Merge pull request #1518 from OpenRefine/issue/1509

Fix issue #1509
This commit is contained in:
Jacky 2018-02-28 19:42:07 -05:00 committed by GitHub
commit 54553da733
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 1101 additions and 23 deletions

View File

@ -92,6 +92,7 @@ import com.google.refine.ProjectManager;
import com.google.refine.RefineServlet;
import com.google.refine.importing.ImportingManager.Format;
import com.google.refine.importing.UrlRewriter.Result;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ColumnModel;
import com.google.refine.model.Project;
@ -1126,22 +1127,7 @@ public class ImportingUtilities {
ProjectManager.singleton.registerProject(project, pm);
// infer the column type
if (project.columnModel.columns.get(0).getType().isEmpty()) {
List<Object[]> listCells = new ArrayList<Object[]>(INFER_ROW_LIMIT);
List<Row> rows = project.rows
.stream()
.limit(INFER_ROW_LIMIT)
.collect(Collectors.toList());
rows.forEach(r->listCells.add(r.cells.toArray()));
try {
JSONObject fieldsJSON = TypeInferrer.getInstance().infer(listCells,
project.columnModel.getColumnNames().toArray(new String[0]),
100);
populateColumnTypes(project.columnModel, fieldsJSON.getJSONArray(Schema.JSON_KEY_FIELDS));
} catch (TypeInferringException e) {
logger.error("infer column type exception.", ExceptionUtils.getStackTrace(e));
}
}
inferColumnType(project);
job.setProjectID(project.id);
job.setState("created-project");
@ -1152,6 +1138,36 @@ public class ImportingUtilities {
job.updating = false;
}
}
public static void inferColumnType(final Project project) {
if (project.columnModel.columns.get(0).getType().isEmpty()) {
List<Object[]> listCells = new ArrayList<Object[]>(INFER_ROW_LIMIT);
List<Row> rows = project.rows
.stream()
.limit(INFER_ROW_LIMIT)
.map(Row::dup)
.collect(Collectors.toList());
// convert the null object to prevent the NPE
for (Row row : rows) {
for (int i = 0; i < row.cells.size(); i++) {
Cell cell = row.cells.get(i);
if (cell == null) {
row.cells.set(i, new Cell(StringUtils.EMPTY, null));
}
}
listCells.add(row.cells.toArray());
}
try {
JSONObject fieldsJSON = TypeInferrer.getInstance().infer(listCells,
project.columnModel.getColumnNames().toArray(new String[0]),
100);
populateColumnTypes(project.columnModel, fieldsJSON.getJSONArray(Schema.JSON_KEY_FIELDS));
} catch (TypeInferringException e) {
logger.error("infer column type exception.", ExceptionUtils.getStackTrace(e));
}
}
}
private static void populateDataPackageMetadata(Project project, ProjectMetadata pmd, DataPackageMetadata metadata) {
// project metadata

1002
main/tests/data/jorf.xml Normal file

File diff suppressed because it is too large Load Diff

View File

@ -5,8 +5,11 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import static org.mockito.Mockito.when;
import org.json.JSONArray;
import org.json.JSONObject;
import org.mockito.Mockito;
@ -38,7 +41,9 @@ abstract public class ImporterTest extends RefineTest {
ImportingManager.initialize(servlet);
project = new Project();
metadata = new ProjectMetadata();
job = ImportingManager.createJob();
ImportingJob spiedJob = ImportingManager.createJob();
job = Mockito.spy(spiedJob);
when(job.getRetrievalRecord()).thenReturn(new JSONObject());
options = Mockito.mock(JSONObject.class);
}

View File

@ -1,21 +1,76 @@
package com.google.refine.tests.importing;
import java.io.InputStream;
import java.util.LinkedList;
import org.json.JSONArray;
import org.json.JSONObject;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import com.google.refine.importers.XmlImporter;
import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Column;
import com.google.refine.model.medadata.ProjectMetadata;
import com.google.refine.tests.RefineTest;
import com.google.refine.tests.importers.ImporterTest;
import com.google.refine.util.JSONUtilities;
public class ImportingUtilitiesTests extends RefineTest {
public class ImportingUtilitiesTests extends ImporterTest {
@Test
public void createProjectMetadataTest() throws Exception {
JSONObject optionObj = new JSONObject("{\"projectName\":\"acme\",\"projectTags\":[],\"created\":\"2017-12-18T13:28:40.659\",\"modified\":\"2017-12-20T09:28:06.654\",\"creator\":\"\",\"contributors\":\"\",\"subject\":\"\",\"description\":\"\",\"rowCount\":50,\"customMetadata\":{}}");
ProjectMetadata pm = ImportingUtilities.createProjectMetadata(optionObj);
@Override
@BeforeMethod
public void setUp(){
super.setUp();
}
@Test
public void createProjectMetadataTest()
throws Exception {
JSONObject optionObj = new JSONObject(
"{\"projectName\":\"acme\",\"projectTags\":[],\"created\":\"2017-12-18T13:28:40.659\",\"modified\":\"2017-12-20T09:28:06.654\",\"creator\":\"\",\"contributors\":\"\",\"subject\":\"\",\"description\":\"\",\"rowCount\":50,\"customMetadata\":{}}");
ProjectMetadata pm = ImportingUtilities.createProjectMetadata(optionObj);
Assert.assertEquals(pm.getName(), "acme");
Assert.assertEquals(pm.getEncoding(), "UTF-8");
Assert.assertTrue(pm.getTags().length == 0);
}
@Test
public void inferColumnTypeTest()
throws Exception {
ImportingManager.registerFormat("text/xml", "XML files", "XmlParserUI", new com.google.refine.importers.XmlImporter());
XmlImporter xmlImporter = new XmlImporter();
String fileName = "jorf.xml";
InputStream in = this.getClass().getClassLoader()
.getResourceAsStream(fileName);
options = getNestedOptions(job, xmlImporter);
job.getRetrievalRecord();
parseOneInputStream(new XmlImporter(),
in,
options);
ImportingUtilities.inferColumnType(project);
Assert.assertTrue(project.columnModel.columns.size() == 58);
Assert.assertTrue(project.columnModel.getColumnByName("result - source_id").getType().equals("string"));
Assert.assertTrue(project.columnModel.getColumnByName("result - person - sexe").getType().equals("boolean"));
}
private JSONObject getNestedOptions(ImportingJob job, TreeImportingParserBase parser) {
JSONObject options = parser.createParserUIInitializationData(
job, new LinkedList<JSONObject>(), "text/json");
JSONArray path = new JSONArray();
JSONUtilities.append(path, "results");
JSONUtilities.append(path, "result");
// JSONUtilities.append(path, "object");
JSONUtilities.safePut(options, "recordPath", path);
return options;
}
}