commit
54553da733
@ -92,6 +92,7 @@ import com.google.refine.ProjectManager;
|
||||
import com.google.refine.RefineServlet;
|
||||
import com.google.refine.importing.ImportingManager.Format;
|
||||
import com.google.refine.importing.UrlRewriter.Result;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.ColumnModel;
|
||||
import com.google.refine.model.Project;
|
||||
@ -1126,22 +1127,7 @@ public class ImportingUtilities {
|
||||
ProjectManager.singleton.registerProject(project, pm);
|
||||
|
||||
// infer the column type
|
||||
if (project.columnModel.columns.get(0).getType().isEmpty()) {
|
||||
List<Object[]> listCells = new ArrayList<Object[]>(INFER_ROW_LIMIT);
|
||||
List<Row> rows = project.rows
|
||||
.stream()
|
||||
.limit(INFER_ROW_LIMIT)
|
||||
.collect(Collectors.toList());
|
||||
rows.forEach(r->listCells.add(r.cells.toArray()));
|
||||
try {
|
||||
JSONObject fieldsJSON = TypeInferrer.getInstance().infer(listCells,
|
||||
project.columnModel.getColumnNames().toArray(new String[0]),
|
||||
100);
|
||||
populateColumnTypes(project.columnModel, fieldsJSON.getJSONArray(Schema.JSON_KEY_FIELDS));
|
||||
} catch (TypeInferringException e) {
|
||||
logger.error("infer column type exception.", ExceptionUtils.getStackTrace(e));
|
||||
}
|
||||
}
|
||||
inferColumnType(project);
|
||||
|
||||
job.setProjectID(project.id);
|
||||
job.setState("created-project");
|
||||
@ -1153,6 +1139,36 @@ public class ImportingUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
public static void inferColumnType(final Project project) {
|
||||
if (project.columnModel.columns.get(0).getType().isEmpty()) {
|
||||
List<Object[]> listCells = new ArrayList<Object[]>(INFER_ROW_LIMIT);
|
||||
List<Row> rows = project.rows
|
||||
.stream()
|
||||
.limit(INFER_ROW_LIMIT)
|
||||
.map(Row::dup)
|
||||
.collect(Collectors.toList());
|
||||
// convert the null object to prevent the NPE
|
||||
for (Row row : rows) {
|
||||
for (int i = 0; i < row.cells.size(); i++) {
|
||||
Cell cell = row.cells.get(i);
|
||||
if (cell == null) {
|
||||
row.cells.set(i, new Cell(StringUtils.EMPTY, null));
|
||||
}
|
||||
}
|
||||
listCells.add(row.cells.toArray());
|
||||
}
|
||||
|
||||
try {
|
||||
JSONObject fieldsJSON = TypeInferrer.getInstance().infer(listCells,
|
||||
project.columnModel.getColumnNames().toArray(new String[0]),
|
||||
100);
|
||||
populateColumnTypes(project.columnModel, fieldsJSON.getJSONArray(Schema.JSON_KEY_FIELDS));
|
||||
} catch (TypeInferringException e) {
|
||||
logger.error("infer column type exception.", ExceptionUtils.getStackTrace(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void populateDataPackageMetadata(Project project, ProjectMetadata pmd, DataPackageMetadata metadata) {
|
||||
// project metadata
|
||||
JSONObject pkg = metadata.getPackage().getJson();
|
||||
|
1002
main/tests/data/jorf.xml
Normal file
1002
main/tests/data/jorf.xml
Normal file
File diff suppressed because it is too large
Load Diff
@ -5,8 +5,11 @@ import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.mockito.Mockito.when;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
@ -38,7 +41,9 @@ abstract public class ImporterTest extends RefineTest {
|
||||
ImportingManager.initialize(servlet);
|
||||
project = new Project();
|
||||
metadata = new ProjectMetadata();
|
||||
job = ImportingManager.createJob();
|
||||
ImportingJob spiedJob = ImportingManager.createJob();
|
||||
job = Mockito.spy(spiedJob);
|
||||
when(job.getRetrievalRecord()).thenReturn(new JSONObject());
|
||||
|
||||
options = Mockito.mock(JSONObject.class);
|
||||
}
|
||||
|
@ -1,21 +1,76 @@
|
||||
|
||||
package com.google.refine.tests.importing;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.importers.XmlImporter;
|
||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.medadata.ProjectMetadata;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
import com.google.refine.tests.importers.ImporterTest;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class ImportingUtilitiesTests extends RefineTest {
|
||||
public class ImportingUtilitiesTests extends ImporterTest {
|
||||
|
||||
@Override
|
||||
@BeforeMethod
|
||||
public void setUp(){
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createProjectMetadataTest() throws Exception {
|
||||
JSONObject optionObj = new JSONObject("{\"projectName\":\"acme\",\"projectTags\":[],\"created\":\"2017-12-18T13:28:40.659\",\"modified\":\"2017-12-20T09:28:06.654\",\"creator\":\"\",\"contributors\":\"\",\"subject\":\"\",\"description\":\"\",\"rowCount\":50,\"customMetadata\":{}}");
|
||||
public void createProjectMetadataTest()
|
||||
throws Exception {
|
||||
JSONObject optionObj = new JSONObject(
|
||||
"{\"projectName\":\"acme\",\"projectTags\":[],\"created\":\"2017-12-18T13:28:40.659\",\"modified\":\"2017-12-20T09:28:06.654\",\"creator\":\"\",\"contributors\":\"\",\"subject\":\"\",\"description\":\"\",\"rowCount\":50,\"customMetadata\":{}}");
|
||||
ProjectMetadata pm = ImportingUtilities.createProjectMetadata(optionObj);
|
||||
Assert.assertEquals(pm.getName(), "acme");
|
||||
Assert.assertEquals(pm.getEncoding(), "UTF-8");
|
||||
Assert.assertTrue(pm.getTags().length == 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void inferColumnTypeTest()
|
||||
throws Exception {
|
||||
ImportingManager.registerFormat("text/xml", "XML files", "XmlParserUI", new com.google.refine.importers.XmlImporter());
|
||||
XmlImporter xmlImporter = new XmlImporter();
|
||||
String fileName = "jorf.xml";
|
||||
InputStream in = this.getClass().getClassLoader()
|
||||
.getResourceAsStream(fileName);
|
||||
options = getNestedOptions(job, xmlImporter);
|
||||
job.getRetrievalRecord();
|
||||
|
||||
parseOneInputStream(new XmlImporter(),
|
||||
in,
|
||||
options);
|
||||
|
||||
ImportingUtilities.inferColumnType(project);
|
||||
|
||||
Assert.assertTrue(project.columnModel.columns.size() == 58);
|
||||
Assert.assertTrue(project.columnModel.getColumnByName("result - source_id").getType().equals("string"));
|
||||
Assert.assertTrue(project.columnModel.getColumnByName("result - person - sexe").getType().equals("boolean"));
|
||||
}
|
||||
|
||||
private JSONObject getNestedOptions(ImportingJob job, TreeImportingParserBase parser) {
|
||||
JSONObject options = parser.createParserUIInitializationData(
|
||||
job, new LinkedList<JSONObject>(), "text/json");
|
||||
|
||||
JSONArray path = new JSONArray();
|
||||
JSONUtilities.append(path, "results");
|
||||
JSONUtilities.append(path, "result");
|
||||
// JSONUtilities.append(path, "object");
|
||||
|
||||
JSONUtilities.safePut(options, "recordPath", path);
|
||||
return options;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user