From b3f5fada9594cc694b0ae4bef5ad9bec71cce3a1 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Sat, 8 Sep 2012 01:20:25 +0000 Subject: [PATCH] FIXED - task 578 & 596: Clean up JSON importer http://code.google.com/p/google-refine/issues/detail?id=578 http://code.google.com/p/google-refine/issues/detail?id=596 Extend tree parser framework to allow any Serializable instead of just Strings. Use this in JSON importer to: Import keywords null, true, false; Import empty strings and don't trim whitespace from strings on import; Import numbers directly instead of importing them as text and then parsing them ourselves. Add tests to verify all this stuff git-svn-id: http://google-refine.googlecode.com/svn/trunk@2543 7d457c2a-affb-35e4-300a-418c747d4874 --- .classpath | 2 +- .../google/refine/importers/JsonImporter.java | 52 +++++- .../google/refine/importers/XmlImporter.java | 9 +- .../importers/tree/TreeImportUtilities.java | 25 ++- .../tree/TreeImportingParserBase.java | 28 ++- .../refine/importers/tree/TreeReader.java | 6 + .../importers/tree/XmlImportUtilities.java | 162 ++++++++++++++---- main/src/com/google/refine/model/Cell.java | 6 +- .../com/google/refine/tests/RefineTest.java | 14 ++ .../refine/tests/importers/ImporterTest.java | 2 +- .../tests/importers/JsonImporterTests.java | 101 ++++++++++- .../importers/XmlImportUtilitiesStub.java | 50 +++++- .../importers/XmlImportUtilitiesTests.java | 6 +- .../parser-interfaces/json-parser-ui.html | 6 + .../index/parser-interfaces/json-parser-ui.js | 14 ++ .../parser-interfaces/xml-parser-ui.html | 6 + .../index/parser-interfaces/xml-parser-ui.js | 13 ++ 17 files changed, 432 insertions(+), 70 deletions(-) diff --git a/.classpath b/.classpath index 90ba1d7e8..5c89de9ac 100644 --- a/.classpath +++ b/.classpath @@ -23,7 +23,7 @@ - + diff --git a/main/src/com/google/refine/importers/JsonImporter.java b/main/src/com/google/refine/importers/JsonImporter.java index 5cf347d93..b81005e62 100644 --- a/main/src/com/google/refine/importers/JsonImporter.java +++ b/main/src/com/google/refine/importers/JsonImporter.java @@ -38,11 +38,13 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import java.io.Serializable; import java.util.List; import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.JsonParser.NumberType; import org.codehaus.jackson.JsonToken; import org.json.JSONArray; import org.json.JSONObject; @@ -114,9 +116,9 @@ public class JsonImporter extends TreeImportingParserBase { case VALUE_STRING: return parser.getText(); case VALUE_NUMBER_INT: - return Integer.valueOf(parser.getIntValue()); + return Long.valueOf(parser.getLongValue()); case VALUE_NUMBER_FLOAT: - return Float.valueOf(parser.getFloatValue()); + return Double.valueOf(parser.getDoubleValue()); case VALUE_TRUE: return Boolean.TRUE; case VALUE_FALSE: @@ -215,7 +217,7 @@ public class JsonImporter extends TreeImportingParserBase { private JsonToken current = null; private JsonToken next = null; private String fieldName = ANONYMOUS; - private String fieldValue = null; + private Serializable fieldValue = null; public JSONTreeReader(Reader reader) { @@ -284,14 +286,52 @@ public class JsonImporter extends TreeImportingParserBase { @Override public String getFieldValue() throws TreeReaderException { + return fieldValue.toString(); + } + + @Override + public Serializable getValue() + throws TreeReaderException { return fieldValue; } - @Override public boolean hasNext() { return next != null; } - + + private Serializable getValue(JsonParser parser, JsonToken token) throws IOException { + if (token != null) { + switch (token) { + case VALUE_STRING: + return parser.getText(); + case VALUE_NUMBER_INT: + if (parser.getNumberType() == NumberType.INT || parser.getNumberType() == NumberType.LONG) { + return Long.valueOf(parser.getLongValue()); + } else { + return parser.getNumberValue(); + } + case VALUE_NUMBER_FLOAT: + if (parser.getNumberType() == NumberType.FLOAT) { + return Float.valueOf(parser.getFloatValue()); + } else if (parser.getNumberType() == NumberType.DOUBLE) { + return Double.valueOf(parser.getDoubleValue()); + } else { + return parser.getNumberValue(); + } + case VALUE_TRUE: + return Boolean.TRUE; + case VALUE_FALSE: + return Boolean.FALSE; + case VALUE_NULL: + return null; + case END_ARRAY: + default: + break; + } + } + return null; + } + @Override public Token next() throws TreeReaderException { JsonToken previous = current; @@ -300,7 +340,7 @@ public class JsonImporter extends TreeImportingParserBase { try { if (current != null) { if (current.isScalarValue()) { - fieldValue = parser.getText(); + fieldValue = getValue(parser,current); } else { fieldValue = null; } diff --git a/main/src/com/google/refine/importers/XmlImporter.java b/main/src/com/google/refine/importers/XmlImporter.java index 5900de6fb..bc3cb9f86 100644 --- a/main/src/com/google/refine/importers/XmlImporter.java +++ b/main/src/com/google/refine/importers/XmlImporter.java @@ -38,6 +38,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; +import java.io.Serializable; import java.util.List; import javax.xml.stream.XMLInputFactory; @@ -287,7 +288,13 @@ public class XmlImporter extends TreeImportingParserBase { public String getFieldValue(){ return parser.getText(); } - + + @Override + public Serializable getValue() { + // XML parser only does string types + return getFieldValue(); + } + @Override public int getAttributeCount(){ return parser.getAttributeCount(); diff --git a/main/src/com/google/refine/importers/tree/TreeImportUtilities.java b/main/src/com/google/refine/importers/tree/TreeImportUtilities.java index d149fa610..99ba4656a 100644 --- a/main/src/com/google/refine/importers/tree/TreeImportUtilities.java +++ b/main/src/com/google/refine/importers/tree/TreeImportUtilities.java @@ -114,6 +114,7 @@ public abstract class TreeImportUtilities { } } + @Deprecated static protected void addCell( Project project, ImportColumnGroup columnGroup, @@ -121,12 +122,30 @@ public abstract class TreeImportUtilities { String columnLocalName, String text ) { - if (text == null || (text).isEmpty()) { + addCell(project, columnGroup, record, columnLocalName, text, true, true); + } + + static protected void addCell( + Project project, + ImportColumnGroup columnGroup, + ImportRecord record, + String columnLocalName, + String text, + boolean storeEmptyString, + boolean guessDataType + ) { + Serializable value = text; + if (!storeEmptyString && (text == null || (text).isEmpty())) { return; } + if (guessDataType) { + value = ImporterUtilities.parseCellValue(text); + } + addCell(project, columnGroup, record, columnLocalName, value); + } - Serializable value = ImporterUtilities.parseCellValue(text); - + protected static void addCell(Project project, ImportColumnGroup columnGroup, ImportRecord record, + String columnLocalName, Serializable value) { ImportColumn column = getColumn(project, columnGroup, columnLocalName); int cellIndex = column.cellIndex; diff --git a/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java b/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java index 1ce6b5195..866657d94 100644 --- a/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java +++ b/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java @@ -45,8 +45,8 @@ import org.json.JSONObject; import com.google.refine.ProjectMetadata; import com.google.refine.importers.ImporterUtilities; import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress; +import com.google.refine.importers.ImportingParserBase; import com.google.refine.importing.ImportingJob; -import com.google.refine.importing.ImportingParser; import com.google.refine.importing.ImportingUtilities; import com.google.refine.model.Project; import com.google.refine.util.JSONUtilities; @@ -55,19 +55,23 @@ import com.google.refine.util.JSONUtilities; * Abstract class for importer parsers which handle tree-shaped data * (currently XML & JSON). */ -abstract public class TreeImportingParserBase implements ImportingParser { - final protected boolean useInputStream; - - protected TreeImportingParserBase(boolean useInputStream) { - this.useInputStream = useInputStream; +abstract public class TreeImportingParserBase extends ImportingParserBase { + + protected TreeImportingParserBase(final boolean useInputStream) { + super(useInputStream); } @Override public JSONObject createParserUIInitializationData(ImportingJob job, List fileRecords, String format) { - JSONObject options = new JSONObject(); + JSONObject options = super.createParserUIInitializationData(job, fileRecords, format); + + JSONUtilities.safePut(options, "trimStrings", false); + JSONUtilities.safePut(options, "guessCellValueTypes", false); + JSONUtilities.safePut(options, "storeEmptyStrings", true); return options; } + @Override public void parse(Project project, ProjectMetadata metadata, @@ -200,6 +204,14 @@ abstract public class TreeImportingParserBase implements ImportingParser { if (limit2 == 0) { // shouldn't really happen, but be sure since 0 is stop signal limit2 = -1; } - XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2); + + // NOTE: these defaults are solely to preserve historical behavior. + // All new code should override them to keep input data from being modified + boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true); + boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false); + boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true); + + XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, trimStrings, + storeEmptyStrings,guessCellValueTypes); } } diff --git a/main/src/com/google/refine/importers/tree/TreeReader.java b/main/src/com/google/refine/importers/tree/TreeReader.java index b1c9cf842..724d77815 100644 --- a/main/src/com/google/refine/importers/tree/TreeReader.java +++ b/main/src/com/google/refine/importers/tree/TreeReader.java @@ -33,6 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.importers.tree; +import java.io.Serializable; + /** * Interface for all tree-shaped parsers. * @@ -54,8 +56,12 @@ public interface TreeReader { public String getFieldName() throws TreeReaderException; public String getPrefix(); + + @Deprecated public String getFieldValue() throws TreeReaderException; + public Serializable getValue() throws TreeReaderException; + public int getAttributeCount(); public String getAttributeValue(int index); public String getAttributePrefix(int index); diff --git a/main/src/com/google/refine/importers/tree/XmlImportUtilities.java b/main/src/com/google/refine/importers/tree/XmlImportUtilities.java index ca2b99221..e5c78f574 100644 --- a/main/src/com/google/refine/importers/tree/XmlImportUtilities.java +++ b/main/src/com/google/refine/importers/tree/XmlImportUtilities.java @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.importers.tree; +import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -41,6 +42,8 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import javax.servlet.ServletException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -235,21 +238,33 @@ public class XmlImportUtilities extends TreeImportUtilities { return null; } - + @Deprecated + static public void importTreeData( + TreeReader parser, + Project project, + String[] recordPath, + ImportColumnGroup rootColumnGroup, + int limit + ) { + importTreeData(parser, project, recordPath, rootColumnGroup, limit,true,false,true); + } static public void importTreeData( TreeReader parser, Project project, String[] recordPath, ImportColumnGroup rootColumnGroup, - int limit + int limit, + boolean trimStrings, + boolean storeEmptyStrings, + boolean guessDataType ) { logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)"); try { while (parser.hasNext()) { Token eventType = parser.next(); if (eventType == Token.StartEntity) { - findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--); + findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType); } } } catch (TreeReaderException e) { @@ -258,7 +273,17 @@ public class XmlImportUtilities extends TreeImportUtilities { } } - + @Deprecated + static protected void findRecord( + Project project, + TreeReader parser, + String[] recordPath, + int pathIndex, + ImportColumnGroup rootColumnGroup, + int limit + ) throws TreeReaderException { + findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, limit, true, false,true); + } /** * @@ -275,7 +300,10 @@ public class XmlImportUtilities extends TreeImportUtilities { String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup, - int limit + int limit, + boolean trimStrings, + boolean storeEmptyStrings, + boolean guessDataType ) throws TreeReaderException { logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath)); @@ -293,7 +321,7 @@ public class XmlImportUtilities extends TreeImportUtilities { while (parser.hasNext() && limit != 0) { Token eventType = parser.next(); if (eventType == Token.StartEntity) { - findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--); + findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType); } else if (eventType == Token.EndEntity) { break; } else if (eventType == Token.Value) { @@ -302,13 +330,13 @@ public class XmlImportUtilities extends TreeImportUtilities { String desiredFieldName = recordPath[pathIndex + 1]; String currentFieldName = parser.getFieldName(); if (desiredFieldName.equals(currentFieldName)) { - processFieldAsRecord(project, parser, rootColumnGroup); + processFieldAsRecord(project, parser, rootColumnGroup,trimStrings,storeEmptyStrings,guessDataType); } } } } } else { - processRecord(project, parser, rootColumnGroup); + processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType); } } else { skip(parser); @@ -326,6 +354,18 @@ public class XmlImportUtilities extends TreeImportUtilities { } } + /** + * @deprecated on 20120907 by tfmorris -use {@link #processRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)} + */ + @Deprecated + static protected void processRecord( + Project project, + TreeReader parser, + ImportColumnGroup rootColumnGroup + ) throws TreeReaderException { + processRecord(project, parser, rootColumnGroup, true, false, true); + } + /** * processRecord parses Tree data for a single element and it's sub-elements, * adding the parsed data as a row to the project @@ -337,15 +377,31 @@ public class XmlImportUtilities extends TreeImportUtilities { static protected void processRecord( Project project, TreeReader parser, - ImportColumnGroup rootColumnGroup + ImportColumnGroup rootColumnGroup, + boolean trimStrings, + boolean storeEmptyStrings, + boolean guessDataType ) throws TreeReaderException { logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)"); ImportRecord record = new ImportRecord(); - processSubRecord(project, parser, rootColumnGroup, record, 0); + processSubRecord(project, parser, rootColumnGroup, record, 0, trimStrings, storeEmptyStrings, guessDataType); addImportRecordToProject(record, project); } + /** + * @deprecated 20120907 by tfmorris - use {@link #processFieldAsRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)} + */ + @Deprecated + static protected void processFieldAsRecord( + Project project, + TreeReader parser, + ImportColumnGroup rootColumnGroup + ) throws TreeReaderException { + processFieldAsRecord(project, parser, rootColumnGroup, true, false, true); + } + + /** * processFieldAsRecord parses Tree data for a single element and it's sub-elements, * adding the parsed data as a row to the project @@ -357,20 +413,43 @@ public class XmlImportUtilities extends TreeImportUtilities { static protected void processFieldAsRecord( Project project, TreeReader parser, - ImportColumnGroup rootColumnGroup + ImportColumnGroup rootColumnGroup, + boolean trimStrings, + boolean storeEmptyStrings, + boolean guessDataType ) throws TreeReaderException { logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)"); - String text = parser.getFieldValue().trim(); - if (text.length() > 0) { - ImportRecord record = new ImportRecord(); + Serializable value = parser.getValue(); + ImportRecord record = null; + if (value instanceof String) { + String text = (String) value; + if (trimStrings) { + text = text.trim(); + } + if (text.length() > 0 | !storeEmptyStrings) { + record = new ImportRecord(); + addCell( + project, + rootColumnGroup, + record, + parser.getFieldName(), + (String) value, + storeEmptyStrings, + guessDataType + ); + } + } else { + record = new ImportRecord(); addCell( project, rootColumnGroup, record, parser.getFieldName(), - text + value ); + } + if (record != null) { addImportRecordToProject(record, project); } } @@ -396,6 +475,19 @@ public class XmlImportUtilities extends TreeImportUtilities { } } + /** + * @deprecated by tfmorris use {@link #processSubRecord(Project, TreeReader, ImportColumnGroup, ImportRecord, int, boolean, boolean, boolean)} + */ + @Deprecated + static protected void processSubRecord( Project project, + TreeReader parser, + ImportColumnGroup columnGroup, + ImportRecord record, + int level + ) throws TreeReaderException { + processSubRecord(project, parser, columnGroup, record, level, true, false, true); + } + /** * * @param project @@ -409,7 +501,10 @@ public class XmlImportUtilities extends TreeImportUtilities { TreeReader parser, ImportColumnGroup columnGroup, ImportRecord record, - int level + int level, + boolean trimStrings, + boolean storeEmptyStrings, + boolean guessDataType ) throws TreeReaderException { logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup); @@ -426,14 +521,19 @@ public class XmlImportUtilities extends TreeImportUtilities { int attributeCount = parser.getAttributeCount(); for (int i = 0; i < attributeCount; i++) { - String text = parser.getAttributeValue(i).trim(); - if (text.length() > 0) { + String text = parser.getAttributeValue(i); + if (trimStrings) { + text = text.trim(); + } + if (text.length() > 0 | !storeEmptyStrings) { addCell( project, thisColumnGroup, record, composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)), - text + text, + storeEmptyStrings, + guessDataType ); } } @@ -446,23 +546,21 @@ public class XmlImportUtilities extends TreeImportUtilities { parser, thisColumnGroup, record, - level+1 + level+1, + trimStrings, + storeEmptyStrings, + guessDataType ); } else if (//eventType == XMLStreamConstants.CDATA || eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) { - String text = parser.getFieldValue(); + Serializable value = parser.getValue(); String colName = parser.getFieldName(); - if(text != null){ - text = text.trim(); - if (text.length() > 0) { - addCell( - project, - thisColumnGroup, - record, - colName, - text - ); - } + if (value instanceof String) { + String text = (String) value; + addCell(project, thisColumnGroup, record, colName, text, + storeEmptyStrings, guessDataType); + } else { + addCell(project, thisColumnGroup, record, colName, value); } } else if (eventType == Token.EndEntity) { break; diff --git a/main/src/com/google/refine/model/Cell.java b/main/src/com/google/refine/model/Cell.java index 1b3acb303..7ab10cf16 100644 --- a/main/src/com/google/refine/model/Cell.java +++ b/main/src/com/google/refine/model/Cell.java @@ -93,8 +93,12 @@ public class Cell implements HasFields, Jsonizable { writer.key("t"); writer.value("date"); } else if (value instanceof Double && (((Double)value).isNaN() || ((Double)value).isInfinite())) { - // TODO: Skip? Write as string? + // write as a string writer.value(((Double)value).toString()); + } else if (value instanceof Float + && (((Float)value).isNaN() || ((Float)value).isInfinite())) { + // TODO: Skip? Write as string? + writer.value(((Float)value).toString()); } else { writer.value(value); } diff --git a/main/tests/server/src/com/google/refine/tests/RefineTest.java b/main/tests/server/src/com/google/refine/tests/RefineTest.java index 1f3689709..fe998e52b 100644 --- a/main/tests/server/src/com/google/refine/tests/RefineTest.java +++ b/main/tests/server/src/com/google/refine/tests/RefineTest.java @@ -75,6 +75,20 @@ public class RefineTest { Assert.assertEquals(project.rows.size(), numRows); } + /** + * Check that a project was created with the appropriate number of columns, rows, and records. + * + * @param project project to check + * @param numCols expected column count + * @param numRows expected row count + * @param numRows expected record count + */ + public static void assertProjectCreated(Project project, int numCols, int numRows, int numRecords) { + assertProjectCreated(project,numCols,numRows); + Assert.assertNotNull(project.recordModel); + Assert.assertEquals(project.recordModel.getRecordCount(),numRecords); + } + public void log(Project project) { // some quick and dirty debugging StringBuilder sb = new StringBuilder(); diff --git a/main/tests/server/src/com/google/refine/tests/importers/ImporterTest.java b/main/tests/server/src/com/google/refine/tests/importers/ImporterTest.java index 6c877f048..d5d65e018 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/ImporterTest.java +++ b/main/tests/server/src/com/google/refine/tests/importers/ImporterTest.java @@ -151,7 +151,7 @@ abstract class ImporterTest extends RefineTest { Project project, ImportColumnGroup rootColumnGroup, List exceptions) { XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); - project.columnModel.update(); + project.update(); for (Exception e : exceptions) { e.printStackTrace(); diff --git a/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java index fe03f56de..70a40b574 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java @@ -295,9 +295,78 @@ public class JsonImporterTests extends ImporterTest { } } + @Test + public void testJsonDatatypes(){ + RunTest(getSampleWithDataTypes()); + + log(project); + assertProjectCreated(project, 2, 21,4); + + Assert.assertEquals( project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id"); + Assert.assertEquals( project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell"); + + Row row = project.rows.get(8); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertEquals(row.cells.get(1).value,""); // Make sure empty strings are preserved + + // null, true, false 0,1,-2.1,0.23,-0.24,3.14e100 + + row = project.rows.get(12); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertNull(row.cells.get(1).value); + + row = project.rows.get(13); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertEquals(row.cells.get(1).value,Boolean.TRUE); + + row = project.rows.get(14); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertEquals(row.cells.get(1).value,Boolean.FALSE); + + row = project.rows.get(15); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertEquals(row.cells.get(1).value,Long.valueOf(0)); + + row = project.rows.get(16); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertEquals(row.cells.get(1).value,Long.valueOf(1)); + + row = project.rows.get(17); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertEquals(row.cells.get(1).value,Double.parseDouble("-2.1")); + + row = project.rows.get(18); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)0.23)); + + row = project.rows.get(19); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)-0.24)); + + row = project.rows.get(20); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(),2); + Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value)); + Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)3.14e100)); + + // null, true, false 0,1,-2.1,0.23,-0.24,3.14e100 + + + // TODO: check data types + } + //------------helper methods--------------- - public static String getTypicalElement(int id){ + private static String getTypicalElement(int id){ return "{ \"id\" : " + id + "," + "\"author\" : \"Author " + id + ", The\"," + "\"title\" : \"Book title " + id + "\"," + @@ -305,7 +374,7 @@ public class JsonImporterTests extends ImporterTest { "}"; } - public static String getElementWithDuplicateSubElement(int id){ + private static String getElementWithDuplicateSubElement(int id){ return "{ \"id\" : " + id + "," + "\"authors\":[" + "{\"name\" : \"Author " + id + ", The\"}," + @@ -316,7 +385,7 @@ public class JsonImporterTests extends ImporterTest { "}"; } - public static String getSample(){ + static String getSample(){ StringBuilder sb = new StringBuilder(); sb.append("["); for(int i = 1; i < 7; i++){ @@ -329,7 +398,7 @@ public class JsonImporterTests extends ImporterTest { return sb.toString(); } - public static JSONObject getOptions(ImportingJob job, TreeImportingParserBase parser) { + private static JSONObject getOptions(ImportingJob job, TreeImportingParserBase parser) { JSONObject options = parser.createParserUIInitializationData( job, new LinkedList(), "text/json"); @@ -338,10 +407,14 @@ public class JsonImporterTests extends ImporterTest { JSONUtilities.append(path, JsonImporter.ANONYMOUS); JSONUtilities.safePut(options, "recordPath", path); + JSONUtilities.safePut(options, "trimStrings", false); + JSONUtilities.safePut(options, "storeEmptyStrings", true); + JSONUtilities.safePut(options, "guessCellValueTypes", false); + return options; } - public static String getSampleWithDuplicateNestedElements(){ + private static String getSampleWithDuplicateNestedElements(){ StringBuilder sb = new StringBuilder(); sb.append("["); for(int i = 1; i < 7; i++){ @@ -354,7 +427,7 @@ public class JsonImporterTests extends ImporterTest { return sb.toString(); } - public static String getSampleWithLineBreak(){ + private static String getSampleWithLineBreak(){ StringBuilder sb = new StringBuilder(); sb.append("["); for(int i = 1; i < 4; i++){ @@ -373,7 +446,7 @@ public class JsonImporterTests extends ImporterTest { return sb.toString(); } - public static String getSampleWithVaryingStructure(){ + private static String getSampleWithVaryingStructure(){ StringBuilder sb = new StringBuilder(); sb.append("["); for(int i = 1; i < 6; i++){ @@ -390,7 +463,7 @@ public class JsonImporterTests extends ImporterTest { return sb.toString(); } - public static String getSampleWithTreeStructure(){ + private static String getSampleWithTreeStructure(){ StringBuilder sb = new StringBuilder(); sb.append("["); for(int i = 1; i < 7; i++){ @@ -407,6 +480,18 @@ public class JsonImporterTests extends ImporterTest { sb.append("]"); return sb.toString(); } + + private static String getSampleWithDataTypes() { + StringBuilder sb = new StringBuilder(); + sb.append("["); + int i = 1; + sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n"); + sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n"); + sb.append("{\"id\":null,\"cell\":[null,true,false,0,1,-2.1,0.23,-0.24,3.14e100]}\n"); + sb.append("]"); + return sb.toString(); + } + private void RunTest(String testString) { RunTest(testString, getOptions(job, SUT)); diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java index 195d3c091..4aa9a84f0 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.tests.importers; +import java.io.Serializable; import java.util.List; import com.google.refine.importers.tree.ImportColumnGroup; @@ -47,19 +48,54 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities { return super.detectRecordElement(parser, tag); } - public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, ImportRecord record, int level) throws Exception{ - super.processSubRecord(project, parser, columnGroup, record, level); + public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, + ImportRecord record, int level,boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType) + throws Exception { + super.processSubRecord(project, parser, columnGroup, record, level, trimStrings, storeEmptyStrings, guessDataType); } - public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws Exception{ - super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1); + @Deprecated + public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, + ImportRecord record, int level) + throws Exception { + super.processSubRecord(project, parser, columnGroup, record, level, false, true, false); } - public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup) throws Exception{ - super.processRecord(project, parser, rootColumnGroup); + public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, + ImportColumnGroup rootColumnGroup, boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType) + throws Exception { + super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, trimStrings, storeEmptyStrings, guessDataType); } + @Deprecated + public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, + ImportColumnGroup rootColumnGroup) + throws Exception { + super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, true, false, true); + } + + public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup, + boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType) + throws Exception { + super.processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType); + } + + @Deprecated + public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup) + throws Exception { + super.processRecord(project, parser, rootColumnGroup, true, false, true); + } + + public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, Serializable value, int commonStartingRowIndex) { + super.addCell(project, columnGroup, record, columnLocalName, value); + } + + public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex, boolean trimStrings, boolean storeEmptyStrings) { + super.addCell(project, columnGroup, record, columnLocalName, text, trimStrings, storeEmptyStrings); + } + + @Deprecated public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) { - super.addCell(project, columnGroup, record, columnLocalName, text); + super.addCell(project, columnGroup, record, columnLocalName, text, false, true); } } diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java index 13ec00277..1d2890948 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java @@ -209,7 +209,8 @@ public class XmlImportUtilitiesTests extends RefineTest { loadSampleXml(); String[] recordPath = new String[]{"library","book"}; - XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1); + XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true, + false); log(project); assertProjectCreated(project, 0, 6); @@ -229,7 +230,8 @@ public class XmlImportUtilitiesTests extends RefineTest { loadData(XmlImporterTests.getSampleWithVaryingStructure()); String[] recordPath = new String[]{"library", "book"}; - XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1); + XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true, + false); log(project); assertProjectCreated(project, 0, 6); diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/json-parser-ui.html b/main/webapp/modules/core/scripts/index/parser-interfaces/json-parser-ui.html index b2c7e863b..0572216f9 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/json-parser-ui.html +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/json-parser-ui.html @@ -14,6 +14,12 @@ + + + + + + diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/json-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/json-parser-ui.js index 3801e45fa..e0dbbb337 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/json-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/json-parser-ui.js @@ -86,6 +86,11 @@ Refine.JsonParserUI.prototype.getOptions = function() { } else { options.limit = -1; } + + options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked; + options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked; + options.storeEmptyStrings = this._optionContainerElmts.storeEmptyStringsCheckbox[0].checked; + options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked; return options; @@ -103,6 +108,15 @@ Refine.JsonParserUI.prototype._initialize = function() { this._optionContainerElmts.limitCheckbox.attr("checked", "checked"); this._optionContainerElmts.limitInput[0].value = this._config.limit.toString(); } + if (this._config.trimStrings) { + this._optionContainerElmts.trimStringsCheckbox.attr("checked", "checked"); + } + if (this._config.guessCellValueTypes) { + this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked"); + } + if (this._config.storeEmptyStrings) { + this._optionContainerElmts.storeEmptyStringsCheckbox.attr("checked", "checked"); + } if (this._config.includeFileSources) { this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked"); } diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/xml-parser-ui.html b/main/webapp/modules/core/scripts/index/parser-interfaces/xml-parser-ui.html index c0b0d2ee0..3aff4f11b 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/xml-parser-ui.html +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/xml-parser-ui.html @@ -15,6 +15,12 @@ + + + + + + diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/xml-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/xml-parser-ui.js index 46015bebc..acb1acc56 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/xml-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/xml-parser-ui.js @@ -85,6 +85,10 @@ Refine.XmlParserUI.prototype.getOptions = function() { } else { options.limit = -1; } + options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked; + options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked; + options.storeEmptyStrings = this._optionContainerElmts.storeEmptyStringsCheckbox[0].checked; + options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked; return options; @@ -102,6 +106,15 @@ Refine.XmlParserUI.prototype._initialize = function() { this._optionContainerElmts.limitCheckbox.attr("checked", "checked"); this._optionContainerElmts.limitInput[0].value = this._config.limit.toString(); } + if (this._config.trimStrings) { + this._optionContainerElmts.trimStringsCheckbox.attr("checked", "checked"); + } + if (this._config.guessCellValueTypes) { + this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked"); + } + if (this._config.storeEmptyStrings) { + this._optionContainerElmts.storeEmptyStringsCheckbox.attr("checked", "checked"); + } if (this._config.includeFileSources) { this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked"); }