FIXED - task 578 & 596: Clean up JSON importer

http://code.google.com/p/google-refine/issues/detail?id=578
http://code.google.com/p/google-refine/issues/detail?id=596

Extend tree parser framework to allow any Serializable instead of just Strings. Use this in JSON importer to: Import keywords null, true, false; Import empty strings and don't trim whitespace from strings on import;  Import numbers directly instead of importing them as text and then parsing them ourselves. Add tests to verify all this stuff

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2543 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Tom Morris 2012-09-08 01:20:25 +00:00
parent 9f7d0bc057
commit b3f5fada95
17 changed files with 432 additions and 70 deletions

View File

@ -23,7 +23,7 @@
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jackson-core-asl-1.9.9.jar" sourcepath="main/webapp/WEB-INF/lib-src/jackson-src-1.9.9.zip"/> <classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jackson-core-asl-1.9.9.jar" sourcepath="main/webapp/WEB-INF/lib-src/jackson-src-1.9.9.zip"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jcl-over-slf4j-1.5.6.jar"/> <classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jcl-over-slf4j-1.5.6.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jrdf-0.5.6.jar" sourcepath="main/webapp/WEB-INF/lib-src/jrdf-0.5.6-sources.jar"/> <classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jrdf-0.5.6.jar" sourcepath="main/webapp/WEB-INF/lib-src/jrdf-0.5.6-sources.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/json-20100208.jar"/> <classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/json-20100208.jar" sourcepath="main/webapp/WEB-INF/lib-src/json-20100208-sources.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/lessen-trunk-r8.jar"/> <classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/lessen-trunk-r8.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/log4j-1.2.15.jar"/> <classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/log4j-1.2.15.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/marc4j-2.4.jar"/> <classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/marc4j-2.4.jar"/>

View File

@ -38,11 +38,13 @@ import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.Reader; import java.io.Reader;
import java.io.Serializable;
import java.util.List; import java.util.List;
import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonParser.NumberType;
import org.codehaus.jackson.JsonToken; import org.codehaus.jackson.JsonToken;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONObject; import org.json.JSONObject;
@ -114,9 +116,9 @@ public class JsonImporter extends TreeImportingParserBase {
case VALUE_STRING: case VALUE_STRING:
return parser.getText(); return parser.getText();
case VALUE_NUMBER_INT: case VALUE_NUMBER_INT:
return Integer.valueOf(parser.getIntValue()); return Long.valueOf(parser.getLongValue());
case VALUE_NUMBER_FLOAT: case VALUE_NUMBER_FLOAT:
return Float.valueOf(parser.getFloatValue()); return Double.valueOf(parser.getDoubleValue());
case VALUE_TRUE: case VALUE_TRUE:
return Boolean.TRUE; return Boolean.TRUE;
case VALUE_FALSE: case VALUE_FALSE:
@ -215,7 +217,7 @@ public class JsonImporter extends TreeImportingParserBase {
private JsonToken current = null; private JsonToken current = null;
private JsonToken next = null; private JsonToken next = null;
private String fieldName = ANONYMOUS; private String fieldName = ANONYMOUS;
private String fieldValue = null; private Serializable fieldValue = null;
public JSONTreeReader(Reader reader) { public JSONTreeReader(Reader reader) {
@ -284,14 +286,52 @@ public class JsonImporter extends TreeImportingParserBase {
@Override @Override
public String getFieldValue() throws TreeReaderException { public String getFieldValue() throws TreeReaderException {
return fieldValue; return fieldValue.toString();
} }
@Override
public Serializable getValue()
throws TreeReaderException {
return fieldValue;
}
@Override @Override
public boolean hasNext() { public boolean hasNext() {
return next != null; return next != null;
} }
private Serializable getValue(JsonParser parser, JsonToken token) throws IOException {
if (token != null) {
switch (token) {
case VALUE_STRING:
return parser.getText();
case VALUE_NUMBER_INT:
if (parser.getNumberType() == NumberType.INT || parser.getNumberType() == NumberType.LONG) {
return Long.valueOf(parser.getLongValue());
} else {
return parser.getNumberValue();
}
case VALUE_NUMBER_FLOAT:
if (parser.getNumberType() == NumberType.FLOAT) {
return Float.valueOf(parser.getFloatValue());
} else if (parser.getNumberType() == NumberType.DOUBLE) {
return Double.valueOf(parser.getDoubleValue());
} else {
return parser.getNumberValue();
}
case VALUE_TRUE:
return Boolean.TRUE;
case VALUE_FALSE:
return Boolean.FALSE;
case VALUE_NULL:
return null;
case END_ARRAY:
default:
break;
}
}
return null;
}
@Override @Override
public Token next() throws TreeReaderException { public Token next() throws TreeReaderException {
JsonToken previous = current; JsonToken previous = current;
@ -300,7 +340,7 @@ public class JsonImporter extends TreeImportingParserBase {
try { try {
if (current != null) { if (current != null) {
if (current.isScalarValue()) { if (current.isScalarValue()) {
fieldValue = parser.getText(); fieldValue = getValue(parser,current);
} else { } else {
fieldValue = null; fieldValue = null;
} }

View File

@ -38,6 +38,7 @@ import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream; import java.io.PushbackInputStream;
import java.io.Serializable;
import java.util.List; import java.util.List;
import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLInputFactory;
@ -288,6 +289,12 @@ public class XmlImporter extends TreeImportingParserBase {
return parser.getText(); return parser.getText();
} }
@Override
public Serializable getValue() {
// XML parser only does string types
return getFieldValue();
}
@Override @Override
public int getAttributeCount(){ public int getAttributeCount(){
return parser.getAttributeCount(); return parser.getAttributeCount();

View File

@ -114,6 +114,7 @@ public abstract class TreeImportUtilities {
} }
} }
@Deprecated
static protected void addCell( static protected void addCell(
Project project, Project project,
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
@ -121,12 +122,30 @@ public abstract class TreeImportUtilities {
String columnLocalName, String columnLocalName,
String text String text
) { ) {
if (text == null || (text).isEmpty()) { addCell(project, columnGroup, record, columnLocalName, text, true, true);
}
static protected void addCell(
Project project,
ImportColumnGroup columnGroup,
ImportRecord record,
String columnLocalName,
String text,
boolean storeEmptyString,
boolean guessDataType
) {
Serializable value = text;
if (!storeEmptyString && (text == null || (text).isEmpty())) {
return; return;
} }
if (guessDataType) {
value = ImporterUtilities.parseCellValue(text);
}
addCell(project, columnGroup, record, columnLocalName, value);
}
Serializable value = ImporterUtilities.parseCellValue(text); protected static void addCell(Project project, ImportColumnGroup columnGroup, ImportRecord record,
String columnLocalName, Serializable value) {
ImportColumn column = getColumn(project, columnGroup, columnLocalName); ImportColumn column = getColumn(project, columnGroup, columnLocalName);
int cellIndex = column.cellIndex; int cellIndex = column.cellIndex;

View File

@ -45,8 +45,8 @@ import org.json.JSONObject;
import com.google.refine.ProjectMetadata; import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImporterUtilities; import com.google.refine.importers.ImporterUtilities;
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress; import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importers.ImportingParserBase;
import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingParser;
import com.google.refine.importing.ImportingUtilities; import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities; import com.google.refine.util.JSONUtilities;
@ -55,20 +55,24 @@ import com.google.refine.util.JSONUtilities;
* Abstract class for importer parsers which handle tree-shaped data * Abstract class for importer parsers which handle tree-shaped data
* (currently XML & JSON). * (currently XML & JSON).
*/ */
abstract public class TreeImportingParserBase implements ImportingParser { abstract public class TreeImportingParserBase extends ImportingParserBase {
final protected boolean useInputStream;
protected TreeImportingParserBase(boolean useInputStream) { protected TreeImportingParserBase(final boolean useInputStream) {
this.useInputStream = useInputStream; super(useInputStream);
} }
@Override @Override
public JSONObject createParserUIInitializationData(ImportingJob job, public JSONObject createParserUIInitializationData(ImportingJob job,
List<JSONObject> fileRecords, String format) { List<JSONObject> fileRecords, String format) {
JSONObject options = new JSONObject(); JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
return options; return options;
} }
@Override @Override
public void parse(Project project, ProjectMetadata metadata, public void parse(Project project, ProjectMetadata metadata,
ImportingJob job, List<JSONObject> fileRecords, String format, ImportingJob job, List<JSONObject> fileRecords, String format,
@ -200,6 +204,14 @@ abstract public class TreeImportingParserBase implements ImportingParser {
if (limit2 == 0) { // shouldn't really happen, but be sure since 0 is stop signal if (limit2 == 0) { // shouldn't really happen, but be sure since 0 is stop signal
limit2 = -1; limit2 = -1;
} }
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2);
// NOTE: these defaults are solely to preserve historical behavior.
// All new code should override them to keep input data from being modified
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true);
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, trimStrings,
storeEmptyStrings,guessCellValueTypes);
} }
} }

View File

@ -33,6 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers.tree; package com.google.refine.importers.tree;
import java.io.Serializable;
/** /**
* Interface for all tree-shaped parsers. * Interface for all tree-shaped parsers.
* *
@ -54,8 +56,12 @@ public interface TreeReader {
public String getFieldName() throws TreeReaderException; public String getFieldName() throws TreeReaderException;
public String getPrefix(); public String getPrefix();
@Deprecated
public String getFieldValue() throws TreeReaderException; public String getFieldValue() throws TreeReaderException;
public Serializable getValue() throws TreeReaderException;
public int getAttributeCount(); public int getAttributeCount();
public String getAttributeValue(int index); public String getAttributeValue(int index);
public String getAttributePrefix(int index); public String getAttributePrefix(int index);

View File

@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers.tree; package com.google.refine.importers.tree;
import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@ -41,6 +42,8 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import javax.servlet.ServletException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -235,21 +238,33 @@ public class XmlImportUtilities extends TreeImportUtilities {
return null; return null;
} }
@Deprecated
static public void importTreeData(
TreeReader parser,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup,
int limit
) {
importTreeData(parser, project, recordPath, rootColumnGroup, limit,true,false,true);
}
static public void importTreeData( static public void importTreeData(
TreeReader parser, TreeReader parser,
Project project, Project project,
String[] recordPath, String[] recordPath,
ImportColumnGroup rootColumnGroup, ImportColumnGroup rootColumnGroup,
int limit int limit,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) { ) {
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)"); logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
try { try {
while (parser.hasNext()) { while (parser.hasNext()) {
Token eventType = parser.next(); Token eventType = parser.next();
if (eventType == Token.StartEntity) { if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--); findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
} }
} }
} catch (TreeReaderException e) { } catch (TreeReaderException e) {
@ -258,7 +273,17 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} }
@Deprecated
static protected void findRecord(
Project project,
TreeReader parser,
String[] recordPath,
int pathIndex,
ImportColumnGroup rootColumnGroup,
int limit
) throws TreeReaderException {
findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, limit, true, false,true);
}
/** /**
* *
@ -275,7 +300,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
String[] recordPath, String[] recordPath,
int pathIndex, int pathIndex,
ImportColumnGroup rootColumnGroup, ImportColumnGroup rootColumnGroup,
int limit int limit,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException { ) throws TreeReaderException {
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath)); logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath));
@ -293,7 +321,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
while (parser.hasNext() && limit != 0) { while (parser.hasNext() && limit != 0) {
Token eventType = parser.next(); Token eventType = parser.next();
if (eventType == Token.StartEntity) { if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--); findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
} else if (eventType == Token.EndEntity) { } else if (eventType == Token.EndEntity) {
break; break;
} else if (eventType == Token.Value) { } else if (eventType == Token.Value) {
@ -302,13 +330,13 @@ public class XmlImportUtilities extends TreeImportUtilities {
String desiredFieldName = recordPath[pathIndex + 1]; String desiredFieldName = recordPath[pathIndex + 1];
String currentFieldName = parser.getFieldName(); String currentFieldName = parser.getFieldName();
if (desiredFieldName.equals(currentFieldName)) { if (desiredFieldName.equals(currentFieldName)) {
processFieldAsRecord(project, parser, rootColumnGroup); processFieldAsRecord(project, parser, rootColumnGroup,trimStrings,storeEmptyStrings,guessDataType);
} }
} }
} }
} }
} else { } else {
processRecord(project, parser, rootColumnGroup); processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType);
} }
} else { } else {
skip(parser); skip(parser);
@ -326,6 +354,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} }
/**
* @deprecated on 20120907 by tfmorris -use {@link #processRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processRecord(project, parser, rootColumnGroup, true, false, true);
}
/** /**
* processRecord parses Tree data for a single element and it's sub-elements, * processRecord parses Tree data for a single element and it's sub-elements,
* adding the parsed data as a row to the project * adding the parsed data as a row to the project
@ -337,15 +377,31 @@ public class XmlImportUtilities extends TreeImportUtilities {
static protected void processRecord( static protected void processRecord(
Project project, Project project,
TreeReader parser, TreeReader parser,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException { ) throws TreeReaderException {
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)"); logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
ImportRecord record = new ImportRecord(); ImportRecord record = new ImportRecord();
processSubRecord(project, parser, rootColumnGroup, record, 0); processSubRecord(project, parser, rootColumnGroup, record, 0, trimStrings, storeEmptyStrings, guessDataType);
addImportRecordToProject(record, project); addImportRecordToProject(record, project);
} }
/**
* @deprecated 20120907 by tfmorris - use {@link #processFieldAsRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processFieldAsRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processFieldAsRecord(project, parser, rootColumnGroup, true, false, true);
}
/** /**
* processFieldAsRecord parses Tree data for a single element and it's sub-elements, * processFieldAsRecord parses Tree data for a single element and it's sub-elements,
* adding the parsed data as a row to the project * adding the parsed data as a row to the project
@ -357,20 +413,43 @@ public class XmlImportUtilities extends TreeImportUtilities {
static protected void processFieldAsRecord( static protected void processFieldAsRecord(
Project project, Project project,
TreeReader parser, TreeReader parser,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException { ) throws TreeReaderException {
logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)"); logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)");
String text = parser.getFieldValue().trim(); Serializable value = parser.getValue();
if (text.length() > 0) { ImportRecord record = null;
ImportRecord record = new ImportRecord(); if (value instanceof String) {
String text = (String) value;
if (trimStrings) {
text = text.trim();
}
if (text.length() > 0 | !storeEmptyStrings) {
record = new ImportRecord();
addCell(
project,
rootColumnGroup,
record,
parser.getFieldName(),
(String) value,
storeEmptyStrings,
guessDataType
);
}
} else {
record = new ImportRecord();
addCell( addCell(
project, project,
rootColumnGroup, rootColumnGroup,
record, record,
parser.getFieldName(), parser.getFieldName(),
text value
); );
}
if (record != null) {
addImportRecordToProject(record, project); addImportRecordToProject(record, project);
} }
} }
@ -396,6 +475,19 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} }
/**
* @deprecated by tfmorris use {@link #processSubRecord(Project, TreeReader, ImportColumnGroup, ImportRecord, int, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processSubRecord( Project project,
TreeReader parser,
ImportColumnGroup columnGroup,
ImportRecord record,
int level
) throws TreeReaderException {
processSubRecord(project, parser, columnGroup, record, level, true, false, true);
}
/** /**
* *
* @param project * @param project
@ -409,7 +501,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
TreeReader parser, TreeReader parser,
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
ImportRecord record, ImportRecord record,
int level int level,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException { ) throws TreeReaderException {
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup); logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup);
@ -426,14 +521,19 @@ public class XmlImportUtilities extends TreeImportUtilities {
int attributeCount = parser.getAttributeCount(); int attributeCount = parser.getAttributeCount();
for (int i = 0; i < attributeCount; i++) { for (int i = 0; i < attributeCount; i++) {
String text = parser.getAttributeValue(i).trim(); String text = parser.getAttributeValue(i);
if (text.length() > 0) { if (trimStrings) {
text = text.trim();
}
if (text.length() > 0 | !storeEmptyStrings) {
addCell( addCell(
project, project,
thisColumnGroup, thisColumnGroup,
record, record,
composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)), composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)),
text text,
storeEmptyStrings,
guessDataType
); );
} }
} }
@ -446,23 +546,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
parser, parser,
thisColumnGroup, thisColumnGroup,
record, record,
level+1 level+1,
trimStrings,
storeEmptyStrings,
guessDataType
); );
} else if (//eventType == XMLStreamConstants.CDATA || } else if (//eventType == XMLStreamConstants.CDATA ||
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) { eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) {
String text = parser.getFieldValue(); Serializable value = parser.getValue();
String colName = parser.getFieldName(); String colName = parser.getFieldName();
if(text != null){ if (value instanceof String) {
text = text.trim(); String text = (String) value;
if (text.length() > 0) { addCell(project, thisColumnGroup, record, colName, text,
addCell( storeEmptyStrings, guessDataType);
project, } else {
thisColumnGroup, addCell(project, thisColumnGroup, record, colName, value);
record,
colName,
text
);
}
} }
} else if (eventType == Token.EndEntity) { } else if (eventType == Token.EndEntity) {
break; break;

View File

@ -93,8 +93,12 @@ public class Cell implements HasFields, Jsonizable {
writer.key("t"); writer.value("date"); writer.key("t"); writer.value("date");
} else if (value instanceof Double } else if (value instanceof Double
&& (((Double)value).isNaN() || ((Double)value).isInfinite())) { && (((Double)value).isNaN() || ((Double)value).isInfinite())) {
// TODO: Skip? Write as string? // write as a string
writer.value(((Double)value).toString()); writer.value(((Double)value).toString());
} else if (value instanceof Float
&& (((Float)value).isNaN() || ((Float)value).isInfinite())) {
// TODO: Skip? Write as string?
writer.value(((Float)value).toString());
} else { } else {
writer.value(value); writer.value(value);
} }

View File

@ -75,6 +75,20 @@ public class RefineTest {
Assert.assertEquals(project.rows.size(), numRows); Assert.assertEquals(project.rows.size(), numRows);
} }
/**
* Check that a project was created with the appropriate number of columns, rows, and records.
*
* @param project project to check
* @param numCols expected column count
* @param numRows expected row count
* @param numRows expected record count
*/
public static void assertProjectCreated(Project project, int numCols, int numRows, int numRecords) {
assertProjectCreated(project,numCols,numRows);
Assert.assertNotNull(project.recordModel);
Assert.assertEquals(project.recordModel.getRecordCount(),numRecords);
}
public void log(Project project) { public void log(Project project) {
// some quick and dirty debugging // some quick and dirty debugging
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();

View File

@ -151,7 +151,7 @@ abstract class ImporterTest extends RefineTest {
Project project, ImportColumnGroup rootColumnGroup, List<Exception> exceptions) { Project project, ImportColumnGroup rootColumnGroup, List<Exception> exceptions) {
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
project.columnModel.update(); project.update();
for (Exception e : exceptions) { for (Exception e : exceptions) {
e.printStackTrace(); e.printStackTrace();

View File

@ -295,9 +295,78 @@ public class JsonImporterTests extends ImporterTest {
} }
} }
@Test
public void testJsonDatatypes(){
RunTest(getSampleWithDataTypes());
log(project);
assertProjectCreated(project, 2, 21,4);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id");
Assert.assertEquals( project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell");
Row row = project.rows.get(8);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,""); // Make sure empty strings are preserved
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
row = project.rows.get(12);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertNull(row.cells.get(1).value);
row = project.rows.get(13);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Boolean.TRUE);
row = project.rows.get(14);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Boolean.FALSE);
row = project.rows.get(15);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(0));
row = project.rows.get(16);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(1));
row = project.rows.get(17);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Double.parseDouble("-2.1"));
row = project.rows.get(18);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)0.23));
row = project.rows.get(19);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)-0.24));
row = project.rows.get(20);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value));
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)3.14e100));
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
// TODO: check data types
}
//------------helper methods--------------- //------------helper methods---------------
public static String getTypicalElement(int id){ private static String getTypicalElement(int id){
return "{ \"id\" : " + id + "," + return "{ \"id\" : " + id + "," +
"\"author\" : \"Author " + id + ", The\"," + "\"author\" : \"Author " + id + ", The\"," +
"\"title\" : \"Book title " + id + "\"," + "\"title\" : \"Book title " + id + "\"," +
@ -305,7 +374,7 @@ public class JsonImporterTests extends ImporterTest {
"}"; "}";
} }
public static String getElementWithDuplicateSubElement(int id){ private static String getElementWithDuplicateSubElement(int id){
return "{ \"id\" : " + id + "," + return "{ \"id\" : " + id + "," +
"\"authors\":[" + "\"authors\":[" +
"{\"name\" : \"Author " + id + ", The\"}," + "{\"name\" : \"Author " + id + ", The\"}," +
@ -316,7 +385,7 @@ public class JsonImporterTests extends ImporterTest {
"}"; "}";
} }
public static String getSample(){ static String getSample(){
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 7; i++){ for(int i = 1; i < 7; i++){
@ -329,7 +398,7 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString(); return sb.toString();
} }
public static JSONObject getOptions(ImportingJob job, TreeImportingParserBase parser) { private static JSONObject getOptions(ImportingJob job, TreeImportingParserBase parser) {
JSONObject options = parser.createParserUIInitializationData( JSONObject options = parser.createParserUIInitializationData(
job, new LinkedList<JSONObject>(), "text/json"); job, new LinkedList<JSONObject>(), "text/json");
@ -338,10 +407,14 @@ public class JsonImporterTests extends ImporterTest {
JSONUtilities.append(path, JsonImporter.ANONYMOUS); JSONUtilities.append(path, JsonImporter.ANONYMOUS);
JSONUtilities.safePut(options, "recordPath", path); JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
return options; return options;
} }
public static String getSampleWithDuplicateNestedElements(){ private static String getSampleWithDuplicateNestedElements(){
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 7; i++){ for(int i = 1; i < 7; i++){
@ -354,7 +427,7 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString(); return sb.toString();
} }
public static String getSampleWithLineBreak(){ private static String getSampleWithLineBreak(){
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 4; i++){ for(int i = 1; i < 4; i++){
@ -373,7 +446,7 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString(); return sb.toString();
} }
public static String getSampleWithVaryingStructure(){ private static String getSampleWithVaryingStructure(){
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 6; i++){ for(int i = 1; i < 6; i++){
@ -390,7 +463,7 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString(); return sb.toString();
} }
public static String getSampleWithTreeStructure(){ private static String getSampleWithTreeStructure(){
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 7; i++){ for(int i = 1; i < 7; i++){
@ -408,6 +481,18 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString(); return sb.toString();
} }
private static String getSampleWithDataTypes() {
StringBuilder sb = new StringBuilder();
sb.append("[");
int i = 1;
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":null,\"cell\":[null,true,false,0,1,-2.1,0.23,-0.24,3.14e100]}\n");
sb.append("]");
return sb.toString();
}
private void RunTest(String testString) { private void RunTest(String testString) {
RunTest(testString, getOptions(job, SUT)); RunTest(testString, getOptions(job, SUT));
} }

View File

@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.tests.importers; package com.google.refine.tests.importers;
import java.io.Serializable;
import java.util.List; import java.util.List;
import com.google.refine.importers.tree.ImportColumnGroup; import com.google.refine.importers.tree.ImportColumnGroup;
@ -47,19 +48,54 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities {
return super.detectRecordElement(parser, tag); return super.detectRecordElement(parser, tag);
} }
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, ImportRecord record, int level) throws Exception{ public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
super.processSubRecord(project, parser, columnGroup, record, level); ImportRecord record, int level,boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception {
super.processSubRecord(project, parser, columnGroup, record, level, trimStrings, storeEmptyStrings, guessDataType);
} }
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws Exception{ @Deprecated
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1); public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
ImportRecord record, int level)
throws Exception {
super.processSubRecord(project, parser, columnGroup, record, level, false, true, false);
} }
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup) throws Exception{ public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
super.processRecord(project, parser, rootColumnGroup); ImportColumnGroup rootColumnGroup, boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception {
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, trimStrings, storeEmptyStrings, guessDataType);
} }
@Deprecated
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
ImportColumnGroup rootColumnGroup)
throws Exception {
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, true, false, true);
}
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup,
boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception {
super.processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType);
}
@Deprecated
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup)
throws Exception {
super.processRecord(project, parser, rootColumnGroup, true, false, true);
}
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, Serializable value, int commonStartingRowIndex) {
super.addCell(project, columnGroup, record, columnLocalName, value);
}
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex, boolean trimStrings, boolean storeEmptyStrings) {
super.addCell(project, columnGroup, record, columnLocalName, text, trimStrings, storeEmptyStrings);
}
@Deprecated
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) { public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) {
super.addCell(project, columnGroup, record, columnLocalName, text); super.addCell(project, columnGroup, record, columnLocalName, text, false, true);
} }
} }

View File

@ -209,7 +209,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadSampleXml(); loadSampleXml();
String[] recordPath = new String[]{"library","book"}; String[] recordPath = new String[]{"library","book"};
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1); XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true,
false);
log(project); log(project);
assertProjectCreated(project, 0, 6); assertProjectCreated(project, 0, 6);
@ -229,7 +230,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadData(XmlImporterTests.getSampleWithVaryingStructure()); loadData(XmlImporterTests.getSampleWithVaryingStructure());
String[] recordPath = new String[]{"library", "book"}; String[] recordPath = new String[]{"library", "book"};
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1); XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true,
false);
log(project); log(project);
assertProjectCreated(project, 0, 6); assertProjectCreated(project, 0, 6);

View File

@ -14,6 +14,12 @@
<td><label for="$limit">Load at most</label></td> <td><label for="$limit">Load at most</label></td>
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> <td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
<label for="$limit">record(s) of data</label></td></tr> <label for="$limit">record(s) of data</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeEmptyStringsCheckbox" id="$store-empty-strings" value=true/></td>
<td colspan="2"><label for="$store-empty-strings">Preserve empty strings</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="trimStringsCheckbox" id="$trim" /></td>
<td><label for="$guess">Trim leading &amp; trailing whitespace from strings</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
<td><label for="$guess">Parse cell text into<br/>numbers, dates, ...</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td> <tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td>
<td colspan="2"><label for="$include-file-sources">Store file source (file names, URLs) in each row</label></td></tr> <td colspan="2"><label for="$include-file-sources">Store file source (file names, URLs) in each row</label></td></tr>
</table></div></td> </table></div></td>

View File

@ -86,6 +86,11 @@ Refine.JsonParserUI.prototype.getOptions = function() {
} else { } else {
options.limit = -1; options.limit = -1;
} }
options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked;
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
options.storeEmptyStrings = this._optionContainerElmts.storeEmptyStringsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked; options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
return options; return options;
@ -103,6 +108,15 @@ Refine.JsonParserUI.prototype._initialize = function() {
this._optionContainerElmts.limitCheckbox.attr("checked", "checked"); this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString(); this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
} }
if (this._config.trimStrings) {
this._optionContainerElmts.trimStringsCheckbox.attr("checked", "checked");
}
if (this._config.guessCellValueTypes) {
this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked");
}
if (this._config.storeEmptyStrings) {
this._optionContainerElmts.storeEmptyStringsCheckbox.attr("checked", "checked");
}
if (this._config.includeFileSources) { if (this._config.includeFileSources) {
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked"); this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
} }

View File

@ -15,6 +15,12 @@
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> <td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
<label for="$limit">record(s) of data</label></td></tr> <label for="$limit">record(s) of data</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeEmptyStringsCheckbox" id="$store-empty-strings" /></td>
<td colspan="2"><label for="$store-empty-strings">Preserve empty strings</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="trimStringsCheckbox" id="$trim" /></td>
<td><label for="$guess">Trim leading &amp; trailing whitespace from strings</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
<td><label for="$guess">Parse cell text into<br/>numbers, dates, ...</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td> <tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td>
<td colspan="2"><label for="$include-file-sources">Store file source (file names, URLs) in each row</label></td></tr> <td colspan="2"><label for="$include-file-sources">Store file source (file names, URLs) in each row</label></td></tr>
</table></div></td> </table></div></td>

View File

@ -85,6 +85,10 @@ Refine.XmlParserUI.prototype.getOptions = function() {
} else { } else {
options.limit = -1; options.limit = -1;
} }
options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked;
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
options.storeEmptyStrings = this._optionContainerElmts.storeEmptyStringsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked; options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
return options; return options;
@ -102,6 +106,15 @@ Refine.XmlParserUI.prototype._initialize = function() {
this._optionContainerElmts.limitCheckbox.attr("checked", "checked"); this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString(); this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
} }
if (this._config.trimStrings) {
this._optionContainerElmts.trimStringsCheckbox.attr("checked", "checked");
}
if (this._config.guessCellValueTypes) {
this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked");
}
if (this._config.storeEmptyStrings) {
this._optionContainerElmts.storeEmptyStringsCheckbox.attr("checked", "checked");
}
if (this._config.includeFileSources) { if (this._config.includeFileSources) {
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked"); this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
} }