FIXED - task 578 & 596: Clean up JSON importer
http://code.google.com/p/google-refine/issues/detail?id=578 http://code.google.com/p/google-refine/issues/detail?id=596 Extend tree parser framework to allow any Serializable instead of just Strings. Use this in JSON importer to: Import keywords null, true, false; Import empty strings and don't trim whitespace from strings on import; Import numbers directly instead of importing them as text and then parsing them ourselves. Add tests to verify all this stuff git-svn-id: http://google-refine.googlecode.com/svn/trunk@2543 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
9f7d0bc057
commit
b3f5fada95
@ -23,7 +23,7 @@
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jackson-core-asl-1.9.9.jar" sourcepath="main/webapp/WEB-INF/lib-src/jackson-src-1.9.9.zip"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jcl-over-slf4j-1.5.6.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jrdf-0.5.6.jar" sourcepath="main/webapp/WEB-INF/lib-src/jrdf-0.5.6-sources.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/json-20100208.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/json-20100208.jar" sourcepath="main/webapp/WEB-INF/lib-src/json-20100208-sources.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/lessen-trunk-r8.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/log4j-1.2.15.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/marc4j-2.4.jar"/>
|
||||
|
@ -38,11 +38,13 @@ import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import org.codehaus.jackson.JsonFactory;
|
||||
import org.codehaus.jackson.JsonParseException;
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonParser.NumberType;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
@ -114,9 +116,9 @@ public class JsonImporter extends TreeImportingParserBase {
|
||||
case VALUE_STRING:
|
||||
return parser.getText();
|
||||
case VALUE_NUMBER_INT:
|
||||
return Integer.valueOf(parser.getIntValue());
|
||||
return Long.valueOf(parser.getLongValue());
|
||||
case VALUE_NUMBER_FLOAT:
|
||||
return Float.valueOf(parser.getFloatValue());
|
||||
return Double.valueOf(parser.getDoubleValue());
|
||||
case VALUE_TRUE:
|
||||
return Boolean.TRUE;
|
||||
case VALUE_FALSE:
|
||||
@ -215,7 +217,7 @@ public class JsonImporter extends TreeImportingParserBase {
|
||||
private JsonToken current = null;
|
||||
private JsonToken next = null;
|
||||
private String fieldName = ANONYMOUS;
|
||||
private String fieldValue = null;
|
||||
private Serializable fieldValue = null;
|
||||
|
||||
|
||||
public JSONTreeReader(Reader reader) {
|
||||
@ -284,14 +286,52 @@ public class JsonImporter extends TreeImportingParserBase {
|
||||
|
||||
@Override
|
||||
public String getFieldValue() throws TreeReaderException {
|
||||
return fieldValue.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Serializable getValue()
|
||||
throws TreeReaderException {
|
||||
return fieldValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return next != null;
|
||||
}
|
||||
|
||||
|
||||
private Serializable getValue(JsonParser parser, JsonToken token) throws IOException {
|
||||
if (token != null) {
|
||||
switch (token) {
|
||||
case VALUE_STRING:
|
||||
return parser.getText();
|
||||
case VALUE_NUMBER_INT:
|
||||
if (parser.getNumberType() == NumberType.INT || parser.getNumberType() == NumberType.LONG) {
|
||||
return Long.valueOf(parser.getLongValue());
|
||||
} else {
|
||||
return parser.getNumberValue();
|
||||
}
|
||||
case VALUE_NUMBER_FLOAT:
|
||||
if (parser.getNumberType() == NumberType.FLOAT) {
|
||||
return Float.valueOf(parser.getFloatValue());
|
||||
} else if (parser.getNumberType() == NumberType.DOUBLE) {
|
||||
return Double.valueOf(parser.getDoubleValue());
|
||||
} else {
|
||||
return parser.getNumberValue();
|
||||
}
|
||||
case VALUE_TRUE:
|
||||
return Boolean.TRUE;
|
||||
case VALUE_FALSE:
|
||||
return Boolean.FALSE;
|
||||
case VALUE_NULL:
|
||||
return null;
|
||||
case END_ARRAY:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token next() throws TreeReaderException {
|
||||
JsonToken previous = current;
|
||||
@ -300,7 +340,7 @@ public class JsonImporter extends TreeImportingParserBase {
|
||||
try {
|
||||
if (current != null) {
|
||||
if (current.isScalarValue()) {
|
||||
fieldValue = parser.getText();
|
||||
fieldValue = getValue(parser,current);
|
||||
} else {
|
||||
fieldValue = null;
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
@ -287,7 +288,13 @@ public class XmlImporter extends TreeImportingParserBase {
|
||||
public String getFieldValue(){
|
||||
return parser.getText();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Serializable getValue() {
|
||||
// XML parser only does string types
|
||||
return getFieldValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getAttributeCount(){
|
||||
return parser.getAttributeCount();
|
||||
|
@ -114,6 +114,7 @@ public abstract class TreeImportUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
static protected void addCell(
|
||||
Project project,
|
||||
ImportColumnGroup columnGroup,
|
||||
@ -121,12 +122,30 @@ public abstract class TreeImportUtilities {
|
||||
String columnLocalName,
|
||||
String text
|
||||
) {
|
||||
if (text == null || (text).isEmpty()) {
|
||||
addCell(project, columnGroup, record, columnLocalName, text, true, true);
|
||||
}
|
||||
|
||||
static protected void addCell(
|
||||
Project project,
|
||||
ImportColumnGroup columnGroup,
|
||||
ImportRecord record,
|
||||
String columnLocalName,
|
||||
String text,
|
||||
boolean storeEmptyString,
|
||||
boolean guessDataType
|
||||
) {
|
||||
Serializable value = text;
|
||||
if (!storeEmptyString && (text == null || (text).isEmpty())) {
|
||||
return;
|
||||
}
|
||||
if (guessDataType) {
|
||||
value = ImporterUtilities.parseCellValue(text);
|
||||
}
|
||||
addCell(project, columnGroup, record, columnLocalName, value);
|
||||
}
|
||||
|
||||
Serializable value = ImporterUtilities.parseCellValue(text);
|
||||
|
||||
protected static void addCell(Project project, ImportColumnGroup columnGroup, ImportRecord record,
|
||||
String columnLocalName, Serializable value) {
|
||||
ImportColumn column = getColumn(project, columnGroup, columnLocalName);
|
||||
int cellIndex = column.cellIndex;
|
||||
|
||||
|
@ -45,8 +45,8 @@ import org.json.JSONObject;
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.ImporterUtilities;
|
||||
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
|
||||
import com.google.refine.importers.ImportingParserBase;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingParser;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
@ -55,19 +55,23 @@ import com.google.refine.util.JSONUtilities;
|
||||
* Abstract class for importer parsers which handle tree-shaped data
|
||||
* (currently XML & JSON).
|
||||
*/
|
||||
abstract public class TreeImportingParserBase implements ImportingParser {
|
||||
final protected boolean useInputStream;
|
||||
|
||||
protected TreeImportingParserBase(boolean useInputStream) {
|
||||
this.useInputStream = useInputStream;
|
||||
abstract public class TreeImportingParserBase extends ImportingParserBase {
|
||||
|
||||
protected TreeImportingParserBase(final boolean useInputStream) {
|
||||
super(useInputStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject createParserUIInitializationData(ImportingJob job,
|
||||
List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = new JSONObject();
|
||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||
|
||||
JSONUtilities.safePut(options, "trimStrings", false);
|
||||
JSONUtilities.safePut(options, "guessCellValueTypes", false);
|
||||
JSONUtilities.safePut(options, "storeEmptyStrings", true);
|
||||
return options;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void parse(Project project, ProjectMetadata metadata,
|
||||
@ -200,6 +204,14 @@ abstract public class TreeImportingParserBase implements ImportingParser {
|
||||
if (limit2 == 0) { // shouldn't really happen, but be sure since 0 is stop signal
|
||||
limit2 = -1;
|
||||
}
|
||||
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2);
|
||||
|
||||
// NOTE: these defaults are solely to preserve historical behavior.
|
||||
// All new code should override them to keep input data from being modified
|
||||
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true);
|
||||
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
|
||||
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
|
||||
|
||||
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, trimStrings,
|
||||
storeEmptyStrings,guessCellValueTypes);
|
||||
}
|
||||
}
|
||||
|
@ -33,6 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Interface for all tree-shaped parsers.
|
||||
*
|
||||
@ -54,8 +56,12 @@ public interface TreeReader {
|
||||
|
||||
public String getFieldName() throws TreeReaderException;
|
||||
public String getPrefix();
|
||||
|
||||
@Deprecated
|
||||
public String getFieldValue() throws TreeReaderException;
|
||||
|
||||
public Serializable getValue() throws TreeReaderException;
|
||||
|
||||
public int getAttributeCount();
|
||||
public String getAttributeValue(int index);
|
||||
public String getAttributePrefix(int index);
|
||||
|
@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
@ -41,6 +42,8 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -235,21 +238,33 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Deprecated
|
||||
static public void importTreeData(
|
||||
TreeReader parser,
|
||||
Project project,
|
||||
String[] recordPath,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit
|
||||
) {
|
||||
importTreeData(parser, project, recordPath, rootColumnGroup, limit,true,false,true);
|
||||
}
|
||||
|
||||
static public void importTreeData(
|
||||
TreeReader parser,
|
||||
Project project,
|
||||
String[] recordPath,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit
|
||||
int limit,
|
||||
boolean trimStrings,
|
||||
boolean storeEmptyStrings,
|
||||
boolean guessDataType
|
||||
) {
|
||||
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.StartEntity) {
|
||||
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--);
|
||||
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
|
||||
}
|
||||
}
|
||||
} catch (TreeReaderException e) {
|
||||
@ -258,7 +273,17 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Deprecated
|
||||
static protected void findRecord(
|
||||
Project project,
|
||||
TreeReader parser,
|
||||
String[] recordPath,
|
||||
int pathIndex,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit
|
||||
) throws TreeReaderException {
|
||||
findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, limit, true, false,true);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
@ -275,7 +300,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
String[] recordPath,
|
||||
int pathIndex,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit
|
||||
int limit,
|
||||
boolean trimStrings,
|
||||
boolean storeEmptyStrings,
|
||||
boolean guessDataType
|
||||
) throws TreeReaderException {
|
||||
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath));
|
||||
|
||||
@ -293,7 +321,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
while (parser.hasNext() && limit != 0) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.StartEntity) {
|
||||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--);
|
||||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
|
||||
} else if (eventType == Token.EndEntity) {
|
||||
break;
|
||||
} else if (eventType == Token.Value) {
|
||||
@ -302,13 +330,13 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
String desiredFieldName = recordPath[pathIndex + 1];
|
||||
String currentFieldName = parser.getFieldName();
|
||||
if (desiredFieldName.equals(currentFieldName)) {
|
||||
processFieldAsRecord(project, parser, rootColumnGroup);
|
||||
processFieldAsRecord(project, parser, rootColumnGroup,trimStrings,storeEmptyStrings,guessDataType);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
processRecord(project, parser, rootColumnGroup);
|
||||
processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType);
|
||||
}
|
||||
} else {
|
||||
skip(parser);
|
||||
@ -326,6 +354,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated on 20120907 by tfmorris -use {@link #processRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
|
||||
*/
|
||||
@Deprecated
|
||||
static protected void processRecord(
|
||||
Project project,
|
||||
TreeReader parser,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
) throws TreeReaderException {
|
||||
processRecord(project, parser, rootColumnGroup, true, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* processRecord parses Tree data for a single element and it's sub-elements,
|
||||
* adding the parsed data as a row to the project
|
||||
@ -337,15 +377,31 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
static protected void processRecord(
|
||||
Project project,
|
||||
TreeReader parser,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
boolean trimStrings,
|
||||
boolean storeEmptyStrings,
|
||||
boolean guessDataType
|
||||
) throws TreeReaderException {
|
||||
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
|
||||
ImportRecord record = new ImportRecord();
|
||||
|
||||
processSubRecord(project, parser, rootColumnGroup, record, 0);
|
||||
processSubRecord(project, parser, rootColumnGroup, record, 0, trimStrings, storeEmptyStrings, guessDataType);
|
||||
addImportRecordToProject(record, project);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated 20120907 by tfmorris - use {@link #processFieldAsRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
|
||||
*/
|
||||
@Deprecated
|
||||
static protected void processFieldAsRecord(
|
||||
Project project,
|
||||
TreeReader parser,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
) throws TreeReaderException {
|
||||
processFieldAsRecord(project, parser, rootColumnGroup, true, false, true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* processFieldAsRecord parses Tree data for a single element and it's sub-elements,
|
||||
* adding the parsed data as a row to the project
|
||||
@ -357,20 +413,43 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
static protected void processFieldAsRecord(
|
||||
Project project,
|
||||
TreeReader parser,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
boolean trimStrings,
|
||||
boolean storeEmptyStrings,
|
||||
boolean guessDataType
|
||||
) throws TreeReaderException {
|
||||
logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)");
|
||||
|
||||
String text = parser.getFieldValue().trim();
|
||||
if (text.length() > 0) {
|
||||
ImportRecord record = new ImportRecord();
|
||||
Serializable value = parser.getValue();
|
||||
ImportRecord record = null;
|
||||
if (value instanceof String) {
|
||||
String text = (String) value;
|
||||
if (trimStrings) {
|
||||
text = text.trim();
|
||||
}
|
||||
if (text.length() > 0 | !storeEmptyStrings) {
|
||||
record = new ImportRecord();
|
||||
addCell(
|
||||
project,
|
||||
rootColumnGroup,
|
||||
record,
|
||||
parser.getFieldName(),
|
||||
(String) value,
|
||||
storeEmptyStrings,
|
||||
guessDataType
|
||||
);
|
||||
}
|
||||
} else {
|
||||
record = new ImportRecord();
|
||||
addCell(
|
||||
project,
|
||||
rootColumnGroup,
|
||||
record,
|
||||
parser.getFieldName(),
|
||||
text
|
||||
value
|
||||
);
|
||||
}
|
||||
if (record != null) {
|
||||
addImportRecordToProject(record, project);
|
||||
}
|
||||
}
|
||||
@ -396,6 +475,19 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated by tfmorris use {@link #processSubRecord(Project, TreeReader, ImportColumnGroup, ImportRecord, int, boolean, boolean, boolean)}
|
||||
*/
|
||||
@Deprecated
|
||||
static protected void processSubRecord( Project project,
|
||||
TreeReader parser,
|
||||
ImportColumnGroup columnGroup,
|
||||
ImportRecord record,
|
||||
int level
|
||||
) throws TreeReaderException {
|
||||
processSubRecord(project, parser, columnGroup, record, level, true, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param project
|
||||
@ -409,7 +501,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
TreeReader parser,
|
||||
ImportColumnGroup columnGroup,
|
||||
ImportRecord record,
|
||||
int level
|
||||
int level,
|
||||
boolean trimStrings,
|
||||
boolean storeEmptyStrings,
|
||||
boolean guessDataType
|
||||
) throws TreeReaderException {
|
||||
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup);
|
||||
|
||||
@ -426,14 +521,19 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
|
||||
int attributeCount = parser.getAttributeCount();
|
||||
for (int i = 0; i < attributeCount; i++) {
|
||||
String text = parser.getAttributeValue(i).trim();
|
||||
if (text.length() > 0) {
|
||||
String text = parser.getAttributeValue(i);
|
||||
if (trimStrings) {
|
||||
text = text.trim();
|
||||
}
|
||||
if (text.length() > 0 | !storeEmptyStrings) {
|
||||
addCell(
|
||||
project,
|
||||
thisColumnGroup,
|
||||
record,
|
||||
composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)),
|
||||
text
|
||||
text,
|
||||
storeEmptyStrings,
|
||||
guessDataType
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -446,23 +546,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
parser,
|
||||
thisColumnGroup,
|
||||
record,
|
||||
level+1
|
||||
level+1,
|
||||
trimStrings,
|
||||
storeEmptyStrings,
|
||||
guessDataType
|
||||
);
|
||||
} else if (//eventType == XMLStreamConstants.CDATA ||
|
||||
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) {
|
||||
String text = parser.getFieldValue();
|
||||
Serializable value = parser.getValue();
|
||||
String colName = parser.getFieldName();
|
||||
if(text != null){
|
||||
text = text.trim();
|
||||
if (text.length() > 0) {
|
||||
addCell(
|
||||
project,
|
||||
thisColumnGroup,
|
||||
record,
|
||||
colName,
|
||||
text
|
||||
);
|
||||
}
|
||||
if (value instanceof String) {
|
||||
String text = (String) value;
|
||||
addCell(project, thisColumnGroup, record, colName, text,
|
||||
storeEmptyStrings, guessDataType);
|
||||
} else {
|
||||
addCell(project, thisColumnGroup, record, colName, value);
|
||||
}
|
||||
} else if (eventType == Token.EndEntity) {
|
||||
break;
|
||||
|
@ -93,8 +93,12 @@ public class Cell implements HasFields, Jsonizable {
|
||||
writer.key("t"); writer.value("date");
|
||||
} else if (value instanceof Double
|
||||
&& (((Double)value).isNaN() || ((Double)value).isInfinite())) {
|
||||
// TODO: Skip? Write as string?
|
||||
// write as a string
|
||||
writer.value(((Double)value).toString());
|
||||
} else if (value instanceof Float
|
||||
&& (((Float)value).isNaN() || ((Float)value).isInfinite())) {
|
||||
// TODO: Skip? Write as string?
|
||||
writer.value(((Float)value).toString());
|
||||
} else {
|
||||
writer.value(value);
|
||||
}
|
||||
|
@ -75,6 +75,20 @@ public class RefineTest {
|
||||
Assert.assertEquals(project.rows.size(), numRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that a project was created with the appropriate number of columns, rows, and records.
|
||||
*
|
||||
* @param project project to check
|
||||
* @param numCols expected column count
|
||||
* @param numRows expected row count
|
||||
* @param numRows expected record count
|
||||
*/
|
||||
public static void assertProjectCreated(Project project, int numCols, int numRows, int numRecords) {
|
||||
assertProjectCreated(project,numCols,numRows);
|
||||
Assert.assertNotNull(project.recordModel);
|
||||
Assert.assertEquals(project.recordModel.getRecordCount(),numRecords);
|
||||
}
|
||||
|
||||
public void log(Project project) {
|
||||
// some quick and dirty debugging
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
@ -151,7 +151,7 @@ abstract class ImporterTest extends RefineTest {
|
||||
Project project, ImportColumnGroup rootColumnGroup, List<Exception> exceptions) {
|
||||
|
||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||
project.columnModel.update();
|
||||
project.update();
|
||||
|
||||
for (Exception e : exceptions) {
|
||||
e.printStackTrace();
|
||||
|
@ -295,9 +295,78 @@ public class JsonImporterTests extends ImporterTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJsonDatatypes(){
|
||||
RunTest(getSampleWithDataTypes());
|
||||
|
||||
log(project);
|
||||
assertProjectCreated(project, 2, 21,4);
|
||||
|
||||
Assert.assertEquals( project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id");
|
||||
Assert.assertEquals( project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell");
|
||||
|
||||
Row row = project.rows.get(8);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertEquals(row.cells.get(1).value,""); // Make sure empty strings are preserved
|
||||
|
||||
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
|
||||
|
||||
row = project.rows.get(12);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertNull(row.cells.get(1).value);
|
||||
|
||||
row = project.rows.get(13);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertEquals(row.cells.get(1).value,Boolean.TRUE);
|
||||
|
||||
row = project.rows.get(14);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertEquals(row.cells.get(1).value,Boolean.FALSE);
|
||||
|
||||
row = project.rows.get(15);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(0));
|
||||
|
||||
row = project.rows.get(16);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(1));
|
||||
|
||||
row = project.rows.get(17);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertEquals(row.cells.get(1).value,Double.parseDouble("-2.1"));
|
||||
|
||||
row = project.rows.get(18);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)0.23));
|
||||
|
||||
row = project.rows.get(19);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)-0.24));
|
||||
|
||||
row = project.rows.get(20);
|
||||
Assert.assertNotNull(row);
|
||||
Assert.assertEquals(row.cells.size(),2);
|
||||
Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value));
|
||||
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)3.14e100));
|
||||
|
||||
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
|
||||
|
||||
|
||||
// TODO: check data types
|
||||
}
|
||||
|
||||
//------------helper methods---------------
|
||||
|
||||
public static String getTypicalElement(int id){
|
||||
private static String getTypicalElement(int id){
|
||||
return "{ \"id\" : " + id + "," +
|
||||
"\"author\" : \"Author " + id + ", The\"," +
|
||||
"\"title\" : \"Book title " + id + "\"," +
|
||||
@ -305,7 +374,7 @@ public class JsonImporterTests extends ImporterTest {
|
||||
"}";
|
||||
}
|
||||
|
||||
public static String getElementWithDuplicateSubElement(int id){
|
||||
private static String getElementWithDuplicateSubElement(int id){
|
||||
return "{ \"id\" : " + id + "," +
|
||||
"\"authors\":[" +
|
||||
"{\"name\" : \"Author " + id + ", The\"}," +
|
||||
@ -316,7 +385,7 @@ public class JsonImporterTests extends ImporterTest {
|
||||
"}";
|
||||
}
|
||||
|
||||
public static String getSample(){
|
||||
static String getSample(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("[");
|
||||
for(int i = 1; i < 7; i++){
|
||||
@ -329,7 +398,7 @@ public class JsonImporterTests extends ImporterTest {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static JSONObject getOptions(ImportingJob job, TreeImportingParserBase parser) {
|
||||
private static JSONObject getOptions(ImportingJob job, TreeImportingParserBase parser) {
|
||||
JSONObject options = parser.createParserUIInitializationData(
|
||||
job, new LinkedList<JSONObject>(), "text/json");
|
||||
|
||||
@ -338,10 +407,14 @@ public class JsonImporterTests extends ImporterTest {
|
||||
JSONUtilities.append(path, JsonImporter.ANONYMOUS);
|
||||
|
||||
JSONUtilities.safePut(options, "recordPath", path);
|
||||
JSONUtilities.safePut(options, "trimStrings", false);
|
||||
JSONUtilities.safePut(options, "storeEmptyStrings", true);
|
||||
JSONUtilities.safePut(options, "guessCellValueTypes", false);
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
public static String getSampleWithDuplicateNestedElements(){
|
||||
private static String getSampleWithDuplicateNestedElements(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("[");
|
||||
for(int i = 1; i < 7; i++){
|
||||
@ -354,7 +427,7 @@ public class JsonImporterTests extends ImporterTest {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String getSampleWithLineBreak(){
|
||||
private static String getSampleWithLineBreak(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("[");
|
||||
for(int i = 1; i < 4; i++){
|
||||
@ -373,7 +446,7 @@ public class JsonImporterTests extends ImporterTest {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String getSampleWithVaryingStructure(){
|
||||
private static String getSampleWithVaryingStructure(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("[");
|
||||
for(int i = 1; i < 6; i++){
|
||||
@ -390,7 +463,7 @@ public class JsonImporterTests extends ImporterTest {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String getSampleWithTreeStructure(){
|
||||
private static String getSampleWithTreeStructure(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("[");
|
||||
for(int i = 1; i < 7; i++){
|
||||
@ -407,6 +480,18 @@ public class JsonImporterTests extends ImporterTest {
|
||||
sb.append("]");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static String getSampleWithDataTypes() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("[");
|
||||
int i = 1;
|
||||
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n");
|
||||
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n");
|
||||
sb.append("{\"id\":null,\"cell\":[null,true,false,0,1,-2.1,0.23,-0.24,3.14e100]}\n");
|
||||
sb.append("]");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
private void RunTest(String testString) {
|
||||
RunTest(testString, getOptions(job, SUT));
|
||||
|
@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.tests.importers;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||
@ -47,19 +48,54 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities {
|
||||
return super.detectRecordElement(parser, tag);
|
||||
}
|
||||
|
||||
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, ImportRecord record, int level) throws Exception{
|
||||
super.processSubRecord(project, parser, columnGroup, record, level);
|
||||
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
|
||||
ImportRecord record, int level,boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
|
||||
throws Exception {
|
||||
super.processSubRecord(project, parser, columnGroup, record, level, trimStrings, storeEmptyStrings, guessDataType);
|
||||
}
|
||||
|
||||
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws Exception{
|
||||
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1);
|
||||
@Deprecated
|
||||
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
|
||||
ImportRecord record, int level)
|
||||
throws Exception {
|
||||
super.processSubRecord(project, parser, columnGroup, record, level, false, true, false);
|
||||
}
|
||||
|
||||
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup) throws Exception{
|
||||
super.processRecord(project, parser, rootColumnGroup);
|
||||
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
|
||||
ImportColumnGroup rootColumnGroup, boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
|
||||
throws Exception {
|
||||
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, trimStrings, storeEmptyStrings, guessDataType);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
|
||||
ImportColumnGroup rootColumnGroup)
|
||||
throws Exception {
|
||||
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, true, false, true);
|
||||
}
|
||||
|
||||
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup,
|
||||
boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
|
||||
throws Exception {
|
||||
super.processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup)
|
||||
throws Exception {
|
||||
super.processRecord(project, parser, rootColumnGroup, true, false, true);
|
||||
}
|
||||
|
||||
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, Serializable value, int commonStartingRowIndex) {
|
||||
super.addCell(project, columnGroup, record, columnLocalName, value);
|
||||
}
|
||||
|
||||
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex, boolean trimStrings, boolean storeEmptyStrings) {
|
||||
super.addCell(project, columnGroup, record, columnLocalName, text, trimStrings, storeEmptyStrings);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) {
|
||||
super.addCell(project, columnGroup, record, columnLocalName, text);
|
||||
super.addCell(project, columnGroup, record, columnLocalName, text, false, true);
|
||||
}
|
||||
}
|
||||
|
@ -209,7 +209,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
loadSampleXml();
|
||||
|
||||
String[] recordPath = new String[]{"library","book"};
|
||||
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
|
||||
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true,
|
||||
false);
|
||||
|
||||
log(project);
|
||||
assertProjectCreated(project, 0, 6);
|
||||
@ -229,7 +230,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
loadData(XmlImporterTests.getSampleWithVaryingStructure());
|
||||
|
||||
String[] recordPath = new String[]{"library", "book"};
|
||||
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
|
||||
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true,
|
||||
false);
|
||||
|
||||
log(project);
|
||||
assertProjectCreated(project, 0, 6);
|
||||
|
@ -14,6 +14,12 @@
|
||||
<td><label for="$limit">Load at most</label></td>
|
||||
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
|
||||
<label for="$limit">record(s) of data</label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeEmptyStringsCheckbox" id="$store-empty-strings" value=true/></td>
|
||||
<td colspan="2"><label for="$store-empty-strings">Preserve empty strings</label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="trimStringsCheckbox" id="$trim" /></td>
|
||||
<td><label for="$guess">Trim leading & trailing whitespace from strings</label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
|
||||
<td><label for="$guess">Parse cell text into<br/>numbers, dates, ...</label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td>
|
||||
<td colspan="2"><label for="$include-file-sources">Store file source (file names, URLs) in each row</label></td></tr>
|
||||
</table></div></td>
|
||||
|
@ -86,6 +86,11 @@ Refine.JsonParserUI.prototype.getOptions = function() {
|
||||
} else {
|
||||
options.limit = -1;
|
||||
}
|
||||
|
||||
options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked;
|
||||
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
|
||||
options.storeEmptyStrings = this._optionContainerElmts.storeEmptyStringsCheckbox[0].checked;
|
||||
|
||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||
|
||||
return options;
|
||||
@ -103,6 +108,15 @@ Refine.JsonParserUI.prototype._initialize = function() {
|
||||
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||
}
|
||||
if (this._config.trimStrings) {
|
||||
this._optionContainerElmts.trimStringsCheckbox.attr("checked", "checked");
|
||||
}
|
||||
if (this._config.guessCellValueTypes) {
|
||||
this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked");
|
||||
}
|
||||
if (this._config.storeEmptyStrings) {
|
||||
this._optionContainerElmts.storeEmptyStringsCheckbox.attr("checked", "checked");
|
||||
}
|
||||
if (this._config.includeFileSources) {
|
||||
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
|
||||
}
|
||||
|
@ -15,6 +15,12 @@
|
||||
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
|
||||
<label for="$limit">record(s) of data</label></td></tr>
|
||||
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeEmptyStringsCheckbox" id="$store-empty-strings" /></td>
|
||||
<td colspan="2"><label for="$store-empty-strings">Preserve empty strings</label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="trimStringsCheckbox" id="$trim" /></td>
|
||||
<td><label for="$guess">Trim leading & trailing whitespace from strings</label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
|
||||
<td><label for="$guess">Parse cell text into<br/>numbers, dates, ...</label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td>
|
||||
<td colspan="2"><label for="$include-file-sources">Store file source (file names, URLs) in each row</label></td></tr>
|
||||
</table></div></td>
|
||||
|
@ -85,6 +85,10 @@ Refine.XmlParserUI.prototype.getOptions = function() {
|
||||
} else {
|
||||
options.limit = -1;
|
||||
}
|
||||
options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked;
|
||||
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
|
||||
options.storeEmptyStrings = this._optionContainerElmts.storeEmptyStringsCheckbox[0].checked;
|
||||
|
||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||
|
||||
return options;
|
||||
@ -102,6 +106,15 @@ Refine.XmlParserUI.prototype._initialize = function() {
|
||||
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||
}
|
||||
if (this._config.trimStrings) {
|
||||
this._optionContainerElmts.trimStringsCheckbox.attr("checked", "checked");
|
||||
}
|
||||
if (this._config.guessCellValueTypes) {
|
||||
this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked");
|
||||
}
|
||||
if (this._config.storeEmptyStrings) {
|
||||
this._optionContainerElmts.storeEmptyStringsCheckbox.attr("checked", "checked");
|
||||
}
|
||||
if (this._config.includeFileSources) {
|
||||
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user