FIXED - task 578 & 596: Clean up JSON importer

http://code.google.com/p/google-refine/issues/detail?id=578
http://code.google.com/p/google-refine/issues/detail?id=596

Extend tree parser framework to allow any Serializable instead of just Strings. Use this in JSON importer to: Import keywords null, true, false; Import empty strings and don't trim whitespace from strings on import;  Import numbers directly instead of importing them as text and then parsing them ourselves. Add tests to verify all this stuff

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2543 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Tom Morris 2012-09-08 01:20:25 +00:00
parent 9f7d0bc057
commit b3f5fada95
17 changed files with 432 additions and 70 deletions

View File

@ -23,7 +23,7 @@
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jackson-core-asl-1.9.9.jar" sourcepath="main/webapp/WEB-INF/lib-src/jackson-src-1.9.9.zip"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jcl-over-slf4j-1.5.6.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jrdf-0.5.6.jar" sourcepath="main/webapp/WEB-INF/lib-src/jrdf-0.5.6-sources.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/json-20100208.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/json-20100208.jar" sourcepath="main/webapp/WEB-INF/lib-src/json-20100208-sources.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/lessen-trunk-r8.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/log4j-1.2.15.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/marc4j-2.4.jar"/>

View File

@ -38,11 +38,13 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.Serializable;
import java.util.List;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonParser.NumberType;
import org.codehaus.jackson.JsonToken;
import org.json.JSONArray;
import org.json.JSONObject;
@ -114,9 +116,9 @@ public class JsonImporter extends TreeImportingParserBase {
case VALUE_STRING:
return parser.getText();
case VALUE_NUMBER_INT:
return Integer.valueOf(parser.getIntValue());
return Long.valueOf(parser.getLongValue());
case VALUE_NUMBER_FLOAT:
return Float.valueOf(parser.getFloatValue());
return Double.valueOf(parser.getDoubleValue());
case VALUE_TRUE:
return Boolean.TRUE;
case VALUE_FALSE:
@ -215,7 +217,7 @@ public class JsonImporter extends TreeImportingParserBase {
private JsonToken current = null;
private JsonToken next = null;
private String fieldName = ANONYMOUS;
private String fieldValue = null;
private Serializable fieldValue = null;
public JSONTreeReader(Reader reader) {
@ -284,14 +286,52 @@ public class JsonImporter extends TreeImportingParserBase {
@Override
public String getFieldValue() throws TreeReaderException {
return fieldValue.toString();
}
@Override
public Serializable getValue()
throws TreeReaderException {
return fieldValue;
}
@Override
public boolean hasNext() {
return next != null;
}
private Serializable getValue(JsonParser parser, JsonToken token) throws IOException {
if (token != null) {
switch (token) {
case VALUE_STRING:
return parser.getText();
case VALUE_NUMBER_INT:
if (parser.getNumberType() == NumberType.INT || parser.getNumberType() == NumberType.LONG) {
return Long.valueOf(parser.getLongValue());
} else {
return parser.getNumberValue();
}
case VALUE_NUMBER_FLOAT:
if (parser.getNumberType() == NumberType.FLOAT) {
return Float.valueOf(parser.getFloatValue());
} else if (parser.getNumberType() == NumberType.DOUBLE) {
return Double.valueOf(parser.getDoubleValue());
} else {
return parser.getNumberValue();
}
case VALUE_TRUE:
return Boolean.TRUE;
case VALUE_FALSE:
return Boolean.FALSE;
case VALUE_NULL:
return null;
case END_ARRAY:
default:
break;
}
}
return null;
}
@Override
public Token next() throws TreeReaderException {
JsonToken previous = current;
@ -300,7 +340,7 @@ public class JsonImporter extends TreeImportingParserBase {
try {
if (current != null) {
if (current.isScalarValue()) {
fieldValue = parser.getText();
fieldValue = getValue(parser,current);
} else {
fieldValue = null;
}

View File

@ -38,6 +38,7 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.io.Serializable;
import java.util.List;
import javax.xml.stream.XMLInputFactory;
@ -287,7 +288,13 @@ public class XmlImporter extends TreeImportingParserBase {
public String getFieldValue(){
return parser.getText();
}
@Override
public Serializable getValue() {
// XML parser only does string types
return getFieldValue();
}
@Override
public int getAttributeCount(){
return parser.getAttributeCount();

View File

@ -114,6 +114,7 @@ public abstract class TreeImportUtilities {
}
}
@Deprecated
static protected void addCell(
Project project,
ImportColumnGroup columnGroup,
@ -121,12 +122,30 @@ public abstract class TreeImportUtilities {
String columnLocalName,
String text
) {
if (text == null || (text).isEmpty()) {
addCell(project, columnGroup, record, columnLocalName, text, true, true);
}
static protected void addCell(
Project project,
ImportColumnGroup columnGroup,
ImportRecord record,
String columnLocalName,
String text,
boolean storeEmptyString,
boolean guessDataType
) {
Serializable value = text;
if (!storeEmptyString && (text == null || (text).isEmpty())) {
return;
}
if (guessDataType) {
value = ImporterUtilities.parseCellValue(text);
}
addCell(project, columnGroup, record, columnLocalName, value);
}
Serializable value = ImporterUtilities.parseCellValue(text);
protected static void addCell(Project project, ImportColumnGroup columnGroup, ImportRecord record,
String columnLocalName, Serializable value) {
ImportColumn column = getColumn(project, columnGroup, columnLocalName);
int cellIndex = column.cellIndex;

View File

@ -45,8 +45,8 @@ import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImporterUtilities;
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importers.ImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingParser;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
@ -55,19 +55,23 @@ import com.google.refine.util.JSONUtilities;
* Abstract class for importer parsers which handle tree-shaped data
* (currently XML & JSON).
*/
abstract public class TreeImportingParserBase implements ImportingParser {
final protected boolean useInputStream;
protected TreeImportingParserBase(boolean useInputStream) {
this.useInputStream = useInputStream;
abstract public class TreeImportingParserBase extends ImportingParserBase {
protected TreeImportingParserBase(final boolean useInputStream) {
super(useInputStream);
}
@Override
public JSONObject createParserUIInitializationData(ImportingJob job,
List<JSONObject> fileRecords, String format) {
JSONObject options = new JSONObject();
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
return options;
}
@Override
public void parse(Project project, ProjectMetadata metadata,
@ -200,6 +204,14 @@ abstract public class TreeImportingParserBase implements ImportingParser {
if (limit2 == 0) { // shouldn't really happen, but be sure since 0 is stop signal
limit2 = -1;
}
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2);
// NOTE: these defaults are solely to preserve historical behavior.
// All new code should override them to keep input data from being modified
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true);
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, trimStrings,
storeEmptyStrings,guessCellValueTypes);
}
}

View File

@ -33,6 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers.tree;
import java.io.Serializable;
/**
* Interface for all tree-shaped parsers.
*
@ -54,8 +56,12 @@ public interface TreeReader {
public String getFieldName() throws TreeReaderException;
public String getPrefix();
@Deprecated
public String getFieldValue() throws TreeReaderException;
public Serializable getValue() throws TreeReaderException;
public int getAttributeCount();
public String getAttributeValue(int index);
public String getAttributePrefix(int index);

View File

@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers.tree;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@ -41,6 +42,8 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import javax.servlet.ServletException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -235,21 +238,33 @@ public class XmlImportUtilities extends TreeImportUtilities {
return null;
}
@Deprecated
static public void importTreeData(
TreeReader parser,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup,
int limit
) {
importTreeData(parser, project, recordPath, rootColumnGroup, limit,true,false,true);
}
static public void importTreeData(
TreeReader parser,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup,
int limit
int limit,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) {
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
try {
while (parser.hasNext()) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--);
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
}
}
} catch (TreeReaderException e) {
@ -258,7 +273,17 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
@Deprecated
static protected void findRecord(
Project project,
TreeReader parser,
String[] recordPath,
int pathIndex,
ImportColumnGroup rootColumnGroup,
int limit
) throws TreeReaderException {
findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, limit, true, false,true);
}
/**
*
@ -275,7 +300,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
String[] recordPath,
int pathIndex,
ImportColumnGroup rootColumnGroup,
int limit
int limit,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException {
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath));
@ -293,7 +321,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
while (parser.hasNext() && limit != 0) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--);
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
} else if (eventType == Token.EndEntity) {
break;
} else if (eventType == Token.Value) {
@ -302,13 +330,13 @@ public class XmlImportUtilities extends TreeImportUtilities {
String desiredFieldName = recordPath[pathIndex + 1];
String currentFieldName = parser.getFieldName();
if (desiredFieldName.equals(currentFieldName)) {
processFieldAsRecord(project, parser, rootColumnGroup);
processFieldAsRecord(project, parser, rootColumnGroup,trimStrings,storeEmptyStrings,guessDataType);
}
}
}
}
} else {
processRecord(project, parser, rootColumnGroup);
processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType);
}
} else {
skip(parser);
@ -326,6 +354,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
/**
* @deprecated on 20120907 by tfmorris -use {@link #processRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processRecord(project, parser, rootColumnGroup, true, false, true);
}
/**
* processRecord parses Tree data for a single element and it's sub-elements,
* adding the parsed data as a row to the project
@ -337,15 +377,31 @@ public class XmlImportUtilities extends TreeImportUtilities {
static protected void processRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
ImportColumnGroup rootColumnGroup,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException {
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
ImportRecord record = new ImportRecord();
processSubRecord(project, parser, rootColumnGroup, record, 0);
processSubRecord(project, parser, rootColumnGroup, record, 0, trimStrings, storeEmptyStrings, guessDataType);
addImportRecordToProject(record, project);
}
/**
* @deprecated 20120907 by tfmorris - use {@link #processFieldAsRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processFieldAsRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processFieldAsRecord(project, parser, rootColumnGroup, true, false, true);
}
/**
* processFieldAsRecord parses Tree data for a single element and it's sub-elements,
* adding the parsed data as a row to the project
@ -357,20 +413,43 @@ public class XmlImportUtilities extends TreeImportUtilities {
static protected void processFieldAsRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
ImportColumnGroup rootColumnGroup,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException {
logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)");
String text = parser.getFieldValue().trim();
if (text.length() > 0) {
ImportRecord record = new ImportRecord();
Serializable value = parser.getValue();
ImportRecord record = null;
if (value instanceof String) {
String text = (String) value;
if (trimStrings) {
text = text.trim();
}
if (text.length() > 0 | !storeEmptyStrings) {
record = new ImportRecord();
addCell(
project,
rootColumnGroup,
record,
parser.getFieldName(),
(String) value,
storeEmptyStrings,
guessDataType
);
}
} else {
record = new ImportRecord();
addCell(
project,
rootColumnGroup,
record,
parser.getFieldName(),
text
value
);
}
if (record != null) {
addImportRecordToProject(record, project);
}
}
@ -396,6 +475,19 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
/**
* @deprecated by tfmorris use {@link #processSubRecord(Project, TreeReader, ImportColumnGroup, ImportRecord, int, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processSubRecord( Project project,
TreeReader parser,
ImportColumnGroup columnGroup,
ImportRecord record,
int level
) throws TreeReaderException {
processSubRecord(project, parser, columnGroup, record, level, true, false, true);
}
/**
*
* @param project
@ -409,7 +501,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
TreeReader parser,
ImportColumnGroup columnGroup,
ImportRecord record,
int level
int level,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException {
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup);
@ -426,14 +521,19 @@ public class XmlImportUtilities extends TreeImportUtilities {
int attributeCount = parser.getAttributeCount();
for (int i = 0; i < attributeCount; i++) {
String text = parser.getAttributeValue(i).trim();
if (text.length() > 0) {
String text = parser.getAttributeValue(i);
if (trimStrings) {
text = text.trim();
}
if (text.length() > 0 | !storeEmptyStrings) {
addCell(
project,
thisColumnGroup,
record,
composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)),
text
text,
storeEmptyStrings,
guessDataType
);
}
}
@ -446,23 +546,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
parser,
thisColumnGroup,
record,
level+1
level+1,
trimStrings,
storeEmptyStrings,
guessDataType
);
} else if (//eventType == XMLStreamConstants.CDATA ||
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) {
String text = parser.getFieldValue();
Serializable value = parser.getValue();
String colName = parser.getFieldName();
if(text != null){
text = text.trim();
if (text.length() > 0) {
addCell(
project,
thisColumnGroup,
record,
colName,
text
);
}
if (value instanceof String) {
String text = (String) value;
addCell(project, thisColumnGroup, record, colName, text,
storeEmptyStrings, guessDataType);
} else {
addCell(project, thisColumnGroup, record, colName, value);
}
} else if (eventType == Token.EndEntity) {
break;

View File

@ -93,8 +93,12 @@ public class Cell implements HasFields, Jsonizable {
writer.key("t"); writer.value("date");
} else if (value instanceof Double
&& (((Double)value).isNaN() || ((Double)value).isInfinite())) {
// TODO: Skip? Write as string?
// write as a string
writer.value(((Double)value).toString());
} else if (value instanceof Float
&& (((Float)value).isNaN() || ((Float)value).isInfinite())) {
// TODO: Skip? Write as string?
writer.value(((Float)value).toString());
} else {
writer.value(value);
}

View File

@ -75,6 +75,20 @@ public class RefineTest {
Assert.assertEquals(project.rows.size(), numRows);
}
/**
* Check that a project was created with the appropriate number of columns, rows, and records.
*
* @param project project to check
* @param numCols expected column count
* @param numRows expected row count
* @param numRows expected record count
*/
public static void assertProjectCreated(Project project, int numCols, int numRows, int numRecords) {
assertProjectCreated(project,numCols,numRows);
Assert.assertNotNull(project.recordModel);
Assert.assertEquals(project.recordModel.getRecordCount(),numRecords);
}
public void log(Project project) {
// some quick and dirty debugging
StringBuilder sb = new StringBuilder();

View File

@ -151,7 +151,7 @@ abstract class ImporterTest extends RefineTest {
Project project, ImportColumnGroup rootColumnGroup, List<Exception> exceptions) {
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
project.columnModel.update();
project.update();
for (Exception e : exceptions) {
e.printStackTrace();

View File

@ -295,9 +295,78 @@ public class JsonImporterTests extends ImporterTest {
}
}
@Test
public void testJsonDatatypes(){
RunTest(getSampleWithDataTypes());
log(project);
assertProjectCreated(project, 2, 21,4);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id");
Assert.assertEquals( project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell");
Row row = project.rows.get(8);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,""); // Make sure empty strings are preserved
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
row = project.rows.get(12);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertNull(row.cells.get(1).value);
row = project.rows.get(13);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Boolean.TRUE);
row = project.rows.get(14);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Boolean.FALSE);
row = project.rows.get(15);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(0));
row = project.rows.get(16);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(1));
row = project.rows.get(17);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Double.parseDouble("-2.1"));
row = project.rows.get(18);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)0.23));
row = project.rows.get(19);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)-0.24));
row = project.rows.get(20);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value));
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)3.14e100));
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
// TODO: check data types
}
//------------helper methods---------------
public static String getTypicalElement(int id){
private static String getTypicalElement(int id){
return "{ \"id\" : " + id + "," +
"\"author\" : \"Author " + id + ", The\"," +
"\"title\" : \"Book title " + id + "\"," +
@ -305,7 +374,7 @@ public class JsonImporterTests extends ImporterTest {
"}";
}
public static String getElementWithDuplicateSubElement(int id){
private static String getElementWithDuplicateSubElement(int id){
return "{ \"id\" : " + id + "," +
"\"authors\":[" +
"{\"name\" : \"Author " + id + ", The\"}," +
@ -316,7 +385,7 @@ public class JsonImporterTests extends ImporterTest {
"}";
}
public static String getSample(){
static String getSample(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 7; i++){
@ -329,7 +398,7 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString();
}
public static JSONObject getOptions(ImportingJob job, TreeImportingParserBase parser) {
private static JSONObject getOptions(ImportingJob job, TreeImportingParserBase parser) {
JSONObject options = parser.createParserUIInitializationData(
job, new LinkedList<JSONObject>(), "text/json");
@ -338,10 +407,14 @@ public class JsonImporterTests extends ImporterTest {
JSONUtilities.append(path, JsonImporter.ANONYMOUS);
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
return options;
}
public static String getSampleWithDuplicateNestedElements(){
private static String getSampleWithDuplicateNestedElements(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 7; i++){
@ -354,7 +427,7 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString();
}
public static String getSampleWithLineBreak(){
private static String getSampleWithLineBreak(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 4; i++){
@ -373,7 +446,7 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString();
}
public static String getSampleWithVaryingStructure(){
private static String getSampleWithVaryingStructure(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 6; i++){
@ -390,7 +463,7 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString();
}
public static String getSampleWithTreeStructure(){
private static String getSampleWithTreeStructure(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 7; i++){
@ -407,6 +480,18 @@ public class JsonImporterTests extends ImporterTest {
sb.append("]");
return sb.toString();
}
private static String getSampleWithDataTypes() {
StringBuilder sb = new StringBuilder();
sb.append("[");
int i = 1;
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":null,\"cell\":[null,true,false,0,1,-2.1,0.23,-0.24,3.14e100]}\n");
sb.append("]");
return sb.toString();
}
private void RunTest(String testString) {
RunTest(testString, getOptions(job, SUT));

View File

@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.tests.importers;
import java.io.Serializable;
import java.util.List;
import com.google.refine.importers.tree.ImportColumnGroup;
@ -47,19 +48,54 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities {
return super.detectRecordElement(parser, tag);
}
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, ImportRecord record, int level) throws Exception{
super.processSubRecord(project, parser, columnGroup, record, level);
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
ImportRecord record, int level,boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception {
super.processSubRecord(project, parser, columnGroup, record, level, trimStrings, storeEmptyStrings, guessDataType);
}
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws Exception{
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1);
@Deprecated
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
ImportRecord record, int level)
throws Exception {
super.processSubRecord(project, parser, columnGroup, record, level, false, true, false);
}
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup) throws Exception{
super.processRecord(project, parser, rootColumnGroup);
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
ImportColumnGroup rootColumnGroup, boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception {
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, trimStrings, storeEmptyStrings, guessDataType);
}
@Deprecated
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
ImportColumnGroup rootColumnGroup)
throws Exception {
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, true, false, true);
}
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup,
boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception {
super.processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType);
}
@Deprecated
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup)
throws Exception {
super.processRecord(project, parser, rootColumnGroup, true, false, true);
}
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, Serializable value, int commonStartingRowIndex) {
super.addCell(project, columnGroup, record, columnLocalName, value);
}
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex, boolean trimStrings, boolean storeEmptyStrings) {
super.addCell(project, columnGroup, record, columnLocalName, text, trimStrings, storeEmptyStrings);
}
@Deprecated
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) {
super.addCell(project, columnGroup, record, columnLocalName, text);
super.addCell(project, columnGroup, record, columnLocalName, text, false, true);
}
}

View File

@ -209,7 +209,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadSampleXml();
String[] recordPath = new String[]{"library","book"};
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true,
false);
log(project);
assertProjectCreated(project, 0, 6);
@ -229,7 +230,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadData(XmlImporterTests.getSampleWithVaryingStructure());
String[] recordPath = new String[]{"library", "book"};
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true,
false);
log(project);
assertProjectCreated(project, 0, 6);

View File

@ -14,6 +14,12 @@
<td><label for="$limit">Load at most</label></td>
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
<label for="$limit">record(s) of data</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeEmptyStringsCheckbox" id="$store-empty-strings" value=true/></td>
<td colspan="2"><label for="$store-empty-strings">Preserve empty strings</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="trimStringsCheckbox" id="$trim" /></td>
<td><label for="$guess">Trim leading &amp; trailing whitespace from strings</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
<td><label for="$guess">Parse cell text into<br/>numbers, dates, ...</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td>
<td colspan="2"><label for="$include-file-sources">Store file source (file names, URLs) in each row</label></td></tr>
</table></div></td>

View File

@ -86,6 +86,11 @@ Refine.JsonParserUI.prototype.getOptions = function() {
} else {
options.limit = -1;
}
options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked;
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
options.storeEmptyStrings = this._optionContainerElmts.storeEmptyStringsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
return options;
@ -103,6 +108,15 @@ Refine.JsonParserUI.prototype._initialize = function() {
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
}
if (this._config.trimStrings) {
this._optionContainerElmts.trimStringsCheckbox.attr("checked", "checked");
}
if (this._config.guessCellValueTypes) {
this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked");
}
if (this._config.storeEmptyStrings) {
this._optionContainerElmts.storeEmptyStringsCheckbox.attr("checked", "checked");
}
if (this._config.includeFileSources) {
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
}

View File

@ -15,6 +15,12 @@
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
<label for="$limit">record(s) of data</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeEmptyStringsCheckbox" id="$store-empty-strings" /></td>
<td colspan="2"><label for="$store-empty-strings">Preserve empty strings</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="trimStringsCheckbox" id="$trim" /></td>
<td><label for="$guess">Trim leading &amp; trailing whitespace from strings</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
<td><label for="$guess">Parse cell text into<br/>numbers, dates, ...</label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td>
<td colspan="2"><label for="$include-file-sources">Store file source (file names, URLs) in each row</label></td></tr>
</table></div></td>

View File

@ -85,6 +85,10 @@ Refine.XmlParserUI.prototype.getOptions = function() {
} else {
options.limit = -1;
}
options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked;
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
options.storeEmptyStrings = this._optionContainerElmts.storeEmptyStringsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
return options;
@ -102,6 +106,15 @@ Refine.XmlParserUI.prototype._initialize = function() {
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
}
if (this._config.trimStrings) {
this._optionContainerElmts.trimStringsCheckbox.attr("checked", "checked");
}
if (this._config.guessCellValueTypes) {
this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked");
}
if (this._config.storeEmptyStrings) {
this._optionContainerElmts.storeEmptyStringsCheckbox.attr("checked", "checked");
}
if (this._config.includeFileSources) {
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
}