fix issue #512 to save the file location as a table column

This commit is contained in:
jackyq2015 2015-08-27 15:13:20 -04:00
parent ee38f9edde
commit 4e6f584cde
5 changed files with 102 additions and 140 deletions

View File

@ -0,0 +1,28 @@
package com.google.refine.importers.tree;
public class ImportParameters {
boolean trimStrings;
boolean storeEmptyStrings;
boolean guessDataType;
boolean includeFileSources;
String fileSource;
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes,
boolean includeFileSources, String fileSource) {
this.trimStrings = trimStrings;
this.storeEmptyStrings = storeEmptyStrings;
this.guessDataType = guessCellValueTypes;
this.includeFileSources = includeFileSources;
this.fileSource = fileSource;
}
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) {
this.trimStrings = trimStrings;
this.storeEmptyStrings = storeEmptyStrings;
this.guessDataType = guessCellValueTypes;
this.includeFileSources = false;
this.fileSource = "";
}
}

View File

@ -48,7 +48,11 @@ import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importers.ImportingParserBase; import com.google.refine.importers.ImportingParserBase;
import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities; import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities; import com.google.refine.util.JSONUtilities;
/** /**
@ -211,7 +215,21 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false); boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true); boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, trimStrings, // copied from TabularImportingParserBase
storeEmptyStrings,guessCellValueTypes); boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
String fileNameColumnName = "File";
if (includeFileSources) {
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
} catch (ModelException e) {
// Ignore: We already checked for duplicate name.
}
}
}
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2,
new ImportParameters(trimStrings, storeEmptyStrings,guessCellValueTypes, includeFileSources,fileSource));
} }
} }

View File

@ -245,26 +245,13 @@ public class XmlImportUtilities extends TreeImportUtilities {
return null; return null;
} }
@Deprecated
static public void importTreeData(
TreeReader parser,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup,
int limit
) {
importTreeData(parser, project, recordPath, rootColumnGroup, limit,true,false,true);
}
static public void importTreeData( static public void importTreeData(
TreeReader parser, TreeReader parser,
Project project, Project project,
String[] recordPath, String[] recordPath,
ImportColumnGroup rootColumnGroup, ImportColumnGroup rootColumnGroup,
int limit, int limit,
boolean trimStrings, ImportParameters parameters
boolean storeEmptyStrings,
boolean guessDataType
) { ) {
if (logger.isTraceEnabled()) { if (logger.isTraceEnabled()) {
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)"); logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
@ -273,7 +260,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
while (parser.hasNext()) { while (parser.hasNext()) {
Token eventType = parser.next(); Token eventType = parser.next();
if (eventType == Token.StartEntity) { if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType); findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,parameters);
} }
} }
} catch (TreeReaderException e) { } catch (TreeReaderException e) {
@ -282,18 +269,6 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} }
@Deprecated
static protected void findRecord(
Project project,
TreeReader parser,
String[] recordPath,
int pathIndex,
ImportColumnGroup rootColumnGroup,
int limit
) throws TreeReaderException {
findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, limit, true, false,true);
}
/** /**
* *
* @param project * @param project
@ -310,9 +285,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
int pathIndex, int pathIndex,
ImportColumnGroup rootColumnGroup, ImportColumnGroup rootColumnGroup,
int limit, int limit,
boolean trimStrings, ImportParameters parameters
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException { ) throws TreeReaderException {
if (logger.isTraceEnabled()) { if (logger.isTraceEnabled()) {
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath)); logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath));
@ -331,7 +304,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
while (parser.hasNext() && limit != 0) { while (parser.hasNext() && limit != 0) {
Token eventType = parser.next(); Token eventType = parser.next();
if (eventType == Token.StartEntity) { if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType); findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,
parameters);
} else if (eventType == Token.EndEntity) { } else if (eventType == Token.EndEntity) {
break; break;
} else if (eventType == Token.Value) { } else if (eventType == Token.Value) {
@ -340,13 +314,13 @@ public class XmlImportUtilities extends TreeImportUtilities {
String desiredFieldName = recordPath[pathIndex + 1]; String desiredFieldName = recordPath[pathIndex + 1];
String currentFieldName = parser.getFieldName(); String currentFieldName = parser.getFieldName();
if (desiredFieldName.equals(currentFieldName)) { if (desiredFieldName.equals(currentFieldName)) {
processFieldAsRecord(project, parser, rootColumnGroup,trimStrings,storeEmptyStrings,guessDataType); processFieldAsRecord(project, parser, rootColumnGroup,parameters);
} }
} }
} }
} }
} else { } else {
processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType); processRecord(project, parser, rootColumnGroup, parameters);
} }
} else { } else {
skip(parser); skip(parser);
@ -364,17 +338,6 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} }
/**
* @deprecated on 20120907 by tfmorris -use {@link #processRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processRecord(project, parser, rootColumnGroup, true, false, true);
}
/** /**
* processRecord parses Tree data for a single element and it's sub-elements, * processRecord parses Tree data for a single element and it's sub-elements,
@ -388,29 +351,15 @@ public class XmlImportUtilities extends TreeImportUtilities {
Project project, Project project,
TreeReader parser, TreeReader parser,
ImportColumnGroup rootColumnGroup, ImportColumnGroup rootColumnGroup,
boolean trimStrings, ImportParameters parameter
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException { ) throws TreeReaderException {
if (logger.isTraceEnabled()) { if (logger.isTraceEnabled()) {
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)"); logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
} }
ImportRecord record = new ImportRecord(); ImportRecord record = new ImportRecord();
processSubRecord(project, parser, rootColumnGroup, record, 0, trimStrings, storeEmptyStrings, guessDataType); processSubRecord(project, parser, rootColumnGroup, record, 0, parameter);
addImportRecordToProject(record, project); addImportRecordToProject(record, project, parameter.includeFileSources, parameter.fileSource);
}
/**
* @deprecated 20120907 by tfmorris - use {@link #processFieldAsRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processFieldAsRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processFieldAsRecord(project, parser, rootColumnGroup, true, false, true);
} }
@ -426,9 +375,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
Project project, Project project,
TreeReader parser, TreeReader parser,
ImportColumnGroup rootColumnGroup, ImportColumnGroup rootColumnGroup,
boolean trimStrings, ImportParameters parameter
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException { ) throws TreeReaderException {
if (logger.isTraceEnabled()) { if (logger.isTraceEnabled()) {
logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)"); logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)");
@ -437,10 +384,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
ImportRecord record = null; ImportRecord record = null;
if (value instanceof String) { if (value instanceof String) {
String text = (String) value; String text = (String) value;
if (trimStrings) { if (parameter.trimStrings) {
text = text.trim(); text = text.trim();
} }
if (text.length() > 0 | !storeEmptyStrings) { if (text.length() > 0 | !parameter.storeEmptyStrings) {
record = new ImportRecord(); record = new ImportRecord();
addCell( addCell(
project, project,
@ -448,8 +395,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
record, record,
parser.getFieldName(), parser.getFieldName(),
(String) value, (String) value,
storeEmptyStrings, parameter.storeEmptyStrings,
guessDataType parameter.guessDataType
); );
} }
} else { } else {
@ -463,20 +410,25 @@ public class XmlImportUtilities extends TreeImportUtilities {
); );
} }
if (record != null) { if (record != null) {
addImportRecordToProject(record, project); addImportRecordToProject(record, project,
parameter.includeFileSources, parameter.fileSource);
} }
} }
static protected void addImportRecordToProject(ImportRecord record, Project project) { static protected void addImportRecordToProject(ImportRecord record, Project project,
boolean includeFileSources, String fileSource) {
for (List<Cell> row : record.rows) { for (List<Cell> row : record.rows) {
if (row.size() > 0) { if (row.size() > 0) {
Row realRow = null; Row realRow = new Row(row.size()); ;
for (int c = 0; c < row.size(); c++) { for (int c = 0; c < row.size(); c++) {
if (c == 0 && includeFileSources) { // to add the file source:
realRow.setCell(
0,
new Cell(fileSource, null));
continue;
}
Cell cell = row.get(c); Cell cell = row.get(c);
if (cell != null) { if (cell != null) {
if (realRow == null) {
realRow = new Row(row.size());
}
realRow.setCell(c, cell); realRow.setCell(c, cell);
} }
} }
@ -487,19 +439,6 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} }
/**
* @deprecated by tfmorris use {@link #processSubRecord(Project, TreeReader, ImportColumnGroup, ImportRecord, int, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processSubRecord( Project project,
TreeReader parser,
ImportColumnGroup columnGroup,
ImportRecord record,
int level
) throws TreeReaderException {
processSubRecord(project, parser, columnGroup, record, level, true, false, true);
}
/** /**
* *
* @param project * @param project
@ -514,9 +453,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
ImportRecord record, ImportRecord record,
int level, int level,
boolean trimStrings, ImportParameters parameter
boolean storeEmptyStrings,
boolean guessDataType
) throws TreeReaderException { ) throws TreeReaderException {
if (logger.isTraceEnabled()) { if (logger.isTraceEnabled()) {
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup); logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup);
@ -536,18 +473,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
int attributeCount = parser.getAttributeCount(); int attributeCount = parser.getAttributeCount();
for (int i = 0; i < attributeCount; i++) { for (int i = 0; i < attributeCount; i++) {
String text = parser.getAttributeValue(i); String text = parser.getAttributeValue(i);
if (trimStrings) { if (parameter.trimStrings) {
text = text.trim(); text = text.trim();
} }
if (text.length() > 0 | !storeEmptyStrings) { if (text.length() > 0 | !parameter.storeEmptyStrings) {
addCell( addCell(
project, project,
thisColumnGroup, thisColumnGroup,
record, record,
composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)), composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)),
text, text,
storeEmptyStrings, parameter.storeEmptyStrings,
guessDataType parameter.guessDataType
); );
} }
} }
@ -561,9 +498,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
thisColumnGroup, thisColumnGroup,
record, record,
level+1, level+1,
trimStrings, parameter
storeEmptyStrings,
guessDataType
); );
} else if (//eventType == XMLStreamConstants.CDATA || } else if (//eventType == XMLStreamConstants.CDATA ||
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) { eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) {
@ -572,7 +507,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
if (value instanceof String) { if (value instanceof String) {
String text = (String) value; String text = (String) value;
addCell(project, thisColumnGroup, record, colName, text, addCell(project, thisColumnGroup, record, colName, text,
storeEmptyStrings, guessDataType); parameter.storeEmptyStrings, parameter.guessDataType);
} else { } else {
addCell(project, thisColumnGroup, record, colName, value); addCell(project, thisColumnGroup, record, colName, value);
} }

View File

@ -37,6 +37,7 @@ import java.io.Serializable;
import java.util.List; import java.util.List;
import com.google.refine.importers.tree.ImportColumnGroup; import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.ImportParameters;
import com.google.refine.importers.tree.ImportRecord; import com.google.refine.importers.tree.ImportRecord;
import com.google.refine.importers.tree.TreeReader; import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.XmlImportUtilities; import com.google.refine.importers.tree.XmlImportUtilities;
@ -49,41 +50,23 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities {
} }
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
ImportRecord record, int level,boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType) ImportRecord record, int level, ImportParameters parameter)
throws Exception { throws Exception {
super.processSubRecord(project, parser, columnGroup, record, level, trimStrings, storeEmptyStrings, guessDataType); super.processSubRecord(project, parser, columnGroup, record, level, parameter);
}
@Deprecated
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
ImportRecord record, int level)
throws Exception {
super.processSubRecord(project, parser, columnGroup, record, level, false, true, false);
} }
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
ImportColumnGroup rootColumnGroup, boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType) ImportColumnGroup rootColumnGroup, boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception { throws Exception {
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, trimStrings, storeEmptyStrings, guessDataType); super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1,
} new ImportParameters(trimStrings, storeEmptyStrings, guessDataType));
@Deprecated
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
ImportColumnGroup rootColumnGroup)
throws Exception {
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, true, false, true);
} }
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup, public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup,
boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType) boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception { throws Exception {
super.processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType); super.processRecord(project, parser, rootColumnGroup,
} new ImportParameters(trimStrings, storeEmptyStrings, guessDataType));
@Deprecated
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup)
throws Exception {
super.processRecord(project, parser, rootColumnGroup, true, false, true);
} }
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, Serializable value, int commonStartingRowIndex) { public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, Serializable value, int commonStartingRowIndex) {
@ -93,9 +76,4 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities {
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex, boolean trimStrings, boolean storeEmptyStrings) { public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex, boolean trimStrings, boolean storeEmptyStrings) {
super.addCell(project, columnGroup, record, columnLocalName, text, trimStrings, storeEmptyStrings); super.addCell(project, columnGroup, record, columnLocalName, text, trimStrings, storeEmptyStrings);
} }
@Deprecated
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) {
super.addCell(project, columnGroup, record, columnLocalName, text, false, true);
}
} }

View File

@ -53,6 +53,7 @@ import com.google.refine.importers.JsonImporter.JSONTreeReader;
import com.google.refine.importers.XmlImporter.XmlParser; import com.google.refine.importers.XmlImporter.XmlParser;
import com.google.refine.importers.tree.ImportColumn; import com.google.refine.importers.tree.ImportColumn;
import com.google.refine.importers.tree.ImportColumnGroup; import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.ImportParameters;
import com.google.refine.importers.tree.ImportRecord; import com.google.refine.importers.tree.ImportRecord;
import com.google.refine.importers.tree.TreeReader; import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.TreeReaderException; import com.google.refine.importers.tree.TreeReaderException;
@ -208,8 +209,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadSampleXml(); loadSampleXml();
String[] recordPath = new String[]{"library","book"}; String[] recordPath = new String[]{"library","book"};
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true, XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1,
false); new ImportParameters(false, true, false));
log(project); log(project);
assertProjectCreated(project, 0, 6); assertProjectCreated(project, 0, 6);
@ -229,8 +230,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadData(XmlImporterTests.getSampleWithVaryingStructure()); loadData(XmlImporterTests.getSampleWithVaryingStructure());
String[] recordPath = new String[]{"library", "book"}; String[] recordPath = new String[]{"library", "book"};
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true, XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1,
false); new ImportParameters(false, true, false));
log(project); log(project);
assertProjectCreated(project, 0, 6); assertProjectCreated(project, 0, 6);
@ -283,7 +284,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
int pathIndex = 0; int pathIndex = 0;
try { try {
SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup); SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup,
false, false, false);
} catch (Exception e) { } catch (Exception e) {
Assert.fail(); Assert.fail();
} }
@ -302,7 +304,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
ParserSkip(); ParserSkip();
try { try {
SUT.processRecordWrapper(project, parser, columnGroup); SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) { } catch (Exception e) {
Assert.fail(); Assert.fail();
} }
@ -323,7 +325,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
ParserSkip(); ParserSkip();
try { try {
SUT.processRecordWrapper(project, parser, columnGroup); SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) { } catch (Exception e) {
Assert.fail(); Assert.fail();
} }
@ -348,7 +350,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
ParserSkip(); ParserSkip();
try { try {
SUT.processRecordWrapper(project, parser, columnGroup); SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) { } catch (Exception e) {
Assert.fail(); Assert.fail();
} }
@ -372,7 +374,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
ParserSkip(); ParserSkip();
try { try {
SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record,0); SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record,0,
new ImportParameters(false, false, false));
} catch (Exception e) { } catch (Exception e) {
Assert.fail(); Assert.fail();
} }