fix issue #512 to save the file location as a table column

This commit is contained in:
jackyq2015 2015-08-27 15:13:20 -04:00
parent ee38f9edde
commit 4e6f584cde
5 changed files with 102 additions and 140 deletions

View File

@ -0,0 +1,28 @@
package com.google.refine.importers.tree;
public class ImportParameters {
boolean trimStrings;
boolean storeEmptyStrings;
boolean guessDataType;
boolean includeFileSources;
String fileSource;
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes,
boolean includeFileSources, String fileSource) {
this.trimStrings = trimStrings;
this.storeEmptyStrings = storeEmptyStrings;
this.guessDataType = guessCellValueTypes;
this.includeFileSources = includeFileSources;
this.fileSource = fileSource;
}
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) {
this.trimStrings = trimStrings;
this.storeEmptyStrings = storeEmptyStrings;
this.guessDataType = guessCellValueTypes;
this.includeFileSources = false;
this.fileSource = "";
}
}

View File

@ -48,7 +48,11 @@ import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importers.ImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
/**
@ -210,8 +214,22 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true);
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, trimStrings,
storeEmptyStrings,guessCellValueTypes);
// copied from TabularImportingParserBase
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
String fileNameColumnName = "File";
if (includeFileSources) {
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
} catch (ModelException e) {
// Ignore: We already checked for duplicate name.
}
}
}
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2,
new ImportParameters(trimStrings, storeEmptyStrings,guessCellValueTypes, includeFileSources,fileSource));
}
}

View File

@ -245,26 +245,13 @@ public class XmlImportUtilities extends TreeImportUtilities {
return null;
}
@Deprecated
static public void importTreeData(
TreeReader parser,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup,
int limit
) {
importTreeData(parser, project, recordPath, rootColumnGroup, limit,true,false,true);
}
static public void importTreeData(
TreeReader parser,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup,
int limit,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameters
) {
if (logger.isTraceEnabled()) {
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
@ -273,7 +260,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
while (parser.hasNext()) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,parameters);
}
}
} catch (TreeReaderException e) {
@ -282,18 +269,6 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
@Deprecated
static protected void findRecord(
Project project,
TreeReader parser,
String[] recordPath,
int pathIndex,
ImportColumnGroup rootColumnGroup,
int limit
) throws TreeReaderException {
findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, limit, true, false,true);
}
/**
*
* @param project
@ -310,9 +285,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
int pathIndex,
ImportColumnGroup rootColumnGroup,
int limit,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameters
) throws TreeReaderException {
if (logger.isTraceEnabled()) {
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath));
@ -331,7 +304,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
while (parser.hasNext() && limit != 0) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,
parameters);
} else if (eventType == Token.EndEntity) {
break;
} else if (eventType == Token.Value) {
@ -340,13 +314,13 @@ public class XmlImportUtilities extends TreeImportUtilities {
String desiredFieldName = recordPath[pathIndex + 1];
String currentFieldName = parser.getFieldName();
if (desiredFieldName.equals(currentFieldName)) {
processFieldAsRecord(project, parser, rootColumnGroup,trimStrings,storeEmptyStrings,guessDataType);
processFieldAsRecord(project, parser, rootColumnGroup,parameters);
}
}
}
}
} else {
processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType);
processRecord(project, parser, rootColumnGroup, parameters);
}
} else {
skip(parser);
@ -364,17 +338,6 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
/**
* @deprecated on 20120907 by tfmorris -use {@link #processRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processRecord(project, parser, rootColumnGroup, true, false, true);
}
/**
* processRecord parses Tree data for a single element and it's sub-elements,
@ -388,32 +351,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameter
) throws TreeReaderException {
if (logger.isTraceEnabled()) {
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
}
ImportRecord record = new ImportRecord();
processSubRecord(project, parser, rootColumnGroup, record, 0, trimStrings, storeEmptyStrings, guessDataType);
addImportRecordToProject(record, project);
processSubRecord(project, parser, rootColumnGroup, record, 0, parameter);
addImportRecordToProject(record, project, parameter.includeFileSources, parameter.fileSource);
}
/**
* @deprecated 20120907 by tfmorris - use {@link #processFieldAsRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processFieldAsRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processFieldAsRecord(project, parser, rootColumnGroup, true, false, true);
}
/**
* processFieldAsRecord parses Tree data for a single element and it's sub-elements,
* adding the parsed data as a row to the project
@ -426,9 +375,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameter
) throws TreeReaderException {
if (logger.isTraceEnabled()) {
logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)");
@ -437,10 +384,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
ImportRecord record = null;
if (value instanceof String) {
String text = (String) value;
if (trimStrings) {
if (parameter.trimStrings) {
text = text.trim();
}
if (text.length() > 0 | !storeEmptyStrings) {
if (text.length() > 0 | !parameter.storeEmptyStrings) {
record = new ImportRecord();
addCell(
project,
@ -448,8 +395,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
record,
parser.getFieldName(),
(String) value,
storeEmptyStrings,
guessDataType
parameter.storeEmptyStrings,
parameter.guessDataType
);
}
} else {
@ -463,20 +410,25 @@ public class XmlImportUtilities extends TreeImportUtilities {
);
}
if (record != null) {
addImportRecordToProject(record, project);
addImportRecordToProject(record, project,
parameter.includeFileSources, parameter.fileSource);
}
}
static protected void addImportRecordToProject(ImportRecord record, Project project) {
static protected void addImportRecordToProject(ImportRecord record, Project project,
boolean includeFileSources, String fileSource) {
for (List<Cell> row : record.rows) {
if (row.size() > 0) {
Row realRow = null;
Row realRow = new Row(row.size()); ;
for (int c = 0; c < row.size(); c++) {
if (c == 0 && includeFileSources) { // to add the file source:
realRow.setCell(
0,
new Cell(fileSource, null));
continue;
}
Cell cell = row.get(c);
if (cell != null) {
if (realRow == null) {
realRow = new Row(row.size());
}
realRow.setCell(c, cell);
}
}
@ -486,19 +438,6 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
}
/**
* @deprecated by tfmorris use {@link #processSubRecord(Project, TreeReader, ImportColumnGroup, ImportRecord, int, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processSubRecord( Project project,
TreeReader parser,
ImportColumnGroup columnGroup,
ImportRecord record,
int level
) throws TreeReaderException {
processSubRecord(project, parser, columnGroup, record, level, true, false, true);
}
/**
*
@ -514,9 +453,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
ImportColumnGroup columnGroup,
ImportRecord record,
int level,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameter
) throws TreeReaderException {
if (logger.isTraceEnabled()) {
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup);
@ -536,18 +473,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
int attributeCount = parser.getAttributeCount();
for (int i = 0; i < attributeCount; i++) {
String text = parser.getAttributeValue(i);
if (trimStrings) {
if (parameter.trimStrings) {
text = text.trim();
}
if (text.length() > 0 | !storeEmptyStrings) {
if (text.length() > 0 | !parameter.storeEmptyStrings) {
addCell(
project,
thisColumnGroup,
record,
composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)),
text,
storeEmptyStrings,
guessDataType
parameter.storeEmptyStrings,
parameter.guessDataType
);
}
}
@ -561,9 +498,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
thisColumnGroup,
record,
level+1,
trimStrings,
storeEmptyStrings,
guessDataType
parameter
);
} else if (//eventType == XMLStreamConstants.CDATA ||
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) {
@ -572,7 +507,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
if (value instanceof String) {
String text = (String) value;
addCell(project, thisColumnGroup, record, colName, text,
storeEmptyStrings, guessDataType);
parameter.storeEmptyStrings, parameter.guessDataType);
} else {
addCell(project, thisColumnGroup, record, colName, value);
}

View File

@ -37,6 +37,7 @@ import java.io.Serializable;
import java.util.List;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.ImportParameters;
import com.google.refine.importers.tree.ImportRecord;
import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.XmlImportUtilities;
@ -49,42 +50,24 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities {
}
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
ImportRecord record, int level,boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
ImportRecord record, int level, ImportParameters parameter)
throws Exception {
super.processSubRecord(project, parser, columnGroup, record, level, trimStrings, storeEmptyStrings, guessDataType);
}
@Deprecated
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup,
ImportRecord record, int level)
throws Exception {
super.processSubRecord(project, parser, columnGroup, record, level, false, true, false);
super.processSubRecord(project, parser, columnGroup, record, level, parameter);
}
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
ImportColumnGroup rootColumnGroup, boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception {
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, trimStrings, storeEmptyStrings, guessDataType);
}
@Deprecated
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex,
ImportColumnGroup rootColumnGroup)
throws Exception {
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, true, false, true);
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1,
new ImportParameters(trimStrings, storeEmptyStrings, guessDataType));
}
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup,
boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType)
throws Exception {
super.processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType);
super.processRecord(project, parser, rootColumnGroup,
new ImportParameters(trimStrings, storeEmptyStrings, guessDataType));
}
@Deprecated
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup)
throws Exception {
super.processRecord(project, parser, rootColumnGroup, true, false, true);
}
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, Serializable value, int commonStartingRowIndex) {
super.addCell(project, columnGroup, record, columnLocalName, value);
@ -93,9 +76,4 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities {
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex, boolean trimStrings, boolean storeEmptyStrings) {
super.addCell(project, columnGroup, record, columnLocalName, text, trimStrings, storeEmptyStrings);
}
@Deprecated
public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) {
super.addCell(project, columnGroup, record, columnLocalName, text, false, true);
}
}

View File

@ -53,6 +53,7 @@ import com.google.refine.importers.JsonImporter.JSONTreeReader;
import com.google.refine.importers.XmlImporter.XmlParser;
import com.google.refine.importers.tree.ImportColumn;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.ImportParameters;
import com.google.refine.importers.tree.ImportRecord;
import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.TreeReaderException;
@ -208,8 +209,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadSampleXml();
String[] recordPath = new String[]{"library","book"};
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true,
false);
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1,
new ImportParameters(false, true, false));
log(project);
assertProjectCreated(project, 0, 6);
@ -229,8 +230,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadData(XmlImporterTests.getSampleWithVaryingStructure());
String[] recordPath = new String[]{"library", "book"};
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true,
false);
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1,
new ImportParameters(false, true, false));
log(project);
assertProjectCreated(project, 0, 6);
@ -283,7 +284,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
int pathIndex = 0;
try {
SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup);
SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup,
false, false, false);
} catch (Exception e) {
Assert.fail();
}
@ -302,7 +304,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
ParserSkip();
try {
SUT.processRecordWrapper(project, parser, columnGroup);
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) {
Assert.fail();
}
@ -323,7 +325,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
ParserSkip();
try {
SUT.processRecordWrapper(project, parser, columnGroup);
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) {
Assert.fail();
}
@ -348,7 +350,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
ParserSkip();
try {
SUT.processRecordWrapper(project, parser, columnGroup);
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) {
Assert.fail();
}
@ -372,7 +374,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
ParserSkip();
try {
SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record,0);
SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record,0,
new ImportParameters(false, false, false));
} catch (Exception e) {
Assert.fail();
}