From 4e6f584cdee2bf7636b877737f49e0ddae11e797 Mon Sep 17 00:00:00 2001 From: jackyq2015 Date: Thu, 27 Aug 2015 15:13:20 -0400 Subject: [PATCH] fix issue #512 to save the file location as a table column --- .../importers/tree/ImportParameters.java | 28 ++++ .../tree/TreeImportingParserBase.java | 24 +++- .../importers/tree/XmlImportUtilities.java | 133 +++++------------- .../importers/XmlImportUtilitiesStub.java | 36 +---- .../importers/XmlImportUtilitiesTests.java | 21 +-- 5 files changed, 102 insertions(+), 140 deletions(-) create mode 100644 main/src/com/google/refine/importers/tree/ImportParameters.java diff --git a/main/src/com/google/refine/importers/tree/ImportParameters.java b/main/src/com/google/refine/importers/tree/ImportParameters.java new file mode 100644 index 000000000..141bcdcfb --- /dev/null +++ b/main/src/com/google/refine/importers/tree/ImportParameters.java @@ -0,0 +1,28 @@ +package com.google.refine.importers.tree; + + +public class ImportParameters { + boolean trimStrings; + boolean storeEmptyStrings; + boolean guessDataType; + boolean includeFileSources; + String fileSource; + + public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes, + boolean includeFileSources, String fileSource) { + this.trimStrings = trimStrings; + this.storeEmptyStrings = storeEmptyStrings; + this.guessDataType = guessCellValueTypes; + this.includeFileSources = includeFileSources; + this.fileSource = fileSource; + } + + public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) { + this.trimStrings = trimStrings; + this.storeEmptyStrings = storeEmptyStrings; + this.guessDataType = guessCellValueTypes; + this.includeFileSources = false; + this.fileSource = ""; + } + +} \ No newline at end of file diff --git a/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java b/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java index 866657d94..dcf7b3d5d 100644 --- a/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java +++ b/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java @@ -48,7 +48,11 @@ import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress; import com.google.refine.importers.ImportingParserBase; import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingUtilities; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; import com.google.refine.model.Project; +import com.google.refine.model.Row; import com.google.refine.util.JSONUtilities; /** @@ -210,8 +214,22 @@ abstract public class TreeImportingParserBase extends ImportingParserBase { boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true); boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false); boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true); - - XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, trimStrings, - storeEmptyStrings,guessCellValueTypes); + + // copied from TabularImportingParserBase + boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false); + String fileNameColumnName = "File"; + if (includeFileSources) { + if (project.columnModel.getColumnByName(fileNameColumnName) == null) { + try { + project.columnModel.addColumn( + 0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false); + } catch (ModelException e) { + // Ignore: We already checked for duplicate name. + } + } + } + + XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, + new ImportParameters(trimStrings, storeEmptyStrings,guessCellValueTypes, includeFileSources,fileSource)); } } diff --git a/main/src/com/google/refine/importers/tree/XmlImportUtilities.java b/main/src/com/google/refine/importers/tree/XmlImportUtilities.java index 90401100d..65b287c81 100644 --- a/main/src/com/google/refine/importers/tree/XmlImportUtilities.java +++ b/main/src/com/google/refine/importers/tree/XmlImportUtilities.java @@ -245,26 +245,13 @@ public class XmlImportUtilities extends TreeImportUtilities { return null; } - @Deprecated - static public void importTreeData( - TreeReader parser, - Project project, - String[] recordPath, - ImportColumnGroup rootColumnGroup, - int limit - ) { - importTreeData(parser, project, recordPath, rootColumnGroup, limit,true,false,true); - } - static public void importTreeData( TreeReader parser, Project project, String[] recordPath, ImportColumnGroup rootColumnGroup, int limit, - boolean trimStrings, - boolean storeEmptyStrings, - boolean guessDataType + ImportParameters parameters ) { if (logger.isTraceEnabled()) { logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)"); @@ -273,7 +260,7 @@ public class XmlImportUtilities extends TreeImportUtilities { while (parser.hasNext()) { Token eventType = parser.next(); if (eventType == Token.StartEntity) { - findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType); + findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,parameters); } } } catch (TreeReaderException e) { @@ -282,18 +269,6 @@ public class XmlImportUtilities extends TreeImportUtilities { } } - @Deprecated - static protected void findRecord( - Project project, - TreeReader parser, - String[] recordPath, - int pathIndex, - ImportColumnGroup rootColumnGroup, - int limit - ) throws TreeReaderException { - findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, limit, true, false,true); - } - /** * * @param project @@ -310,9 +285,7 @@ public class XmlImportUtilities extends TreeImportUtilities { int pathIndex, ImportColumnGroup rootColumnGroup, int limit, - boolean trimStrings, - boolean storeEmptyStrings, - boolean guessDataType + ImportParameters parameters ) throws TreeReaderException { if (logger.isTraceEnabled()) { logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath)); @@ -331,7 +304,8 @@ public class XmlImportUtilities extends TreeImportUtilities { while (parser.hasNext() && limit != 0) { Token eventType = parser.next(); if (eventType == Token.StartEntity) { - findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType); + findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--, + parameters); } else if (eventType == Token.EndEntity) { break; } else if (eventType == Token.Value) { @@ -340,13 +314,13 @@ public class XmlImportUtilities extends TreeImportUtilities { String desiredFieldName = recordPath[pathIndex + 1]; String currentFieldName = parser.getFieldName(); if (desiredFieldName.equals(currentFieldName)) { - processFieldAsRecord(project, parser, rootColumnGroup,trimStrings,storeEmptyStrings,guessDataType); + processFieldAsRecord(project, parser, rootColumnGroup,parameters); } } } } } else { - processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType); + processRecord(project, parser, rootColumnGroup, parameters); } } else { skip(parser); @@ -364,17 +338,6 @@ public class XmlImportUtilities extends TreeImportUtilities { } } - /** - * @deprecated on 20120907 by tfmorris -use {@link #processRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)} - */ - @Deprecated - static protected void processRecord( - Project project, - TreeReader parser, - ImportColumnGroup rootColumnGroup - ) throws TreeReaderException { - processRecord(project, parser, rootColumnGroup, true, false, true); - } /** * processRecord parses Tree data for a single element and it's sub-elements, @@ -388,32 +351,18 @@ public class XmlImportUtilities extends TreeImportUtilities { Project project, TreeReader parser, ImportColumnGroup rootColumnGroup, - boolean trimStrings, - boolean storeEmptyStrings, - boolean guessDataType + ImportParameters parameter ) throws TreeReaderException { if (logger.isTraceEnabled()) { logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)"); } ImportRecord record = new ImportRecord(); - processSubRecord(project, parser, rootColumnGroup, record, 0, trimStrings, storeEmptyStrings, guessDataType); - addImportRecordToProject(record, project); + processSubRecord(project, parser, rootColumnGroup, record, 0, parameter); + addImportRecordToProject(record, project, parameter.includeFileSources, parameter.fileSource); } - /** - * @deprecated 20120907 by tfmorris - use {@link #processFieldAsRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)} - */ - @Deprecated - static protected void processFieldAsRecord( - Project project, - TreeReader parser, - ImportColumnGroup rootColumnGroup - ) throws TreeReaderException { - processFieldAsRecord(project, parser, rootColumnGroup, true, false, true); - } - - + /** * processFieldAsRecord parses Tree data for a single element and it's sub-elements, * adding the parsed data as a row to the project @@ -426,9 +375,7 @@ public class XmlImportUtilities extends TreeImportUtilities { Project project, TreeReader parser, ImportColumnGroup rootColumnGroup, - boolean trimStrings, - boolean storeEmptyStrings, - boolean guessDataType + ImportParameters parameter ) throws TreeReaderException { if (logger.isTraceEnabled()) { logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)"); @@ -437,10 +384,10 @@ public class XmlImportUtilities extends TreeImportUtilities { ImportRecord record = null; if (value instanceof String) { String text = (String) value; - if (trimStrings) { + if (parameter.trimStrings) { text = text.trim(); } - if (text.length() > 0 | !storeEmptyStrings) { + if (text.length() > 0 | !parameter.storeEmptyStrings) { record = new ImportRecord(); addCell( project, @@ -448,8 +395,8 @@ public class XmlImportUtilities extends TreeImportUtilities { record, parser.getFieldName(), (String) value, - storeEmptyStrings, - guessDataType + parameter.storeEmptyStrings, + parameter.guessDataType ); } } else { @@ -463,20 +410,25 @@ public class XmlImportUtilities extends TreeImportUtilities { ); } if (record != null) { - addImportRecordToProject(record, project); + addImportRecordToProject(record, project, + parameter.includeFileSources, parameter.fileSource); } } - static protected void addImportRecordToProject(ImportRecord record, Project project) { + static protected void addImportRecordToProject(ImportRecord record, Project project, + boolean includeFileSources, String fileSource) { for (List row : record.rows) { if (row.size() > 0) { - Row realRow = null; + Row realRow = new Row(row.size()); ; for (int c = 0; c < row.size(); c++) { + if (c == 0 && includeFileSources) { // to add the file source: + realRow.setCell( + 0, + new Cell(fileSource, null)); + continue; + } Cell cell = row.get(c); if (cell != null) { - if (realRow == null) { - realRow = new Row(row.size()); - } realRow.setCell(c, cell); } } @@ -486,19 +438,6 @@ public class XmlImportUtilities extends TreeImportUtilities { } } } - - /** - * @deprecated by tfmorris use {@link #processSubRecord(Project, TreeReader, ImportColumnGroup, ImportRecord, int, boolean, boolean, boolean)} - */ - @Deprecated - static protected void processSubRecord( Project project, - TreeReader parser, - ImportColumnGroup columnGroup, - ImportRecord record, - int level - ) throws TreeReaderException { - processSubRecord(project, parser, columnGroup, record, level, true, false, true); - } /** * @@ -514,9 +453,7 @@ public class XmlImportUtilities extends TreeImportUtilities { ImportColumnGroup columnGroup, ImportRecord record, int level, - boolean trimStrings, - boolean storeEmptyStrings, - boolean guessDataType + ImportParameters parameter ) throws TreeReaderException { if (logger.isTraceEnabled()) { logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup); @@ -536,18 +473,18 @@ public class XmlImportUtilities extends TreeImportUtilities { int attributeCount = parser.getAttributeCount(); for (int i = 0; i < attributeCount; i++) { String text = parser.getAttributeValue(i); - if (trimStrings) { + if (parameter.trimStrings) { text = text.trim(); } - if (text.length() > 0 | !storeEmptyStrings) { + if (text.length() > 0 | !parameter.storeEmptyStrings) { addCell( project, thisColumnGroup, record, composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)), text, - storeEmptyStrings, - guessDataType + parameter.storeEmptyStrings, + parameter.guessDataType ); } } @@ -561,9 +498,7 @@ public class XmlImportUtilities extends TreeImportUtilities { thisColumnGroup, record, level+1, - trimStrings, - storeEmptyStrings, - guessDataType + parameter ); } else if (//eventType == XMLStreamConstants.CDATA || eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) { @@ -572,7 +507,7 @@ public class XmlImportUtilities extends TreeImportUtilities { if (value instanceof String) { String text = (String) value; addCell(project, thisColumnGroup, record, colName, text, - storeEmptyStrings, guessDataType); + parameter.storeEmptyStrings, parameter.guessDataType); } else { addCell(project, thisColumnGroup, record, colName, value); } diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java index 4aa9a84f0..5962597a9 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java @@ -37,6 +37,7 @@ import java.io.Serializable; import java.util.List; import com.google.refine.importers.tree.ImportColumnGroup; +import com.google.refine.importers.tree.ImportParameters; import com.google.refine.importers.tree.ImportRecord; import com.google.refine.importers.tree.TreeReader; import com.google.refine.importers.tree.XmlImportUtilities; @@ -49,42 +50,24 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities { } public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, - ImportRecord record, int level,boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType) + ImportRecord record, int level, ImportParameters parameter) throws Exception { - super.processSubRecord(project, parser, columnGroup, record, level, trimStrings, storeEmptyStrings, guessDataType); - } - - @Deprecated - public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, - ImportRecord record, int level) - throws Exception { - super.processSubRecord(project, parser, columnGroup, record, level, false, true, false); + super.processSubRecord(project, parser, columnGroup, record, level, parameter); } public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup, boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType) throws Exception { - super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, trimStrings, storeEmptyStrings, guessDataType); - } - - @Deprecated - public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, - ImportColumnGroup rootColumnGroup) - throws Exception { - super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, true, false, true); + super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1, + new ImportParameters(trimStrings, storeEmptyStrings, guessDataType)); } public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup, boolean trimStrings, boolean storeEmptyStrings, boolean guessDataType) throws Exception { - super.processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType); + super.processRecord(project, parser, rootColumnGroup, + new ImportParameters(trimStrings, storeEmptyStrings, guessDataType)); } - - @Deprecated - public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup) - throws Exception { - super.processRecord(project, parser, rootColumnGroup, true, false, true); - } public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, Serializable value, int commonStartingRowIndex) { super.addCell(project, columnGroup, record, columnLocalName, value); @@ -93,9 +76,4 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities { public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex, boolean trimStrings, boolean storeEmptyStrings) { super.addCell(project, columnGroup, record, columnLocalName, text, trimStrings, storeEmptyStrings); } - - @Deprecated - public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) { - super.addCell(project, columnGroup, record, columnLocalName, text, false, true); - } } diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java index 1270b96c0..9d161f371 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java @@ -53,6 +53,7 @@ import com.google.refine.importers.JsonImporter.JSONTreeReader; import com.google.refine.importers.XmlImporter.XmlParser; import com.google.refine.importers.tree.ImportColumn; import com.google.refine.importers.tree.ImportColumnGroup; +import com.google.refine.importers.tree.ImportParameters; import com.google.refine.importers.tree.ImportRecord; import com.google.refine.importers.tree.TreeReader; import com.google.refine.importers.tree.TreeReaderException; @@ -208,8 +209,8 @@ public class XmlImportUtilitiesTests extends RefineTest { loadSampleXml(); String[] recordPath = new String[]{"library","book"}; - XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true, - false); + XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, + new ImportParameters(false, true, false)); log(project); assertProjectCreated(project, 0, 6); @@ -229,8 +230,8 @@ public class XmlImportUtilitiesTests extends RefineTest { loadData(XmlImporterTests.getSampleWithVaryingStructure()); String[] recordPath = new String[]{"library", "book"}; - XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, false, true, - false); + XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1, + new ImportParameters(false, true, false)); log(project); assertProjectCreated(project, 0, 6); @@ -283,7 +284,8 @@ public class XmlImportUtilitiesTests extends RefineTest { int pathIndex = 0; try { - SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup); + SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup, + false, false, false); } catch (Exception e) { Assert.fail(); } @@ -302,7 +304,7 @@ public class XmlImportUtilitiesTests extends RefineTest { ParserSkip(); try { - SUT.processRecordWrapper(project, parser, columnGroup); + SUT.processRecordWrapper(project, parser, columnGroup, false, false, false); } catch (Exception e) { Assert.fail(); } @@ -323,7 +325,7 @@ public class XmlImportUtilitiesTests extends RefineTest { ParserSkip(); try { - SUT.processRecordWrapper(project, parser, columnGroup); + SUT.processRecordWrapper(project, parser, columnGroup, false, false, false); } catch (Exception e) { Assert.fail(); } @@ -348,7 +350,7 @@ public class XmlImportUtilitiesTests extends RefineTest { ParserSkip(); try { - SUT.processRecordWrapper(project, parser, columnGroup); + SUT.processRecordWrapper(project, parser, columnGroup, false, false, false); } catch (Exception e) { Assert.fail(); } @@ -372,7 +374,8 @@ public class XmlImportUtilitiesTests extends RefineTest { ParserSkip(); try { - SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record,0); + SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record,0, + new ImportParameters(false, false, false)); } catch (Exception e) { Assert.fail(); }