From afb7953eaccc5fa7545dc1ba8dc3d5a6be2441b1 Mon Sep 17 00:00:00 2001 From: David Huynh Date: Sun, 14 Aug 2011 02:53:19 +0000 Subject: [PATCH] Fixed problem for importing from an archive file containing fixed width column files: we used to create totally new columns for each contained file, yielding too many columns. git-svn-id: http://google-refine.googlecode.com/svn/trunk@2203 7d457c2a-affb-35e4-300a-418c747d4874 --- .../refine/importers/FixedWidthImporter.java | 23 +++++++++++++------ .../refine/importers/ImporterUtilities.java | 20 ++++++++++------ .../importers/TabularImportingParserBase.java | 4 +++- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/main/src/com/google/refine/importers/FixedWidthImporter.java b/main/src/com/google/refine/importers/FixedWidthImporter.java index 3f219cee6..45b181c86 100644 --- a/main/src/com/google/refine/importers/FixedWidthImporter.java +++ b/main/src/com/google/refine/importers/FixedWidthImporter.java @@ -66,18 +66,27 @@ public class FixedWidthImporter extends TabularImportingParserBase { // String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n"); final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths"); - final List columnNames; + List retrievedColumnNames = null; if (options.has("columnNames")) { - columnNames = new ArrayList(); String[] strings = JSONUtilities.getStringArray(options, "columnNames"); - for (String s : strings) { - columnNames.add(s); + if (strings.length > 0) { + retrievedColumnNames = new ArrayList(); + for (String s : strings) { + s = s.trim(); + if (!s.isEmpty()) { + retrievedColumnNames.add(s); + } + } + + if (retrievedColumnNames.size() > 0) { + JSONUtilities.safePut(options, "headerLines", 1); + } else { + retrievedColumnNames = null; + } } - JSONUtilities.safePut(options, "headerLines", 1); - } else { - columnNames = null; } + final List columnNames = retrievedColumnNames; final LineNumberReader lnReader = new LineNumberReader(reader); TableDataReader dataReader = new TableDataReader() { diff --git a/main/src/com/google/refine/importers/ImporterUtilities.java b/main/src/com/google/refine/importers/ImporterUtilities.java index a4ceb66ab..6b0725bf7 100644 --- a/main/src/com/google/refine/importers/ImporterUtilities.java +++ b/main/src/com/google/refine/importers/ImporterUtilities.java @@ -128,19 +128,25 @@ public class ImporterUtilities { } } - static public Column getOrAllocateColumn(Project project, List currentFileColumnNames, int index) { + static public Column getOrAllocateColumn(Project project, List currentFileColumnNames, + int index, boolean hasOurOwnColumnNames) { if (index < currentFileColumnNames.size()) { return project.columnModel.getColumnByName(currentFileColumnNames.get(index)); - } else if (index == currentFileColumnNames.size()) { + } else if (index >= currentFileColumnNames.size()) { String prefix = "Column "; - int i = 1; + int i = index + 1; while (true) { String columnName = prefix + i; - if (project.columnModel.getColumnByName(columnName) != null) { - // Already taken name - i++; + Column column = project.columnModel.getColumnByName(columnName); + if (column != null) { + if (hasOurOwnColumnNames) { + // Already taken name + i++; + } else { + return column; + } } else { - Column column = new Column(project.columnModel.allocateNewCellIndex(), columnName); + column = new Column(project.columnModel.allocateNewCellIndex(), columnName); try { project.columnModel.addColumn(project.columnModel.columns.size(), column, false); } catch (ModelException e) { diff --git a/main/src/com/google/refine/importers/TabularImportingParserBase.java b/main/src/com/google/refine/importers/TabularImportingParserBase.java index f53b90225..0ae49b732 100644 --- a/main/src/com/google/refine/importers/TabularImportingParserBase.java +++ b/main/src/com/google/refine/importers/TabularImportingParserBase.java @@ -117,6 +117,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase { } List columnNames = new ArrayList(); + boolean hasOurOwnColumnNames = headerLines > 0; List cells = null; int rowsWithData = 0; @@ -161,7 +162,8 @@ abstract public class TabularImportingParserBase extends ImportingParserBase { if (skipDataLines <= 0 || rowsWithData > skipDataLines) { boolean rowHasData = false; for (int c = 0; c < cells.size(); c++) { - Column column = ImporterUtilities.getOrAllocateColumn(project, columnNames, c); + Column column = ImporterUtilities.getOrAllocateColumn( + project, columnNames, c, hasOurOwnColumnNames); Object value = cells.get(c); if (value != null && value instanceof Cell) {