Fixed problem for importing from an archive file containing fixed width column files: we used to create totally new columns for each contained file, yielding too many columns.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2203 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2011-08-14 02:53:19 +00:00
parent 33d99186ea
commit afb7953eac
3 changed files with 32 additions and 15 deletions

View File

@ -66,18 +66,27 @@ public class FixedWidthImporter extends TabularImportingParserBase {
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n");
final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths");
final List<Object> columnNames;
List<Object> retrievedColumnNames = null;
if (options.has("columnNames")) {
columnNames = new ArrayList<Object>();
String[] strings = JSONUtilities.getStringArray(options, "columnNames");
if (strings.length > 0) {
retrievedColumnNames = new ArrayList<Object>();
for (String s : strings) {
columnNames.add(s);
s = s.trim();
if (!s.isEmpty()) {
retrievedColumnNames.add(s);
}
JSONUtilities.safePut(options, "headerLines", 1);
} else {
columnNames = null;
}
if (retrievedColumnNames.size() > 0) {
JSONUtilities.safePut(options, "headerLines", 1);
} else {
retrievedColumnNames = null;
}
}
}
final List<Object> columnNames = retrievedColumnNames;
final LineNumberReader lnReader = new LineNumberReader(reader);
TableDataReader dataReader = new TableDataReader() {

View File

@ -128,19 +128,25 @@ public class ImporterUtilities {
}
}
static public Column getOrAllocateColumn(Project project, List<String> currentFileColumnNames, int index) {
static public Column getOrAllocateColumn(Project project, List<String> currentFileColumnNames,
int index, boolean hasOurOwnColumnNames) {
if (index < currentFileColumnNames.size()) {
return project.columnModel.getColumnByName(currentFileColumnNames.get(index));
} else if (index == currentFileColumnNames.size()) {
} else if (index >= currentFileColumnNames.size()) {
String prefix = "Column ";
int i = 1;
int i = index + 1;
while (true) {
String columnName = prefix + i;
if (project.columnModel.getColumnByName(columnName) != null) {
Column column = project.columnModel.getColumnByName(columnName);
if (column != null) {
if (hasOurOwnColumnNames) {
// Already taken name
i++;
} else {
Column column = new Column(project.columnModel.allocateNewCellIndex(), columnName);
return column;
}
} else {
column = new Column(project.columnModel.allocateNewCellIndex(), columnName);
try {
project.columnModel.addColumn(project.columnModel.columns.size(), column, false);
} catch (ModelException e) {

View File

@ -117,6 +117,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
}
List<String> columnNames = new ArrayList<String>();
boolean hasOurOwnColumnNames = headerLines > 0;
List<Object> cells = null;
int rowsWithData = 0;
@ -161,7 +162,8 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
if (skipDataLines <= 0 || rowsWithData > skipDataLines) {
boolean rowHasData = false;
for (int c = 0; c < cells.size(); c++) {
Column column = ImporterUtilities.getOrAllocateColumn(project, columnNames, c);
Column column = ImporterUtilities.getOrAllocateColumn(
project, columnNames, c, hasOurOwnColumnNames);
Object value = cells.get(c);
if (value != null && value instanceof Cell) {