diff --git a/src/main/java/com/metaweb/gridworks/importers/ExcelImporter.java b/src/main/java/com/metaweb/gridworks/importers/ExcelImporter.java index b2845c8c3..e3f003a21 100644 --- a/src/main/java/com/metaweb/gridworks/importers/ExcelImporter.java +++ b/src/main/java/com/metaweb/gridworks/importers/ExcelImporter.java @@ -42,6 +42,7 @@ public class ExcelImporter implements Importer { } public void read(InputStream inputStream, Project project, Properties options) throws Exception { + int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1); int limit = ImporterUtilities.getIntegerOption("limit",options,-1); int skip = ImporterUtilities.getIntegerOption("skip",options,0); @@ -64,7 +65,7 @@ public class ExcelImporter implements Importer { int lastRow = sheet.getLastRowNum(); int r = firstRow; - List nonBlankIndices = null; + List nonBlankIndices = null; List nonBlankHeaderStrings = null; /* @@ -74,6 +75,9 @@ public class ExcelImporter implements Importer { org.apache.poi.ss.usermodel.Row row = sheet.getRow(r); if (row == null) { continue; + } else if (ignoreLines > 0) { + ignoreLines--; + continue; } short firstCell = row.getFirstCellNum(); diff --git a/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java b/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java index 7b453ca36..672b25a31 100644 --- a/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java +++ b/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java @@ -1,8 +1,15 @@ package com.metaweb.gridworks.importers; import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Properties; +import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.Row; + public class ImporterUtilities { static public Serializable parseCellValue(String text) { @@ -51,4 +58,53 @@ public class ImporterUtilities { return value; } + static public void appendColumnName(List columnNames, int index, String name) { + name = name.trim(); + + while (columnNames.size() <= index) { + columnNames.add(""); + } + + if (!name.isEmpty()) { + String oldName = columnNames.get(index); + if (!oldName.isEmpty()) { + name = oldName + " " + name; + } + + columnNames.set(index, name); + } + } + + static public void ensureColumnsInRowExist(List columnNames, Row row) { + int count = row.cells.size(); + while (count > columnNames.size()) { + columnNames.add(""); + } + } + + static public void setupColumns(Project project, List columnNames) { + Map nameToIndex = new HashMap(); + for (int c = 0; c < columnNames.size(); c++) { + String cell = columnNames.get(c).trim(); + if (cell.isEmpty()) { + cell = "Column"; + } else if (cell.startsWith("\"") && cell.endsWith("\"")) { + cell = cell.substring(1, cell.length() - 1).trim(); + } + + if (nameToIndex.containsKey(cell)) { + int index = nameToIndex.get(cell); + nameToIndex.put(cell, index + 1); + + cell = cell.contains(" ") ? (cell + " " + index) : (cell + index); + } else { + nameToIndex.put(cell, 2); + } + + Column column = new Column(c, cell); + + project.columnModel.columns.add(column); + } + } + } diff --git a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java index d99923612..cd37a17b3 100644 --- a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java +++ b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java @@ -3,9 +3,8 @@ package com.metaweb.gridworks.importers; import java.io.InputStream; import java.io.LineNumberReader; import java.io.Reader; -import java.util.HashMap; +import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.Properties; import org.apache.commons.lang.NotImplementedException; @@ -14,27 +13,32 @@ import org.apache.commons.lang.StringUtils; import com.metaweb.gridworks.importers.parsers.CSVRowParser; import com.metaweb.gridworks.importers.parsers.RowParser; import com.metaweb.gridworks.importers.parsers.SeparatorRowParser; -import com.metaweb.gridworks.model.Column; import com.metaweb.gridworks.model.Project; import com.metaweb.gridworks.model.Row; public class TsvCsvImporter implements Importer { - public void read(Reader reader, Project project, Properties options) throws Exception { + String sep = options.getProperty("separator"); // auto-detect if not present + int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1); + int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1); + int limit = ImporterUtilities.getIntegerOption("limit",options,-1); int skip = ImporterUtilities.getIntegerOption("skip",options,0); boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true); + + List columnNames = new ArrayList(); LineNumberReader lnReader = new LineNumberReader(reader); - String sep = options.getProperty("separator"); // auto-detect if not present - String line = null; - boolean first = true; - int cellCount = 1; - RowParser parser = (sep == null || (sep.length() == 0)) ? null : new SeparatorRowParser(sep); + RowParser parser = (sep == null || (sep.length() == 0)) ? null : new SeparatorRowParser(sep); + String line = null; int rowsWithData = 0; + while ((line = lnReader.readLine()) != null) { - if (StringUtils.isBlank(line)) { + if (ignoreLines > 0) { + ignoreLines--; + continue; + } else if (StringUtils.isBlank(line)) { continue; } @@ -49,34 +53,17 @@ public class TsvCsvImporter implements Importer { } } - if (first) { + if (headerLines > 0) { + headerLines--; + List cells = parser.split(line); - Map nameToIndex = new HashMap(); - - first = false; for (int c = 0; c < cells.size(); c++) { String cell = cells.get(c).trim(); - if (cell.startsWith("\"") && cell.endsWith("\"")) { - cell = cell.substring(1, cell.length() - 1).trim(); - } - if (nameToIndex.containsKey(cell)) { - int index = nameToIndex.get(cell); - nameToIndex.put(cell, index + 1); - - cell = cell.contains(" ") ? (cell + " " + index) : (cell + index); - } else { - nameToIndex.put(cell, 2); - } - - Column column = new Column(c, cell); - - project.columnModel.columns.add(column); + ImporterUtilities.appendColumnName(columnNames, c, cell); } - - cellCount = cells.size(); } else { - Row row = new Row(cellCount); + Row row = new Row(columnNames.size()); if (parser.parseRow(row, line, guessValueType)) { rowsWithData++; @@ -85,6 +72,8 @@ public class TsvCsvImporter implements Importer { project.rows.add(row); project.columnModel.setMaxCellIndex(row.cells.size()); + ImporterUtilities.ensureColumnsInRowExist(columnNames, row); + if (limit > 0 && project.rows.size() >= limit) { break; } @@ -92,6 +81,8 @@ public class TsvCsvImporter implements Importer { } } } + + ImporterUtilities.setupColumns(project, columnNames); } public void read(InputStream inputStream, Project project, Properties options) throws Exception { diff --git a/src/main/webapp/index.html b/src/main/webapp/index.html index d5db0d808..25593749e 100644 --- a/src/main/webapp/index.html +++ b/src/main/webapp/index.html @@ -1 +1 @@ - Freebase Gridworks
Gridworks
Gridworks

Upload Data File

Data File:
Project Name:
Load up to: data rows (optional)
Skip: initial data rows (optional)
Column separator: (optional, default to comma or tab)
Guess Value Type: (try to parse cells' content into numbers, dates, etc.)

Import Existing Project

Project .tar or .tar.gz File:
Re-name Project: (optional)
\ No newline at end of file + Freebase Gridworks
Gridworks
Gridworks

Upload Data File

Data File:
Project Name:
Column separator: leave blank to guess comma or tab
Guess Value Type: (try to parse cells' content into numbers, dates, etc.)
Ignore: initial non-blank lines
Header lines: (can be zero)
Skip: initial data rows
Load up to: data rows (leave blank to load all rows)

Import Existing Project

Project .tar or .tar.gz File:
Re-name Project: (optional)
\ No newline at end of file diff --git a/src/main/webapp/scripts/index.js b/src/main/webapp/scripts/index.js index 571a26efa..7f45c70c5 100644 --- a/src/main/webapp/scripts/index.js +++ b/src/main/webapp/scripts/index.js @@ -8,9 +8,11 @@ function onClickUploadFileButton(evt) { } else { $("#file-upload-form").attr("action", "/command/create-project-from-upload?" + [ + "separator=" + $("#separator-input")[0].value, + "ignore=" + $("#ignore-input")[0].value, + "header-lines=" + $("#header-lines-input")[0].value, "skip=" + $("#skip-input")[0].value, "limit=" + $("#limit-input")[0].value, - "separator=" + $("#separator-input")[0].value, "guess-value-type=" + $("#guess-value-type-input")[0].checked ].join("&")); }