diff --git a/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java b/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java
index 07d334f34..06fd015ae 100644
--- a/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java
+++ b/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java
@@ -209,7 +209,7 @@ public class CreateProjectCommand extends Command {
}
CharsetDetector detector = new CharsetDetector();
- detector.setDeclaredEncoding("utf8"); // the content on the web is encoded in UTF-8 so assume that
+ detector.setDeclaredEncoding("utf8"); // most of the content on the web is encoded in UTF-8 so start with that
Reader reader = null;
CharsetMatch[] charsetMatches = detector.setText(bytes).detectAll();
diff --git a/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java b/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java
index d4e4387dc..31649182a 100644
--- a/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java
+++ b/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java
@@ -2,10 +2,6 @@ package com.metaweb.gridworks.importers;
import java.io.Serializable;
-import com.metaweb.gridworks.expr.ExpressionUtils;
-import com.metaweb.gridworks.model.Cell;
-import com.metaweb.gridworks.model.Row;
-
public class ImporterUtilities {
static public Serializable parseCellValue(String text) {
@@ -27,78 +23,4 @@ public class ImporterUtilities {
return text;
}
- static public boolean parseCSVIntoRow(Row row, String line) {
- boolean hasData = false;
-
- int start = 0;
- while (start < line.length()) {
- String text = null;
-
- if (line.charAt(start) == '"') {
- StringBuffer sb = new StringBuffer();
-
- start++; // skip over "
- while (start < line.length()) {
- int quote = line.indexOf('"', start);
- if (quote < 0) {
- sb.append(line.substring(start));
- start = line.length();
- break;
- } else {
- if (quote < line.length() - 1 && line.charAt(quote + 1) == '"') {
- sb.append(line.substring(start, quote + 1)); // include " as well
- start = quote + 2;
- } else {
- sb.append(line.substring(start, quote));
- start = quote + 1;
- if (start < line.length() && line.charAt(start) == ',') {
- start++; // skip ,
- }
- break;
- }
- }
- }
-
- text = sb.toString();
- } else {
- int next = line.indexOf(',', start);
- if (next < 0) {
- text = line.substring(start);
- start = line.length();
- } else {
- text = line.substring(start, next);
- start = next + 1;
- }
- }
-
- Serializable value = parseCellValue(text);
- if (ExpressionUtils.isNonBlankData(value)) {
- row.cells.add(new Cell(value, null));
- hasData = true;
- } else {
- row.cells.add(null);
- }
- }
-
- return hasData;
- }
-
- static public boolean parseTSVIntoRow(Row row, String line) {
- boolean hasData = false;
-
- String[] cells = line.split("\t");
- for (int c = 0; c < cells.length; c++) {
- String text = cells[c];
-
- Serializable value = parseCellValue(text);
- if (ExpressionUtils.isNonBlankData(value)) {
- row.cells.add(new Cell(value, null));
- hasData = true;
- } else {
- row.cells.add(null);
- }
- }
- return hasData;
- }
-
}
diff --git a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java
index f91b3f9b6..64559733e 100644
--- a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java
+++ b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java
@@ -6,7 +6,11 @@ import java.io.Reader;
import java.util.Properties;
import org.apache.commons.lang.NotImplementedException;
+import org.apache.commons.lang.StringUtils;
+import com.metaweb.gridworks.importers.parsers.CSVRowParser;
+import com.metaweb.gridworks.importers.parsers.RowParser;
+import com.metaweb.gridworks.importers.parsers.SeparatorRowParser;
import com.metaweb.gridworks.model.Column;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
@@ -18,10 +22,11 @@ public class TsvCsvImporter implements Importer {
LineNumberReader lnReader = new LineNumberReader(reader);
try {
- String sep = null; // auto-detect TSV or CSV
- String line = null;
+ String sep = options.getProperty("separator"); // auto-detect if not present
+ String line = null;
boolean first = true;
int cellCount = 1;
+ RowParser parser = (sep == null || (sep.length() == 0)) ? null : new SeparatorRowParser(sep);
int rowsWithData = 0;
while ((line = lnReader.readLine()) != null) {
@@ -29,18 +34,20 @@ public class TsvCsvImporter implements Importer {
continue;
}
- if (sep == null) {
+ if (parser == null) {
int tab = line.indexOf('\t');
if (tab >= 0) {
sep = "\t";
+ parser = new SeparatorRowParser(sep);
} else {
sep = ",";
+ parser = new CSVRowParser();
}
}
if (first) {
- String[] cells = line.split(sep);
-
+ String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
+
first = false;
for (int c = 0; c < cells.length; c++) {
String cell = cells[c];
@@ -57,7 +64,7 @@ public class TsvCsvImporter implements Importer {
} else {
Row row = new Row(cellCount);
- if ((sep.charAt(0) == ',') ? ImporterUtilities.parseCSVIntoRow(row, line) : ImporterUtilities.parseTSVIntoRow(row, line)) {
+ if (parser.parseRow(row, line)) {
rowsWithData++;
if (skip <= 0 || rowsWithData > skip) {
diff --git a/src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java b/src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java
new file mode 100644
index 000000000..1ae8e4be2
--- /dev/null
+++ b/src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java
@@ -0,0 +1,68 @@
+package com.metaweb.gridworks.importers.parsers;
+
+import java.io.Serializable;
+
+import com.metaweb.gridworks.expr.ExpressionUtils;
+import com.metaweb.gridworks.importers.ImporterUtilities;
+import com.metaweb.gridworks.model.Cell;
+import com.metaweb.gridworks.model.Row;
+
+public class CSVRowParser extends RowParser {
+
+ public boolean parseRow(Row row, String line) {
+ boolean hasData = false;
+
+ int start = 0;
+ while (start < line.length()) {
+ String text = null;
+
+ if (line.charAt(start) == '"') {
+ StringBuffer sb = new StringBuffer();
+
+ start++; // skip over "
+ while (start < line.length()) {
+ int quote = line.indexOf('"', start);
+ if (quote < 0) {
+ sb.append(line.substring(start));
+ start = line.length();
+ break;
+ } else {
+ if (quote < line.length() - 1 && line.charAt(quote + 1) == '"') {
+ sb.append(line.substring(start, quote + 1)); // include " as well
+ start = quote + 2;
+ } else {
+ sb.append(line.substring(start, quote));
+ start = quote + 1;
+ if (start < line.length() && line.charAt(start) == ',') {
+ start++; // skip ,
+ }
+ break;
+ }
+ }
+ }
+
+ text = sb.toString();
+ } else {
+ int next = line.indexOf(',', start);
+ if (next < 0) {
+ text = line.substring(start);
+ start = line.length();
+ } else {
+ text = line.substring(start, next);
+ start = next + 1;
+ }
+ }
+
+ Serializable value = ImporterUtilities.parseCellValue(text);
+ if (ExpressionUtils.isNonBlankData(value)) {
+ row.cells.add(new Cell(value, null));
+ hasData = true;
+ } else {
+ row.cells.add(null);
+ }
+ }
+
+ return hasData;
+ }
+
+}
diff --git a/src/main/java/com/metaweb/gridworks/importers/parsers/RowParser.java b/src/main/java/com/metaweb/gridworks/importers/parsers/RowParser.java
new file mode 100644
index 000000000..e424312b4
--- /dev/null
+++ b/src/main/java/com/metaweb/gridworks/importers/parsers/RowParser.java
@@ -0,0 +1,8 @@
+package com.metaweb.gridworks.importers.parsers;
+
+import com.metaweb.gridworks.model.Row;
+
+public abstract class RowParser {
+
+ public abstract boolean parseRow(Row row, String line);
+}
diff --git a/src/main/java/com/metaweb/gridworks/importers/parsers/SeparatorRowParser.java b/src/main/java/com/metaweb/gridworks/importers/parsers/SeparatorRowParser.java
new file mode 100644
index 000000000..1a1f0947c
--- /dev/null
+++ b/src/main/java/com/metaweb/gridworks/importers/parsers/SeparatorRowParser.java
@@ -0,0 +1,38 @@
+package com.metaweb.gridworks.importers.parsers;
+
+import java.io.Serializable;
+
+import org.apache.commons.lang.StringUtils;
+
+import com.metaweb.gridworks.expr.ExpressionUtils;
+import com.metaweb.gridworks.importers.ImporterUtilities;
+import com.metaweb.gridworks.model.Cell;
+import com.metaweb.gridworks.model.Row;
+
+public class SeparatorRowParser extends RowParser {
+
+ String sep;
+
+ public SeparatorRowParser(String sep) {
+ this.sep = sep;
+ }
+
+ public boolean parseRow(Row row, String line) {
+ boolean hasData = false;
+
+ String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
+ for (int c = 0; c < cells.length; c++) {
+ String text = cells[c];
+
+ Serializable value = ImporterUtilities.parseCellValue(text);
+ if (ExpressionUtils.isNonBlankData(value)) {
+ row.cells.add(new Cell(value, null));
+ hasData = true;
+ } else {
+ row.cells.add(null);
+ }
+ }
+ return hasData;
+ }
+
+}
diff --git a/src/main/webapp/index.html b/src/main/webapp/index.html
index 5c9a8930a..c5d737726 100644
--- a/src/main/webapp/index.html
+++ b/src/main/webapp/index.html
@@ -1 +1 @@
-
Freebase Gridworks
Gridworks |
|
Gridworks |
|
|
Upload Data File
Import Existing Project
\ No newline at end of file
+
Freebase Gridworks
Gridworks |
|
Gridworks |
|
|
Upload Data File
Import Existing Project
\ No newline at end of file
diff --git a/src/main/webapp/scripts/index.js b/src/main/webapp/scripts/index.js
index d43e7065b..f47726763 100644
--- a/src/main/webapp/scripts/index.js
+++ b/src/main/webapp/scripts/index.js
@@ -29,7 +29,8 @@ function onClickUploadFileButton(evt) {
$("#file-upload-form").attr("action",
"/command/create-project-from-upload?" + [
"skip=" + $("#skip-input")[0].value,
- "limit=" + $("#limit-input")[0].value
+ "limit=" + $("#limit-input")[0].value,
+ "separator=" + $("#separator-input")[0].value
].join("&"));
}
}