Fixed misspell in clustering dialog.

Added option for not splitting lines into columns on import.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@508 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-04-20 07:26:07 +00:00
parent d85a0e1851
commit 35da36b0e8
5 changed files with 57 additions and 10 deletions

View File

@ -11,6 +11,7 @@ import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import com.metaweb.gridworks.importers.parsers.CSVRowParser; import com.metaweb.gridworks.importers.parsers.CSVRowParser;
import com.metaweb.gridworks.importers.parsers.NonSplitRowParser;
import com.metaweb.gridworks.importers.parsers.RowParser; import com.metaweb.gridworks.importers.parsers.RowParser;
import com.metaweb.gridworks.importers.parsers.SeparatorRowParser; import com.metaweb.gridworks.importers.parsers.SeparatorRowParser;
import com.metaweb.gridworks.model.Project; import com.metaweb.gridworks.model.Project;
@ -18,6 +19,8 @@ import com.metaweb.gridworks.model.Row;
public class TsvCsvImporter implements Importer { public class TsvCsvImporter implements Importer {
public void read(Reader reader, Project project, Properties options) throws Exception { public void read(Reader reader, Project project, Properties options) throws Exception {
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
String sep = options.getProperty("separator"); // auto-detect if not present String sep = options.getProperty("separator"); // auto-detect if not present
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1); int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1); int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
@ -29,7 +32,8 @@ public class TsvCsvImporter implements Importer {
List<String> columnNames = new ArrayList<String>(); List<String> columnNames = new ArrayList<String>();
LineNumberReader lnReader = new LineNumberReader(reader); LineNumberReader lnReader = new LineNumberReader(reader);
RowParser parser = (sep == null || (sep.length() == 0)) ? null : new SeparatorRowParser(sep); RowParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
new SeparatorRowParser(sep) : null;
String line = null; String line = null;
int rowsWithData = 0; int rowsWithData = 0;
@ -43,13 +47,17 @@ public class TsvCsvImporter implements Importer {
} }
if (parser == null) { if (parser == null) {
int tab = line.indexOf('\t'); if (splitIntoColumns) {
if (tab >= 0) { int tab = line.indexOf('\t');
sep = "\t"; if (tab >= 0) {
parser = new SeparatorRowParser(sep); sep = "\t";
parser = new SeparatorRowParser(sep);
} else {
sep = ",";
parser = new CSVRowParser();
}
} else { } else {
sep = ","; parser = new NonSplitRowParser();
parser = new CSVRowParser();
} }
} }

View File

@ -0,0 +1,38 @@
package com.metaweb.gridworks.importers.parsers;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import com.metaweb.gridworks.expr.ExpressionUtils;
import com.metaweb.gridworks.importers.ImporterUtilities;
import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Row;
public class NonSplitRowParser extends RowParser {
public List<String> split(String line) {
List<String> results = new ArrayList<String>(1);
results.add(line.trim());
return results;
}
public boolean parseRow(Row row, String line, boolean guessValueType) {
line = line.trim();
if (line.isEmpty()) {
return false;
} else {
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(line) : line;
if (ExpressionUtils.isNonBlankData(value)) {
row.cells.add(new Cell(value, null));
return true;
} else {
row.cells.add(null);
return false;
}
}
}
}

File diff suppressed because one or more lines are too long

View File

@ -33,7 +33,7 @@ ClusteringDialog.prototype._createDialog = function() {
'<td>' + '<td>' +
'Method: <select bind="methodSelector">' + 'Method: <select bind="methodSelector">' +
'<option selected="true">key collision</option>' + '<option selected="true">key collision</option>' +
'<option>nearest neightbor</option>' + '<option>nearest neighbor</option>' +
'</select>' + '</select>' +
'</td>' + '</td>' +
'<td>' + '<td>' +
@ -77,7 +77,7 @@ ClusteringDialog.prototype._createDialog = function() {
body.find(".knn-controls").hide(); body.find(".knn-controls").hide();
self._method = "binning"; self._method = "binning";
self._elmts.keyingFunctionSelector.change(); self._elmts.keyingFunctionSelector.change();
} else if (selection === 'nearest neightbor') { } else if (selection === 'nearest neighbor') {
body.find(".binning-controls").hide(); body.find(".binning-controls").hide();
body.find(".knn-controls").show(); body.find(".knn-controls").show();
self._method = "knn"; self._method = "knn";

View File

@ -8,6 +8,7 @@ function onClickUploadFileButton(evt) {
} else { } else {
$("#file-upload-form").attr("action", $("#file-upload-form").attr("action",
"/command/create-project-from-upload?" + [ "/command/create-project-from-upload?" + [
"split-into-columns=" + $("#split-into-columns-input")[0].checked,
"separator=" + $("#separator-input")[0].value, "separator=" + $("#separator-input")[0].value,
"ignore=" + $("#ignore-input")[0].value, "ignore=" + $("#ignore-input")[0].value,
"header-lines=" + $("#header-lines-input")[0].value, "header-lines=" + $("#header-lines-input")[0].value,