Fixed misspell in clustering dialog.

Added option for not splitting lines into columns on import.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@508 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-04-20 07:26:07 +00:00
parent d85a0e1851
commit 35da36b0e8
5 changed files with 57 additions and 10 deletions

View File

@ -11,6 +11,7 @@ import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.StringUtils;
import com.metaweb.gridworks.importers.parsers.CSVRowParser;
import com.metaweb.gridworks.importers.parsers.NonSplitRowParser;
import com.metaweb.gridworks.importers.parsers.RowParser;
import com.metaweb.gridworks.importers.parsers.SeparatorRowParser;
import com.metaweb.gridworks.model.Project;
@ -18,6 +19,8 @@ import com.metaweb.gridworks.model.Row;
public class TsvCsvImporter implements Importer {
public void read(Reader reader, Project project, Properties options) throws Exception {
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
String sep = options.getProperty("separator"); // auto-detect if not present
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
@ -29,7 +32,8 @@ public class TsvCsvImporter implements Importer {
List<String> columnNames = new ArrayList<String>();
LineNumberReader lnReader = new LineNumberReader(reader);
RowParser parser = (sep == null || (sep.length() == 0)) ? null : new SeparatorRowParser(sep);
RowParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
new SeparatorRowParser(sep) : null;
String line = null;
int rowsWithData = 0;
@ -43,13 +47,17 @@ public class TsvCsvImporter implements Importer {
}
if (parser == null) {
int tab = line.indexOf('\t');
if (tab >= 0) {
sep = "\t";
parser = new SeparatorRowParser(sep);
if (splitIntoColumns) {
int tab = line.indexOf('\t');
if (tab >= 0) {
sep = "\t";
parser = new SeparatorRowParser(sep);
} else {
sep = ",";
parser = new CSVRowParser();
}
} else {
sep = ",";
parser = new CSVRowParser();
parser = new NonSplitRowParser();
}
}

View File

@ -0,0 +1,38 @@
package com.metaweb.gridworks.importers.parsers;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import com.metaweb.gridworks.expr.ExpressionUtils;
import com.metaweb.gridworks.importers.ImporterUtilities;
import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Row;
public class NonSplitRowParser extends RowParser {
public List<String> split(String line) {
List<String> results = new ArrayList<String>(1);
results.add(line.trim());
return results;
}
public boolean parseRow(Row row, String line, boolean guessValueType) {
line = line.trim();
if (line.isEmpty()) {
return false;
} else {
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(line) : line;
if (ExpressionUtils.isNonBlankData(value)) {
row.cells.add(new Cell(value, null));
return true;
} else {
row.cells.add(null);
return false;
}
}
}
}

File diff suppressed because one or more lines are too long

View File

@ -33,7 +33,7 @@ ClusteringDialog.prototype._createDialog = function() {
'<td>' +
'Method: <select bind="methodSelector">' +
'<option selected="true">key collision</option>' +
'<option>nearest neightbor</option>' +
'<option>nearest neighbor</option>' +
'</select>' +
'</td>' +
'<td>' +
@ -77,7 +77,7 @@ ClusteringDialog.prototype._createDialog = function() {
body.find(".knn-controls").hide();
self._method = "binning";
self._elmts.keyingFunctionSelector.change();
} else if (selection === 'nearest neightbor') {
} else if (selection === 'nearest neighbor') {
body.find(".binning-controls").hide();
body.find(".knn-controls").show();
self._method = "knn";

View File

@ -8,6 +8,7 @@ function onClickUploadFileButton(evt) {
} else {
$("#file-upload-form").attr("action",
"/command/create-project-from-upload?" + [
"split-into-columns=" + $("#split-into-columns-input")[0].checked,
"separator=" + $("#separator-input")[0].value,
"ignore=" + $("#ignore-input")[0].value,
"header-lines=" + $("#header-lines-input")[0].value,