Fixed misspell in clustering dialog.
Added option for not splitting lines into columns on import. git-svn-id: http://google-refine.googlecode.com/svn/trunk@508 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
d85a0e1851
commit
35da36b0e8
@ -11,6 +11,7 @@ import org.apache.commons.lang.NotImplementedException;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import com.metaweb.gridworks.importers.parsers.CSVRowParser;
|
||||
import com.metaweb.gridworks.importers.parsers.NonSplitRowParser;
|
||||
import com.metaweb.gridworks.importers.parsers.RowParser;
|
||||
import com.metaweb.gridworks.importers.parsers.SeparatorRowParser;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
@ -18,6 +19,8 @@ import com.metaweb.gridworks.model.Row;
|
||||
|
||||
public class TsvCsvImporter implements Importer {
|
||||
public void read(Reader reader, Project project, Properties options) throws Exception {
|
||||
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
|
||||
|
||||
String sep = options.getProperty("separator"); // auto-detect if not present
|
||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
||||
@ -29,7 +32,8 @@ public class TsvCsvImporter implements Importer {
|
||||
List<String> columnNames = new ArrayList<String>();
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(reader);
|
||||
RowParser parser = (sep == null || (sep.length() == 0)) ? null : new SeparatorRowParser(sep);
|
||||
RowParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
|
||||
new SeparatorRowParser(sep) : null;
|
||||
|
||||
String line = null;
|
||||
int rowsWithData = 0;
|
||||
@ -43,13 +47,17 @@ public class TsvCsvImporter implements Importer {
|
||||
}
|
||||
|
||||
if (parser == null) {
|
||||
int tab = line.indexOf('\t');
|
||||
if (tab >= 0) {
|
||||
sep = "\t";
|
||||
parser = new SeparatorRowParser(sep);
|
||||
if (splitIntoColumns) {
|
||||
int tab = line.indexOf('\t');
|
||||
if (tab >= 0) {
|
||||
sep = "\t";
|
||||
parser = new SeparatorRowParser(sep);
|
||||
} else {
|
||||
sep = ",";
|
||||
parser = new CSVRowParser();
|
||||
}
|
||||
} else {
|
||||
sep = ",";
|
||||
parser = new CSVRowParser();
|
||||
parser = new NonSplitRowParser();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,38 @@
|
||||
package com.metaweb.gridworks.importers.parsers;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.metaweb.gridworks.expr.ExpressionUtils;
|
||||
import com.metaweb.gridworks.importers.ImporterUtilities;
|
||||
import com.metaweb.gridworks.model.Cell;
|
||||
import com.metaweb.gridworks.model.Row;
|
||||
|
||||
public class NonSplitRowParser extends RowParser {
|
||||
|
||||
public List<String> split(String line) {
|
||||
List<String> results = new ArrayList<String>(1);
|
||||
|
||||
results.add(line.trim());
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
public boolean parseRow(Row row, String line, boolean guessValueType) {
|
||||
line = line.trim();
|
||||
if (line.isEmpty()) {
|
||||
return false;
|
||||
} else {
|
||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(line) : line;
|
||||
if (ExpressionUtils.isNonBlankData(value)) {
|
||||
row.cells.add(new Cell(value, null));
|
||||
return true;
|
||||
} else {
|
||||
row.cells.add(null);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because one or more lines are too long
@ -33,7 +33,7 @@ ClusteringDialog.prototype._createDialog = function() {
|
||||
'<td>' +
|
||||
'Method: <select bind="methodSelector">' +
|
||||
'<option selected="true">key collision</option>' +
|
||||
'<option>nearest neightbor</option>' +
|
||||
'<option>nearest neighbor</option>' +
|
||||
'</select>' +
|
||||
'</td>' +
|
||||
'<td>' +
|
||||
@ -77,7 +77,7 @@ ClusteringDialog.prototype._createDialog = function() {
|
||||
body.find(".knn-controls").hide();
|
||||
self._method = "binning";
|
||||
self._elmts.keyingFunctionSelector.change();
|
||||
} else if (selection === 'nearest neightbor') {
|
||||
} else if (selection === 'nearest neighbor') {
|
||||
body.find(".binning-controls").hide();
|
||||
body.find(".knn-controls").show();
|
||||
self._method = "knn";
|
||||
|
@ -8,6 +8,7 @@ function onClickUploadFileButton(evt) {
|
||||
} else {
|
||||
$("#file-upload-form").attr("action",
|
||||
"/command/create-project-from-upload?" + [
|
||||
"split-into-columns=" + $("#split-into-columns-input")[0].checked,
|
||||
"separator=" + $("#separator-input")[0].value,
|
||||
"ignore=" + $("#ignore-input")[0].value,
|
||||
"header-lines=" + $("#header-lines-input")[0].value,
|
||||
|
Loading…
Reference in New Issue
Block a user