Merge pull request #1731 from xseris/master
Custom column names for separator based importer
This commit is contained in:
commit
22be1ec323
main
src/com/google/refine/importers
tests/server/src/com/google/refine/tests/importers
webapp/modules/core
langs
scripts/index/parser-interfaces
@ -101,6 +101,28 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
|||||||
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
|
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
|
||||||
boolean strictQuotes = JSONUtilities.getBoolean(options, "strictQuotes", false);
|
boolean strictQuotes = JSONUtilities.getBoolean(options, "strictQuotes", false);
|
||||||
|
|
||||||
|
|
||||||
|
List<Object> retrievedColumnNames = null;
|
||||||
|
if (options.has("columnNames")) {
|
||||||
|
String[] strings = JSONUtilities.getStringArray(options, "columnNames");
|
||||||
|
if (strings.length > 0) {
|
||||||
|
retrievedColumnNames = new ArrayList<Object>();
|
||||||
|
for (String s : strings) {
|
||||||
|
s = s.trim();
|
||||||
|
if (!s.isEmpty()) {
|
||||||
|
retrievedColumnNames.add(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!retrievedColumnNames.isEmpty()) {
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 1);
|
||||||
|
} else {
|
||||||
|
retrievedColumnNames = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final List<Object> columnNames = retrievedColumnNames;
|
||||||
|
|
||||||
Character quote = CSVParser.DEFAULT_QUOTE_CHARACTER;
|
Character quote = CSVParser.DEFAULT_QUOTE_CHARACTER;
|
||||||
String quoteCharacter = JSONUtilities.getString(options, "quoteCharacter", null);
|
String quoteCharacter = JSONUtilities.getString(options, "quoteCharacter", null);
|
||||||
if (quoteCharacter != null && quoteCharacter.trim().length() == 1) {
|
if (quoteCharacter != null && quoteCharacter.trim().length() == 1) {
|
||||||
@ -118,8 +140,13 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
|||||||
final LineNumberReader lnReader = new LineNumberReader(reader);
|
final LineNumberReader lnReader = new LineNumberReader(reader);
|
||||||
|
|
||||||
TableDataReader dataReader = new TableDataReader() {
|
TableDataReader dataReader = new TableDataReader() {
|
||||||
|
boolean usedColumnNames = false;
|
||||||
@Override
|
@Override
|
||||||
public List<Object> getNextRowOfCells() throws IOException {
|
public List<Object> getNextRowOfCells() throws IOException {
|
||||||
|
if (columnNames != null && !usedColumnNames) {
|
||||||
|
usedColumnNames = true;
|
||||||
|
return columnNames;
|
||||||
|
} else {
|
||||||
String line = lnReader.readLine();
|
String line = lnReader.readLine();
|
||||||
if (line == null) {
|
if (line == null) {
|
||||||
return null;
|
return null;
|
||||||
@ -127,6 +154,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
|||||||
return getCells(line, parser, lnReader);
|
return getCells(line, parser, lnReader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
|
@ -38,6 +38,7 @@ import static org.mockito.Mockito.verify;
|
|||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.json.JSONArray;
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
@ -514,6 +515,27 @@ public class TsvCsvImporterTests extends ImporterTest {
|
|||||||
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
|
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
||||||
|
public void readCustomColumnNames(String sep){
|
||||||
|
//create input
|
||||||
|
String inputSeparator = sep == null ? "\t" : sep;
|
||||||
|
String input = "data1" + inputSeparator + "data2" + inputSeparator + "data3\n";
|
||||||
|
|
||||||
|
try {
|
||||||
|
prepareOptions(sep, -1, 0, 0, 1, false, false,"\"","[col1,col2,col3]");
|
||||||
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Exception during file parse",e);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||||
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||||
|
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
|
||||||
|
}
|
||||||
|
|
||||||
//---------------------read tests------------------------
|
//---------------------read tests------------------------
|
||||||
@Test
|
@Test
|
||||||
public void readCsvWithProperties() {
|
public void readCsvWithProperties() {
|
||||||
@ -580,6 +602,13 @@ public class TsvCsvImporterTests extends ImporterTest {
|
|||||||
String sep, int limit, int skip, int ignoreLines,
|
String sep, int limit, int skip, int ignoreLines,
|
||||||
int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter) {
|
int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter) {
|
||||||
|
|
||||||
|
prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, quoteCharacter,"[]");
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void prepareOptions(
|
||||||
|
String sep, int limit, int skip, int ignoreLines,
|
||||||
|
int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter, String columnNames) {
|
||||||
|
|
||||||
whenGetStringOption("separator", options, sep);
|
whenGetStringOption("separator", options, sep);
|
||||||
whenGetStringOption("quoteCharacter", options, quoteCharacter);
|
whenGetStringOption("quoteCharacter", options, quoteCharacter);
|
||||||
whenGetIntegerOption("limit", options, limit);
|
whenGetIntegerOption("limit", options, limit);
|
||||||
@ -589,6 +618,7 @@ public class TsvCsvImporterTests extends ImporterTest {
|
|||||||
whenGetBooleanOption("guessCellValueTypes", options, guessValueType);
|
whenGetBooleanOption("guessCellValueTypes", options, guessValueType);
|
||||||
whenGetBooleanOption("processQuotes", options, !ignoreQuotes);
|
whenGetBooleanOption("processQuotes", options, !ignoreQuotes);
|
||||||
whenGetBooleanOption("storeBlankCellsAsNulls", options, true);
|
whenGetBooleanOption("storeBlankCellsAsNulls", options, true);
|
||||||
|
whenGetArrayOption("columnNames", options, new JSONArray(columnNames));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyOptions() {
|
private void verifyOptions() {
|
||||||
@ -601,6 +631,7 @@ public class TsvCsvImporterTests extends ImporterTest {
|
|||||||
verify(options, times(1)).getBoolean("guessCellValueTypes");
|
verify(options, times(1)).getBoolean("guessCellValueTypes");
|
||||||
verify(options, times(1)).getBoolean("processQuotes");
|
verify(options, times(1)).getBoolean("processQuotes");
|
||||||
verify(options, times(1)).getBoolean("storeBlankCellsAsNulls");
|
verify(options, times(1)).getBoolean("storeBlankCellsAsNulls");
|
||||||
|
verify(options, times(1)).getJSONArray("columnNames");
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
Assert.fail("JSON exception",e);
|
Assert.fail("JSON exception",e);
|
||||||
}
|
}
|
||||||
|
@ -155,7 +155,9 @@
|
|||||||
"escape": "Escape special characters with \\",
|
"escape": "Escape special characters with \\",
|
||||||
"use-quote": "Use character",
|
"use-quote": "Use character",
|
||||||
"quote-delimits-cells": "to enclose cells containing column separators",
|
"quote-delimits-cells": "to enclose cells containing column separators",
|
||||||
"click-xml": "Click on the first XML element corresponding to the first record to load."
|
"click-xml": "Click on the first XML element corresponding to the first record to load.",
|
||||||
|
"column-names-label": "Column names (comma separated)",
|
||||||
|
"column-names-optional":"comma separated"
|
||||||
},
|
},
|
||||||
"core-dialogs": {
|
"core-dialogs": {
|
||||||
"cluster-edit": "Cluster & Edit column",
|
"cluster-edit": "Cluster & Edit column",
|
||||||
|
@ -55,9 +55,14 @@
|
|||||||
</table></div></td>
|
</table></div></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td> </td>
|
<td><div class="grid-layout layout-tightest" style="width:fit-content;"><table>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="columnNamesCheckbox" id="$check-column-names" />
|
||||||
|
<label id="or-import-columnNames"></label></td></tr>
|
||||||
|
<tr>
|
||||||
|
<td><input style="width: 25em;" bind="columnNamesInput" /></td>
|
||||||
|
</tr></table></div></td>
|
||||||
|
|
||||||
<td><div class="grid-layout layout-tightest"><table>
|
<td colspan="1"><div class="grid-layout layout-tightest"><table>
|
||||||
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
|
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
|
||||||
<td><label for="$guess" id="or-import-parseCell"></label></td></tr>
|
<td><label for="$guess" id="or-import-parseCell"></label></td></tr>
|
||||||
</table></div></td>
|
</table></div></td>
|
||||||
|
@ -118,6 +118,13 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() {
|
|||||||
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||||
|
|
||||||
|
if (this._optionContainerElmts.columnNamesCheckbox[0].checked) {
|
||||||
|
var columnNames = this._optionContainerElmts.columnNamesInput.val();
|
||||||
|
if (columnNames != undefined && columnNames != null && columnNames != '') {
|
||||||
|
options.columnNames = columnNames.split(",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return options;
|
return options;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -136,6 +143,10 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
|
|||||||
$('#or-import-tabs').html($.i18n._('core-index-parser')["tabs"]);
|
$('#or-import-tabs').html($.i18n._('core-index-parser')["tabs"]);
|
||||||
$('#or-import-custom').html($.i18n._('core-index-parser')["custom"]);
|
$('#or-import-custom').html($.i18n._('core-index-parser')["custom"]);
|
||||||
$('#or-import-escape').html($.i18n._('core-index-parser')["escape"]);
|
$('#or-import-escape').html($.i18n._('core-index-parser')["escape"]);
|
||||||
|
$('#or-import-columnNames').html($.i18n._('core-index-parser')["column-names-label"] + ':');
|
||||||
|
$('#or-import-optional').html($.i18n._('core-index-parser')["column-names-optional"]);
|
||||||
|
|
||||||
|
self._optionContainerElmts.columnNamesInput.prop('disabled', true);
|
||||||
|
|
||||||
$('#or-import-ignore').text($.i18n._('core-index-parser')["ignore-first"]);
|
$('#or-import-ignore').text($.i18n._('core-index-parser')["ignore-first"]);
|
||||||
$('#or-import-lines').text($.i18n._('core-index-parser')["lines-beg"]);
|
$('#or-import-lines').text($.i18n._('core-index-parser')["lines-beg"]);
|
||||||
@ -160,6 +171,31 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
this._optionContainerElmts.headerLinesCheckbox.on("click", function() {
|
||||||
|
if ($(this).is(':checked')) {
|
||||||
|
var isDisabled = $('textbox').prop('disabled');
|
||||||
|
if (!isDisabled) {
|
||||||
|
self._optionContainerElmts.columnNamesInput.prop('disabled', true);
|
||||||
|
self._optionContainerElmts.columnNamesCheckbox.prop("checked", false);
|
||||||
|
self._optionContainerElmts.columnNamesInput.val('');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self._optionContainerElmts.columnNamesInput.prop('disabled', false);
|
||||||
|
self._optionContainerElmts.columnNamesCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
this._optionContainerElmts.columnNamesCheckbox.on("click", function() {
|
||||||
|
if ($(this).is(':checked')) {
|
||||||
|
self._optionContainerElmts.headerLinesCheckbox.prop("checked", false);
|
||||||
|
self._optionContainerElmts.columnNamesInput.prop('disabled', false);
|
||||||
|
} else {
|
||||||
|
self._optionContainerElmts.headerLinesCheckbox.prop("checked", true);
|
||||||
|
self._optionContainerElmts.columnNamesInput.val('');
|
||||||
|
self._optionContainerElmts.columnNamesInput.prop('disabled', true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
var columnSeparatorValue = (this._config.separator == ",") ? 'comma' :
|
var columnSeparatorValue = (this._config.separator == ",") ? 'comma' :
|
||||||
((this._config.separator == "\\t") ? 'tab' : 'custom');
|
((this._config.separator == "\\t") ? 'tab' : 'custom');
|
||||||
this._optionContainer.find(
|
this._optionContainer.find(
|
||||||
@ -206,6 +242,7 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
|
|||||||
};
|
};
|
||||||
this._optionContainer.find("input").bind("change", onChange);
|
this._optionContainer.find("input").bind("change", onChange);
|
||||||
this._optionContainer.find("select").bind("change", onChange);
|
this._optionContainer.find("select").bind("change", onChange);
|
||||||
|
this._optionContainerElmts.columnNamesInput.bind("keyup",onChange);
|
||||||
};
|
};
|
||||||
|
|
||||||
Refine.SeparatorBasedParserUI.prototype._scheduleUpdatePreview = function() {
|
Refine.SeparatorBasedParserUI.prototype._scheduleUpdatePreview = function() {
|
||||||
|
Loading…
Reference in New Issue
Block a user