Merge branch 'develop/1086-quotecharacter' of https://github.com/stundzig/OpenRefine into stundzig-develop/1086-quotecharacter

This commit is contained in:
Antonin Delpeuch 2018-02-03 07:56:30 +00:00
commit c9395d564c
2 changed files with 43 additions and 3 deletions

View File

@ -50,6 +50,7 @@ import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.json.JSONObject;
import au.com.bytecode.opencsv.CSVParser;
@ -75,6 +76,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options, "processQuotes", true);
JSONUtilities.safePut(options, "quoteCharacter", CSVParser.DEFAULT_QUOTE_CHARACTER);
return options;
}
@ -98,9 +100,15 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
boolean strictQuotes = JSONUtilities.getBoolean(options, "strictQuotes", false);
Character quote = CSVParser.DEFAULT_QUOTE_CHARACTER;
String quoteCharacter = JSONUtilities.getString(options, "quoteCharacter", null);
if (!StringUtils.isBlank(quoteCharacter)) {
quote = quoteCharacter.charAt(0);
}
final CSVParser parser = new CSVParser(
sep,
CSVParser.DEFAULT_QUOTE_CHARACTER,
quote,
(char) 0, // we don't want escape processing
strictQuotes,
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,

View File

@ -487,6 +487,33 @@ public class TsvCsvImporterTests extends ImporterTest {
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2");
}
@Test(dataProvider = "CSV-TSV-AutoDetermine")
public void readSimpleData_CSV_1Header_1Row_singleQuote(String sep){
//create input to test with
String inputSeparator = sep == null ? "\t" : sep;
String input = "'col1'" + inputSeparator + "'col2'" + inputSeparator + "'col3'\n" +
"'data1'" + inputSeparator + "'data2'" + inputSeparator + "'data3'";
try {
prepareOptions(sep, -1, 0, 0, 1, false, false, "'");
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail("Exception during file parse",e);
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3");
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
}
//---------------------read tests------------------------
@Test
public void readCsvWithProperties() {
@ -542,12 +569,17 @@ public class TsvCsvImporterTests extends ImporterTest {
{","},{"\t"},{null}
};
}
private void prepareOptions(
String sep, int limit, int skip, int ignoreLines,
int headerLines, boolean guessValueType, boolean ignoreQuotes) {
prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, "\"");
}
private void prepareOptions(
String sep, int limit, int skip, int ignoreLines,
int headerLines, boolean guessValueType, boolean ignoreQuotes) {
int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter) {
whenGetStringOption("separator", options, sep);
whenGetStringOption("quoteCharacter", options, quoteCharacter);
whenGetIntegerOption("limit", options, limit);
whenGetIntegerOption("skipDataLines", options, skip);
whenGetIntegerOption("ignoreLines", options, ignoreLines);