[FIX] json/xml trim whitespace configuration option (#2415)

* trimStrings condition

* added test for trimString xml

* added trimStrings check for json
This commit is contained in:
Lisa Chandra 2020-03-15 21:34:01 +05:30 committed by GitHub
parent afd5cf70f0
commit a91691cb6b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 99 additions and 4 deletions

View File

@ -506,6 +506,9 @@ public class XmlImportUtilities extends TreeImportUtilities {
String colName = parser.getFieldName();
if (value instanceof String) {
String text = (String) value;
if(parameter.trimStrings) {
text = text.trim();
}
addCell(project, thisColumnGroup, record, colName, text,
parameter.storeEmptyStrings, parameter.guessDataType);
} else {

View File

@ -106,6 +106,48 @@ public class JsonImporterTests extends ImporterTest {
Assert.assertEquals(row.getCell(1).value, "Author 1, The");
}
@Test
public void trimLeadingTrailingWhitespaceOnTrimStrings(){
String ScraperwikiOutput =
"[\n" +
"{\n" +
" \"school\": \" University of Cambridge \",\n" +
" \"name\": \" Amy Zhang \",\n" +
" \"student-faculty-score\": \"100\",\n" +
" \"intl-student-score\": \"95\"\n" +
" }\n" +
"]\n";
RunTest(ScraperwikiOutput, true);
log(project);
assertProjectCreated(project, 4, 1);
Row row = project.rows.get(0);
Assert.assertNotNull(row);
Assert.assertNotNull(row.getCell(1));
Assert.assertEquals(row.getCell(0).value, "University of Cambridge");
Assert.assertEquals(row.getCell(1).value, "Amy Zhang");
}
@Test
public void doesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings(){
String ScraperwikiOutput =
"[\n" +
"{\n" +
" \"school\": \" University of Cambridge \",\n" +
" \"name\": \" Amy Zhang \",\n" +
" \"student-faculty-score\": \"100\",\n" +
" \"intl-student-score\": \"95\"\n" +
" }\n" +
"]\n";
RunTest(ScraperwikiOutput);
log(project);
assertProjectCreated(project, 4, 1);
Row row = project.rows.get(0);
Assert.assertNotNull(row);
Assert.assertNotNull(row.getCell(1));
Assert.assertEquals(row.getCell(0).value, " University of Cambridge ");
Assert.assertEquals(row.getCell(1).value, " Amy Zhang ");
}
@Test
public void canParseSampleWithDuplicateNestedElements(){
RunTest(getSampleWithDuplicateNestedElements());
@ -414,7 +456,7 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString();
}
private static ObjectNode getOptions(ImportingJob job, TreeImportingParserBase parser, String pathSelector) {
private static ObjectNode getOptions(ImportingJob job, TreeImportingParserBase parser, String pathSelector, boolean trimStrings) {
ObjectNode options = parser.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
@ -423,7 +465,7 @@ public class JsonImporterTests extends ImporterTest {
JSONUtilities.append(path, pathSelector);
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "trimStrings", trimStrings);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
@ -510,11 +552,15 @@ public class JsonImporterTests extends ImporterTest {
private void RunTest(String testString) {
RunTest(testString, getOptions(job, SUT, JsonImporter.ANONYMOUS));
RunTest(testString, getOptions(job, SUT, JsonImporter.ANONYMOUS, false));
}
private void RunComplexJSONTest(String testString) {
RunTest(testString, getOptions(job, SUT, "institutes"));
RunTest(testString, getOptions(job, SUT, "institutes", false));
}
private void RunTest(String testString, boolean trimStrings) {
RunTest(testString, getOptions(job, SUT, JsonImporter.ANONYMOUS, trimStrings));
}
private void RunTest(String testString, ObjectNode options) {

View File

@ -396,6 +396,52 @@ public class XmlImportUtilitiesTests extends RefineTest {
//TODO check record
}
@Test
public void trimLeadingTrailingWhitespaceOnTrimString(){
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author><author-name> author1 </author-name><author-dob> a date </author-dob></author><genre>genre1</genre></book></library>");
createXmlParser();
ParserSkip();
try {
SUT.processRecordWrapper(project, parser, columnGroup, true, false, false);
} catch (Exception e) {
Assert.fail();
}
log(project);
Assert.assertNotNull(project.rows);
Assert.assertEquals(project.rows.size(), 1);
Row row = project.rows.get(0);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(), 4);
Assert.assertNotNull(row.getCell(1));
Assert.assertEquals(row.getCell(1).value, "author1");
Assert.assertNotNull(row.getCell(2));
Assert.assertEquals(row.getCell(2).value, "a date");
}
@Test
public void doesNotTrimLeadingTrailingWhitespaceOnNoTrimString(){
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author><author-name> author1 </author-name><author-dob> a date </author-dob></author><genre>genre1</genre></book></library>");
createXmlParser();
ParserSkip();
try {
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) {
Assert.fail();
}
log(project);
Assert.assertNotNull(project.rows);
Assert.assertEquals(project.rows.size(), 1);
Row row = project.rows.get(0);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(), 4);
Assert.assertNotNull(row.getCell(1));
Assert.assertEquals(row.getCell(1).value, " author1 ");
Assert.assertNotNull(row.getCell(2));
Assert.assertEquals(row.getCell(2).value, " a date ");
}
@Test
public void addCellTest(){
String columnLocalName = "author";