Convert illegal characters into legal ones. (#2431)

* Convert illegal characters into leagal ones.

* Test tab in key & value string

Also fix up test that depended on previous TAB
related error message and clean up logging

Co-authored-by: Tom Morris <tfmorris@gmail.com>
This commit is contained in:
chuhao zeng 2020-06-14 03:47:58 -04:00 committed by GitHub
parent 3a7812aef7
commit 9b03ecae41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 31 deletions

View File

@ -90,6 +90,7 @@ public class JsonImporter extends TreeImportingParserBase {
File file = ImportingUtilities.getFile(job, firstFileRecord); File file = ImportingUtilities.getFile(job, firstFileRecord);
JsonFactory factory = new JsonFactory(); JsonFactory factory = new JsonFactory();
JsonParser parser = factory.createParser(file); JsonParser parser = factory.createParser(file);
parser.enable(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
PreviewParsingState state = new PreviewParsingState(); PreviewParsingState state = new PreviewParsingState();
JsonNode rootValue = parseForPreview(parser, state); JsonNode rootValue = parseForPreview(parser, state);
@ -224,6 +225,7 @@ public class JsonImporter extends TreeImportingParserBase {
public JSONTreeReader(InputStream is) { public JSONTreeReader(InputStream is) {
try { try {
parser = factory.createParser(is); parser = factory.createParser(is);
parser.enable(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
current = null; current = null;
next = parser.nextToken(); next = parser.nextToken();
} catch (IOException e) { } catch (IOException e) {

View File

@ -38,11 +38,13 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.ArrayList; import java.util.ArrayList;
import java.lang.reflect.Method;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert; import org.testng.Assert;
import org.testng.ITestResult;
import org.testng.annotations.AfterMethod; import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest; import org.testng.annotations.BeforeTest;
@ -75,16 +77,16 @@ public class JsonImporterTests extends ImporterTest {
//System Under Test //System Under Test
JsonImporter SUT = null; JsonImporter SUT = null;
@Override
@BeforeMethod @BeforeMethod
public void setUp(){ public void setUp(Method method){
super.setUp(); super.setUp();
SUT = new JsonImporter(); SUT = new JsonImporter();
logger.info("About to run test method: " + method.getName());
} }
@Override
@AfterMethod @AfterMethod
public void tearDown() { public void tearDown(ITestResult result) {
// logger.info("Finished test method: " + result.getMethod().getMethodName());
SUT = null; SUT = null;
if (inputStream != null) { if (inputStream != null) {
try { try {
@ -100,8 +102,6 @@ public class JsonImporterTests extends ImporterTest {
@Test @Test
public void canParseSample(){ public void canParseSample(){
RunTest(getSample()); RunTest(getSample());
log(project);
assertProjectCreated(project, 4, 6); assertProjectCreated(project, 4, 6);
Row row = project.rows.get(0); Row row = project.rows.get(0);
@ -122,7 +122,6 @@ public class JsonImporterTests extends ImporterTest {
JSONUtilities.safePut(options, "storeEmptyStrings", true); JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false); JSONUtilities.safePut(options, "guessCellValueTypes", false);
try { try {
inputStream = new ByteArrayInputStream(errJSON.getBytes( "UTF-8" ) ); inputStream = new ByteArrayInputStream(errJSON.getBytes( "UTF-8" ) );
} catch (UnsupportedEncodingException e1) { } catch (UnsupportedEncodingException e1) {
@ -143,10 +142,10 @@ public class JsonImporterTests extends ImporterTest {
exceptions exceptions
); );
Assert.assertFalse(exceptions.isEmpty()); Assert.assertFalse(exceptions.isEmpty());
Assert.assertEquals(exceptions.get(0).getMessage(), "Illegal unquoted " + Assert.assertEquals("Unexpected character (';' (code 59)): was expecting comma to separate Object entries",
"character ((CTRL-CHAR, code 10)): has to be escaped using backslash to be included in string value"); exceptions.get(0).getMessage());
} }
@Test @Test
public void trimLeadingTrailingWhitespaceOnTrimStrings(){ public void trimLeadingTrailingWhitespaceOnTrimStrings(){
String ScraperwikiOutput = String ScraperwikiOutput =
@ -159,7 +158,6 @@ public class JsonImporterTests extends ImporterTest {
" }\n" + " }\n" +
"]\n"; "]\n";
RunTest(ScraperwikiOutput, true); RunTest(ScraperwikiOutput, true);
log(project);
assertProjectCreated(project, 4, 1); assertProjectCreated(project, 4, 1);
Row row = project.rows.get(0); Row row = project.rows.get(0);
Assert.assertNotNull(row); Assert.assertNotNull(row);
@ -180,7 +178,6 @@ public class JsonImporterTests extends ImporterTest {
" }\n" + " }\n" +
"]\n"; "]\n";
RunTest(ScraperwikiOutput); RunTest(ScraperwikiOutput);
log(project);
assertProjectCreated(project, 4, 1); assertProjectCreated(project, 4, 1);
Row row = project.rows.get(0); Row row = project.rows.get(0);
Assert.assertNotNull(row); Assert.assertNotNull(row);
@ -192,8 +189,6 @@ public class JsonImporterTests extends ImporterTest {
@Test @Test
public void canParseSampleWithDuplicateNestedElements(){ public void canParseSampleWithDuplicateNestedElements(){
RunTest(getSampleWithDuplicateNestedElements()); RunTest(getSampleWithDuplicateNestedElements());
log(project);
assertProjectCreated(project, 4, 12); assertProjectCreated(project, 4, 12);
Row row = project.rows.get(0); Row row = project.rows.get(0);
@ -206,10 +201,7 @@ public class JsonImporterTests extends ImporterTest {
@Test @Test
public void testCanParseLineBreak(){ public void testCanParseLineBreak(){
RunTest(getSampleWithLineBreak()); RunTest(getSampleWithLineBreak());
log(project);
assertProjectCreated(project, 4, 6); assertProjectCreated(project, 4, 6);
Row row = project.rows.get(3); Row row = project.rows.get(3);
@ -222,8 +214,6 @@ public class JsonImporterTests extends ImporterTest {
@Test @Test
public void testElementsWithVaryingStructure(){ public void testElementsWithVaryingStructure(){
RunTest(getSampleWithVaryingStructure()); RunTest(getSampleWithVaryingStructure());
log(project);
assertProjectCreated(project, 5, 6); assertProjectCreated(project, 5, 6);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(4).getName(), JsonImporter.ANONYMOUS + " - genre"); Assert.assertEquals( project.columnModel.getColumnByCellIndex(4).getName(), JsonImporter.ANONYMOUS + " - genre");
@ -240,7 +230,6 @@ public class JsonImporterTests extends ImporterTest {
@Test @Test
public void testElementWithNestedTree(){ public void testElementWithNestedTree(){
RunTest(getSampleWithTreeStructure()); RunTest(getSampleWithTreeStructure());
log(project);
assertProjectCreated(project, 5, 6); assertProjectCreated(project, 5, 6);
Assert.assertEquals(project.columnModel.columnGroups.size(),1); Assert.assertEquals(project.columnModel.columnGroups.size(),1);
@ -264,7 +253,6 @@ public class JsonImporterTests extends ImporterTest {
JSONUtilities.safePut(options, "recordPath", path); JSONUtilities.safePut(options, "recordPath", path);
RunTest(mqlOutput, options); RunTest(mqlOutput, options);
log(project);
assertProjectCreated(project,3,16); assertProjectCreated(project,3,16);
} }
@ -298,7 +286,6 @@ public class JsonImporterTests extends ImporterTest {
" }\n" + " }\n" +
"]\n"; "]\n";
RunTest(ScraperwikiOutput); RunTest(ScraperwikiOutput);
log(project);
assertProjectCreated(project,9,2); assertProjectCreated(project,9,2);
} }
@ -379,11 +366,36 @@ public class JsonImporterTests extends ImporterTest {
} }
} }
@Test
public void testCanParseTab() throws Exception {
// Use un-escaped tabs here.
String sampleJson = "{\"\tfield\":\t\"\tvalue\"}";
JSONTreeReader parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson.getBytes("UTF-8")));
Token token = Token.Ignorable;
int i = 0;
try {
while (token != null) {
token = parser.next();
if (token == null) {
break;
}
i++;
if (i == 3) {
Assert.assertEquals(Token.Value, token);
Assert.assertEquals("\tfield", parser.getFieldName());
Assert.assertEquals("\tvalue", parser.getFieldValue());
}
}
}catch(Exception e){
Assert.fail();
}
}
@Test @Test
public void testJsonDatatypes(){ public void testJsonDatatypes(){
RunTest(getSampleWithDataTypes()); RunTest(getSampleWithDataTypes());
log(project);
assertProjectCreated(project, 2, 21,4); assertProjectCreated(project, 2, 21,4);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id"); Assert.assertEquals( project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id");
@ -454,15 +466,13 @@ public class JsonImporterTests extends ImporterTest {
String fileName = "grid_small.json"; String fileName = "grid_small.json";
RunComplexJSONTest(getComplexJSON(fileName)); RunComplexJSONTest(getComplexJSON(fileName));
log(project); logger.debug("************************ columnu number:" + project.columnModel.columns.size() +
logger.info("************************ columnu number:" + project.columnModel.columns.size() + ". \tcolumn groups number:" + project.columnModel.columnGroups.size() +
". \tcolumn groups number:" + project.columnModel.columnGroups.size() +
".\trow number:" + project.rows.size() + ".\trecord number:" + project.recordModel.getRecordCount()) ; ".\trow number:" + project.rows.size() + ".\trecord number:" + project.recordModel.getRecordCount()) ;
assertProjectCreated(project, 63, 63, 8); assertProjectCreated(project, 63, 63, 8);
} }
//------------helper methods--------------- //------------helper methods---------------
private static String getTypicalElement(int id){ private static String getTypicalElement(int id){