From d3f223c19695e2c9ac995c5c2a8451ed142a5ed5 Mon Sep 17 00:00:00 2001 From: Iain Sproat Date: Mon, 4 Oct 2010 10:02:50 +0000 Subject: [PATCH] The JsonImporter now passes all current unit tests. git-svn-id: http://google-refine.googlecode.com/svn/trunk@1421 7d457c2a-affb-35e4-300a-418c747d4874 --- .../google/refine/importers/JsonImporter.java | 1 - .../refine/importers/TreeImportUtilities.java | 2 - .../refine/importers/XmlImportUtilities.java | 71 +++++++---- .../refine/importers/parsers/JSONParser.java | 115 ++++++++++++++---- .../refine/importers/parsers/TreeParser.java | 4 +- .../importers/parsers/TreeParserToken.java | 2 +- .../refine/importers/parsers/XmlParser.java | 34 +++++- .../tests/importers/JsonImporterTests.java | 106 +++++++++++++--- .../importers/XmlImportUtilitiesTests.java | 97 +++++++++------ .../tests/importers/XmlImporterTests.java | 2 + 10 files changed, 319 insertions(+), 115 deletions(-) diff --git a/main/src/com/google/refine/importers/JsonImporter.java b/main/src/com/google/refine/importers/JsonImporter.java index 32057e86a..b8fdcec35 100644 --- a/main/src/com/google/refine/importers/JsonImporter.java +++ b/main/src/com/google/refine/importers/JsonImporter.java @@ -62,7 +62,6 @@ public class JsonImporter implements StreamImporter{ if (recordPath == null) return; - ImportColumnGroup rootColumnGroup = new ImportColumnGroup(); XmlImportUtilities.importTreeData(new JSONParser(pis), project, recordPath, rootColumnGroup); XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); diff --git a/main/src/com/google/refine/importers/TreeImportUtilities.java b/main/src/com/google/refine/importers/TreeImportUtilities.java index e2e6852f4..d342910ea 100644 --- a/main/src/com/google/refine/importers/TreeImportUtilities.java +++ b/main/src/com/google/refine/importers/TreeImportUtilities.java @@ -168,8 +168,6 @@ public abstract class TreeImportUtilities { row.add(null); } - logger.trace("Adding cell with value : \"" + value + "\" to row : " + rowIndex + " at cell index : " + (cellIndex-1)); - row.set(cellIndex, new Cell(value, null)); column.nextRowIndex = rowIndex + 1; diff --git a/main/src/com/google/refine/importers/XmlImportUtilities.java b/main/src/com/google/refine/importers/XmlImportUtilities.java index 6e939ed06..0a63d29e0 100644 --- a/main/src/com/google/refine/importers/XmlImportUtilities.java +++ b/main/src/com/google/refine/importers/XmlImportUtilities.java @@ -110,7 +110,7 @@ public class XmlImportUtilities extends TreeImportUtilities { try { while (parser.hasNext()) { TreeParserToken eventType = parser.next(); - if (eventType == TreeParserToken.StartEntity){ //XMLStreamConstants.START_ELEMENT) { + if (eventType == TreeParserToken.StartEntity) { RecordElementCandidate candidate = detectRecordElement( parser, @@ -146,13 +146,17 @@ public class XmlImportUtilities extends TreeImportUtilities { try { while (parser.hasNext()) { TreeParserToken eventType = parser.next(); - if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) { + if (eventType == TreeParserToken.EndEntity ) { break; - } else if (eventType == TreeParserToken.Value) {//XMLStreamConstants.CHARACTERS) { - if (parser.getText().trim().length() > 0) { - textNodeCount++; + } else if (eventType == TreeParserToken.Value) { + try{ + if (parser.getText().trim().length() > 0) { + textNodeCount++; + } + }catch(Exception e){ + //silent } - } else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) { + } else if (eventType == TreeParserToken.StartEntity) { childElementNodeCount++; String tagName = parser.getLocalName(); @@ -234,15 +238,15 @@ public class XmlImportUtilities extends TreeImportUtilities { String[] recordPath, ImportColumnGroup rootColumnGroup ) { + logger.trace("importTreeData(TreeParser, Project, String[], ImportColumnGroup)"); try { while (parser.hasNext()) { TreeParserToken eventType = parser.next(); - if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) { + if (eventType == TreeParserToken.StartEntity) { findRecord(project, parser, recordPath, 0, rootColumnGroup); } } } catch (Exception e) { - e.printStackTrace(); // silent } } @@ -265,18 +269,21 @@ public class XmlImportUtilities extends TreeImportUtilities { int pathIndex, ImportColumnGroup rootColumnGroup ) throws ServletException { - if(parser.getEventType() == TreeParserToken.EndDocument){//XMLStreamConstants.START_DOCUMENT){ + logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup"); + + if(parser.getEventType() == TreeParserToken.StartDocument){//XMLStreamConstants.START_DOCUMENT){ logger.warn("Cannot use findRecord method for START_DOCUMENT event"); return; } + String tagName = parser.getLocalName(); if (tagName.equals(recordPath[pathIndex])) { if (pathIndex < recordPath.length - 1) { while (parser.hasNext()) { TreeParserToken eventType = parser.next(); - if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) { + if (eventType == TreeParserToken.StartEntity) { findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup); - } else if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) { + } else if (eventType == TreeParserToken.EndEntity ) { break; } } @@ -312,6 +319,7 @@ public class XmlImportUtilities extends TreeImportUtilities { TreeParser parser, ImportColumnGroup rootColumnGroup ) throws ServletException { + logger.trace("processRecord(Project,TreeParser,ImportColumnGroup)"); ImportRecord record = new ImportRecord(); processSubRecord(project, parser, rootColumnGroup, record); @@ -350,13 +358,18 @@ public class XmlImportUtilities extends TreeImportUtilities { ImportColumnGroup columnGroup, ImportRecord record ) throws ServletException { + logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)"); + + if(parser.getEventType() == TreeParserToken.StartDocument) + return; + ImportColumnGroup thisColumnGroup = getColumnGroup( - project, - columnGroup, - composeName(parser.getPrefix(), parser.getLocalName())); - + project, + columnGroup, + composeName(parser.getPrefix(), parser.getLocalName())); + thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex); - + int attributeCount = parser.getAttributeCount(); for (int i = 0; i < attributeCount; i++) { String text = parser.getAttributeValue(i).trim(); @@ -373,7 +386,7 @@ public class XmlImportUtilities extends TreeImportUtilities { while (parser.hasNext()) { TreeParserToken eventType = parser.next(); - if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) { + if (eventType == TreeParserToken.StartEntity) { processSubRecord( project, parser, @@ -382,17 +395,21 @@ public class XmlImportUtilities extends TreeImportUtilities { ); } else if (//eventType == XMLStreamConstants.CDATA || eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) { - String text = parser.getText().trim(); - if (text.length() > 0) { - addCell( - project, - thisColumnGroup, - record, - null, - parser.getText() - ); + String text = parser.getText(); + String colName = parser.getLocalName(); + if(text != null){ + text = text.trim(); + if (text.length() > 0) { + addCell( + project, + thisColumnGroup, + record, + colName, + parser.getText() + ); + } } - } else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) { + } else if (eventType == TreeParserToken.EndEntity) { break; } } diff --git a/main/src/com/google/refine/importers/parsers/JSONParser.java b/main/src/com/google/refine/importers/parsers/JSONParser.java index 633652bcf..dd9a18fdb 100644 --- a/main/src/com/google/refine/importers/parsers/JSONParser.java +++ b/main/src/com/google/refine/importers/parsers/JSONParser.java @@ -8,40 +8,58 @@ import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.JsonToken; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class JSONParser implements TreeParser{ + final static Logger logger = LoggerFactory.getLogger("JsonParser"); + JsonFactory factory = new JsonFactory(); JsonParser parser = null; + + //The following is a workaround for inconsistent Jackson JsonParser + Boolean lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false; + Boolean thisTokenIsAFieldName = false; + String lastFieldName = null; + //end of workaround public JSONParser(InputStream inputStream){ try { parser = factory.createJsonParser(inputStream); - } catch (JsonParseException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // TODO Auto-generated catch block + } catch (Exception e) { e.printStackTrace(); } } + + /** + * Does nothing. All Json is treated as elements + */ @Override public int getAttributeCount() { // TODO Auto-generated method stub return 0; } + /** + * Does nothing. All Json is treated as elements + */ @Override public String getAttributeLocalName(int index) { - // TODO Auto-generated method stub return null; } + /** + * Does nothing. All Json is treated as elements + */ @Override public String getAttributePrefix(int index) { // TODO Auto-generated method stub return null; } + /** + * Does nothing. All Json is treated as elements + */ @Override public String getAttributeValue(int index) { // TODO Auto-generated method stub @@ -50,31 +68,49 @@ public class JSONParser implements TreeParser{ @Override public TreeParserToken getEventType() throws ServletException { - return this.convertToTreeParserToken(parser.getCurrentToken()); + return this.mapToTreeParserToken(parser.getCurrentToken()); } @Override - public String getLocalName() { - // TODO Auto-generated method stub - return null; + public String getLocalName() throws ServletException{ + try { + String text = parser.getCurrentName(); + + //The following is a workaround for inconsistent Jackson JsonParser + if(text == null){ + if(this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity) + text = this.lastFieldName; + else + text = "__anonymous__"; + } + //end of workaround + + return text; + } catch (Exception e) { + throw new ServletException(e); + } } + /** + * Does nothing. Json does not have prefixes + */ @Override public String getPrefix() { - // TODO Auto-generated method stub return null; } @Override - public String getText() { - // TODO Auto-generated method stub - return null; + public String getText() throws ServletException { + try { + return parser.getText(); + } catch (Exception e) { + throw new ServletException(e); + } } @Override public boolean hasNext() throws ServletException { - // TODO Auto-generated method stub - return false; + return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?) } @Override @@ -83,29 +119,58 @@ public class JSONParser implements TreeParser{ try { next = parser.nextToken(); } catch (JsonParseException e) { - throw new ServletException(e.getMessage()); + throw new ServletException(e); } catch (IOException e) { - throw new ServletException(e.getMessage()); + throw new ServletException(e); } if(next == null) throw new ServletException("No more Json Tokens in stream"); - return convertToTreeParserToken(next); + //The following is a workaround for inconsistent Jackson JsonParser + if(next == JsonToken.FIELD_NAME){ + try { + this.thisTokenIsAFieldName = true; + this.lastFieldName = parser.getCurrentName(); + } catch (Exception e) { + //silent + } + }else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){ + if(this.thisTokenIsAFieldName){ + this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = true; + this.thisTokenIsAFieldName = false; + }else{ + this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false; + this.lastFieldName = null; + } + }else{ + this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false; + this.lastFieldName = null; + this.thisTokenIsAFieldName = false; + } + //end of workaround + + return mapToTreeParserToken(next); } - protected TreeParserToken convertToTreeParserToken(JsonToken token) throws ServletException{ + protected TreeParserToken mapToTreeParserToken(JsonToken token){ switch(token){ case START_ARRAY: return TreeParserToken.StartEntity; case END_ARRAY: return TreeParserToken.EndEntity; case START_OBJECT: return TreeParserToken.StartEntity; case END_OBJECT: return TreeParserToken.EndEntity; case VALUE_STRING: return TreeParserToken.Value; - //Json does not have START_DOCUMENT - //Json does not have END_DOCUMENT - - //TODO finish the rest of the cases - default: throw new ServletException("Not yet implemented"); + case FIELD_NAME: return TreeParserToken.Ignorable; //returned by the getLocalName function() + case VALUE_NUMBER_INT: return TreeParserToken.Value; + //Json does not have START_DOCUMENT token type (so ignored as default) + //Json does not have END_DOCUMENT token type (so ignored as default) + case VALUE_TRUE : return TreeParserToken.Value; + case VALUE_NUMBER_FLOAT : return TreeParserToken.Value; + case VALUE_NULL : return TreeParserToken.Value; + case VALUE_FALSE : return TreeParserToken.Value; + case VALUE_EMBEDDED_OBJECT : return TreeParserToken.Ignorable; + case NOT_AVAILABLE : return TreeParserToken.Ignorable; + default: return TreeParserToken.Ignorable; } } diff --git a/main/src/com/google/refine/importers/parsers/TreeParser.java b/main/src/com/google/refine/importers/parsers/TreeParser.java index 0a90028db..00668464f 100644 --- a/main/src/com/google/refine/importers/parsers/TreeParser.java +++ b/main/src/com/google/refine/importers/parsers/TreeParser.java @@ -6,9 +6,9 @@ public interface TreeParser { public TreeParserToken next() throws ServletException; public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken public boolean hasNext() throws ServletException; - public String getLocalName(); + public String getLocalName() throws ServletException; //aka getFieldName public String getPrefix(); - public String getText(); + public String getText() throws ServletException; public int getAttributeCount(); public String getAttributeValue(int index); public String getAttributePrefix(int index); diff --git a/main/src/com/google/refine/importers/parsers/TreeParserToken.java b/main/src/com/google/refine/importers/parsers/TreeParserToken.java index 92db049cb..3ac86818f 100644 --- a/main/src/com/google/refine/importers/parsers/TreeParserToken.java +++ b/main/src/com/google/refine/importers/parsers/TreeParserToken.java @@ -8,5 +8,5 @@ public enum TreeParserToken { StartEntity, EndEntity, Value - //TODO + //append additional tokens as necessary (most are just mapped to Value or Ignorable) } diff --git a/main/src/com/google/refine/importers/parsers/XmlParser.java b/main/src/com/google/refine/importers/parsers/XmlParser.java index 7a43e62c0..8ee2c2558 100644 --- a/main/src/com/google/refine/importers/parsers/XmlParser.java +++ b/main/src/com/google/refine/importers/parsers/XmlParser.java @@ -9,7 +9,12 @@ import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public class XmlParser implements TreeParser{ + final static Logger logger = LoggerFactory.getLogger("XmlParser"); + XMLStreamReader parser = null; public XmlParser(InputStream inputStream){ @@ -24,19 +29,20 @@ public class XmlParser implements TreeParser{ } } + @Override public TreeParserToken next() throws ServletException{ try { if(!parser.hasNext()) throw new ServletException("End of XML stream"); } catch (XMLStreamException e) { - throw new ServletException(e.getMessage()); + throw new ServletException(e); } int currentToken = -1; try { currentToken = parser.next(); } catch (XMLStreamException e) { - throw new ServletException(e.getMessage()); + throw new ServletException(e); } return convertToTreeParserToken(currentToken); @@ -44,6 +50,8 @@ public class XmlParser implements TreeParser{ protected TreeParserToken convertToTreeParserToken(int token) throws ServletException { switch(token){ + //Xml does not have StartArray element type + //Xml does not have EndArray element type case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity; case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity; case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value; @@ -53,44 +61,58 @@ public class XmlParser implements TreeParser{ //TODO default: return TreeParserToken.Ignorable; - //throw new ServletException("Not yet implemented"); } } + @Override public TreeParserToken getEventType() throws ServletException{ return this.convertToTreeParserToken(parser.getEventType()); } + @Override public boolean hasNext() throws ServletException{ try { return parser.hasNext(); } catch (XMLStreamException e) { - throw new ServletException(e.getMessage()); + throw new ServletException(e); } } - public String getLocalName(){ - return parser.getLocalName(); + @Override + public String getLocalName() throws ServletException{ + try{ + return parser.getLocalName(); + }catch(IllegalStateException e){ + return null; + } } + @Override public String getPrefix(){ return parser.getPrefix(); } + @Override public String getText(){ return parser.getText(); } + @Override public int getAttributeCount(){ return parser.getAttributeCount(); } + @Override public String getAttributeValue(int index){ return parser.getAttributeValue(index); } + + @Override public String getAttributePrefix(int index){ return parser.getAttributePrefix(index); } + + @Override public String getAttributeLocalName(int index){ return parser.getAttributeLocalName(index); } diff --git a/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java index 2c20b3ceb..7701fb398 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java @@ -16,6 +16,8 @@ import org.testng.annotations.Test; import com.google.refine.ProjectMetadata; import com.google.refine.importers.JsonImporter; +import com.google.refine.importers.parsers.JSONParser; +import com.google.refine.importers.parsers.TreeParserToken; import com.google.refine.model.Project; import com.google.refine.model.Row; import com.google.refine.tests.RefineTest; @@ -102,7 +104,7 @@ public class JsonImporterTests extends RefineTest { log(project); assertProjectCreated(project, 5, 6); - Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "book - genre"); + Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "__anonymous__ - genre"); Row row0 = project.rows.get(0); Assert.assertNotNull(row0); @@ -120,12 +122,87 @@ public class JsonImporterTests extends RefineTest { assertProjectCreated(project, 5, 6); Assert.assertEquals(project.columnModel.columnGroups.size(),1); - Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 2); - Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 2); + Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 3); + Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 3); Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup); Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0); Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2); } + + + /** + * org.codehaus.Jackson.JsonParser has an inconsistency when returning getLocalName + * of an Entity_Start token which occurs after a Field_Name token + */ + @Test + public void EnsureJSONParserHandlesgetLocalNameCorrectly() throws Exception{ + String sampleJson = "{\"field\":\"value\"}"; + String sampleJson2 = "{\"field\":{}}"; + String sampleJson3 = "{\"field\":[{},{}]}"; + + JSONParser parser = new JSONParser(new ByteArrayInputStream( sampleJson.getBytes( "UTF-8" ) )); + TreeParserToken token = TreeParserToken.Ignorable; + int i = 0; + try{ + while(token != null){ + token = parser.next(); + if(token == null) + break; + i++; + if(i == 3){ + Assert.assertEquals(TreeParserToken.Value, token); + Assert.assertEquals("field", parser.getLocalName()); + } + } + }catch(Exception e){ + //silent + } + + + parser = new JSONParser(new ByteArrayInputStream( sampleJson2.getBytes( "UTF-8" ) ) ); + token = TreeParserToken.Ignorable; + i = 0; + try{ + while(token != null){ + token = parser.next(); + if(token == null) + break; + i++; + if(i == 3){ + Assert.assertEquals(TreeParserToken.StartEntity, token); + Assert.assertEquals(parser.getLocalName(), "field"); + } + } + }catch(Exception e){ + //silent + } + + parser = new JSONParser(new ByteArrayInputStream( sampleJson3.getBytes( "UTF-8" ) ) ); + token = TreeParserToken.Ignorable; + i = 0; + try{ + while(token != null){ + token = parser.next(); + if(token == null) + break; + i++; + if(i == 3){ + Assert.assertEquals(token, TreeParserToken.StartEntity); + Assert.assertEquals(parser.getLocalName(), "field"); + } + if(i == 4){ + Assert.assertEquals(token, TreeParserToken.StartEntity); + Assert.assertEquals(parser.getLocalName(), "__anonymous__"); + } + if(i == 6){ + Assert.assertEquals(token, TreeParserToken.StartEntity); + Assert.assertEquals(parser.getLocalName(), "__anonymous__"); + } + } + }catch(Exception e){ + //silent + } + } //------------helper methods--------------- @@ -139,13 +216,13 @@ public class JsonImporterTests extends RefineTest { public static String getElementWithDuplicateSubElement(int id){ return "{ \"id\" : " + id + "," + - "\"authors\":[" + - "{\"author\" : \"Author " + id + ", The\"}," + - "{\"author\" : \"Author " + id + ", Another\"}" + - "]," + - "\"title\" : \"Book title " + id + "\"," + - "\"publish_date\" : \"2010-05-26\"" + - "}"; + "\"authors\":[" + + "{\"name\" : \"Author " + id + ", The\"}," + + "{\"name\" : \"Author " + id + ", Another\"}" + + "]," + + "\"title\" : \"Book title " + id + "\"," + + "\"publish_date\" : \"2010-05-26\"" + + "}"; } public static String getSample(){ @@ -181,8 +258,8 @@ public class JsonImporterTests extends RefineTest { sb.append(","); } sb.append("{\"id\" : 4," + - "\"author\" : \"With line\n break\"," + - "\"title\" : \"Book title 4\"" + + "\"author\" : \"With line\\n break\"," + //FIXME this line break is doubled - is this correct?? + "\"title\" : \"Book title 4\"," + "\"publish_date\" : \"2010-05-26\"" + "},"); sb.append(getTypicalElement(5)); @@ -197,12 +274,13 @@ public class JsonImporterTests extends RefineTest { sb.append("["); for(int i = 1; i < 6; i++){ sb.append(getTypicalElement(i)); + sb.append(","); } sb.append("{\"id\" : 6," + "\"author\" : \"Author 6, The\"," + "\"title\" : \"Book title 6\"," + "\"genre\" : \"New element not seen in other records\"," + - "\"publish_date\" : \"2010-05-26\"," + + "\"publish_date\" : \"2010-05-26\"" + "}"); sb.append("]"); return sb.toString(); @@ -217,7 +295,7 @@ public class JsonImporterTests extends RefineTest { "\"author-dob\" : \"1950-0" + i + "-15\"}," + "\"title\" : \"Book title " + i + "\"," + "\"publish_date\" : \"2010-05-26\"" + - "},"); + "}"); if(i < 6) sb.append(","); } diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java index a78fe4208..cacc6d43b 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java @@ -18,7 +18,9 @@ import org.testng.annotations.Test; import com.google.refine.importers.TreeImportUtilities.ImportColumn; import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup; import com.google.refine.importers.TreeImportUtilities.ImportRecord; +import com.google.refine.importers.parsers.JSONParser; import com.google.refine.importers.parsers.TreeParser; +import com.google.refine.importers.parsers.TreeParserToken; import com.google.refine.importers.parsers.XmlParser; import com.google.refine.model.Project; import com.google.refine.model.Row; @@ -63,10 +65,11 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void detectPathFromTagTest(){ - loadXml("author1genre1"); + public void detectPathFromTagXmlTest(){ + loadData("author1genre1"); + String tag = "library"; - createParser(); + createXmlParser(); String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag); Assert.assertNotNull(response); @@ -75,11 +78,11 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void detectPathFromTagWithNestedElement(){ - loadXml("author1genre1"); + public void detectPathFromTagWithNestedElementXml(){ + loadData("author1genre1"); String tag = "book"; - createParser(); + createXmlParser(); String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag); Assert.assertNotNull(response); @@ -89,9 +92,9 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void detectRecordElementTest(){ - loadXml("author1genre1"); - createParser(); + public void detectRecordElementXmlTest(){ + loadData("author1genre1"); + createXmlParser(); String tag="library"; @@ -107,9 +110,9 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void detectRecordElementCanHandleWithNestedElements(){ - loadXml("author1genre1"); - createParser(); + public void detectRecordElementCanHandleWithNestedElementsXml(){ + loadData("author1genre1"); + createXmlParser(); String tag="book"; @@ -126,9 +129,9 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void detectRecordElementIsNullForUnfoundTag(){ - loadXml("author1genre1"); - createParser(); + public void detectRecordElementIsNullForUnfoundTagXml(){ + loadData("author1genre1"); + createXmlParser(); String tag=""; @@ -142,7 +145,7 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void detectRecordElementRegressionTest(){ + public void detectRecordElementRegressionXmlTest(){ loadSampleXml(); String[] path = XmlImportUtilitiesStub.detectRecordElement(new XmlParser(inputStream)); @@ -151,9 +154,20 @@ public class XmlImportUtilitiesTests extends RefineTest { Assert.assertEquals(path[0], "library"); Assert.assertEquals(path[1], "book"); } + + @Test + public void detectRecordElementRegressionJsonTest(){ + loadSampleJson(); + + String[] path = XmlImportUtilitiesStub.detectRecordElement(new JSONParser(inputStream)); + Assert.assertNotNull(path); + Assert.assertEquals(path.length, 2); + Assert.assertEquals(path[0], "__anonymous__"); + Assert.assertEquals(path[1], "__anonymous__"); + } @Test - public void importXmlTest(){ + public void importTreeDataXmlTest(){ loadSampleXml(); String[] recordPath = new String[]{"library","book"}; @@ -174,7 +188,7 @@ public class XmlImportUtilitiesTests extends RefineTest { @Test public void importXmlWithVaryingStructureTest(){ - loadXml(XmlImporterTests.getSampleWithVaryingStructure()); + loadData(XmlImporterTests.getSampleWithVaryingStructure()); String[] recordPath = new String[]{"library", "book"}; XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup); @@ -221,9 +235,9 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void findRecordTest(){ + public void findRecordTestXml(){ loadSampleXml(); - createParser(); + createXmlParser(); ParserSkip(); String[] recordPath = new String[]{"library","book"}; @@ -243,9 +257,9 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void processRecordTest(){ - loadXml("author1genre1"); - createParser(); + public void processRecordTestXml(){ + loadData("author1genre1"); + createXmlParser(); ParserSkip(); try { @@ -264,9 +278,9 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void processRecordTestDuplicateColumns(){ - loadXml("author1author2genre1"); - createParser(); + public void processRecordTestDuplicateColumnsXml(){ + loadData("author1author2genre1"); + createXmlParser(); ParserSkip(); try { @@ -289,9 +303,9 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void processRecordTestNestedElement(){ - loadXml("author1a dategenre1"); - createParser(); + public void processRecordTestNestedElementXml(){ + loadData("author1a dategenre1"); + createXmlParser(); ParserSkip(); try { @@ -313,9 +327,9 @@ public class XmlImportUtilitiesTests extends RefineTest { @Test - public void processSubRecordTest(){ - loadXml("author1genre1"); - createParser(); + public void processSubRecordTestXml(){ + loadData("author1genre1"); + createXmlParser(); ParserSkip(); try { @@ -365,10 +379,14 @@ public class XmlImportUtilitiesTests extends RefineTest { //----------------helpers------------- public void loadSampleXml(){ - loadXml( XmlImporterTests.getSample() ); + loadData( XmlImporterTests.getSample() ); + } + + public void loadSampleJson(){ + loadData( JsonImporterTests.getSample() ); } - public void loadXml(String xml){ + public void loadData(String xml){ try { inputStream = new ByteArrayInputStream( xml.getBytes( "UTF-8" ) ); } catch (UnsupportedEncodingException e1) { @@ -378,13 +396,18 @@ public class XmlImportUtilitiesTests extends RefineTest { public void ParserSkip(){ try { - parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event + if(parser.getEventType() == TreeParserToken.StartDocument){ + parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event + } } catch (ServletException e1) { Assert.fail(); } } - public void createParser(){ - parser = new XmlParser(inputStream); + public void createXmlParser(){ + parser = new XmlParser(inputStream); + } + public void createJsonParser(){ + parser = new JSONParser(inputStream); } } diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImporterTests.java index 3af2ffde1..fe6ea2957 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImporterTests.java @@ -140,8 +140,10 @@ public class XmlImporterTests extends RefineTest { public static String getElementWithDuplicateSubElement(int id){ return "" + + "" + "Author " + id + ", The" + "Author " + id + ", Another" + + "" + "Book title " + id + "" + "2010-05-26" + "";