From d285999da820f05ae86e9e04645961fc16fcaee8 Mon Sep 17 00:00:00 2001 From: Iain Sproat Date: Mon, 27 Sep 2010 22:53:17 +0000 Subject: [PATCH] New JsonImporter, JsonParser and JsonImporterTests (copy of XmlImporterTests with syntax of the example data altered for Json). Renaming of TreeImporter to TreeImportUtilities (as per the current convention with the XmlImporter and XmlImportUtilities). NB the new JsonParser class does not work, and 5 of the new unit tests for JsonImporter currently fail. To be fixed in due course. git-svn-id: http://google-refine.googlecode.com/svn/trunk@1361 7d457c2a-affb-35e4-300a-418c747d4874 --- .../google/refine/importers/JsonImporter.java | 96 +++++++ .../refine/importers/TreeImportUtilities.java | 244 ++++++++++++++++++ .../refine/importers/XmlImportUtilities.java | 35 +-- .../google/refine/importers/XmlImporter.java | 23 +- .../refine/importers/parsers/JSONParser.java | 86 ++++++ .../tests/importers/JsonImporterTests.java | 241 +++++++++++++++++ .../importers/XmlImportUtilitiesTests.java | 18 +- 7 files changed, 699 insertions(+), 44 deletions(-) create mode 100644 main/src/com/google/refine/importers/JsonImporter.java create mode 100644 main/src/com/google/refine/importers/TreeImportUtilities.java create mode 100644 main/src/com/google/refine/importers/parsers/JSONParser.java create mode 100644 main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java diff --git a/main/src/com/google/refine/importers/JsonImporter.java b/main/src/com/google/refine/importers/JsonImporter.java new file mode 100644 index 000000000..dcd32808f --- /dev/null +++ b/main/src/com/google/refine/importers/JsonImporter.java @@ -0,0 +1,96 @@ +package com.google.refine.importers; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +import java.util.Properties; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.ProjectMetadata; +import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup; +import com.google.refine.importers.parsers.JSONParser; +import com.google.refine.importers.parsers.TreeParser; +import com.google.refine.importers.parsers.XmlParser; +import com.google.refine.model.Project; + +public class JsonImporter implements StreamImporter{ + final static Logger logger = LoggerFactory.getLogger("XmlImporter"); + + public static final int BUFFER_SIZE = 64 * 1024; + + @Override + public void read(InputStream inputStream, Project project, + ProjectMetadata metadata, Properties options) + throws ImportException { + //FIXME the below is a close duplicate of the XmlImporter code. + //Should wrap a lot of the below into methods and put them in a common superclass + logger.trace("JsonImporter.read"); + PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE); + + String[] recordPath = null; + { + byte[] buffer = new byte[BUFFER_SIZE]; + int bytes_read = 0; + try {//fill the buffer with data + while (bytes_read < BUFFER_SIZE) { + int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read); + if (c == -1) break; + bytes_read +=c ; + } + pis.unread(buffer, 0, bytes_read); + } catch (IOException e) { + throw new ImportException("Read error",e); + } + + InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read); + TreeParser parser = new JSONParser(iStream); + if (options.containsKey("importer-record-tag")) { + try{ + recordPath = XmlImportUtilities.detectPathFromTag( + parser, + options.getProperty("importer-record-tag")); + }catch(Exception e){ + // silent + // e.printStackTrace(); + } + } else { + recordPath = XmlImportUtilities.detectRecordElement(parser); + } + } + + if (recordPath == null) + return; + + ImportColumnGroup rootColumnGroup = new ImportColumnGroup(); + XmlImportUtilities.importTreeData(new XmlParser(pis), project, recordPath, rootColumnGroup); + XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); + + project.columnModel.update(); + + } + + @Override + public boolean canImportData(String contentType, String fileName) { + if (contentType != null) { + contentType = contentType.toLowerCase().trim(); + + if("application/json".equals(contentType) || + "text/json".equals(contentType)) { + return true; + } + } else if (fileName != null) { + fileName = fileName.toLowerCase(); + if ( + fileName.endsWith(".json") || + fileName.endsWith(".js") + ) { + return true; + } + } + return false; + } + +} diff --git a/main/src/com/google/refine/importers/TreeImportUtilities.java b/main/src/com/google/refine/importers/TreeImportUtilities.java new file mode 100644 index 000000000..2ca25f880 --- /dev/null +++ b/main/src/com/google/refine/importers/TreeImportUtilities.java @@ -0,0 +1,244 @@ +package com.google.refine.importers; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; + +public abstract class TreeImportUtilities { + final static Logger logger = LoggerFactory.getLogger("TreeImporter"); + + /** + * An element which holds sub-elements we + * shall import as records + */ + static protected class RecordElementCandidate { + String[] path; + int count; + } + + /** + * + * + * + */ + static protected abstract class ImportVertical { + public String name = ""; + public int nonBlankCount; + + abstract void tabulate(); + } + + /** + * A column group describes a branch in tree structured data + */ + static public class ImportColumnGroup extends ImportVertical { + public Map subgroups = new HashMap(); + public Map columns = new HashMap(); + public int nextRowIndex; + + @Override + void tabulate() { + for (ImportColumn c : columns.values()) { + c.tabulate(); + nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount); + } + for (ImportColumnGroup g : subgroups.values()) { + g.tabulate(); + nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount); + } + } + } + + /** + * A column is used to describe a branch-terminating element in a tree structure + * + */ + static public class ImportColumn extends ImportVertical { + public int cellIndex; + public int nextRowIndex; + public boolean blankOnFirstRow; + + public ImportColumn() {} + + public ImportColumn(String name) { //required for testing + super.name = name; + } + + @Override + void tabulate() { + // already done the tabulation elsewhere + } + } + + /** + * A record describes a data element in a tree-structure + * + */ + static public class ImportRecord { + public List> rows = new LinkedList>(); + } + + static protected void sortRecordElementCandidates(List list) { + Collections.sort(list, new Comparator() { + public int compare(RecordElementCandidate o1, RecordElementCandidate o2) { + return o2.count - o1.count; + } + }); + } + + static public void createColumnsFromImport( + Project project, + ImportColumnGroup columnGroup + ) { + int startColumnIndex = project.columnModel.columns.size(); + + List columns = new ArrayList(columnGroup.columns.values()); + Collections.sort(columns, new Comparator() { + public int compare(ImportColumn o1, ImportColumn o2) { + if (o1.blankOnFirstRow != o2.blankOnFirstRow) { + return o1.blankOnFirstRow ? 1 : -1; + } + + int c = o2.nonBlankCount - o1.nonBlankCount; + return c != 0 ? c : (o1.name.length() - o2.name.length()); + } + }); + + for (int i = 0; i < columns.size(); i++) { + ImportColumn c = columns.get(i); + + Column column = new com.google.refine.model.Column(c.cellIndex, c.name); + project.columnModel.columns.add(column); + } + + List subgroups = new ArrayList(columnGroup.subgroups.values()); + Collections.sort(subgroups, new Comparator() { + public int compare(ImportColumnGroup o1, ImportColumnGroup o2) { + int c = o2.nonBlankCount - o1.nonBlankCount; + return c != 0 ? c : (o1.name.length() - o2.name.length()); + } + }); + + for (ImportColumnGroup g : subgroups) { + createColumnsFromImport(project, g); + } + + int endColumnIndex = project.columnModel.columns.size(); + int span = endColumnIndex - startColumnIndex; + if (span > 1 && span < project.columnModel.columns.size()) { + project.columnModel.addColumnGroup(startColumnIndex, span, startColumnIndex); + } + } + + static protected void addCell( + Project project, + ImportColumnGroup columnGroup, + ImportRecord record, + String columnLocalName, + String text + ) { + if (text == null || ((String) text).isEmpty()) { + return; + } + + Serializable value = ImporterUtilities.parseCellValue(text); + + ImportColumn column = getColumn(project, columnGroup, columnLocalName); + int cellIndex = column.cellIndex; + + int rowIndex = Math.max(columnGroup.nextRowIndex, column.nextRowIndex); + while (rowIndex >= record.rows.size()) { + record.rows.add(new ArrayList()); + } + + List row = record.rows.get(rowIndex); + while (cellIndex >= row.size()) { + row.add(null); + } + + logger.trace("Adding cell with value : \"" + value + "\" to row : " + rowIndex + " at cell index : " + (cellIndex-1)); + + row.set(cellIndex, new Cell(value, null)); + + column.nextRowIndex = rowIndex + 1; + column.nonBlankCount++; + } + + + static protected ImportColumn getColumn( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + if (columnGroup.columns.containsKey(localName)) { + return columnGroup.columns.get(localName); + } + + ImportColumn column = createColumn(project, columnGroup, localName); + columnGroup.columns.put(localName, column); + + return column; + } + + static protected ImportColumn createColumn( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + ImportColumn newColumn = new ImportColumn(); + + newColumn.name = + columnGroup.name.length() == 0 ? + (localName == null ? "Text" : localName) : + (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); + + newColumn.cellIndex = project.columnModel.allocateNewCellIndex(); + newColumn.nextRowIndex = columnGroup.nextRowIndex; + + return newColumn; + } + + static protected ImportColumnGroup getColumnGroup( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + if (columnGroup.subgroups.containsKey(localName)) { + return columnGroup.subgroups.get(localName); + } + + ImportColumnGroup subgroup = createColumnGroup(project, columnGroup, localName); + columnGroup.subgroups.put(localName, subgroup); + + return subgroup; + } + + static protected ImportColumnGroup createColumnGroup( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + ImportColumnGroup newGroup = new ImportColumnGroup(); + + newGroup.name = + columnGroup.name.length() == 0 ? + (localName == null ? "Text" : localName) : + (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); + + newGroup.nextRowIndex = columnGroup.nextRowIndex; + + return newGroup; + } +} diff --git a/main/src/com/google/refine/importers/XmlImportUtilities.java b/main/src/com/google/refine/importers/XmlImportUtilities.java index bfc80ef3e..c20f1658a 100644 --- a/main/src/com/google/refine/importers/XmlImportUtilities.java +++ b/main/src/com/google/refine/importers/XmlImportUtilities.java @@ -1,6 +1,5 @@ package com.google.refine.importers; -import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; @@ -16,12 +15,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.refine.importers.parsers.TreeParser; -import com.google.refine.importers.parsers.XmlParser; import com.google.refine.model.Cell; import com.google.refine.model.Project; import com.google.refine.model.Row; -public class XmlImportUtilities extends TreeImporter { +public class XmlImportUtilities extends TreeImportUtilities { final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities"); static public String[] detectPathFromTag(TreeParser parser, String tag) { @@ -46,7 +44,7 @@ public class XmlImportUtilities extends TreeImporter { return null; } - + /** * Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it. * @@ -64,7 +62,7 @@ public class XmlImportUtilities extends TreeImporter { try{ if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) //FIXME uses Xml, and is not generic parser.next(); - + String localName = parser.getLocalName(); String fullName = composeName(parser.getPrefix(), localName); if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) { @@ -92,11 +90,11 @@ public class XmlImportUtilities extends TreeImporter { } return null; } - + static protected String composeName(String prefix, String localName) { return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName; } - + /** * Seeks for recurring XML element in an InputStream * which are likely candidates for being data records @@ -106,14 +104,11 @@ public class XmlImportUtilities extends TreeImporter { * The path to the most numerous of the possible candidates. * null if no candidates were found (less than 6 recurrences) */ - static public String[] detectRecordElement(InputStream inputStream) { + static public String[] detectRecordElement(TreeParser parser) { logger.trace("detectRecordElement(inputStream)"); List candidates = new ArrayList(); try { - //XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); - TreeParser parser = new XmlParser(inputStream); - while (parser.hasNext()) { int eventType = parser.next(); if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic @@ -232,17 +227,15 @@ public class XmlImportUtilities extends TreeImporter { return null; } - - static public void importXml( //FIXME could do with a name change to 'importTreeData' or similar - InputStream inputStream, + + static public void importTreeData( + TreeParser parser, Project project, String[] recordPath, ImportColumnGroup rootColumnGroup ) { try { - TreeParser parser = new XmlParser(inputStream); - while (parser.hasNext()) { int eventType = parser.next(); if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic @@ -337,7 +330,7 @@ public class XmlImportUtilities extends TreeImporter { cellCount++; } } - + if (cellCount > 0) { project.rows.add(realRow); } @@ -363,9 +356,9 @@ public class XmlImportUtilities extends TreeImporter { project, columnGroup, composeName(parser.getPrefix(), parser.getLocalName())); - + thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex); - + int attributeCount = parser.getAttributeCount(); for (int i = 0; i < attributeCount; i++) { String text = parser.getAttributeValue(i).trim(); @@ -405,7 +398,7 @@ public class XmlImportUtilities extends TreeImporter { break; } } - + int nextRowIndex = thisColumnGroup.nextRowIndex; for (ImportColumn column2 : thisColumnGroup.columns.values()) { nextRowIndex = Math.max(nextRowIndex, column2.nextRowIndex); @@ -418,5 +411,5 @@ public class XmlImportUtilities extends TreeImporter { - + } diff --git a/main/src/com/google/refine/importers/XmlImporter.java b/main/src/com/google/refine/importers/XmlImporter.java index 2117dc26a..737f331b0 100644 --- a/main/src/com/google/refine/importers/XmlImporter.java +++ b/main/src/com/google/refine/importers/XmlImporter.java @@ -10,7 +10,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.refine.ProjectMetadata; -import com.google.refine.importers.TreeImporter.ImportColumnGroup; +import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup; import com.google.refine.importers.parsers.TreeParser; import com.google.refine.importers.parsers.XmlParser; import com.google.refine.model.Project; @@ -34,7 +34,7 @@ public class XmlImporter implements StreamImporter { { byte[] buffer = new byte[BUFFER_SIZE]; int bytes_read = 0; - try { + try {//fill the buffer with data while (bytes_read < BUFFER_SIZE) { int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read); if (c == -1) break; @@ -44,13 +44,11 @@ public class XmlImporter implements StreamImporter { } catch (IOException e) { throw new ImportException("Read error",e); } - - if (options.containsKey("importer-record-tag")) { - InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read); - TreeParser parser = null; + InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read); + TreeParser parser = new XmlParser(iStream); + if (options.containsKey("importer-record-tag")) { try{ - parser = new XmlParser(iStream); recordPath = XmlImportUtilities.detectPathFromTag( parser, options.getProperty("importer-record-tag")); @@ -58,21 +56,18 @@ public class XmlImporter implements StreamImporter { // silent // e.printStackTrace(); } - } else { - recordPath = XmlImportUtilities.detectRecordElement( - new ByteArrayInputStream(buffer, 0, bytes_read)); + recordPath = XmlImportUtilities.detectRecordElement(parser); } } if (recordPath == null) return; - - ImportColumnGroup rootColumnGroup = new ImportColumnGroup(); - XmlImportUtilities.importXml(pis, project, recordPath, rootColumnGroup); + ImportColumnGroup rootColumnGroup = new ImportColumnGroup(); + XmlImportUtilities.importTreeData(new XmlParser(pis), project, recordPath, rootColumnGroup); XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); - + project.columnModel.update(); } diff --git a/main/src/com/google/refine/importers/parsers/JSONParser.java b/main/src/com/google/refine/importers/parsers/JSONParser.java new file mode 100644 index 000000000..1a0e6d7b2 --- /dev/null +++ b/main/src/com/google/refine/importers/parsers/JSONParser.java @@ -0,0 +1,86 @@ +package com.google.refine.importers.parsers; + +import java.io.IOException; +import java.io.InputStream; + +import javax.servlet.ServletException; +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonParseException; +import org.codehaus.jackson.JsonParser; + +public class JSONParser implements TreeParser{ + JsonFactory factory = new JsonFactory(); + JsonParser parser = null; + + public JSONParser(InputStream inputStream){ + try { + parser = factory.createJsonParser(inputStream); + } catch (JsonParseException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + @Override + public int getAttributeCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public String getAttributeLocalName(int index) { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getAttributePrefix(int index) { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getAttributeValue(int index) { + // TODO Auto-generated method stub + return null; + } + + @Override + public int getEventType() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public String getLocalName() { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getPrefix() { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getText() { + // TODO Auto-generated method stub + return null; + } + + @Override + public boolean hasNext() throws ServletException { + // TODO Auto-generated method stub + return false; + } + + @Override + public int next() throws ServletException { + // TODO Auto-generated method stub + return 0; + } + +} diff --git a/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java new file mode 100644 index 000000000..2c20b3ceb --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/importers/JsonImporterTests.java @@ -0,0 +1,241 @@ +package com.google.refine.tests.importers; + +import static org.mockito.Mockito.mock; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.Properties; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectMetadata; +import com.google.refine.importers.JsonImporter; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.tests.RefineTest; + +public class JsonImporterTests extends RefineTest { + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + + //dependencies + Project project = null; + Properties options = null; + ByteArrayInputStream inputStream = null; + + //System Under Test + JsonImporter SUT = null; + + + @BeforeMethod + public void SetUp(){ + SUT = new JsonImporter(); + project = new Project(); + options = mock(Properties.class); + } + + @AfterMethod + public void TearDown() throws IOException{ + SUT = null; + project = null; + options = null; + if (inputStream != null) inputStream.close(); + inputStream = null; + } + + @Test + public void canParseSample(){ + RunTest(getSample()); + + log(project); + assertProjectCreated(project, 4, 6); + + Row row = project.rows.get(0); + Assert.assertNotNull(row); + Assert.assertNotNull(row.getCell(2)); + Assert.assertEquals(row.getCell(2).value, "Author 1, The"); + } + + @Test + public void canParseSampleWithDuplicateNestedElements(){ + RunTest(getSampleWithDuplicateNestedElements()); + + log(project); + assertProjectCreated(project, 4, 12); + + Row row = project.rows.get(0); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(), 5); + Assert.assertNotNull(row.getCell(2)); + Assert.assertEquals(row.getCell(2).value, "Author 1, The"); + Assert.assertEquals(project.rows.get(1).getCell(2).value, "Author 1, Another"); + } + + @Test + public void testCanParseLineBreak(){ + + RunTest(getSampleWithLineBreak()); + + log(project); + assertProjectCreated(project, 4, 6); + + Row row = project.rows.get(3); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(), 5); + Assert.assertNotNull(row.getCell(2)); + Assert.assertEquals(row.getCell(2).value, "With line\n break"); + } + + @Test + public void testElementsWithVaryingStructure(){ + RunTest(getSampleWithVaryingStructure()); + + log(project); + assertProjectCreated(project, 5, 6); + + Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "book - genre"); + + Row row0 = project.rows.get(0); + Assert.assertNotNull(row0); + Assert.assertEquals(row0.cells.size(),5); + + Row row5 = project.rows.get(5); + Assert.assertNotNull(row5); + Assert.assertEquals(row5.cells.size(),6); + } + + @Test + public void testElementWithNestedTree(){ + RunTest(getSampleWithTreeStructure()); + log(project); + assertProjectCreated(project, 5, 6); + + Assert.assertEquals(project.columnModel.columnGroups.size(),1); + Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 2); + Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 2); + Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup); + Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0); + Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2); + } + + //------------helper methods--------------- + + public static String getTypicalElement(int id){ + return "{ \"id\" : " + id + "," + + "\"author\" : \"Author " + id + ", The\"," + + "\"title\" : \"Book title " + id + "\"," + + "\"publish_date\" : \"2010-05-26\"" + + "}"; + } + + public static String getElementWithDuplicateSubElement(int id){ + return "{ \"id\" : " + id + "," + + "\"authors\":[" + + "{\"author\" : \"Author " + id + ", The\"}," + + "{\"author\" : \"Author " + id + ", Another\"}" + + "]," + + "\"title\" : \"Book title " + id + "\"," + + "\"publish_date\" : \"2010-05-26\"" + + "}"; + } + + public static String getSample(){ + StringBuilder sb = new StringBuilder(); + sb.append("["); + for(int i = 1; i < 7; i++){ + sb.append(getTypicalElement(i)); + if(i < 6) + sb.append(","); + } + sb.append("]"); + return sb.toString(); + } + + public static String getSampleWithDuplicateNestedElements(){ + StringBuilder sb = new StringBuilder(); + sb.append("["); + for(int i = 1; i < 7; i++){ + sb.append(getElementWithDuplicateSubElement(i)); + if(i < 6) + sb.append(","); + } + sb.append("]"); + return sb.toString(); + + } + + public static String getSampleWithLineBreak(){ + StringBuilder sb = new StringBuilder(); + sb.append("["); + for(int i = 1; i < 4; i++){ + sb.append(getTypicalElement(i)); + sb.append(","); + } + sb.append("{\"id\" : 4," + + "\"author\" : \"With line\n break\"," + + "\"title\" : \"Book title 4\"" + + "\"publish_date\" : \"2010-05-26\"" + + "},"); + sb.append(getTypicalElement(5)); + sb.append(","); + sb.append(getTypicalElement(6)); + sb.append("]"); + return sb.toString(); + } + + public static String getSampleWithVaryingStructure(){ + StringBuilder sb = new StringBuilder(); + sb.append("["); + for(int i = 1; i < 6; i++){ + sb.append(getTypicalElement(i)); + } + sb.append("{\"id\" : 6," + + "\"author\" : \"Author 6, The\"," + + "\"title\" : \"Book title 6\"," + + "\"genre\" : \"New element not seen in other records\"," + + "\"publish_date\" : \"2010-05-26\"," + + "}"); + sb.append("]"); + return sb.toString(); + } + + public static String getSampleWithTreeStructure(){ + StringBuilder sb = new StringBuilder(); + sb.append("["); + for(int i = 1; i < 7; i++){ + sb.append("{\"id\" : " + i + "," + + "\"author\" : {\"author-name\" : \"Author " + i + ", The\"," + + "\"author-dob\" : \"1950-0" + i + "-15\"}," + + "\"title\" : \"Book title " + i + "\"," + + "\"publish_date\" : \"2010-05-26\"" + + "},"); + if(i < 6) + sb.append(","); + } + sb.append("]"); + return sb.toString(); + } + + private void RunTest(String testString){ + try { + inputStream = new ByteArrayInputStream( testString.getBytes( "UTF-8" ) ); + } catch (UnsupportedEncodingException e1) { + Assert.fail(); + } + + try { + SUT.read(inputStream, project, new ProjectMetadata(), options); + } catch (Exception e) { + Assert.fail(); + } + } +} diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java index 2eab7c5f9..a78fe4208 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java @@ -15,9 +15,9 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; -import com.google.refine.importers.TreeImporter.ImportColumn; -import com.google.refine.importers.TreeImporter.ImportColumnGroup; -import com.google.refine.importers.TreeImporter.ImportRecord; +import com.google.refine.importers.TreeImportUtilities.ImportColumn; +import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup; +import com.google.refine.importers.TreeImportUtilities.ImportRecord; import com.google.refine.importers.parsers.TreeParser; import com.google.refine.importers.parsers.XmlParser; import com.google.refine.model.Project; @@ -78,7 +78,7 @@ public class XmlImportUtilitiesTests extends RefineTest { public void detectPathFromTagWithNestedElement(){ loadXml("author1genre1"); String tag = "book"; - + createParser(); String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag); @@ -145,7 +145,7 @@ public class XmlImportUtilitiesTests extends RefineTest { public void detectRecordElementRegressionTest(){ loadSampleXml(); - String[] path = XmlImportUtilitiesStub.detectRecordElement(inputStream); + String[] path = XmlImportUtilitiesStub.detectRecordElement(new XmlParser(inputStream)); Assert.assertNotNull(path); Assert.assertEquals(path.length, 2); Assert.assertEquals(path[0], "library"); @@ -157,7 +157,7 @@ public class XmlImportUtilitiesTests extends RefineTest { loadSampleXml(); String[] recordPath = new String[]{"library","book"}; - XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup ); + XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup ); log(project); assertProjectCreated(project, 0, 6); @@ -177,7 +177,7 @@ public class XmlImportUtilitiesTests extends RefineTest { loadXml(XmlImporterTests.getSampleWithVaryingStructure()); String[] recordPath = new String[]{"library", "book"}; - XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup); + XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup); log(project); assertProjectCreated(project, 0, 6); @@ -277,13 +277,13 @@ public class XmlImportUtilitiesTests extends RefineTest { log(project); Assert.assertNotNull(project.rows); Assert.assertEquals(project.rows.size(), 2); - + Row row = project.rows.get(0); Assert.assertNotNull(row); Assert.assertEquals(row.cells.size(), 4); Assert.assertNotNull(row.getCell(2)); Assert.assertEquals(row.getCell(2).value, "author1"); - + row = project.rows.get(1); Assert.assertEquals(row.getCell(2).value, "author2"); }