diff --git a/main/src/com/metaweb/gridworks/importers/XmlImportUtilities.java b/main/src/com/metaweb/gridworks/importers/XmlImportUtilities.java index 78ab92ebe..3a7a3bf8e 100644 --- a/main/src/com/metaweb/gridworks/importers/XmlImportUtilities.java +++ b/main/src/com/metaweb/gridworks/importers/XmlImportUtilities.java @@ -27,11 +27,20 @@ import com.metaweb.gridworks.model.Row; public class XmlImportUtilities { final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities"); + /** + * An element which holds sub-elements we + * shall import as records + */ static protected class RecordElementCandidate { String[] path; int count; } + /** + * + * + * + */ static protected abstract class ImportVertical { public String name = ""; public int nonBlankCount; @@ -39,6 +48,9 @@ public class XmlImportUtilities { abstract void tabulate(); } + /** + * A column group describes a branch in tree structured data + */ static public class ImportColumnGroup extends ImportVertical { public Map subgroups = new HashMap(); public Map columns = new HashMap(); @@ -56,19 +68,32 @@ public class XmlImportUtilities { } } + /** + * A column is used to describe a branch-terminating element in a tree structure + * + */ static public class ImportColumn extends ImportVertical { public int cellIndex; public boolean blankOnFirstRow; + public ImportColumn(){} + public ImportColumn(String name){ //required for testing + super.name = name; + } + @Override void tabulate() { // already done the tabulation elsewhere } } + /** + * A record describes a data element in a tree-structure + * + */ static public class ImportRecord { - List> rows = new LinkedList>(); - List columnEmptyRowIndices = new ArrayList(); + public List> rows = new LinkedList>(); + public List columnEmptyRowIndices = new ArrayList(); } static public String[] detectPathFromTag(InputStream inputStream, String tag) { @@ -96,6 +121,19 @@ public class XmlImportUtilities { return null; } + /** + * Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it. + * + * @param parser + * @param tag + * The Xml element name (can be qualified) to search for + * @return + * If the tag is found, an array of strings is returned. + * If the tag is at the top level, the tag will be the only item in the array. + * If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index + * Null if the the tag is not found. + * @throws XMLStreamException + */ static protected List detectRecordElement(XMLStreamReader parser, String tag) throws XMLStreamException { if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) parser.next(); @@ -123,6 +161,15 @@ public class XmlImportUtilities { return null; } + /** + * Seeks for recurring XML element in an InputStream + * which are likely candidates for being data records + * @param inputStream + * The XML data as a stream + * @return + * The path to the most numerous of the possible candidates. + * null if no candidates were found (less than 6 recurrences) + */ static public String[] detectRecordElement(InputStream inputStream) { logger.trace("detectRecordElement(inputStream)"); List candidates = new ArrayList(); @@ -320,6 +367,15 @@ public class XmlImportUtilities { } } + /** + * + * @param project + * @param parser + * @param recordPath + * @param pathIndex + * @param rootColumnGroup + * @throws XMLStreamException + */ static protected void findRecord( Project project, XMLStreamReader parser, @@ -361,6 +417,14 @@ public class XmlImportUtilities { } } + /** + * processRecord parsesXml for a single element and it's sub-elements, + * adding the parsed data as a row to the project + * @param project + * @param parser + * @param rootColumnGroup + * @throws XMLStreamException + */ static protected void processRecord( Project project, XMLStreamReader parser, @@ -390,6 +454,14 @@ public class XmlImportUtilities { return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName; } + /** + * + * @param project + * @param parser + * @param columnGroup + * @param record + * @throws XMLStreamException + */ static protected void processSubRecord( Project project, XMLStreamReader parser, @@ -495,7 +567,7 @@ public class XmlImportUtilities { row.add(null); } - logger.trace("Adding cell with value : " + value + " to row : " + rowIndex + " at cell index : " + (cellIndex-1)); + logger.trace("Adding cell with value : \"" + value + "\" to row : " + rowIndex + " at cell index : " + (cellIndex-1)); row.set(cellIndex-1, new Cell(value, null)); diff --git a/main/tests/server/src/com/metaweb/gridworks/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/metaweb/gridworks/tests/importers/XmlImportUtilitiesTests.java index ea251e673..d14f5d21e 100644 --- a/main/tests/server/src/com/metaweb/gridworks/tests/importers/XmlImportUtilitiesTests.java +++ b/main/tests/server/src/com/metaweb/gridworks/tests/importers/XmlImportUtilitiesTests.java @@ -27,12 +27,12 @@ import com.metaweb.gridworks.tests.GridworksTest; public class XmlImportUtilitiesTests extends GridworksTest { - + @BeforeTest public void init() { logger = LoggerFactory.getLogger(this.getClass()); } - + //dependencies Project project; XMLStreamReader parser; @@ -157,26 +157,51 @@ public class XmlImportUtilitiesTests extends GridworksTest { assertProjectCreated(project, 0, 6); Assert.assertEquals(project.rows.get(0).cells.size(), 4); - //TODO + + Assert.assertEquals(columnGroup.subgroups.size(), 1); + Assert.assertNotNull(columnGroup.subgroups.get("book")); + Assert.assertEquals(columnGroup.subgroups.get("book").subgroups.size(), 3); + Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("author")); + Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("title")); + Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("publish_date")); } @Test - public void createColumnsFromImportTest() { + public void importXmlWithVaryingStructureTest(){ + loadXml(XmlImporterTests.getSampleWithVaryingStructure()); + + String[] recordPath = new String[]{"library", "book"}; + XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup); + + log(project); + assertProjectCreated(project, 0, 6); + Assert.assertEquals(project.rows.get(0).cells.size(), 4); + Assert.assertEquals(project.rows.get(5).cells.size(), 5); + + Assert.assertEquals(columnGroup.subgroups.size(), 1); + Assert.assertEquals(columnGroup.name, ""); + ImportColumnGroup book = columnGroup.subgroups.get("book"); + Assert.assertNotNull(book); + Assert.assertEquals(book.columns.size(), 1); + Assert.assertEquals(book.subgroups.size(), 4); + Assert.assertNotNull(book.subgroups.get("author")); + Assert.assertEquals(book.subgroups.get("author").columns.size(), 1); + Assert.assertNotNull(book.subgroups.get("title")); + Assert.assertNotNull(book.subgroups.get("publish_date")); + Assert.assertNotNull(book.subgroups.get("genre")); + } + + @Test + public void createColumnsFromImportTest(){ + ImportColumnGroup columnGroup = new ImportColumnGroup(); - ImportColumn ic1 = new ImportColumn(); - ic1.name = "hello"; - ImportColumn ic2 = new ImportColumn(); - ic2.name = "world"; ImportColumnGroup subGroup = new ImportColumnGroup(); - ImportColumn ic3 = new ImportColumn(); - ic3.name = "foo"; - ImportColumn ic4 = new ImportColumn(); - ic4.name = "bar"; - subGroup.columns.put("c", ic3); - subGroup.columns.put("d", ic4); - columnGroup.columns.put("a", ic1); - columnGroup.columns.put("b", ic2); + columnGroup.columns.put("a", new ImportColumn("hello")); + columnGroup.columns.put("b", new ImportColumn("world")); + subGroup.columns.put("c", new ImportColumn("foo")); + subGroup.columns.put("d", new ImportColumn("bar")); columnGroup.subgroups.put("e", subGroup); + XmlImportUtilitiesStub.createColumnsFromImport(project, columnGroup); log(project); assertProjectCreated(project, 4, 0); @@ -206,7 +231,7 @@ public class XmlImportUtilitiesTests extends GridworksTest { log(project); assertProjectCreated(project, 0, 6); - + Assert.assertEquals(project.rows.get(0).cells.size(), 4); //TODO } @@ -279,7 +304,7 @@ public class XmlImportUtilitiesTests extends GridworksTest { } - @Test(groups={"broken"}) + @Test public void processSubRecordTest(){ loadXml("author1genre1"); createParser(); @@ -291,20 +316,43 @@ public class XmlImportUtilitiesTests extends GridworksTest { Assert.fail(); } log(project); - Assert.fail(); - //TODO need to verify 'record' was set correctly which we can't do as ImportRecord is an internal class + + Assert.assertEquals(columnGroup.subgroups.size(), 1); + Assert.assertEquals(columnGroup.name, ""); + + Assert.assertNotNull(columnGroup.subgroups.get("library")); + Assert.assertEquals(columnGroup.subgroups.get("library").subgroups.size(), 1); + + ImportColumnGroup book = columnGroup.subgroups.get("library").subgroups.get("book"); + Assert.assertNotNull(book); + Assert.assertEquals(book.subgroups.size(), 2); + Assert.assertNotNull(book.subgroups.get("author")); + Assert.assertNotNull(book.subgroups.get("genre")); + + //TODO check record } - @Test(groups={"broken"}) + @Test public void addCellTest(){ String columnLocalName = "author"; String text = "Author1, The"; int commonStartingRowIndex = 0; - project.rows.add(new Row(0)); SUT.addCellWrapper(project, columnGroup, record, columnLocalName, text, commonStartingRowIndex); - Assert.fail(); - //TODO need to verify 'record' was set correctly which we can't do as ImportRecord is an internal class + Assert.assertNotNull(record); + Assert.assertNotNull(record.rows); + Assert.assertNotNull(record.columnEmptyRowIndices); + Assert.assertEquals(record.rows.size(), 1); + Assert.assertEquals(record.columnEmptyRowIndices.size(), 2); + Assert.assertNotNull(record.rows.get(0)); + Assert.assertNotNull(record.columnEmptyRowIndices.get(0)); + Assert.assertNotNull(record.columnEmptyRowIndices.get(1)); + Assert.assertEquals(record.rows.get(0).size(), 2); + Assert.assertNotNull(record.rows.get(0).get(0)); + Assert.assertEquals(record.rows.get(0).get(0).value, "Author1, The"); + Assert.assertEquals(record.columnEmptyRowIndices.get(0).intValue(),0); + Assert.assertEquals(record.columnEmptyRowIndices.get(1).intValue(),1); + } //----------------helpers------------- diff --git a/main/tests/server/src/com/metaweb/gridworks/tests/importers/XmlImporterTests.java b/main/tests/server/src/com/metaweb/gridworks/tests/importers/XmlImporterTests.java index 1dc3146e5..60fdd06c6 100644 --- a/main/tests/server/src/com/metaweb/gridworks/tests/importers/XmlImporterTests.java +++ b/main/tests/server/src/com/metaweb/gridworks/tests/importers/XmlImporterTests.java @@ -95,24 +95,27 @@ public class XmlImporterTests extends GridworksTest { Assert.assertEquals(row.getCell(1).value, "With line\n break"); } - @Test(groups={"broken"}) + @Test public void testElementsWithVaryingStructure(){ RunTest(getSampleWithVaryingStructure()); log(project); assertProjectCreated(project, 5, 6); + Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "book - genre"); + Row row0 = project.rows.get(0); Assert.assertNotNull(row0); - Assert.assertEquals(row0.cells.size(),6); + Assert.assertEquals(row0.cells.size(),4); Row row5 = project.rows.get(5); Assert.assertNotNull(row5); - Assert.assertEquals(row5.cells.size(),6); + Assert.assertEquals(row5.cells.size(),5); } - @Test(groups={"broken"}) + @Test public void testElementWithNestedTree(){ + RunTest(getSampleWithTreeStructure()); log(project); assertProjectCreated(project, 5, 6);