Two more XmlImport tests now work. Some documentation stubs were added to XmlImporterUtilities.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@967 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
8a87ddaf3f
commit
a671551289
@ -27,11 +27,20 @@ import com.metaweb.gridworks.model.Row;
|
||||
public class XmlImportUtilities {
|
||||
final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities");
|
||||
|
||||
/**
|
||||
* An element which holds sub-elements we
|
||||
* shall import as records
|
||||
*/
|
||||
static protected class RecordElementCandidate {
|
||||
String[] path;
|
||||
int count;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
static protected abstract class ImportVertical {
|
||||
public String name = "";
|
||||
public int nonBlankCount;
|
||||
@ -39,6 +48,9 @@ public class XmlImportUtilities {
|
||||
abstract void tabulate();
|
||||
}
|
||||
|
||||
/**
|
||||
* A column group describes a branch in tree structured data
|
||||
*/
|
||||
static public class ImportColumnGroup extends ImportVertical {
|
||||
public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>();
|
||||
public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>();
|
||||
@ -56,19 +68,32 @@ public class XmlImportUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A column is used to describe a branch-terminating element in a tree structure
|
||||
*
|
||||
*/
|
||||
static public class ImportColumn extends ImportVertical {
|
||||
public int cellIndex;
|
||||
public boolean blankOnFirstRow;
|
||||
|
||||
public ImportColumn(){}
|
||||
public ImportColumn(String name){ //required for testing
|
||||
super.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
void tabulate() {
|
||||
// already done the tabulation elsewhere
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A record describes a data element in a tree-structure
|
||||
*
|
||||
*/
|
||||
static public class ImportRecord {
|
||||
List<List<Cell>> rows = new LinkedList<List<Cell>>();
|
||||
List<Integer> columnEmptyRowIndices = new ArrayList<Integer>();
|
||||
public List<List<Cell>> rows = new LinkedList<List<Cell>>();
|
||||
public List<Integer> columnEmptyRowIndices = new ArrayList<Integer>();
|
||||
}
|
||||
|
||||
static public String[] detectPathFromTag(InputStream inputStream, String tag) {
|
||||
@ -96,6 +121,19 @@ public class XmlImportUtilities {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it.
|
||||
*
|
||||
* @param parser
|
||||
* @param tag
|
||||
* The Xml element name (can be qualified) to search for
|
||||
* @return
|
||||
* If the tag is found, an array of strings is returned.
|
||||
* If the tag is at the top level, the tag will be the only item in the array.
|
||||
* If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index
|
||||
* Null if the the tag is not found.
|
||||
* @throws XMLStreamException
|
||||
*/
|
||||
static protected List<String> detectRecordElement(XMLStreamReader parser, String tag) throws XMLStreamException {
|
||||
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT)
|
||||
parser.next();
|
||||
@ -123,6 +161,15 @@ public class XmlImportUtilities {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Seeks for recurring XML element in an InputStream
|
||||
* which are likely candidates for being data records
|
||||
* @param inputStream
|
||||
* The XML data as a stream
|
||||
* @return
|
||||
* The path to the most numerous of the possible candidates.
|
||||
* null if no candidates were found (less than 6 recurrences)
|
||||
*/
|
||||
static public String[] detectRecordElement(InputStream inputStream) {
|
||||
logger.trace("detectRecordElement(inputStream)");
|
||||
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
|
||||
@ -320,6 +367,15 @@ public class XmlImportUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param project
|
||||
* @param parser
|
||||
* @param recordPath
|
||||
* @param pathIndex
|
||||
* @param rootColumnGroup
|
||||
* @throws XMLStreamException
|
||||
*/
|
||||
static protected void findRecord(
|
||||
Project project,
|
||||
XMLStreamReader parser,
|
||||
@ -361,6 +417,14 @@ public class XmlImportUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* processRecord parsesXml for a single element and it's sub-elements,
|
||||
* adding the parsed data as a row to the project
|
||||
* @param project
|
||||
* @param parser
|
||||
* @param rootColumnGroup
|
||||
* @throws XMLStreamException
|
||||
*/
|
||||
static protected void processRecord(
|
||||
Project project,
|
||||
XMLStreamReader parser,
|
||||
@ -390,6 +454,14 @@ public class XmlImportUtilities {
|
||||
return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param project
|
||||
* @param parser
|
||||
* @param columnGroup
|
||||
* @param record
|
||||
* @throws XMLStreamException
|
||||
*/
|
||||
static protected void processSubRecord(
|
||||
Project project,
|
||||
XMLStreamReader parser,
|
||||
@ -495,7 +567,7 @@ public class XmlImportUtilities {
|
||||
row.add(null);
|
||||
}
|
||||
|
||||
logger.trace("Adding cell with value : " + value + " to row : " + rowIndex + " at cell index : " + (cellIndex-1));
|
||||
logger.trace("Adding cell with value : \"" + value + "\" to row : " + rowIndex + " at cell index : " + (cellIndex-1));
|
||||
|
||||
row.set(cellIndex-1, new Cell(value, null));
|
||||
|
||||
|
@ -27,12 +27,12 @@ import com.metaweb.gridworks.tests.GridworksTest;
|
||||
|
||||
|
||||
public class XmlImportUtilitiesTests extends GridworksTest {
|
||||
|
||||
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
logger = LoggerFactory.getLogger(this.getClass());
|
||||
}
|
||||
|
||||
|
||||
//dependencies
|
||||
Project project;
|
||||
XMLStreamReader parser;
|
||||
@ -157,26 +157,51 @@ public class XmlImportUtilitiesTests extends GridworksTest {
|
||||
assertProjectCreated(project, 0, 6);
|
||||
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||
//TODO
|
||||
|
||||
Assert.assertEquals(columnGroup.subgroups.size(), 1);
|
||||
Assert.assertNotNull(columnGroup.subgroups.get("book"));
|
||||
Assert.assertEquals(columnGroup.subgroups.get("book").subgroups.size(), 3);
|
||||
Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("author"));
|
||||
Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("title"));
|
||||
Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("publish_date"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createColumnsFromImportTest() {
|
||||
public void importXmlWithVaryingStructureTest(){
|
||||
loadXml(XmlImporterTests.getSampleWithVaryingStructure());
|
||||
|
||||
String[] recordPath = new String[]{"library", "book"};
|
||||
XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup);
|
||||
|
||||
log(project);
|
||||
assertProjectCreated(project, 0, 6);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||
Assert.assertEquals(project.rows.get(5).cells.size(), 5);
|
||||
|
||||
Assert.assertEquals(columnGroup.subgroups.size(), 1);
|
||||
Assert.assertEquals(columnGroup.name, "");
|
||||
ImportColumnGroup book = columnGroup.subgroups.get("book");
|
||||
Assert.assertNotNull(book);
|
||||
Assert.assertEquals(book.columns.size(), 1);
|
||||
Assert.assertEquals(book.subgroups.size(), 4);
|
||||
Assert.assertNotNull(book.subgroups.get("author"));
|
||||
Assert.assertEquals(book.subgroups.get("author").columns.size(), 1);
|
||||
Assert.assertNotNull(book.subgroups.get("title"));
|
||||
Assert.assertNotNull(book.subgroups.get("publish_date"));
|
||||
Assert.assertNotNull(book.subgroups.get("genre"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createColumnsFromImportTest(){
|
||||
|
||||
ImportColumnGroup columnGroup = new ImportColumnGroup();
|
||||
ImportColumn ic1 = new ImportColumn();
|
||||
ic1.name = "hello";
|
||||
ImportColumn ic2 = new ImportColumn();
|
||||
ic2.name = "world";
|
||||
ImportColumnGroup subGroup = new ImportColumnGroup();
|
||||
ImportColumn ic3 = new ImportColumn();
|
||||
ic3.name = "foo";
|
||||
ImportColumn ic4 = new ImportColumn();
|
||||
ic4.name = "bar";
|
||||
subGroup.columns.put("c", ic3);
|
||||
subGroup.columns.put("d", ic4);
|
||||
columnGroup.columns.put("a", ic1);
|
||||
columnGroup.columns.put("b", ic2);
|
||||
columnGroup.columns.put("a", new ImportColumn("hello"));
|
||||
columnGroup.columns.put("b", new ImportColumn("world"));
|
||||
subGroup.columns.put("c", new ImportColumn("foo"));
|
||||
subGroup.columns.put("d", new ImportColumn("bar"));
|
||||
columnGroup.subgroups.put("e", subGroup);
|
||||
|
||||
XmlImportUtilitiesStub.createColumnsFromImport(project, columnGroup);
|
||||
log(project);
|
||||
assertProjectCreated(project, 4, 0);
|
||||
@ -206,7 +231,7 @@ public class XmlImportUtilitiesTests extends GridworksTest {
|
||||
|
||||
log(project);
|
||||
assertProjectCreated(project, 0, 6);
|
||||
|
||||
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||
//TODO
|
||||
}
|
||||
@ -279,7 +304,7 @@ public class XmlImportUtilitiesTests extends GridworksTest {
|
||||
}
|
||||
|
||||
|
||||
@Test(groups={"broken"})
|
||||
@Test
|
||||
public void processSubRecordTest(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createParser();
|
||||
@ -291,20 +316,43 @@ public class XmlImportUtilitiesTests extends GridworksTest {
|
||||
Assert.fail();
|
||||
}
|
||||
log(project);
|
||||
Assert.fail();
|
||||
//TODO need to verify 'record' was set correctly which we can't do as ImportRecord is an internal class
|
||||
|
||||
Assert.assertEquals(columnGroup.subgroups.size(), 1);
|
||||
Assert.assertEquals(columnGroup.name, "");
|
||||
|
||||
Assert.assertNotNull(columnGroup.subgroups.get("library"));
|
||||
Assert.assertEquals(columnGroup.subgroups.get("library").subgroups.size(), 1);
|
||||
|
||||
ImportColumnGroup book = columnGroup.subgroups.get("library").subgroups.get("book");
|
||||
Assert.assertNotNull(book);
|
||||
Assert.assertEquals(book.subgroups.size(), 2);
|
||||
Assert.assertNotNull(book.subgroups.get("author"));
|
||||
Assert.assertNotNull(book.subgroups.get("genre"));
|
||||
|
||||
//TODO check record
|
||||
}
|
||||
|
||||
@Test(groups={"broken"})
|
||||
@Test
|
||||
public void addCellTest(){
|
||||
String columnLocalName = "author";
|
||||
String text = "Author1, The";
|
||||
int commonStartingRowIndex = 0;
|
||||
project.rows.add(new Row(0));
|
||||
SUT.addCellWrapper(project, columnGroup, record, columnLocalName, text, commonStartingRowIndex);
|
||||
|
||||
Assert.fail();
|
||||
//TODO need to verify 'record' was set correctly which we can't do as ImportRecord is an internal class
|
||||
Assert.assertNotNull(record);
|
||||
Assert.assertNotNull(record.rows);
|
||||
Assert.assertNotNull(record.columnEmptyRowIndices);
|
||||
Assert.assertEquals(record.rows.size(), 1);
|
||||
Assert.assertEquals(record.columnEmptyRowIndices.size(), 2);
|
||||
Assert.assertNotNull(record.rows.get(0));
|
||||
Assert.assertNotNull(record.columnEmptyRowIndices.get(0));
|
||||
Assert.assertNotNull(record.columnEmptyRowIndices.get(1));
|
||||
Assert.assertEquals(record.rows.get(0).size(), 2);
|
||||
Assert.assertNotNull(record.rows.get(0).get(0));
|
||||
Assert.assertEquals(record.rows.get(0).get(0).value, "Author1, The");
|
||||
Assert.assertEquals(record.columnEmptyRowIndices.get(0).intValue(),0);
|
||||
Assert.assertEquals(record.columnEmptyRowIndices.get(1).intValue(),1);
|
||||
|
||||
}
|
||||
|
||||
//----------------helpers-------------
|
||||
|
@ -95,24 +95,27 @@ public class XmlImporterTests extends GridworksTest {
|
||||
Assert.assertEquals(row.getCell(1).value, "With line\n break");
|
||||
}
|
||||
|
||||
@Test(groups={"broken"})
|
||||
@Test
|
||||
public void testElementsWithVaryingStructure(){
|
||||
RunTest(getSampleWithVaryingStructure());
|
||||
|
||||
log(project);
|
||||
assertProjectCreated(project, 5, 6);
|
||||
|
||||
Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "book - genre");
|
||||
|
||||
Row row0 = project.rows.get(0);
|
||||
Assert.assertNotNull(row0);
|
||||
Assert.assertEquals(row0.cells.size(),6);
|
||||
Assert.assertEquals(row0.cells.size(),4);
|
||||
|
||||
Row row5 = project.rows.get(5);
|
||||
Assert.assertNotNull(row5);
|
||||
Assert.assertEquals(row5.cells.size(),6);
|
||||
Assert.assertEquals(row5.cells.size(),5);
|
||||
}
|
||||
|
||||
@Test(groups={"broken"})
|
||||
@Test
|
||||
public void testElementWithNestedTree(){
|
||||
RunTest(getSampleWithTreeStructure());
|
||||
log(project);
|
||||
assertProjectCreated(project, 5, 6);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user