XmlImporter is partially unit tested. One broken test for case where Record Elements contain varying numbers of nested elements. (This is for Issue 61 which is, at the time of this commit, an open issue)

XmlImportUtilities produces log for case when no RecordElementCandidate are found.  (too few similar Xml elements).

git-svn-id: http://google-refine.googlecode.com/svn/trunk@862 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-05-26 19:22:38 +00:00
parent 1c47ff476b
commit 34cb1c4d07
3 changed files with 332 additions and 110 deletions

View File

@ -16,28 +16,33 @@ import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader; import javax.xml.stream.XMLStreamReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.metaweb.gridworks.model.Cell; import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Column; import com.metaweb.gridworks.model.Column;
import com.metaweb.gridworks.model.Project; import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row; import com.metaweb.gridworks.model.Row;
public class XmlImportUtilities { public class XmlImportUtilities {
final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities");
static protected class RecordElementCandidate { static protected class RecordElementCandidate {
String[] path; String[] path;
int count; int count;
} }
static protected abstract class ImportVertical { static protected abstract class ImportVertical {
public String name = ""; public String name = "";
public int nonBlankCount; public int nonBlankCount;
abstract void tabulate(); abstract void tabulate();
} }
static public class ImportColumnGroup extends ImportVertical { static public class ImportColumnGroup extends ImportVertical {
public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>(); public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>();
public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>(); public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>();
@Override @Override
void tabulate() { void tabulate() {
for (ImportColumn c : columns.values()) { for (ImportColumn c : columns.values()) {
@ -50,7 +55,7 @@ public class XmlImportUtilities {
} }
} }
} }
static public class ImportColumn extends ImportVertical { static public class ImportColumn extends ImportVertical {
public int cellIndex; public int cellIndex;
public boolean blankOnFirstRow; public boolean blankOnFirstRow;
@ -60,27 +65,27 @@ public class XmlImportUtilities {
// already done the tabulation elsewhere // already done the tabulation elsewhere
} }
} }
static public class ImportRecord { static public class ImportRecord {
List<List<Cell>> rows = new LinkedList<List<Cell>>(); List<List<Cell>> rows = new LinkedList<List<Cell>>();
List<Integer> columnEmptyRowIndices = new ArrayList<Integer>(); List<Integer> columnEmptyRowIndices = new ArrayList<Integer>();
} }
static public String[] detectPathFromTag(InputStream inputStream, String tag) { static public String[] detectPathFromTag(InputStream inputStream, String tag) {
//List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>(); //List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
try { try {
XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { if (eventType == XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag); List<String> path = detectRecordElement(parser, tag);
if (path != null) { if (path != null) {
String[] path2 = new String[path.size()]; String[] path2 = new String[path.size()];
path.toArray(path2); path.toArray(path2);
return path2; return path2;
} }
} }
@ -89,20 +94,20 @@ public class XmlImportUtilities {
// silent // silent
// e.printStackTrace(); // e.printStackTrace();
} }
return null; return null;
} }
static protected List<String> detectRecordElement(XMLStreamReader parser, String tag) throws XMLStreamException { static protected List<String> detectRecordElement(XMLStreamReader parser, String tag) throws XMLStreamException {
String localName = parser.getLocalName(); String localName = parser.getLocalName();
String fullName = composeName(parser.getPrefix(), localName); String fullName = composeName(parser.getPrefix(), localName);
if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) { if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) {
List<String> path = new LinkedList<String>(); List<String> path = new LinkedList<String>();
path.add(localName); path.add(localName);
return path; return path;
} }
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.END_ELEMENT) { if (eventType == XMLStreamConstants.END_ELEMENT) {
@ -117,21 +122,22 @@ public class XmlImportUtilities {
} }
return null; return null;
} }
static public String[] detectRecordElement(InputStream inputStream) { static public String[] detectRecordElement(InputStream inputStream) {
logger.trace("detectRecordElement(inputStream)");
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>(); List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
try { try {
XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { if (eventType == XMLStreamConstants.START_ELEMENT) {
RecordElementCandidate candidate = RecordElementCandidate candidate =
detectRecordElement( detectRecordElement(
parser, parser,
new String[] { parser.getLocalName() }); new String[] { parser.getLocalName() });
if (candidate != null) { if (candidate != null) {
candidates.add(candidate); candidates.add(candidate);
} }
@ -141,22 +147,24 @@ public class XmlImportUtilities {
// silent // silent
// e.printStackTrace(); // e.printStackTrace();
} }
if (candidates.size() > 0) { if (candidates.size() > 0) {
sortRecordElementCandidates(candidates); sortRecordElementCandidates(candidates);
return candidates.get(0).path; return candidates.get(0).path;
} }
logger.info("No candidate elements were found in Xml - at least 6 similar elements are required");
return null; return null;
} }
static protected RecordElementCandidate detectRecordElement(XMLStreamReader parser, String[] path) { static protected RecordElementCandidate detectRecordElement(XMLStreamReader parser, String[] path) {
logger.trace("detectRecordElement(XMLStreamReader, String[])");
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>(); List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>(); Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
int textNodeCount = 0; int textNodeCount = 0;
int childElementNodeCount = 0; int childElementNodeCount = 0;
try { try {
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
@ -168,18 +176,18 @@ public class XmlImportUtilities {
} }
} else if (eventType == XMLStreamConstants.START_ELEMENT) { } else if (eventType == XMLStreamConstants.START_ELEMENT) {
childElementNodeCount++; childElementNodeCount++;
String tagName = parser.getLocalName(); String tagName = parser.getLocalName();
immediateChildCandidateMap.put( immediateChildCandidateMap.put(
tagName, tagName,
immediateChildCandidateMap.containsKey(tagName) ? immediateChildCandidateMap.containsKey(tagName) ?
immediateChildCandidateMap.get(tagName) + 1 : 1); immediateChildCandidateMap.get(tagName) + 1 : 1);
String[] path2 = new String[path.length + 1]; String[] path2 = new String[path.length + 1];
System.arraycopy(path, 0, path2, 0, path.length); System.arraycopy(path, 0, path2, 0, path.length);
path2[path.length] = tagName; path2[path.length] = tagName;
RecordElementCandidate c = detectRecordElement(parser, path2); RecordElementCandidate c = detectRecordElement(parser, path2);
if (c != null) { if (c != null) {
descendantCandidates.add(c); descendantCandidates.add(c);
@ -190,12 +198,12 @@ public class XmlImportUtilities {
// silent // silent
// e.printStackTrace(); // e.printStackTrace();
} }
if (textNodeCount > 0 && childElementNodeCount > 0) { if (textNodeCount > 0 && childElementNodeCount > 0) {
// This is a mixed element // This is a mixed element
return null; return null;
} }
if (immediateChildCandidateMap.size() > 0) { if (immediateChildCandidateMap.size() > 0) {
List<RecordElementCandidate> immediateChildCandidates = new ArrayList<RecordElementCandidate>(immediateChildCandidateMap.size()); List<RecordElementCandidate> immediateChildCandidates = new ArrayList<RecordElementCandidate>(immediateChildCandidateMap.size());
for (Entry<String, Integer> entry : immediateChildCandidateMap.entrySet()) { for (Entry<String, Integer> entry : immediateChildCandidateMap.entrySet()) {
@ -204,38 +212,39 @@ public class XmlImportUtilities {
String[] path2 = new String[path.length + 1]; String[] path2 = new String[path.length + 1];
System.arraycopy(path, 0, path2, 0, path.length); System.arraycopy(path, 0, path2, 0, path.length);
path2[path.length] = entry.getKey(); path2[path.length] = entry.getKey();
RecordElementCandidate candidate = new RecordElementCandidate(); RecordElementCandidate candidate = new RecordElementCandidate();
candidate.path = path2; candidate.path = path2;
candidate.count = count; candidate.count = count;
immediateChildCandidates.add(candidate); immediateChildCandidates.add(candidate);
} }
} }
if (immediateChildCandidates.size() > 0 && immediateChildCandidates.size() < 5) { if (immediateChildCandidates.size() > 0 && immediateChildCandidates.size() < 5) {
// There are some promising immediate child elements, but not many, // There are some promising immediate child elements, but not many,
// that can serve as record elements. // that can serve as record elements.
sortRecordElementCandidates(immediateChildCandidates); sortRecordElementCandidates(immediateChildCandidates);
RecordElementCandidate ourCandidate = immediateChildCandidates.get(0); RecordElementCandidate ourCandidate = immediateChildCandidates.get(0);
logger.trace("ourCandidate.count : " + ourCandidate.count + "; immediateChildCandidates.size() : " + immediateChildCandidates.size());
if (ourCandidate.count / immediateChildCandidates.size() > 5) { if (ourCandidate.count / immediateChildCandidates.size() > 5) {
return ourCandidate; return ourCandidate;
} }
descendantCandidates.add(ourCandidate); descendantCandidates.add(ourCandidate);
} }
} }
if (descendantCandidates.size() > 0) { if (descendantCandidates.size() > 0) {
sortRecordElementCandidates(descendantCandidates); sortRecordElementCandidates(descendantCandidates);
RecordElementCandidate candidate = descendantCandidates.get(0); RecordElementCandidate candidate = descendantCandidates.get(0);
if (candidate.count / descendantCandidates.size() > 5) { if (candidate.count / descendantCandidates.size() > 5) {
return candidate; return candidate;
} }
} }
return null; return null;
} }
@ -246,16 +255,16 @@ public class XmlImportUtilities {
} }
}); });
} }
static public void importXml( static public void importXml(
InputStream inputStream, InputStream inputStream,
Project project, Project project,
String[] recordPath, String[] recordPath,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) { ) {
try { try {
XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { if (eventType == XMLStreamConstants.START_ELEMENT) {
@ -266,32 +275,32 @@ public class XmlImportUtilities {
// silent // silent
} }
} }
static public void createColumnsFromImport( static public void createColumnsFromImport(
Project project, Project project,
ImportColumnGroup columnGroup ImportColumnGroup columnGroup
) { ) {
int startColumnIndex = project.columnModel.columns.size(); int startColumnIndex = project.columnModel.columns.size();
List<ImportColumn> columns = new ArrayList<ImportColumn>(columnGroup.columns.values()); List<ImportColumn> columns = new ArrayList<ImportColumn>(columnGroup.columns.values());
Collections.sort(columns, new Comparator<ImportColumn>() { Collections.sort(columns, new Comparator<ImportColumn>() {
public int compare(ImportColumn o1, ImportColumn o2) { public int compare(ImportColumn o1, ImportColumn o2) {
if (o1.blankOnFirstRow != o2.blankOnFirstRow) { if (o1.blankOnFirstRow != o2.blankOnFirstRow) {
return o1.blankOnFirstRow ? 1 : -1; return o1.blankOnFirstRow ? 1 : -1;
} }
int c = o2.nonBlankCount - o1.nonBlankCount; int c = o2.nonBlankCount - o1.nonBlankCount;
return c != 0 ? c : (o1.name.length() - o2.name.length()); return c != 0 ? c : (o1.name.length() - o2.name.length());
} }
}); });
for (int i = 0; i < columns.size(); i++) { for (int i = 0; i < columns.size(); i++) {
ImportColumn c = columns.get(i); ImportColumn c = columns.get(i);
Column column = new com.metaweb.gridworks.model.Column(c.cellIndex, c.name); Column column = new com.metaweb.gridworks.model.Column(c.cellIndex, c.name);
project.columnModel.columns.add(column); project.columnModel.columns.add(column);
} }
List<ImportColumnGroup> subgroups = new ArrayList<ImportColumnGroup>(columnGroup.subgroups.values()); List<ImportColumnGroup> subgroups = new ArrayList<ImportColumnGroup>(columnGroup.subgroups.values());
Collections.sort(subgroups, new Comparator<ImportColumnGroup>() { Collections.sort(subgroups, new Comparator<ImportColumnGroup>() {
public int compare(ImportColumnGroup o1, ImportColumnGroup o2) { public int compare(ImportColumnGroup o1, ImportColumnGroup o2) {
@ -299,20 +308,20 @@ public class XmlImportUtilities {
return c != 0 ? c : (o1.name.length() - o2.name.length()); return c != 0 ? c : (o1.name.length() - o2.name.length());
} }
}); });
for (ImportColumnGroup g : subgroups) { for (ImportColumnGroup g : subgroups) {
createColumnsFromImport(project, g); createColumnsFromImport(project, g);
} }
int endColumnIndex = project.columnModel.columns.size(); int endColumnIndex = project.columnModel.columns.size();
int span = endColumnIndex - startColumnIndex; int span = endColumnIndex - startColumnIndex;
if (span > 1 && span < project.columnModel.columns.size()) { if (span > 1 && span < project.columnModel.columns.size()) {
project.columnModel.addColumnGroup(startColumnIndex, span, startColumnIndex); project.columnModel.addColumnGroup(startColumnIndex, span, startColumnIndex);
} }
} }
static protected void findRecord( static protected void findRecord(
Project project, Project project,
XMLStreamReader parser, XMLStreamReader parser,
String[] recordPath, String[] recordPath,
int pathIndex, int pathIndex,
@ -336,7 +345,7 @@ public class XmlImportUtilities {
skip(parser); skip(parser);
} }
} }
static protected void skip(XMLStreamReader parser) throws XMLStreamException { static protected void skip(XMLStreamReader parser) throws XMLStreamException {
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
@ -347,71 +356,71 @@ public class XmlImportUtilities {
} }
} }
} }
static protected void processRecord( static protected void processRecord(
Project project, Project project,
XMLStreamReader parser, XMLStreamReader parser,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) throws XMLStreamException { ) throws XMLStreamException {
ImportRecord record = new ImportRecord(); ImportRecord record = new ImportRecord();
processSubRecord(project, parser, rootColumnGroup, record); processSubRecord(project, parser, rootColumnGroup, record);
if (record.rows.size() > 0) { if (record.rows.size() > 0) {
for (List<Cell> row : record.rows) { for (List<Cell> row : record.rows) {
Row realRow = new Row(row.size()); Row realRow = new Row(row.size());
for (int c = 0; c < row.size(); c++) { for (int c = 0; c < row.size(); c++) {
Cell cell = row.get(c); Cell cell = row.get(c);
if (cell != null) { if (cell != null) {
realRow.setCell(c, cell); realRow.setCell(c, cell);
} }
} }
project.rows.add(realRow); project.rows.add(realRow);
} }
} }
} }
static protected String composeName(String prefix, String localName) { static protected String composeName(String prefix, String localName) {
return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName; return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName;
} }
static protected void processSubRecord( static protected void processSubRecord(
Project project, Project project,
XMLStreamReader parser, XMLStreamReader parser,
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
ImportRecord record ImportRecord record
) throws XMLStreamException { ) throws XMLStreamException {
ImportColumnGroup thisColumnGroup = getColumnGroup( ImportColumnGroup thisColumnGroup = getColumnGroup(
project, project,
columnGroup, columnGroup,
composeName(parser.getPrefix(), parser.getLocalName())); composeName(parser.getPrefix(), parser.getLocalName()));
int commonStartingRowIndex = 0; int commonStartingRowIndex = 0;
for (ImportColumn column : thisColumnGroup.columns.values()) { for (ImportColumn column : thisColumnGroup.columns.values()) {
if (column.cellIndex < record.columnEmptyRowIndices.size()) { if (column.cellIndex < record.columnEmptyRowIndices.size()) {
commonStartingRowIndex = Math.max( commonStartingRowIndex = Math.max(
commonStartingRowIndex, commonStartingRowIndex,
record.columnEmptyRowIndices.get(column.cellIndex)); record.columnEmptyRowIndices.get(column.cellIndex));
} }
} }
int attributeCount = parser.getAttributeCount(); int attributeCount = parser.getAttributeCount();
for (int i = 0; i < attributeCount; i++) { for (int i = 0; i < attributeCount; i++) {
String text = parser.getAttributeValue(i).trim(); String text = parser.getAttributeValue(i).trim();
if (text.length() > 0) { if (text.length() > 0) {
addCell( addCell(
project, project,
thisColumnGroup, thisColumnGroup,
record, record,
composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)), composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)),
text, text,
commonStartingRowIndex commonStartingRowIndex
); );
} }
} }
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { if (eventType == XMLStreamConstants.START_ELEMENT) {
@ -421,14 +430,14 @@ public class XmlImportUtilities {
thisColumnGroup, thisColumnGroup,
record record
); );
} else if (//eventType == XMLStreamConstants.CDATA || } else if (//eventType == XMLStreamConstants.CDATA ||
eventType == XMLStreamConstants.CHARACTERS) { eventType == XMLStreamConstants.CHARACTERS) {
String text = parser.getText().trim(); String text = parser.getText().trim();
if (text.length() > 0) { if (text.length() > 0) {
addCell( addCell(
project, project,
thisColumnGroup, thisColumnGroup,
record, record,
null, null,
parser.getText(), parser.getText(),
commonStartingRowIndex commonStartingRowIndex
@ -438,10 +447,10 @@ public class XmlImportUtilities {
break; break;
} }
} }
if (commonStartingRowIndex < record.rows.size()) { if (commonStartingRowIndex < record.rows.size()) {
List<Cell> startingRow = record.rows.get(commonStartingRowIndex); List<Cell> startingRow = record.rows.get(commonStartingRowIndex);
for (ImportColumn c : thisColumnGroup.columns.values()) { for (ImportColumn c : thisColumnGroup.columns.values()) {
int cellIndex = c.cellIndex; int cellIndex = c.cellIndex;
if (cellIndex >= startingRow.size() || startingRow.get(cellIndex) == null) { if (cellIndex >= startingRow.size() || startingRow.get(cellIndex) == null) {
@ -450,7 +459,7 @@ public class XmlImportUtilities {
} }
} }
} }
static protected void addCell( static protected void addCell(
Project project, Project project,
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
@ -462,33 +471,33 @@ public class XmlImportUtilities {
if (text == null || ((String) text).isEmpty()) { if (text == null || ((String) text).isEmpty()) {
return; return;
} }
Serializable value = ImporterUtilities.parseCellValue(text); Serializable value = ImporterUtilities.parseCellValue(text);
ImportColumn column = getColumn(project, columnGroup, columnLocalName); ImportColumn column = getColumn(project, columnGroup, columnLocalName);
int cellIndex = column.cellIndex; int cellIndex = column.cellIndex;
while (cellIndex >= record.columnEmptyRowIndices.size()) { while (cellIndex >= record.columnEmptyRowIndices.size()) {
record.columnEmptyRowIndices.add(commonStaringRowIndex); record.columnEmptyRowIndices.add(commonStaringRowIndex);
} }
int rowIndex = record.columnEmptyRowIndices.get(cellIndex); int rowIndex = record.columnEmptyRowIndices.get(cellIndex);
while (rowIndex >= record.rows.size()) { while (rowIndex >= record.rows.size()) {
record.rows.add(new ArrayList<Cell>()); record.rows.add(new ArrayList<Cell>());
} }
List<Cell> row = record.rows.get(rowIndex); List<Cell> row = record.rows.get(rowIndex);
while (cellIndex >= row.size()) { while (cellIndex >= row.size()) {
row.add(null); row.add(null);
} }
row.set(cellIndex, new Cell(value, null)); row.set(cellIndex, new Cell(value, null));
record.columnEmptyRowIndices.set(cellIndex, rowIndex + 1); record.columnEmptyRowIndices.set(cellIndex, rowIndex + 1);
column.nonBlankCount++; column.nonBlankCount++;
} }
static protected ImportColumn getColumn( static protected ImportColumn getColumn(
Project project, Project project,
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
@ -497,27 +506,27 @@ public class XmlImportUtilities {
if (columnGroup.columns.containsKey(localName)) { if (columnGroup.columns.containsKey(localName)) {
return columnGroup.columns.get(localName); return columnGroup.columns.get(localName);
} }
ImportColumn column = createColumn(project, columnGroup, localName); ImportColumn column = createColumn(project, columnGroup, localName);
columnGroup.columns.put(localName, column); columnGroup.columns.put(localName, column);
return column; return column;
} }
static protected ImportColumn createColumn( static protected ImportColumn createColumn(
Project project, Project project,
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
String localName String localName
) { ) {
ImportColumn newColumn = new ImportColumn(); ImportColumn newColumn = new ImportColumn();
newColumn.name = newColumn.name =
columnGroup.name.length() == 0 ? columnGroup.name.length() == 0 ?
(localName == null ? "Text" : localName) : (localName == null ? "Text" : localName) :
(localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName));
newColumn.cellIndex = project.columnModel.allocateNewCellIndex(); newColumn.cellIndex = project.columnModel.allocateNewCellIndex();
return newColumn; return newColumn;
} }
@ -529,25 +538,25 @@ public class XmlImportUtilities {
if (columnGroup.subgroups.containsKey(localName)) { if (columnGroup.subgroups.containsKey(localName)) {
return columnGroup.subgroups.get(localName); return columnGroup.subgroups.get(localName);
} }
ImportColumnGroup subgroup = createColumnGroup(project, columnGroup, localName); ImportColumnGroup subgroup = createColumnGroup(project, columnGroup, localName);
columnGroup.subgroups.put(localName, subgroup); columnGroup.subgroups.put(localName, subgroup);
return subgroup; return subgroup;
} }
static protected ImportColumnGroup createColumnGroup( static protected ImportColumnGroup createColumnGroup(
Project project, Project project,
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
String localName String localName
) { ) {
ImportColumnGroup newGroup = new ImportColumnGroup(); ImportColumnGroup newGroup = new ImportColumnGroup();
newGroup.name = newGroup.name =
columnGroup.name.length() == 0 ? columnGroup.name.length() == 0 ?
(localName == null ? "Text" : localName) : (localName == null ? "Text" : localName) :
(localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName));
return newGroup; return newGroup;
} }
} }

View File

@ -6,11 +6,16 @@ import java.io.PushbackInputStream;
import java.io.Reader; import java.io.Reader;
import java.util.Properties; import java.util.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.metaweb.gridworks.importers.XmlImportUtilities.ImportColumnGroup; import com.metaweb.gridworks.importers.XmlImportUtilities.ImportColumnGroup;
import com.metaweb.gridworks.model.Project; import com.metaweb.gridworks.model.Project;
public class XmlImporter implements Importer { public class XmlImporter implements Importer {
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
public static final int BUFFER_SIZE = 64 * 1024; public static final int BUFFER_SIZE = 64 * 1024;
public boolean takesReader() { public boolean takesReader() {
@ -28,6 +33,7 @@ public class XmlImporter implements Importer {
Project project, Project project,
Properties options Properties options
) throws Exception { ) throws Exception {
logger.trace("XmlImporter.read");
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE); PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
String[] recordPath = null; String[] recordPath = null;

View File

@ -0,0 +1,207 @@
package com.metaweb.gridworks.tests.importers;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Properties;
import static org.mockito.Mockito.mock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import com.metaweb.gridworks.importers.XmlImporter;
import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Column;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
public class XmlImporterTests {
final static Logger logger = LoggerFactory.getLogger("XmlImporterTests");
//dependencies
Project project = null;
Properties options = null;
ByteArrayInputStream inputStream = null;
//System Under Test
XmlImporter SUT = null;
@BeforeMethod
public void SetUp(){
SUT = new XmlImporter();
project = new Project();
options = mock(Properties.class);
}
@AfterMethod
public void TearDown(){
SUT = null;
project = null;
options = null;
}
@Test
public void canParseSample(){
RunTest(getSample());
AssertGridCreate(project, 4, 6);
PrintProject(project);
Row row = project.rows.get(0);
Assert.assertNotNull(row);
Assert.assertNotNull(row.cells);
Assert.assertNotNull(row.cells.get(2));
Assert.assertEquals(row.cells.get(2).value, "Author 1, The");
}
@Test
public void testCanParseLineBreak(){
RunTest(getSampleWithLineBreak());
AssertGridCreate(project, 4, 6);
PrintProject(project);
Row row = project.rows.get(3);
Assert.assertNotNull(row);
Assert.assertNotNull(row.cells);
Assert.assertNotNull(row.cells.get(2));
Assert.assertEquals(row.cells.get(2).value, "With line\n break");
}
@Test(groups={"broken"})
public void testElementsWithVaryingStructure(){
RunTest(getSampleWithVaryingStructure());
AssertGridCreate(project, 5, 6);
PrintProject(project);
Row row0 = project.rows.get(0);
Assert.assertNotNull(row0);
Assert.assertNotNull(row0.cells);
Assert.assertEquals(row0.cells.size(),6);
Row row5 = project.rows.get(5);
Assert.assertNotNull(row5);
Assert.assertNotNull(row5.cells);
Assert.assertEquals(row5.cells.size(),6);
}
//------------helper methods---------------
protected String getTypicalElement(int id){
return "<book id=\"" + id + "\">" +
"<author>Author " + id + ", The</author>" +
"<title>Book title " + id + "</title>" +
"<publish_date>2010-05-26</publish_date>" +
"</book>";
}
protected String getSample(){
StringBuilder sb = new StringBuilder();
sb.append("<?xml version=\"1.0\"?><library>");
for(int i = 1; i < 7; i++){
sb.append(getTypicalElement(i));
}
sb.append("</library>");
return sb.toString();
}
protected String getSampleWithLineBreak(){
StringBuilder sb = new StringBuilder();
sb.append("<?xml version=\"1.0\"?><library>");
for(int i = 1; i < 4; i++){
sb.append(getTypicalElement(i));
}
sb.append("<book id=\"4\">" +
"<author>With line\n break</author>" +
"<title>Book title 4</title>" +
"<publish_date>2010-05-26</publish_date>" +
"</book>");
sb.append(getTypicalElement(5));
sb.append(getTypicalElement(6));
sb.append("</library>");
return sb.toString();
}
protected String getSampleWithVaryingStructure(){
StringBuilder sb = new StringBuilder();
sb.append("<?xml version=\"1.0\"?><library>");
for(int i = 1; i < 6; i++){
sb.append(getTypicalElement(i));
}
sb.append("<book id=\"6\">" +
"<author>With line\n break</author>" +
"<title>Book title 6</title>" +
"<genre>New element not seen in other records</genre>" +
"<publish_date>2010-05-26</publish_date>" +
"</book>");
sb.append("</library>");
return sb.toString();
}
private void RunTest(String testString){
try {
inputStream = new ByteArrayInputStream( testString.getBytes( "UTF-8" ) );
} catch (UnsupportedEncodingException e1) {
Assert.fail();
}
try {
SUT.read(inputStream, project, options);
} catch (Exception e) {
Assert.fail();
}
try {
inputStream.close();
} catch (IOException e) {
Assert.fail();
}
}
private void AssertGridCreate(Project project, int numCols, int numRows){
Assert.assertNotNull(project);
Assert.assertNotNull(project.columnModel);
Assert.assertNotNull(project.columnModel.columns);
Assert.assertEquals(project.columnModel.columns.size(), numCols);
Assert.assertNotNull(project.rows);
Assert.assertEquals(project.rows.size(), numRows);
}
private void PrintProject(Project project){
//some quick and dirty debugging
StringBuilder sb = new StringBuilder();
for(Column c : project.columnModel.columns){
sb.append(c.getName());
sb.append("; ");
}
logger.info(sb.toString());
for(Row r : project.rows){
sb = new StringBuilder();
for(Cell c : r.cells){
if(c != null){
sb.append(c.value);
sb.append("; ");
}else{
sb.append("null; ");
}
}
logger.info(sb.toString());
}
}
}