All methods in XmlImportUtilities now use the TreeParser interface.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1323 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-09-27 17:59:53 +00:00
parent d71c563831
commit e5ddfa6fdc
5 changed files with 74 additions and 61 deletions

View File

@ -9,15 +9,14 @@ import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.refine.importers.parsers.TreeParser; import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.model.Cell; import com.google.refine.model.Cell;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
@ -112,11 +111,12 @@ public class XmlImportUtilities extends TreeImporter {
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>(); List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
try { try {
XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); //XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
TreeParser parser = new XmlParser(inputStream);
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
RecordElementCandidate candidate = RecordElementCandidate candidate =
detectRecordElement( detectRecordElement(
parser, parser,
@ -141,7 +141,7 @@ public class XmlImportUtilities extends TreeImporter {
return null; return null;
} }
static protected RecordElementCandidate detectRecordElement(XMLStreamReader parser, String[] path) { static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
logger.trace("detectRecordElement(XMLStreamReader, String[])"); logger.trace("detectRecordElement(XMLStreamReader, String[])");
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>(); List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
@ -152,13 +152,13 @@ public class XmlImportUtilities extends TreeImporter {
try { try {
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.END_ELEMENT) { if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
break; break;
} else if (eventType == XMLStreamConstants.CHARACTERS) { } else if (eventType == XMLStreamConstants.CHARACTERS) {//FIXME uses Xml, and is not generic
if (parser.getText().trim().length() > 0) { if (parser.getText().trim().length() > 0) {
textNodeCount++; textNodeCount++;
} }
} else if (eventType == XMLStreamConstants.START_ELEMENT) { } else if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
childElementNodeCount++; childElementNodeCount++;
String tagName = parser.getLocalName(); String tagName = parser.getLocalName();
@ -234,18 +234,18 @@ public class XmlImportUtilities extends TreeImporter {
static public void importXml( static public void importXml( //FIXME could do with a name change to 'importTreeData' or similar
InputStream inputStream, InputStream inputStream,
Project project, Project project,
String[] recordPath, String[] recordPath,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) { ) {
try { try {
XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); TreeParser parser = new XmlParser(inputStream);
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
findRecord(project, parser, recordPath, 0, rootColumnGroup); findRecord(project, parser, recordPath, 0, rootColumnGroup);
} }
} }
@ -268,12 +268,13 @@ public class XmlImportUtilities extends TreeImporter {
*/ */
static protected void findRecord( static protected void findRecord(
Project project, Project project,
XMLStreamReader parser, //XMLStreamReader parser,
TreeParser parser,
String[] recordPath, String[] recordPath,
int pathIndex, int pathIndex,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) throws XMLStreamException { ) throws ServletException {
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){ if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){//FIXME uses Xml, and is not generic
logger.warn("Cannot use findRecord method for START_DOCUMENT event"); logger.warn("Cannot use findRecord method for START_DOCUMENT event");
return; return;
} }
@ -282,9 +283,9 @@ public class XmlImportUtilities extends TreeImporter {
if (pathIndex < recordPath.length - 1) { if (pathIndex < recordPath.length - 1) {
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup); findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
} else if (eventType == XMLStreamConstants.END_ELEMENT) { } else if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
break; break;
} }
} }
@ -296,12 +297,12 @@ public class XmlImportUtilities extends TreeImporter {
} }
} }
static protected void skip(XMLStreamReader parser) throws XMLStreamException { static protected void skip(TreeParser parser) throws ServletException {
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
skip(parser); skip(parser);
} else if (eventType == XMLStreamConstants.END_ELEMENT) { } else if (eventType == XMLStreamConstants.END_ELEMENT) { //FIXME uses Xml, and is not generic
return; return;
} }
} }
@ -317,9 +318,9 @@ public class XmlImportUtilities extends TreeImporter {
*/ */
static protected void processRecord( static protected void processRecord(
Project project, Project project,
XMLStreamReader parser, TreeParser parser,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) throws XMLStreamException { ) throws ServletException {
ImportRecord record = new ImportRecord(); ImportRecord record = new ImportRecord();
processSubRecord(project, parser, rootColumnGroup, record); processSubRecord(project, parser, rootColumnGroup, record);
@ -354,10 +355,10 @@ public class XmlImportUtilities extends TreeImporter {
*/ */
static protected void processSubRecord( static protected void processSubRecord(
Project project, Project project,
XMLStreamReader parser, TreeParser parser,
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
ImportRecord record ImportRecord record
) throws XMLStreamException { ) throws ServletException {
ImportColumnGroup thisColumnGroup = getColumnGroup( ImportColumnGroup thisColumnGroup = getColumnGroup(
project, project,
columnGroup, columnGroup,

View File

@ -8,4 +8,9 @@ public interface TreeParser {
public boolean hasNext() throws ServletException; public boolean hasNext() throws ServletException;
public String getLocalName(); public String getLocalName();
public String getPrefix(); public String getPrefix();
public String getText();
public int getAttributeCount();
public String getAttributeValue(int index);
public String getAttributePrefix(int index);
public String getAttributeLocalName(int index);
} }

View File

@ -51,4 +51,22 @@ public class XmlParser implements TreeParser{
public String getPrefix(){ public String getPrefix(){
return parser.getPrefix(); return parser.getPrefix();
} }
public String getText(){
return parser.getText();
}
public int getAttributeCount(){
return parser.getAttributeCount();
}
public String getAttributeValue(int index){
return parser.getAttributeValue(index);
}
public String getAttributePrefix(int index){
return parser.getAttributePrefix(index);
}
public String getAttributeLocalName(int index){
return parser.getAttributeLocalName(index);
}
} }

View File

@ -3,8 +3,6 @@ package com.google.refine.tests.importers;
import java.util.List; import java.util.List;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import com.google.refine.importers.XmlImportUtilities; import com.google.refine.importers.XmlImportUtilities;
import com.google.refine.importers.parsers.TreeParser; import com.google.refine.importers.parsers.TreeParser;
@ -16,15 +14,15 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities {
return super.detectRecordElement(parser, tag); return super.detectRecordElement(parser, tag);
} }
public void ProcessSubRecordWrapper(Project project, XMLStreamReader parser, ImportColumnGroup columnGroup, ImportRecord record) throws XMLStreamException{ public void ProcessSubRecordWrapper(Project project, TreeParser parser, ImportColumnGroup columnGroup, ImportRecord record) throws ServletException{
super.processSubRecord(project, parser, columnGroup, record); super.processSubRecord(project, parser, columnGroup, record);
} }
public void findRecordWrapper(Project project, XMLStreamReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws XMLStreamException{ public void findRecordWrapper(Project project, TreeParser parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws ServletException{
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup); super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup);
} }
public void processRecordWrapper(Project project, XMLStreamReader parser, ImportColumnGroup rootColumnGroup) throws XMLStreamException{ public void processRecordWrapper(Project project, TreeParser parser, ImportColumnGroup rootColumnGroup) throws ServletException{
super.processRecord(project, parser, rootColumnGroup); super.processRecord(project, parser, rootColumnGroup);
} }

View File

@ -7,10 +7,6 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert; import org.testng.Assert;
@ -22,6 +18,7 @@ import org.testng.annotations.Test;
import com.google.refine.importers.TreeImporter.ImportColumn; import com.google.refine.importers.TreeImporter.ImportColumn;
import com.google.refine.importers.TreeImporter.ImportColumnGroup; import com.google.refine.importers.TreeImporter.ImportColumnGroup;
import com.google.refine.importers.TreeImporter.ImportRecord; import com.google.refine.importers.TreeImporter.ImportRecord;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.XmlParser; import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
@ -37,7 +34,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
//dependencies //dependencies
Project project; Project project;
XMLStreamReader parser; TreeParser parser;
ImportColumnGroup columnGroup; ImportColumnGroup columnGroup;
ImportRecord record; ImportRecord record;
ByteArrayInputStream inputStream; ByteArrayInputStream inputStream;
@ -69,9 +66,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void detectPathFromTagTest(){ public void detectPathFromTagTest(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "library"; String tag = "library";
//createParser(); createParser();
XmlParser xmlParser = new XmlParser(inputStream);
String[] response = XmlImportUtilitiesStub.detectPathFromTag(xmlParser, tag); String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag);
Assert.assertNotNull(response); Assert.assertNotNull(response);
Assert.assertEquals(response.length, 1); Assert.assertEquals(response.length, 1);
Assert.assertEquals(response[0], "library"); Assert.assertEquals(response[0], "library");
@ -82,9 +79,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "book"; String tag = "book";
//createParser(); createParser();
XmlParser xmlParser = new XmlParser(inputStream);
String[] response = XmlImportUtilitiesStub.detectPathFromTag(xmlParser, tag); String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag);
Assert.assertNotNull(response); Assert.assertNotNull(response);
Assert.assertEquals(response.length, 2); Assert.assertEquals(response.length, 2);
Assert.assertEquals(response[0], "library"); Assert.assertEquals(response[0], "library");
@ -94,13 +91,13 @@ public class XmlImportUtilitiesTests extends RefineTest {
@Test @Test
public void detectRecordElementTest(){ public void detectRecordElementTest(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
//createParser(); createParser();
XmlParser xmlParser = new XmlParser(inputStream);
String tag="library"; String tag="library";
List<String> response = new ArrayList<String>(); List<String> response = new ArrayList<String>();
try { try {
response = SUT.detectRecordElementWrapper(xmlParser, tag); response = SUT.detectRecordElementWrapper(parser, tag);
} catch (ServletException e) { } catch (ServletException e) {
Assert.fail(e.getMessage()); Assert.fail(e.getMessage());
} }
@ -112,13 +109,13 @@ public class XmlImportUtilitiesTests extends RefineTest {
@Test @Test
public void detectRecordElementCanHandleWithNestedElements(){ public void detectRecordElementCanHandleWithNestedElements(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
//createParser(); createParser();
XmlParser xmlParser = new XmlParser(inputStream);
String tag="book"; String tag="book";
List<String> response = new ArrayList<String>(); List<String> response = new ArrayList<String>();
try { try {
response = SUT.detectRecordElementWrapper(xmlParser, tag); response = SUT.detectRecordElementWrapper(parser, tag);
} catch (ServletException e) { } catch (ServletException e) {
Assert.fail(e.getMessage()); Assert.fail(e.getMessage());
} }
@ -131,13 +128,13 @@ public class XmlImportUtilitiesTests extends RefineTest {
@Test @Test
public void detectRecordElementIsNullForUnfoundTag(){ public void detectRecordElementIsNullForUnfoundTag(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
//createParser(); createParser();
XmlParser xmlParser = new XmlParser(inputStream);
String tag=""; String tag="";
List<String> response = new ArrayList<String>(); List<String> response = new ArrayList<String>();
try { try {
response = SUT.detectRecordElementWrapper(xmlParser, tag); response = SUT.detectRecordElementWrapper(parser, tag);
} catch (ServletException e) { } catch (ServletException e) {
Assert.fail(e.getMessage()); Assert.fail(e.getMessage());
} }
@ -234,7 +231,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try { try {
SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup); SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup);
} catch (XMLStreamException e) { } catch (ServletException e) {
Assert.fail(); Assert.fail();
} }
@ -253,7 +250,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try { try {
SUT.processRecordWrapper(project, parser, columnGroup); SUT.processRecordWrapper(project, parser, columnGroup);
} catch (XMLStreamException e) { } catch (ServletException e) {
Assert.fail(); Assert.fail();
} }
log(project); log(project);
@ -274,7 +271,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try { try {
SUT.processRecordWrapper(project, parser, columnGroup); SUT.processRecordWrapper(project, parser, columnGroup);
} catch (XMLStreamException e) { } catch (ServletException e) {
Assert.fail(); Assert.fail();
} }
log(project); log(project);
@ -299,7 +296,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try { try {
SUT.processRecordWrapper(project, parser, columnGroup); SUT.processRecordWrapper(project, parser, columnGroup);
} catch (XMLStreamException e) { } catch (ServletException e) {
Assert.fail(); Assert.fail();
} }
log(project); log(project);
@ -323,7 +320,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try { try {
SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record); SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record);
} catch (XMLStreamException e) { } catch (ServletException e) {
Assert.fail(); Assert.fail();
} }
log(project); log(project);
@ -382,18 +379,12 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void ParserSkip(){ public void ParserSkip(){
try { try {
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
} catch (XMLStreamException e1) { } catch (ServletException e1) {
Assert.fail(); Assert.fail();
} }
} }
public void createParser(){ public void createParser(){
try { parser = new XmlParser(inputStream);
parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
} catch (XMLStreamException e1) {
Assert.fail();
} catch (FactoryConfigurationError e1) {
Assert.fail();
}
} }
} }