All methods in XmlImportUtilities now use the TreeParser interface.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1323 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-09-27 17:59:53 +00:00
parent d71c563831
commit e5ddfa6fdc
5 changed files with 74 additions and 61 deletions

View File

@ -9,15 +9,14 @@ import java.util.Map;
import java.util.Map.Entry;
import javax.servlet.ServletException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
@ -112,11 +111,12 @@ public class XmlImportUtilities extends TreeImporter {
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
try {
XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
//XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
TreeParser parser = new XmlParser(inputStream);
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) {
if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
RecordElementCandidate candidate =
detectRecordElement(
parser,
@ -141,7 +141,7 @@ public class XmlImportUtilities extends TreeImporter {
return null;
}
static protected RecordElementCandidate detectRecordElement(XMLStreamReader parser, String[] path) {
static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
logger.trace("detectRecordElement(XMLStreamReader, String[])");
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
@ -152,13 +152,13 @@ public class XmlImportUtilities extends TreeImporter {
try {
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.END_ELEMENT) {
if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
break;
} else if (eventType == XMLStreamConstants.CHARACTERS) {
} else if (eventType == XMLStreamConstants.CHARACTERS) {//FIXME uses Xml, and is not generic
if (parser.getText().trim().length() > 0) {
textNodeCount++;
}
} else if (eventType == XMLStreamConstants.START_ELEMENT) {
} else if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
childElementNodeCount++;
String tagName = parser.getLocalName();
@ -234,18 +234,18 @@ public class XmlImportUtilities extends TreeImporter {
static public void importXml(
static public void importXml( //FIXME could do with a name change to 'importTreeData' or similar
InputStream inputStream,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup
) {
try {
XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
TreeParser parser = new XmlParser(inputStream);
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) {
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
findRecord(project, parser, recordPath, 0, rootColumnGroup);
}
}
@ -268,12 +268,13 @@ public class XmlImportUtilities extends TreeImporter {
*/
static protected void findRecord(
Project project,
XMLStreamReader parser,
//XMLStreamReader parser,
TreeParser parser,
String[] recordPath,
int pathIndex,
ImportColumnGroup rootColumnGroup
) throws XMLStreamException {
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){
) throws ServletException {
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){//FIXME uses Xml, and is not generic
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
return;
}
@ -282,9 +283,9 @@ public class XmlImportUtilities extends TreeImporter {
if (pathIndex < recordPath.length - 1) {
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) {
if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
} else if (eventType == XMLStreamConstants.END_ELEMENT) {
} else if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
break;
}
}
@ -296,12 +297,12 @@ public class XmlImportUtilities extends TreeImporter {
}
}
static protected void skip(XMLStreamReader parser) throws XMLStreamException {
static protected void skip(TreeParser parser) throws ServletException {
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) {
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
skip(parser);
} else if (eventType == XMLStreamConstants.END_ELEMENT) {
} else if (eventType == XMLStreamConstants.END_ELEMENT) { //FIXME uses Xml, and is not generic
return;
}
}
@ -317,9 +318,9 @@ public class XmlImportUtilities extends TreeImporter {
*/
static protected void processRecord(
Project project,
XMLStreamReader parser,
TreeParser parser,
ImportColumnGroup rootColumnGroup
) throws XMLStreamException {
) throws ServletException {
ImportRecord record = new ImportRecord();
processSubRecord(project, parser, rootColumnGroup, record);
@ -354,10 +355,10 @@ public class XmlImportUtilities extends TreeImporter {
*/
static protected void processSubRecord(
Project project,
XMLStreamReader parser,
TreeParser parser,
ImportColumnGroup columnGroup,
ImportRecord record
) throws XMLStreamException {
) throws ServletException {
ImportColumnGroup thisColumnGroup = getColumnGroup(
project,
columnGroup,

View File

@ -8,4 +8,9 @@ public interface TreeParser {
public boolean hasNext() throws ServletException;
public String getLocalName();
public String getPrefix();
public String getText();
public int getAttributeCount();
public String getAttributeValue(int index);
public String getAttributePrefix(int index);
public String getAttributeLocalName(int index);
}

View File

@ -51,4 +51,22 @@ public class XmlParser implements TreeParser{
public String getPrefix(){
return parser.getPrefix();
}
public String getText(){
return parser.getText();
}
public int getAttributeCount(){
return parser.getAttributeCount();
}
public String getAttributeValue(int index){
return parser.getAttributeValue(index);
}
public String getAttributePrefix(int index){
return parser.getAttributePrefix(index);
}
public String getAttributeLocalName(int index){
return parser.getAttributeLocalName(index);
}
}

View File

@ -3,8 +3,6 @@ package com.google.refine.tests.importers;
import java.util.List;
import javax.servlet.ServletException;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import com.google.refine.importers.XmlImportUtilities;
import com.google.refine.importers.parsers.TreeParser;
@ -16,15 +14,15 @@ public class XmlImportUtilitiesStub extends XmlImportUtilities {
return super.detectRecordElement(parser, tag);
}
public void ProcessSubRecordWrapper(Project project, XMLStreamReader parser, ImportColumnGroup columnGroup, ImportRecord record) throws XMLStreamException{
public void ProcessSubRecordWrapper(Project project, TreeParser parser, ImportColumnGroup columnGroup, ImportRecord record) throws ServletException{
super.processSubRecord(project, parser, columnGroup, record);
}
public void findRecordWrapper(Project project, XMLStreamReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws XMLStreamException{
public void findRecordWrapper(Project project, TreeParser parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws ServletException{
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup);
}
public void processRecordWrapper(Project project, XMLStreamReader parser, ImportColumnGroup rootColumnGroup) throws XMLStreamException{
public void processRecordWrapper(Project project, TreeParser parser, ImportColumnGroup rootColumnGroup) throws ServletException{
super.processRecord(project, parser, rootColumnGroup);
}

View File

@ -7,10 +7,6 @@ import java.util.ArrayList;
import java.util.List;
import javax.servlet.ServletException;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
@ -22,6 +18,7 @@ import org.testng.annotations.Test;
import com.google.refine.importers.TreeImporter.ImportColumn;
import com.google.refine.importers.TreeImporter.ImportColumnGroup;
import com.google.refine.importers.TreeImporter.ImportRecord;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
@ -37,7 +34,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
//dependencies
Project project;
XMLStreamReader parser;
TreeParser parser;
ImportColumnGroup columnGroup;
ImportRecord record;
ByteArrayInputStream inputStream;
@ -69,9 +66,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void detectPathFromTagTest(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "library";
//createParser();
XmlParser xmlParser = new XmlParser(inputStream);
String[] response = XmlImportUtilitiesStub.detectPathFromTag(xmlParser, tag);
createParser();
String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag);
Assert.assertNotNull(response);
Assert.assertEquals(response.length, 1);
Assert.assertEquals(response[0], "library");
@ -82,9 +79,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "book";
//createParser();
XmlParser xmlParser = new XmlParser(inputStream);
String[] response = XmlImportUtilitiesStub.detectPathFromTag(xmlParser, tag);
createParser();
String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag);
Assert.assertNotNull(response);
Assert.assertEquals(response.length, 2);
Assert.assertEquals(response[0], "library");
@ -94,13 +91,13 @@ public class XmlImportUtilitiesTests extends RefineTest {
@Test
public void detectRecordElementTest(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
//createParser();
XmlParser xmlParser = new XmlParser(inputStream);
createParser();
String tag="library";
List<String> response = new ArrayList<String>();
try {
response = SUT.detectRecordElementWrapper(xmlParser, tag);
response = SUT.detectRecordElementWrapper(parser, tag);
} catch (ServletException e) {
Assert.fail(e.getMessage());
}
@ -112,13 +109,13 @@ public class XmlImportUtilitiesTests extends RefineTest {
@Test
public void detectRecordElementCanHandleWithNestedElements(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
//createParser();
XmlParser xmlParser = new XmlParser(inputStream);
createParser();
String tag="book";
List<String> response = new ArrayList<String>();
try {
response = SUT.detectRecordElementWrapper(xmlParser, tag);
response = SUT.detectRecordElementWrapper(parser, tag);
} catch (ServletException e) {
Assert.fail(e.getMessage());
}
@ -131,13 +128,13 @@ public class XmlImportUtilitiesTests extends RefineTest {
@Test
public void detectRecordElementIsNullForUnfoundTag(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
//createParser();
XmlParser xmlParser = new XmlParser(inputStream);
createParser();
String tag="";
List<String> response = new ArrayList<String>();
try {
response = SUT.detectRecordElementWrapper(xmlParser, tag);
response = SUT.detectRecordElementWrapper(parser, tag);
} catch (ServletException e) {
Assert.fail(e.getMessage());
}
@ -234,7 +231,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup);
} catch (XMLStreamException e) {
} catch (ServletException e) {
Assert.fail();
}
@ -253,7 +250,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.processRecordWrapper(project, parser, columnGroup);
} catch (XMLStreamException e) {
} catch (ServletException e) {
Assert.fail();
}
log(project);
@ -274,7 +271,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.processRecordWrapper(project, parser, columnGroup);
} catch (XMLStreamException e) {
} catch (ServletException e) {
Assert.fail();
}
log(project);
@ -299,7 +296,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.processRecordWrapper(project, parser, columnGroup);
} catch (XMLStreamException e) {
} catch (ServletException e) {
Assert.fail();
}
log(project);
@ -323,7 +320,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record);
} catch (XMLStreamException e) {
} catch (ServletException e) {
Assert.fail();
}
log(project);
@ -382,18 +379,12 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void ParserSkip(){
try {
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
} catch (XMLStreamException e1) {
} catch (ServletException e1) {
Assert.fail();
}
}
public void createParser(){
try {
parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
} catch (XMLStreamException e1) {
Assert.fail();
} catch (FactoryConfigurationError e1) {
Assert.fail();
}
parser = new XmlParser(inputStream);
}
}