Some tidying up of the XmlImporter which reduces the number of generic TreeParser tokens to a minimum - and should allow elements such as comments and CDATA to be ignored/skipped.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1422 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-10-04 15:02:09 +00:00
parent d3f223c196
commit ec9898ba92
4 changed files with 19 additions and 16 deletions

View File

@ -59,7 +59,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
*/ */
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException { static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
try{ try{
if(parser.getEventType() == TreeParserToken.StartDocument)//XMLStreamConstants.START_DOCUMENT) if(parser.getEventType() == TreeParserToken.Ignorable)//XMLStreamConstants.START_DOCUMENT)
parser.next(); parser.next();
String localName = parser.getLocalName(); String localName = parser.getLocalName();
@ -271,7 +271,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
) throws ServletException { ) throws ServletException {
logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup"); logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup");
if(parser.getEventType() == TreeParserToken.StartDocument){//XMLStreamConstants.START_DOCUMENT){ if(parser.getEventType() == TreeParserToken.Ignorable){//XMLStreamConstants.START_DOCUMENT){
logger.warn("Cannot use findRecord method for START_DOCUMENT event"); logger.warn("Cannot use findRecord method for START_DOCUMENT event");
return; return;
} }
@ -360,7 +360,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
) throws ServletException { ) throws ServletException {
logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)"); logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)");
if(parser.getEventType() == TreeParserToken.StartDocument) if(parser.getEventType() == TreeParserToken.Ignorable)
return; return;
ImportColumnGroup thisColumnGroup = getColumnGroup( ImportColumnGroup thisColumnGroup = getColumnGroup(

View File

@ -3,10 +3,8 @@ package com.google.refine.importers.parsers;
public enum TreeParserToken { public enum TreeParserToken {
Ignorable, Ignorable,
StartDocument,
EndDocument,
StartEntity, StartEntity,
EndEntity, EndEntity,
Value Value
//append additional tokens as necessary (most are just mapped to Value or Ignorable) //append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
} }

View File

@ -45,20 +45,25 @@ public class XmlParser implements TreeParser{
throw new ServletException(e); throw new ServletException(e);
} }
return convertToTreeParserToken(currentToken); return mapToTreeParserToken(currentToken);
} }
protected TreeParserToken convertToTreeParserToken(int token) throws ServletException { protected TreeParserToken mapToTreeParserToken(int token) throws ServletException {
switch(token){ switch(token){
//Xml does not have StartArray element type
//Xml does not have EndArray element type
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity; case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity; case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value; case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.StartDocument; case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.Ignorable;
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.EndDocument; case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.Ignorable;
case XMLStreamConstants.SPACE: return TreeParserToken.Value;
//TODO case XMLStreamConstants.PROCESSING_INSTRUCTION: return TreeParserToken.Ignorable;
case XMLStreamConstants.NOTATION_DECLARATION: return TreeParserToken.Ignorable;
case XMLStreamConstants.NAMESPACE: return TreeParserToken.Ignorable;
case XMLStreamConstants.ENTITY_REFERENCE: return TreeParserToken.Ignorable;
case XMLStreamConstants.DTD: return TreeParserToken.Ignorable;
case XMLStreamConstants.COMMENT: return TreeParserToken.Ignorable;
case XMLStreamConstants.CDATA: return TreeParserToken.Ignorable;
case XMLStreamConstants.ATTRIBUTE: return TreeParserToken.Ignorable;
default: default:
return TreeParserToken.Ignorable; return TreeParserToken.Ignorable;
} }
@ -66,7 +71,7 @@ public class XmlParser implements TreeParser{
@Override @Override
public TreeParserToken getEventType() throws ServletException{ public TreeParserToken getEventType() throws ServletException{
return this.convertToTreeParserToken(parser.getEventType()); return this.mapToTreeParserToken(parser.getEventType());
} }
@Override @Override

View File

@ -396,7 +396,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void ParserSkip(){ public void ParserSkip(){
try { try {
if(parser.getEventType() == TreeParserToken.StartDocument){ if(parser.getEventType() == TreeParserToken.Ignorable){
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
} }
} catch (ServletException e1) { } catch (ServletException e1) {