Some tidying up of the XmlImporter which reduces the number of generic TreeParser tokens to a minimum - and should allow elements such as comments and CDATA to be ignored/skipped.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1422 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
d3f223c196
commit
ec9898ba92
@ -59,7 +59,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
*/
|
*/
|
||||||
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
|
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
|
||||||
try{
|
try{
|
||||||
if(parser.getEventType() == TreeParserToken.StartDocument)//XMLStreamConstants.START_DOCUMENT)
|
if(parser.getEventType() == TreeParserToken.Ignorable)//XMLStreamConstants.START_DOCUMENT)
|
||||||
parser.next();
|
parser.next();
|
||||||
|
|
||||||
String localName = parser.getLocalName();
|
String localName = parser.getLocalName();
|
||||||
@ -271,7 +271,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
) throws ServletException {
|
) throws ServletException {
|
||||||
logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup");
|
logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup");
|
||||||
|
|
||||||
if(parser.getEventType() == TreeParserToken.StartDocument){//XMLStreamConstants.START_DOCUMENT){
|
if(parser.getEventType() == TreeParserToken.Ignorable){//XMLStreamConstants.START_DOCUMENT){
|
||||||
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
|
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -360,7 +360,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
) throws ServletException {
|
) throws ServletException {
|
||||||
logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)");
|
logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)");
|
||||||
|
|
||||||
if(parser.getEventType() == TreeParserToken.StartDocument)
|
if(parser.getEventType() == TreeParserToken.Ignorable)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
ImportColumnGroup thisColumnGroup = getColumnGroup(
|
ImportColumnGroup thisColumnGroup = getColumnGroup(
|
||||||
|
@ -3,10 +3,8 @@ package com.google.refine.importers.parsers;
|
|||||||
|
|
||||||
public enum TreeParserToken {
|
public enum TreeParserToken {
|
||||||
Ignorable,
|
Ignorable,
|
||||||
StartDocument,
|
|
||||||
EndDocument,
|
|
||||||
StartEntity,
|
StartEntity,
|
||||||
EndEntity,
|
EndEntity,
|
||||||
Value
|
Value
|
||||||
//append additional tokens as necessary (most are just mapped to Value or Ignorable)
|
//append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
|
||||||
}
|
}
|
||||||
|
@ -45,20 +45,25 @@ public class XmlParser implements TreeParser{
|
|||||||
throw new ServletException(e);
|
throw new ServletException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return convertToTreeParserToken(currentToken);
|
return mapToTreeParserToken(currentToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected TreeParserToken convertToTreeParserToken(int token) throws ServletException {
|
protected TreeParserToken mapToTreeParserToken(int token) throws ServletException {
|
||||||
switch(token){
|
switch(token){
|
||||||
//Xml does not have StartArray element type
|
|
||||||
//Xml does not have EndArray element type
|
|
||||||
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
||||||
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
||||||
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
||||||
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.StartDocument;
|
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.Ignorable;
|
||||||
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.EndDocument;
|
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.Ignorable;
|
||||||
|
case XMLStreamConstants.SPACE: return TreeParserToken.Value;
|
||||||
//TODO
|
case XMLStreamConstants.PROCESSING_INSTRUCTION: return TreeParserToken.Ignorable;
|
||||||
|
case XMLStreamConstants.NOTATION_DECLARATION: return TreeParserToken.Ignorable;
|
||||||
|
case XMLStreamConstants.NAMESPACE: return TreeParserToken.Ignorable;
|
||||||
|
case XMLStreamConstants.ENTITY_REFERENCE: return TreeParserToken.Ignorable;
|
||||||
|
case XMLStreamConstants.DTD: return TreeParserToken.Ignorable;
|
||||||
|
case XMLStreamConstants.COMMENT: return TreeParserToken.Ignorable;
|
||||||
|
case XMLStreamConstants.CDATA: return TreeParserToken.Ignorable;
|
||||||
|
case XMLStreamConstants.ATTRIBUTE: return TreeParserToken.Ignorable;
|
||||||
default:
|
default:
|
||||||
return TreeParserToken.Ignorable;
|
return TreeParserToken.Ignorable;
|
||||||
}
|
}
|
||||||
@ -66,7 +71,7 @@ public class XmlParser implements TreeParser{
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TreeParserToken getEventType() throws ServletException{
|
public TreeParserToken getEventType() throws ServletException{
|
||||||
return this.convertToTreeParserToken(parser.getEventType());
|
return this.mapToTreeParserToken(parser.getEventType());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -396,7 +396,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
|
|
||||||
public void ParserSkip(){
|
public void ParserSkip(){
|
||||||
try {
|
try {
|
||||||
if(parser.getEventType() == TreeParserToken.StartDocument){
|
if(parser.getEventType() == TreeParserToken.Ignorable){
|
||||||
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
|
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
|
||||||
}
|
}
|
||||||
} catch (ServletException e1) {
|
} catch (ServletException e1) {
|
||||||
|
Loading…
Reference in New Issue
Block a user