From ec9898ba922cb13341ae294b31e987df75053592 Mon Sep 17 00:00:00 2001 From: Iain Sproat Date: Mon, 4 Oct 2010 15:02:09 +0000 Subject: [PATCH] Some tidying up of the XmlImporter which reduces the number of generic TreeParser tokens to a minimum - and should allow elements such as comments and CDATA to be ignored/skipped. git-svn-id: http://google-refine.googlecode.com/svn/trunk@1422 7d457c2a-affb-35e4-300a-418c747d4874 --- .../refine/importers/XmlImportUtilities.java | 6 ++--- .../importers/parsers/TreeParserToken.java | 4 +--- .../refine/importers/parsers/XmlParser.java | 23 +++++++++++-------- .../importers/XmlImportUtilitiesTests.java | 2 +- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/main/src/com/google/refine/importers/XmlImportUtilities.java b/main/src/com/google/refine/importers/XmlImportUtilities.java index 0a63d29e0..96738cc6b 100644 --- a/main/src/com/google/refine/importers/XmlImportUtilities.java +++ b/main/src/com/google/refine/importers/XmlImportUtilities.java @@ -59,7 +59,7 @@ public class XmlImportUtilities extends TreeImportUtilities { */ static protected List detectRecordElement(TreeParser parser, String tag) throws ServletException { try{ - if(parser.getEventType() == TreeParserToken.StartDocument)//XMLStreamConstants.START_DOCUMENT) + if(parser.getEventType() == TreeParserToken.Ignorable)//XMLStreamConstants.START_DOCUMENT) parser.next(); String localName = parser.getLocalName(); @@ -271,7 +271,7 @@ public class XmlImportUtilities extends TreeImportUtilities { ) throws ServletException { logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup"); - if(parser.getEventType() == TreeParserToken.StartDocument){//XMLStreamConstants.START_DOCUMENT){ + if(parser.getEventType() == TreeParserToken.Ignorable){//XMLStreamConstants.START_DOCUMENT){ logger.warn("Cannot use findRecord method for START_DOCUMENT event"); return; } @@ -360,7 +360,7 @@ public class XmlImportUtilities extends TreeImportUtilities { ) throws ServletException { logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)"); - if(parser.getEventType() == TreeParserToken.StartDocument) + if(parser.getEventType() == TreeParserToken.Ignorable) return; ImportColumnGroup thisColumnGroup = getColumnGroup( diff --git a/main/src/com/google/refine/importers/parsers/TreeParserToken.java b/main/src/com/google/refine/importers/parsers/TreeParserToken.java index 3ac86818f..1bcaaaaf8 100644 --- a/main/src/com/google/refine/importers/parsers/TreeParserToken.java +++ b/main/src/com/google/refine/importers/parsers/TreeParserToken.java @@ -3,10 +3,8 @@ package com.google.refine.importers.parsers; public enum TreeParserToken { Ignorable, - StartDocument, - EndDocument, StartEntity, EndEntity, Value - //append additional tokens as necessary (most are just mapped to Value or Ignorable) + //append additional tokens only if necessary (most should be just mapped to Value or Ignorable) } diff --git a/main/src/com/google/refine/importers/parsers/XmlParser.java b/main/src/com/google/refine/importers/parsers/XmlParser.java index 8ee2c2558..61f790bff 100644 --- a/main/src/com/google/refine/importers/parsers/XmlParser.java +++ b/main/src/com/google/refine/importers/parsers/XmlParser.java @@ -45,20 +45,25 @@ public class XmlParser implements TreeParser{ throw new ServletException(e); } - return convertToTreeParserToken(currentToken); + return mapToTreeParserToken(currentToken); } - protected TreeParserToken convertToTreeParserToken(int token) throws ServletException { + protected TreeParserToken mapToTreeParserToken(int token) throws ServletException { switch(token){ - //Xml does not have StartArray element type - //Xml does not have EndArray element type case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity; case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity; case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value; - case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.StartDocument; - case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.EndDocument; - - //TODO + case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.Ignorable; + case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.Ignorable; + case XMLStreamConstants.SPACE: return TreeParserToken.Value; + case XMLStreamConstants.PROCESSING_INSTRUCTION: return TreeParserToken.Ignorable; + case XMLStreamConstants.NOTATION_DECLARATION: return TreeParserToken.Ignorable; + case XMLStreamConstants.NAMESPACE: return TreeParserToken.Ignorable; + case XMLStreamConstants.ENTITY_REFERENCE: return TreeParserToken.Ignorable; + case XMLStreamConstants.DTD: return TreeParserToken.Ignorable; + case XMLStreamConstants.COMMENT: return TreeParserToken.Ignorable; + case XMLStreamConstants.CDATA: return TreeParserToken.Ignorable; + case XMLStreamConstants.ATTRIBUTE: return TreeParserToken.Ignorable; default: return TreeParserToken.Ignorable; } @@ -66,7 +71,7 @@ public class XmlParser implements TreeParser{ @Override public TreeParserToken getEventType() throws ServletException{ - return this.convertToTreeParserToken(parser.getEventType()); + return this.mapToTreeParserToken(parser.getEventType()); } @Override diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java index cacc6d43b..f6970a753 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java @@ -396,7 +396,7 @@ public class XmlImportUtilitiesTests extends RefineTest { public void ParserSkip(){ try { - if(parser.getEventType() == TreeParserToken.StartDocument){ + if(parser.getEventType() == TreeParserToken.Ignorable){ parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event } } catch (ServletException e1) {