XmlImportUtilities no longer relies on XMLStreamConstants, and is now independent of any specific type of tree data (Xml or otherwise).

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1378 7d457c2a-affb-35e4-300a-418c747d4874
2010-09-28 10:46:33 +00:00 · 2010-09-28 10:46:33 +00:00 · 855df20481
commit 855df20481
parent b21961be89
5 changed files with 27 additions and 24 deletions
--- a/main/src/com/google/refine/importers/XmlImportUtilities.java
+++ b/main/src/com/google/refine/importers/XmlImportUtilities.java
@ -8,8 +8,6 @@ import java.util.Map;
 import java.util.Map.Entry;

 import javax.servlet.ServletException;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -21,7 +19,7 @@ import com.google.refine.model.Project;
 import com.google.refine.model.Row;

 public class XmlImportUtilities extends TreeImportUtilities {
-    final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities");
+    final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");

    static public String[] detectPathFromTag(TreeParser parser, String tag) {
        try {
@ -47,7 +45,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
    }

    /**
-     * Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it.
+     * Looks for an element with the given tag name in the Tree data being parsed, returning the path hierarchy to reach it.
     *
     * @param parser
     * @param tag
@ -56,12 +54,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
     *         If the tag is found, an array of strings is returned.
     *         If the tag is at the top level, the tag will be the only item in the array.
     *         If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index
-     *         Null if the the tag is not found.
-     * @throws XMLStreamException
+     *         null if the the tag is not found.
+     * @throws ServletException
     */
    static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
        try{
-            if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) //FIXME uses Xml, and is not generic
+            if(parser.getEventType() == TreeParserToken.StartDocument)//XMLStreamConstants.START_DOCUMENT)
                parser.next();

            String localName = parser.getLocalName();
@ -97,10 +95,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
    }

    /**
-     * Seeks for recurring XML element in an InputStream
+     * Seeks for recurring element in a parsed document
     * which are likely candidates for being data records
-     * @param inputStream
-     *              The XML data as a stream
+     * @param parser
+     *              The parser loaded with tree data
     * @return
     *              The path to the most numerous of the possible candidates.
     *              null if no candidates were found (less than 6 recurrences)
@ -133,12 +131,12 @@ public class XmlImportUtilities extends TreeImportUtilities {

            return candidates.get(0).path;
        }
-        logger.info("No candidate elements were found in Xml - at least 6 similar elements are required");
+        logger.info("No candidate elements were found in data - at least 6 similar elements are required");
        return null;
    }

    static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
-        logger.trace("detectRecordElement(XMLStreamReader, String[])");
+        logger.trace("detectRecordElement(TreeParser, String[])");
        List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();

        Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
@ -258,17 +256,16 @@ public class XmlImportUtilities extends TreeImportUtilities {
     * @param recordPath
     * @param pathIndex
     * @param rootColumnGroup
-     * @throws XMLStreamException
+     * @throws ServletException
     */
    static protected void findRecord(
        Project project,
-        //XMLStreamReader parser,
        TreeParser parser,
        String[] recordPath,
        int pathIndex,
        ImportColumnGroup rootColumnGroup
    ) throws ServletException {
-        if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){//FIXME uses Xml, and is not generic
+        if(parser.getEventType() == TreeParserToken.EndDocument){//XMLStreamConstants.START_DOCUMENT){
            logger.warn("Cannot use findRecord method for START_DOCUMENT event");
            return;
        }
@ -303,12 +300,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
    }

    /**
-     * processRecord parsesXml for a single element and it's sub-elements,
+     * processRecord parses Tree data for a single element and it's sub-elements,
     * adding the parsed data as a row to the project
     * @param project
     * @param parser
     * @param rootColumnGroup
-     * @throws XMLStreamException
+     * @throws ServletException
     */
    static protected void processRecord(
        Project project,
@ -345,7 +342,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
     * @param parser
     * @param columnGroup
     * @param record
-     * @throws XMLStreamException
+     * @throws ServletException
     */
    static protected void processSubRecord(
        Project project,
--- a/main/src/com/google/refine/importers/parsers/JSONParser.java
+++ b/main/src/com/google/refine/importers/parsers/JSONParser.java
@ -49,9 +49,8 @@ public class JSONParser implements TreeParser{
 	}

 	@Override
-	public int getEventType() {
-		// TODO Auto-generated method stub
-		return 0;
+	public TreeParserToken getEventType() throws ServletException {
+		return this.convertToTreeParserToken(parser.getCurrentToken());
 	}

 	@Override
@ -102,6 +101,9 @@ public class JSONParser implements TreeParser{
            case START_OBJECT: return TreeParserToken.StartEntity;
            case END_OBJECT: return TreeParserToken.EndEntity;
            case VALUE_STRING: return TreeParserToken.Value;
+            //Json does not have START_DOCUMENT
+            //Json does not have END_DOCUMENT
+            
            //TODO finish the rest of the cases
            default: throw new ServletException("Not yet implemented");
        }
--- a/main/src/com/google/refine/importers/parsers/TreeParser.java
+++ b/main/src/com/google/refine/importers/parsers/TreeParser.java
@ -4,7 +4,7 @@ import javax.servlet.ServletException;

 public interface TreeParser {
    public TreeParserToken next() throws ServletException;
-    public int getEventType(); //aka getCurrentToken
+    public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
    public boolean hasNext() throws ServletException;
    public String getLocalName();
    public String getPrefix();
--- a/main/src/com/google/refine/importers/parsers/TreeParserToken.java
+++ b/main/src/com/google/refine/importers/parsers/TreeParserToken.java
@ -2,6 +2,8 @@ package com.google.refine.importers.parsers;


 public enum TreeParserToken {
+    StartDocument,
+    EndDocument,
    StartEntity,
    EndEntity,
    Value
--- a/main/src/com/google/refine/importers/parsers/XmlParser.java
+++ b/main/src/com/google/refine/importers/parsers/XmlParser.java
@ -47,13 +47,15 @@ public class XmlParser implements TreeParser{
            case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
            case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
            case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
+            case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.StartDocument;
+            case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.EndDocument;
            //TODO
            default: throw new ServletException("Not yet implemented");
        }
    }
    
-    public int getEventType(){
-        return parser.getEventType();
+    public TreeParserToken getEventType() throws ServletException{
+        return this.convertToTreeParserToken(parser.getEventType());
    }
    
    public boolean hasNext() throws ServletException{