XmlImportUtilities no longer relies on XMLStreamConstants, and is now independent of any specific type of tree data (Xml or otherwise).

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1378 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-09-28 10:46:33 +00:00
parent b21961be89
commit 855df20481
5 changed files with 27 additions and 24 deletions

View File

@ -8,8 +8,6 @@ import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -21,7 +19,7 @@ import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
public class XmlImportUtilities extends TreeImportUtilities { public class XmlImportUtilities extends TreeImportUtilities {
final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities"); final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
static public String[] detectPathFromTag(TreeParser parser, String tag) { static public String[] detectPathFromTag(TreeParser parser, String tag) {
try { try {
@ -47,7 +45,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
/** /**
* Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it. * Looks for an element with the given tag name in the Tree data being parsed, returning the path hierarchy to reach it.
* *
* @param parser * @param parser
* @param tag * @param tag
@ -56,12 +54,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
* If the tag is found, an array of strings is returned. * If the tag is found, an array of strings is returned.
* If the tag is at the top level, the tag will be the only item in the array. * If the tag is at the top level, the tag will be the only item in the array.
* If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index * If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index
* Null if the the tag is not found. * null if the the tag is not found.
* @throws XMLStreamException * @throws ServletException
*/ */
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException { static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
try{ try{
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) //FIXME uses Xml, and is not generic if(parser.getEventType() == TreeParserToken.StartDocument)//XMLStreamConstants.START_DOCUMENT)
parser.next(); parser.next();
String localName = parser.getLocalName(); String localName = parser.getLocalName();
@ -97,10 +95,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
/** /**
* Seeks for recurring XML element in an InputStream * Seeks for recurring element in a parsed document
* which are likely candidates for being data records * which are likely candidates for being data records
* @param inputStream * @param parser
* The XML data as a stream * The parser loaded with tree data
* @return * @return
* The path to the most numerous of the possible candidates. * The path to the most numerous of the possible candidates.
* null if no candidates were found (less than 6 recurrences) * null if no candidates were found (less than 6 recurrences)
@ -133,12 +131,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
return candidates.get(0).path; return candidates.get(0).path;
} }
logger.info("No candidate elements were found in Xml - at least 6 similar elements are required"); logger.info("No candidate elements were found in data - at least 6 similar elements are required");
return null; return null;
} }
static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) { static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
logger.trace("detectRecordElement(XMLStreamReader, String[])"); logger.trace("detectRecordElement(TreeParser, String[])");
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>(); List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>(); Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
@ -258,17 +256,16 @@ public class XmlImportUtilities extends TreeImportUtilities {
* @param recordPath * @param recordPath
* @param pathIndex * @param pathIndex
* @param rootColumnGroup * @param rootColumnGroup
* @throws XMLStreamException * @throws ServletException
*/ */
static protected void findRecord( static protected void findRecord(
Project project, Project project,
//XMLStreamReader parser,
TreeParser parser, TreeParser parser,
String[] recordPath, String[] recordPath,
int pathIndex, int pathIndex,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) throws ServletException { ) throws ServletException {
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){//FIXME uses Xml, and is not generic if(parser.getEventType() == TreeParserToken.EndDocument){//XMLStreamConstants.START_DOCUMENT){
logger.warn("Cannot use findRecord method for START_DOCUMENT event"); logger.warn("Cannot use findRecord method for START_DOCUMENT event");
return; return;
} }
@ -303,12 +300,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
/** /**
* processRecord parsesXml for a single element and it's sub-elements, * processRecord parses Tree data for a single element and it's sub-elements,
* adding the parsed data as a row to the project * adding the parsed data as a row to the project
* @param project * @param project
* @param parser * @param parser
* @param rootColumnGroup * @param rootColumnGroup
* @throws XMLStreamException * @throws ServletException
*/ */
static protected void processRecord( static protected void processRecord(
Project project, Project project,
@ -345,7 +342,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
* @param parser * @param parser
* @param columnGroup * @param columnGroup
* @param record * @param record
* @throws XMLStreamException * @throws ServletException
*/ */
static protected void processSubRecord( static protected void processSubRecord(
Project project, Project project,

View File

@ -49,9 +49,8 @@ public class JSONParser implements TreeParser{
} }
@Override @Override
public int getEventType() { public TreeParserToken getEventType() throws ServletException {
// TODO Auto-generated method stub return this.convertToTreeParserToken(parser.getCurrentToken());
return 0;
} }
@Override @Override
@ -102,6 +101,9 @@ public class JSONParser implements TreeParser{
case START_OBJECT: return TreeParserToken.StartEntity; case START_OBJECT: return TreeParserToken.StartEntity;
case END_OBJECT: return TreeParserToken.EndEntity; case END_OBJECT: return TreeParserToken.EndEntity;
case VALUE_STRING: return TreeParserToken.Value; case VALUE_STRING: return TreeParserToken.Value;
//Json does not have START_DOCUMENT
//Json does not have END_DOCUMENT
//TODO finish the rest of the cases //TODO finish the rest of the cases
default: throw new ServletException("Not yet implemented"); default: throw new ServletException("Not yet implemented");
} }

View File

@ -4,7 +4,7 @@ import javax.servlet.ServletException;
public interface TreeParser { public interface TreeParser {
public TreeParserToken next() throws ServletException; public TreeParserToken next() throws ServletException;
public int getEventType(); //aka getCurrentToken public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
public boolean hasNext() throws ServletException; public boolean hasNext() throws ServletException;
public String getLocalName(); public String getLocalName();
public String getPrefix(); public String getPrefix();

View File

@ -2,6 +2,8 @@ package com.google.refine.importers.parsers;
public enum TreeParserToken { public enum TreeParserToken {
StartDocument,
EndDocument,
StartEntity, StartEntity,
EndEntity, EndEntity,
Value Value

View File

@ -47,13 +47,15 @@ public class XmlParser implements TreeParser{
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity; case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity; case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value; case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.StartDocument;
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.EndDocument;
//TODO //TODO
default: throw new ServletException("Not yet implemented"); default: throw new ServletException("Not yet implemented");
} }
} }
public int getEventType(){ public TreeParserToken getEventType() throws ServletException{
return parser.getEventType(); return this.convertToTreeParserToken(parser.getEventType());
} }
public boolean hasNext() throws ServletException{ public boolean hasNext() throws ServletException{