XmlImportUtilities no longer relies on XMLStreamConstants, and is now independent of any specific type of tree data (Xml or otherwise).

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1378 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-09-28 10:46:33 +00:00
parent b21961be89
commit 855df20481
5 changed files with 27 additions and 24 deletions

View File

@ -8,8 +8,6 @@ import java.util.Map;
import java.util.Map.Entry;
import javax.servlet.ServletException;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -21,7 +19,7 @@ import com.google.refine.model.Project;
import com.google.refine.model.Row;
public class XmlImportUtilities extends TreeImportUtilities {
final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities");
final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
static public String[] detectPathFromTag(TreeParser parser, String tag) {
try {
@ -47,7 +45,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
/**
* Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it.
* Looks for an element with the given tag name in the Tree data being parsed, returning the path hierarchy to reach it.
*
* @param parser
* @param tag
@ -56,12 +54,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
* If the tag is found, an array of strings is returned.
* If the tag is at the top level, the tag will be the only item in the array.
* If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index
* Null if the the tag is not found.
* @throws XMLStreamException
* null if the the tag is not found.
* @throws ServletException
*/
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
try{
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) //FIXME uses Xml, and is not generic
if(parser.getEventType() == TreeParserToken.StartDocument)//XMLStreamConstants.START_DOCUMENT)
parser.next();
String localName = parser.getLocalName();
@ -97,10 +95,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
/**
* Seeks for recurring XML element in an InputStream
* Seeks for recurring element in a parsed document
* which are likely candidates for being data records
* @param inputStream
* The XML data as a stream
* @param parser
* The parser loaded with tree data
* @return
* The path to the most numerous of the possible candidates.
* null if no candidates were found (less than 6 recurrences)
@ -133,12 +131,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
return candidates.get(0).path;
}
logger.info("No candidate elements were found in Xml - at least 6 similar elements are required");
logger.info("No candidate elements were found in data - at least 6 similar elements are required");
return null;
}
static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
logger.trace("detectRecordElement(XMLStreamReader, String[])");
logger.trace("detectRecordElement(TreeParser, String[])");
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
@ -258,17 +256,16 @@ public class XmlImportUtilities extends TreeImportUtilities {
* @param recordPath
* @param pathIndex
* @param rootColumnGroup
* @throws XMLStreamException
* @throws ServletException
*/
static protected void findRecord(
Project project,
//XMLStreamReader parser,
TreeParser parser,
String[] recordPath,
int pathIndex,
ImportColumnGroup rootColumnGroup
) throws ServletException {
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){//FIXME uses Xml, and is not generic
if(parser.getEventType() == TreeParserToken.EndDocument){//XMLStreamConstants.START_DOCUMENT){
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
return;
}
@ -303,12 +300,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
/**
* processRecord parsesXml for a single element and it's sub-elements,
* processRecord parses Tree data for a single element and it's sub-elements,
* adding the parsed data as a row to the project
* @param project
* @param parser
* @param rootColumnGroup
* @throws XMLStreamException
* @throws ServletException
*/
static protected void processRecord(
Project project,
@ -345,7 +342,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
* @param parser
* @param columnGroup
* @param record
* @throws XMLStreamException
* @throws ServletException
*/
static protected void processSubRecord(
Project project,

View File

@ -49,9 +49,8 @@ public class JSONParser implements TreeParser{
}
@Override
public int getEventType() {
// TODO Auto-generated method stub
return 0;
public TreeParserToken getEventType() throws ServletException {
return this.convertToTreeParserToken(parser.getCurrentToken());
}
@Override
@ -102,6 +101,9 @@ public class JSONParser implements TreeParser{
case START_OBJECT: return TreeParserToken.StartEntity;
case END_OBJECT: return TreeParserToken.EndEntity;
case VALUE_STRING: return TreeParserToken.Value;
//Json does not have START_DOCUMENT
//Json does not have END_DOCUMENT
//TODO finish the rest of the cases
default: throw new ServletException("Not yet implemented");
}

View File

@ -4,7 +4,7 @@ import javax.servlet.ServletException;
public interface TreeParser {
public TreeParserToken next() throws ServletException;
public int getEventType(); //aka getCurrentToken
public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
public boolean hasNext() throws ServletException;
public String getLocalName();
public String getPrefix();

View File

@ -2,6 +2,8 @@ package com.google.refine.importers.parsers;
public enum TreeParserToken {
StartDocument,
EndDocument,
StartEntity,
EndEntity,
Value

View File

@ -47,13 +47,15 @@ public class XmlParser implements TreeParser{
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.StartDocument;
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.EndDocument;
//TODO
default: throw new ServletException("Not yet implemented");
}
}
public int getEventType(){
return parser.getEventType();
public TreeParserToken getEventType() throws ServletException{
return this.convertToTreeParserToken(parser.getEventType());
}
public boolean hasNext() throws ServletException{