XmlImportUtilities no longer relies on XMLStreamConstants, and is now independent of any specific type of tree data (Xml or otherwise).
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1378 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
b21961be89
commit
855df20481
@ -8,8 +8,6 @@ import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@ -21,7 +19,7 @@ import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
|
||||
public class XmlImportUtilities extends TreeImportUtilities {
|
||||
final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities");
|
||||
final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
|
||||
|
||||
static public String[] detectPathFromTag(TreeParser parser, String tag) {
|
||||
try {
|
||||
@ -47,7 +45,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it.
|
||||
* Looks for an element with the given tag name in the Tree data being parsed, returning the path hierarchy to reach it.
|
||||
*
|
||||
* @param parser
|
||||
* @param tag
|
||||
@ -56,12 +54,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
* If the tag is found, an array of strings is returned.
|
||||
* If the tag is at the top level, the tag will be the only item in the array.
|
||||
* If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index
|
||||
* Null if the the tag is not found.
|
||||
* @throws XMLStreamException
|
||||
* null if the the tag is not found.
|
||||
* @throws ServletException
|
||||
*/
|
||||
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
|
||||
try{
|
||||
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) //FIXME uses Xml, and is not generic
|
||||
if(parser.getEventType() == TreeParserToken.StartDocument)//XMLStreamConstants.START_DOCUMENT)
|
||||
parser.next();
|
||||
|
||||
String localName = parser.getLocalName();
|
||||
@ -97,10 +95,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
|
||||
/**
|
||||
* Seeks for recurring XML element in an InputStream
|
||||
* Seeks for recurring element in a parsed document
|
||||
* which are likely candidates for being data records
|
||||
* @param inputStream
|
||||
* The XML data as a stream
|
||||
* @param parser
|
||||
* The parser loaded with tree data
|
||||
* @return
|
||||
* The path to the most numerous of the possible candidates.
|
||||
* null if no candidates were found (less than 6 recurrences)
|
||||
@ -133,12 +131,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
|
||||
return candidates.get(0).path;
|
||||
}
|
||||
logger.info("No candidate elements were found in Xml - at least 6 similar elements are required");
|
||||
logger.info("No candidate elements were found in data - at least 6 similar elements are required");
|
||||
return null;
|
||||
}
|
||||
|
||||
static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
|
||||
logger.trace("detectRecordElement(XMLStreamReader, String[])");
|
||||
logger.trace("detectRecordElement(TreeParser, String[])");
|
||||
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
|
||||
|
||||
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
|
||||
@ -258,17 +256,16 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
* @param recordPath
|
||||
* @param pathIndex
|
||||
* @param rootColumnGroup
|
||||
* @throws XMLStreamException
|
||||
* @throws ServletException
|
||||
*/
|
||||
static protected void findRecord(
|
||||
Project project,
|
||||
//XMLStreamReader parser,
|
||||
TreeParser parser,
|
||||
String[] recordPath,
|
||||
int pathIndex,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
) throws ServletException {
|
||||
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){//FIXME uses Xml, and is not generic
|
||||
if(parser.getEventType() == TreeParserToken.EndDocument){//XMLStreamConstants.START_DOCUMENT){
|
||||
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
|
||||
return;
|
||||
}
|
||||
@ -303,12 +300,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
|
||||
/**
|
||||
* processRecord parsesXml for a single element and it's sub-elements,
|
||||
* processRecord parses Tree data for a single element and it's sub-elements,
|
||||
* adding the parsed data as a row to the project
|
||||
* @param project
|
||||
* @param parser
|
||||
* @param rootColumnGroup
|
||||
* @throws XMLStreamException
|
||||
* @throws ServletException
|
||||
*/
|
||||
static protected void processRecord(
|
||||
Project project,
|
||||
@ -345,7 +342,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
* @param parser
|
||||
* @param columnGroup
|
||||
* @param record
|
||||
* @throws XMLStreamException
|
||||
* @throws ServletException
|
||||
*/
|
||||
static protected void processSubRecord(
|
||||
Project project,
|
||||
|
@ -49,9 +49,8 @@ public class JSONParser implements TreeParser{
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getEventType() {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
public TreeParserToken getEventType() throws ServletException {
|
||||
return this.convertToTreeParserToken(parser.getCurrentToken());
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -102,6 +101,9 @@ public class JSONParser implements TreeParser{
|
||||
case START_OBJECT: return TreeParserToken.StartEntity;
|
||||
case END_OBJECT: return TreeParserToken.EndEntity;
|
||||
case VALUE_STRING: return TreeParserToken.Value;
|
||||
//Json does not have START_DOCUMENT
|
||||
//Json does not have END_DOCUMENT
|
||||
|
||||
//TODO finish the rest of the cases
|
||||
default: throw new ServletException("Not yet implemented");
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ import javax.servlet.ServletException;
|
||||
|
||||
public interface TreeParser {
|
||||
public TreeParserToken next() throws ServletException;
|
||||
public int getEventType(); //aka getCurrentToken
|
||||
public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
|
||||
public boolean hasNext() throws ServletException;
|
||||
public String getLocalName();
|
||||
public String getPrefix();
|
||||
|
@ -2,6 +2,8 @@ package com.google.refine.importers.parsers;
|
||||
|
||||
|
||||
public enum TreeParserToken {
|
||||
StartDocument,
|
||||
EndDocument,
|
||||
StartEntity,
|
||||
EndEntity,
|
||||
Value
|
||||
|
@ -47,13 +47,15 @@ public class XmlParser implements TreeParser{
|
||||
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
||||
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
||||
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
||||
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.StartDocument;
|
||||
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.EndDocument;
|
||||
//TODO
|
||||
default: throw new ServletException("Not yet implemented");
|
||||
}
|
||||
}
|
||||
|
||||
public int getEventType(){
|
||||
return parser.getEventType();
|
||||
public TreeParserToken getEventType() throws ServletException{
|
||||
return this.convertToTreeParserToken(parser.getEventType());
|
||||
}
|
||||
|
||||
public boolean hasNext() throws ServletException{
|
||||
|
Loading…
Reference in New Issue
Block a user