XmlImportUtilities no longer relies on XMLStreamConstants, and is now independent of any specific type of tree data (Xml or otherwise).
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1378 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
b21961be89
commit
855df20481
@ -8,8 +8,6 @@ import java.util.Map;
|
|||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
import javax.xml.stream.XMLStreamConstants;
|
|
||||||
import javax.xml.stream.XMLStreamException;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -21,7 +19,7 @@ import com.google.refine.model.Project;
|
|||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
|
|
||||||
public class XmlImportUtilities extends TreeImportUtilities {
|
public class XmlImportUtilities extends TreeImportUtilities {
|
||||||
final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities");
|
final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
|
||||||
|
|
||||||
static public String[] detectPathFromTag(TreeParser parser, String tag) {
|
static public String[] detectPathFromTag(TreeParser parser, String tag) {
|
||||||
try {
|
try {
|
||||||
@ -47,7 +45,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it.
|
* Looks for an element with the given tag name in the Tree data being parsed, returning the path hierarchy to reach it.
|
||||||
*
|
*
|
||||||
* @param parser
|
* @param parser
|
||||||
* @param tag
|
* @param tag
|
||||||
@ -56,12 +54,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
* If the tag is found, an array of strings is returned.
|
* If the tag is found, an array of strings is returned.
|
||||||
* If the tag is at the top level, the tag will be the only item in the array.
|
* If the tag is at the top level, the tag will be the only item in the array.
|
||||||
* If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index
|
* If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index
|
||||||
* Null if the the tag is not found.
|
* null if the the tag is not found.
|
||||||
* @throws XMLStreamException
|
* @throws ServletException
|
||||||
*/
|
*/
|
||||||
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
|
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
|
||||||
try{
|
try{
|
||||||
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) //FIXME uses Xml, and is not generic
|
if(parser.getEventType() == TreeParserToken.StartDocument)//XMLStreamConstants.START_DOCUMENT)
|
||||||
parser.next();
|
parser.next();
|
||||||
|
|
||||||
String localName = parser.getLocalName();
|
String localName = parser.getLocalName();
|
||||||
@ -97,10 +95,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Seeks for recurring XML element in an InputStream
|
* Seeks for recurring element in a parsed document
|
||||||
* which are likely candidates for being data records
|
* which are likely candidates for being data records
|
||||||
* @param inputStream
|
* @param parser
|
||||||
* The XML data as a stream
|
* The parser loaded with tree data
|
||||||
* @return
|
* @return
|
||||||
* The path to the most numerous of the possible candidates.
|
* The path to the most numerous of the possible candidates.
|
||||||
* null if no candidates were found (less than 6 recurrences)
|
* null if no candidates were found (less than 6 recurrences)
|
||||||
@ -133,12 +131,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
|
|
||||||
return candidates.get(0).path;
|
return candidates.get(0).path;
|
||||||
}
|
}
|
||||||
logger.info("No candidate elements were found in Xml - at least 6 similar elements are required");
|
logger.info("No candidate elements were found in data - at least 6 similar elements are required");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
|
static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
|
||||||
logger.trace("detectRecordElement(XMLStreamReader, String[])");
|
logger.trace("detectRecordElement(TreeParser, String[])");
|
||||||
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
|
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
|
||||||
|
|
||||||
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
|
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
|
||||||
@ -258,17 +256,16 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
* @param recordPath
|
* @param recordPath
|
||||||
* @param pathIndex
|
* @param pathIndex
|
||||||
* @param rootColumnGroup
|
* @param rootColumnGroup
|
||||||
* @throws XMLStreamException
|
* @throws ServletException
|
||||||
*/
|
*/
|
||||||
static protected void findRecord(
|
static protected void findRecord(
|
||||||
Project project,
|
Project project,
|
||||||
//XMLStreamReader parser,
|
|
||||||
TreeParser parser,
|
TreeParser parser,
|
||||||
String[] recordPath,
|
String[] recordPath,
|
||||||
int pathIndex,
|
int pathIndex,
|
||||||
ImportColumnGroup rootColumnGroup
|
ImportColumnGroup rootColumnGroup
|
||||||
) throws ServletException {
|
) throws ServletException {
|
||||||
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){//FIXME uses Xml, and is not generic
|
if(parser.getEventType() == TreeParserToken.EndDocument){//XMLStreamConstants.START_DOCUMENT){
|
||||||
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
|
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -303,12 +300,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* processRecord parsesXml for a single element and it's sub-elements,
|
* processRecord parses Tree data for a single element and it's sub-elements,
|
||||||
* adding the parsed data as a row to the project
|
* adding the parsed data as a row to the project
|
||||||
* @param project
|
* @param project
|
||||||
* @param parser
|
* @param parser
|
||||||
* @param rootColumnGroup
|
* @param rootColumnGroup
|
||||||
* @throws XMLStreamException
|
* @throws ServletException
|
||||||
*/
|
*/
|
||||||
static protected void processRecord(
|
static protected void processRecord(
|
||||||
Project project,
|
Project project,
|
||||||
@ -345,7 +342,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
* @param parser
|
* @param parser
|
||||||
* @param columnGroup
|
* @param columnGroup
|
||||||
* @param record
|
* @param record
|
||||||
* @throws XMLStreamException
|
* @throws ServletException
|
||||||
*/
|
*/
|
||||||
static protected void processSubRecord(
|
static protected void processSubRecord(
|
||||||
Project project,
|
Project project,
|
||||||
|
@ -49,9 +49,8 @@ public class JSONParser implements TreeParser{
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getEventType() {
|
public TreeParserToken getEventType() throws ServletException {
|
||||||
// TODO Auto-generated method stub
|
return this.convertToTreeParserToken(parser.getCurrentToken());
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -102,6 +101,9 @@ public class JSONParser implements TreeParser{
|
|||||||
case START_OBJECT: return TreeParserToken.StartEntity;
|
case START_OBJECT: return TreeParserToken.StartEntity;
|
||||||
case END_OBJECT: return TreeParserToken.EndEntity;
|
case END_OBJECT: return TreeParserToken.EndEntity;
|
||||||
case VALUE_STRING: return TreeParserToken.Value;
|
case VALUE_STRING: return TreeParserToken.Value;
|
||||||
|
//Json does not have START_DOCUMENT
|
||||||
|
//Json does not have END_DOCUMENT
|
||||||
|
|
||||||
//TODO finish the rest of the cases
|
//TODO finish the rest of the cases
|
||||||
default: throw new ServletException("Not yet implemented");
|
default: throw new ServletException("Not yet implemented");
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ import javax.servlet.ServletException;
|
|||||||
|
|
||||||
public interface TreeParser {
|
public interface TreeParser {
|
||||||
public TreeParserToken next() throws ServletException;
|
public TreeParserToken next() throws ServletException;
|
||||||
public int getEventType(); //aka getCurrentToken
|
public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
|
||||||
public boolean hasNext() throws ServletException;
|
public boolean hasNext() throws ServletException;
|
||||||
public String getLocalName();
|
public String getLocalName();
|
||||||
public String getPrefix();
|
public String getPrefix();
|
||||||
|
@ -2,6 +2,8 @@ package com.google.refine.importers.parsers;
|
|||||||
|
|
||||||
|
|
||||||
public enum TreeParserToken {
|
public enum TreeParserToken {
|
||||||
|
StartDocument,
|
||||||
|
EndDocument,
|
||||||
StartEntity,
|
StartEntity,
|
||||||
EndEntity,
|
EndEntity,
|
||||||
Value
|
Value
|
||||||
|
@ -47,13 +47,15 @@ public class XmlParser implements TreeParser{
|
|||||||
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
||||||
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
||||||
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
||||||
|
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.StartDocument;
|
||||||
|
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.EndDocument;
|
||||||
//TODO
|
//TODO
|
||||||
default: throw new ServletException("Not yet implemented");
|
default: throw new ServletException("Not yet implemented");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getEventType(){
|
public TreeParserToken getEventType() throws ServletException{
|
||||||
return parser.getEventType();
|
return this.convertToTreeParserToken(parser.getEventType());
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() throws ServletException{
|
public boolean hasNext() throws ServletException{
|
||||||
|
Loading…
Reference in New Issue
Block a user