Another small step towards making XmlImportUtilities generic for all tree structured data, and less XML centric. Some calls to XMLStreamConstant in XmlImportUtilities are now working with a generic TreeParserToken, with methods to converter between TreeParserToken and XMLStreamConstant/JsonToken in the respective parsers.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1377 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
740caedf46
commit
b21961be89
@ -15,6 +15,7 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.importers.parsers.TreeParser;
|
||||
import com.google.refine.importers.parsers.TreeParserToken;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
@ -25,8 +26,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
static public String[] detectPathFromTag(TreeParser parser, String tag) {
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
int eventType = parser.next();
|
||||
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
List<String> path = detectRecordElement(parser, tag);
|
||||
if (path != null) {
|
||||
String[] path2 = new String[path.size()];
|
||||
@ -73,10 +74,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
|
||||
while (parser.hasNext()) {
|
||||
int eventType = parser.next();
|
||||
if (eventType == XMLStreamConstants.END_ELEMENT) { //FIXME uses Xml, and is not generic
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||
break;
|
||||
} else if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
|
||||
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
List<String> path = detectRecordElement(parser, tag);
|
||||
if (path != null) {
|
||||
path.add(0, localName);
|
||||
@ -110,8 +111,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
int eventType = parser.next();
|
||||
if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity){ //XMLStreamConstants.START_ELEMENT) {
|
||||
RecordElementCandidate candidate =
|
||||
detectRecordElement(
|
||||
parser,
|
||||
@ -146,14 +147,14 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
int eventType = parser.next();
|
||||
if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||
break;
|
||||
} else if (eventType == XMLStreamConstants.CHARACTERS) {//FIXME uses Xml, and is not generic
|
||||
} else if (eventType == TreeParserToken.Value) {//XMLStreamConstants.CHARACTERS) {
|
||||
if (parser.getText().trim().length() > 0) {
|
||||
textNodeCount++;
|
||||
}
|
||||
} else if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
|
||||
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
childElementNodeCount++;
|
||||
|
||||
String tagName = parser.getLocalName();
|
||||
@ -237,8 +238,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
) {
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
int eventType = parser.next();
|
||||
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
findRecord(project, parser, recordPath, 0, rootColumnGroup);
|
||||
}
|
||||
}
|
||||
@ -275,10 +276,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
if (tagName.equals(recordPath[pathIndex])) {
|
||||
if (pathIndex < recordPath.length - 1) {
|
||||
while (parser.hasNext()) {
|
||||
int eventType = parser.next();
|
||||
if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
|
||||
} else if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
|
||||
} else if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -292,10 +293,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
|
||||
static protected void skip(TreeParser parser) throws ServletException {
|
||||
while (parser.hasNext()) {
|
||||
int eventType = parser.next();
|
||||
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
skip(parser);
|
||||
} else if (eventType == XMLStreamConstants.END_ELEMENT) { //FIXME uses Xml, and is not generic
|
||||
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -374,8 +375,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
|
||||
while (parser.hasNext()) {
|
||||
int eventType = parser.next();
|
||||
if (eventType == XMLStreamConstants.START_ELEMENT) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
processSubRecord(
|
||||
project,
|
||||
parser,
|
||||
@ -383,7 +384,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
record
|
||||
);
|
||||
} else if (//eventType == XMLStreamConstants.CDATA ||
|
||||
eventType == XMLStreamConstants.CHARACTERS) {
|
||||
eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) {
|
||||
String text = parser.getText().trim();
|
||||
if (text.length() > 0) {
|
||||
addCell(
|
||||
@ -394,7 +395,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
parser.getText()
|
||||
);
|
||||
}
|
||||
} else if (eventType == XMLStreamConstants.END_ELEMENT) {
|
||||
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ import javax.servlet.ServletException;
|
||||
import org.codehaus.jackson.JsonFactory;
|
||||
import org.codehaus.jackson.JsonParseException;
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
|
||||
public class JSONParser implements TreeParser{
|
||||
JsonFactory factory = new JsonFactory();
|
||||
@ -78,9 +79,32 @@ public class JSONParser implements TreeParser{
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() throws ServletException {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
public TreeParserToken next() throws ServletException {
|
||||
JsonToken next;
|
||||
try {
|
||||
next = parser.nextToken();
|
||||
} catch (JsonParseException e) {
|
||||
throw new ServletException(e.getMessage());
|
||||
} catch (IOException e) {
|
||||
throw new ServletException(e.getMessage());
|
||||
}
|
||||
|
||||
if(next == null)
|
||||
throw new ServletException("No more Json Tokens in stream");
|
||||
|
||||
return convertToTreeParserToken(next);
|
||||
}
|
||||
|
||||
protected TreeParserToken convertToTreeParserToken(JsonToken token) throws ServletException{
|
||||
switch(token){
|
||||
case START_ARRAY: return TreeParserToken.StartEntity;
|
||||
case END_ARRAY: return TreeParserToken.EndEntity;
|
||||
case START_OBJECT: return TreeParserToken.StartEntity;
|
||||
case END_OBJECT: return TreeParserToken.EndEntity;
|
||||
case VALUE_STRING: return TreeParserToken.Value;
|
||||
//TODO finish the rest of the cases
|
||||
default: throw new ServletException("Not yet implemented");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3,8 +3,8 @@ package com.google.refine.importers.parsers;
|
||||
import javax.servlet.ServletException;
|
||||
|
||||
public interface TreeParser {
|
||||
public int next() throws ServletException;
|
||||
public int getEventType();
|
||||
public TreeParserToken next() throws ServletException;
|
||||
public int getEventType(); //aka getCurrentToken
|
||||
public boolean hasNext() throws ServletException;
|
||||
public String getLocalName();
|
||||
public String getPrefix();
|
||||
|
@ -0,0 +1,9 @@
|
||||
package com.google.refine.importers.parsers;
|
||||
|
||||
|
||||
public enum TreeParserToken {
|
||||
StartEntity,
|
||||
EndEntity,
|
||||
Value
|
||||
//TODO
|
||||
}
|
@ -5,6 +5,7 @@ import java.io.InputStream;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.xml.stream.FactoryConfigurationError;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
|
||||
@ -23,13 +24,32 @@ public class XmlParser implements TreeParser{
|
||||
}
|
||||
}
|
||||
|
||||
public int next() throws ServletException{
|
||||
public TreeParserToken next() throws ServletException{
|
||||
try {
|
||||
return parser.next();
|
||||
if(!parser.hasNext())
|
||||
throw new ServletException("End of XML stream");
|
||||
} catch (XMLStreamException e) {
|
||||
//TODO log and return
|
||||
throw new ServletException(e.getMessage());
|
||||
}
|
||||
|
||||
int currentToken = -1;
|
||||
try {
|
||||
currentToken = parser.next();
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e.getMessage());
|
||||
}
|
||||
|
||||
return convertToTreeParserToken(currentToken);
|
||||
}
|
||||
|
||||
protected TreeParserToken convertToTreeParserToken(int token) throws ServletException {
|
||||
switch(token){
|
||||
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
||||
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
||||
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
||||
//TODO
|
||||
default: throw new ServletException("Not yet implemented");
|
||||
}
|
||||
}
|
||||
|
||||
public int getEventType(){
|
||||
|
Loading…
Reference in New Issue
Block a user