Another small step towards making XmlImportUtilities generic for all tree structured data, and less XML centric. Some calls to XMLStreamConstant in XmlImportUtilities are now working with a generic TreeParserToken, with methods to converter between TreeParserToken and XMLStreamConstant/JsonToken in the respective parsers.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1377 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-09-28 10:04:56 +00:00
parent 740caedf46
commit b21961be89
5 changed files with 86 additions and 32 deletions

View File

@ -15,6 +15,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.TreeParserToken;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
@ -25,8 +26,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
static public String[] detectPathFromTag(TreeParser parser, String tag) {
try {
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag);
if (path != null) {
String[] path2 = new String[path.size()];
@ -73,10 +74,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.END_ELEMENT) { //FIXME uses Xml, and is not generic
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
break;
} else if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag);
if (path != null) {
path.add(0, localName);
@ -110,8 +111,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
try {
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity){ //XMLStreamConstants.START_ELEMENT) {
RecordElementCandidate candidate =
detectRecordElement(
parser,
@ -146,14 +147,14 @@ public class XmlImportUtilities extends TreeImportUtilities {
try {
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
break;
} else if (eventType == XMLStreamConstants.CHARACTERS) {//FIXME uses Xml, and is not generic
} else if (eventType == TreeParserToken.Value) {//XMLStreamConstants.CHARACTERS) {
if (parser.getText().trim().length() > 0) {
textNodeCount++;
}
} else if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
childElementNodeCount++;
String tagName = parser.getLocalName();
@ -237,8 +238,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
) {
try {
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
findRecord(project, parser, recordPath, 0, rootColumnGroup);
}
}
@ -275,10 +276,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
if (tagName.equals(recordPath[pathIndex])) {
if (pathIndex < recordPath.length - 1) {
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
} else if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
} else if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
break;
}
}
@ -292,10 +293,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
static protected void skip(TreeParser parser) throws ServletException {
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
skip(parser);
} else if (eventType == XMLStreamConstants.END_ELEMENT) { //FIXME uses Xml, and is not generic
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
return;
}
}
@ -374,8 +375,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
while (parser.hasNext()) {
int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) {
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
processSubRecord(
project,
parser,
@ -383,7 +384,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
record
);
} else if (//eventType == XMLStreamConstants.CDATA ||
eventType == XMLStreamConstants.CHARACTERS) {
eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) {
String text = parser.getText().trim();
if (text.length() > 0) {
addCell(
@ -394,7 +395,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
parser.getText()
);
}
} else if (eventType == XMLStreamConstants.END_ELEMENT) {
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
break;
}
}

View File

@ -7,6 +7,7 @@ import javax.servlet.ServletException;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken;
public class JSONParser implements TreeParser{
JsonFactory factory = new JsonFactory();
@ -78,9 +79,32 @@ public class JSONParser implements TreeParser{
}
@Override
public int next() throws ServletException {
// TODO Auto-generated method stub
return 0;
public TreeParserToken next() throws ServletException {
JsonToken next;
try {
next = parser.nextToken();
} catch (JsonParseException e) {
throw new ServletException(e.getMessage());
} catch (IOException e) {
throw new ServletException(e.getMessage());
}
if(next == null)
throw new ServletException("No more Json Tokens in stream");
return convertToTreeParserToken(next);
}
protected TreeParserToken convertToTreeParserToken(JsonToken token) throws ServletException{
switch(token){
case START_ARRAY: return TreeParserToken.StartEntity;
case END_ARRAY: return TreeParserToken.EndEntity;
case START_OBJECT: return TreeParserToken.StartEntity;
case END_OBJECT: return TreeParserToken.EndEntity;
case VALUE_STRING: return TreeParserToken.Value;
//TODO finish the rest of the cases
default: throw new ServletException("Not yet implemented");
}
}
}

View File

@ -3,8 +3,8 @@ package com.google.refine.importers.parsers;
import javax.servlet.ServletException;
public interface TreeParser {
public int next() throws ServletException;
public int getEventType();
public TreeParserToken next() throws ServletException;
public int getEventType(); //aka getCurrentToken
public boolean hasNext() throws ServletException;
public String getLocalName();
public String getPrefix();

View File

@ -0,0 +1,9 @@
package com.google.refine.importers.parsers;
public enum TreeParserToken {
StartEntity,
EndEntity,
Value
//TODO
}

View File

@ -5,6 +5,7 @@ import java.io.InputStream;
import javax.servlet.ServletException;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
@ -23,13 +24,32 @@ public class XmlParser implements TreeParser{
}
}
public int next() throws ServletException{
public TreeParserToken next() throws ServletException{
try {
return parser.next();
if(!parser.hasNext())
throw new ServletException("End of XML stream");
} catch (XMLStreamException e) {
//TODO log and return
throw new ServletException(e.getMessage());
}
int currentToken = -1;
try {
currentToken = parser.next();
} catch (XMLStreamException e) {
throw new ServletException(e.getMessage());
}
return convertToTreeParserToken(currentToken);
}
protected TreeParserToken convertToTreeParserToken(int token) throws ServletException {
switch(token){
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
//TODO
default: throw new ServletException("Not yet implemented");
}
}
public int getEventType(){