Another small step towards making XmlImportUtilities generic for all tree structured data, and less XML centric. Some calls to XMLStreamConstant in XmlImportUtilities are now working with a generic TreeParserToken, with methods to converter between TreeParserToken and XMLStreamConstant/JsonToken in the respective parsers.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1377 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
740caedf46
commit
b21961be89
@ -15,6 +15,7 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.refine.importers.parsers.TreeParser;
|
import com.google.refine.importers.parsers.TreeParser;
|
||||||
|
import com.google.refine.importers.parsers.TreeParserToken;
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
@ -25,8 +26,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
static public String[] detectPathFromTag(TreeParser parser, String tag) {
|
static public String[] detectPathFromTag(TreeParser parser, String tag) {
|
||||||
try {
|
try {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
int eventType = parser.next();
|
TreeParserToken eventType = parser.next();
|
||||||
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml
|
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
List<String> path = detectRecordElement(parser, tag);
|
List<String> path = detectRecordElement(parser, tag);
|
||||||
if (path != null) {
|
if (path != null) {
|
||||||
String[] path2 = new String[path.size()];
|
String[] path2 = new String[path.size()];
|
||||||
@ -73,10 +74,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
int eventType = parser.next();
|
TreeParserToken eventType = parser.next();
|
||||||
if (eventType == XMLStreamConstants.END_ELEMENT) { //FIXME uses Xml, and is not generic
|
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||||
break;
|
break;
|
||||||
} else if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
|
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
List<String> path = detectRecordElement(parser, tag);
|
List<String> path = detectRecordElement(parser, tag);
|
||||||
if (path != null) {
|
if (path != null) {
|
||||||
path.add(0, localName);
|
path.add(0, localName);
|
||||||
@ -110,8 +111,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
int eventType = parser.next();
|
TreeParserToken eventType = parser.next();
|
||||||
if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
|
if (eventType == TreeParserToken.StartEntity){ //XMLStreamConstants.START_ELEMENT) {
|
||||||
RecordElementCandidate candidate =
|
RecordElementCandidate candidate =
|
||||||
detectRecordElement(
|
detectRecordElement(
|
||||||
parser,
|
parser,
|
||||||
@ -146,14 +147,14 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
int eventType = parser.next();
|
TreeParserToken eventType = parser.next();
|
||||||
if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
|
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||||
break;
|
break;
|
||||||
} else if (eventType == XMLStreamConstants.CHARACTERS) {//FIXME uses Xml, and is not generic
|
} else if (eventType == TreeParserToken.Value) {//XMLStreamConstants.CHARACTERS) {
|
||||||
if (parser.getText().trim().length() > 0) {
|
if (parser.getText().trim().length() > 0) {
|
||||||
textNodeCount++;
|
textNodeCount++;
|
||||||
}
|
}
|
||||||
} else if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
|
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
childElementNodeCount++;
|
childElementNodeCount++;
|
||||||
|
|
||||||
String tagName = parser.getLocalName();
|
String tagName = parser.getLocalName();
|
||||||
@ -237,8 +238,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
) {
|
) {
|
||||||
try {
|
try {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
int eventType = parser.next();
|
TreeParserToken eventType = parser.next();
|
||||||
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
|
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
findRecord(project, parser, recordPath, 0, rootColumnGroup);
|
findRecord(project, parser, recordPath, 0, rootColumnGroup);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -275,10 +276,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
if (tagName.equals(recordPath[pathIndex])) {
|
if (tagName.equals(recordPath[pathIndex])) {
|
||||||
if (pathIndex < recordPath.length - 1) {
|
if (pathIndex < recordPath.length - 1) {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
int eventType = parser.next();
|
TreeParserToken eventType = parser.next();
|
||||||
if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
|
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
|
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
|
||||||
} else if (eventType == XMLStreamConstants.END_ELEMENT) {//FIXME uses Xml, and is not generic
|
} else if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -292,10 +293,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
|
|
||||||
static protected void skip(TreeParser parser) throws ServletException {
|
static protected void skip(TreeParser parser) throws ServletException {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
int eventType = parser.next();
|
TreeParserToken eventType = parser.next();
|
||||||
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
|
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
skip(parser);
|
skip(parser);
|
||||||
} else if (eventType == XMLStreamConstants.END_ELEMENT) { //FIXME uses Xml, and is not generic
|
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -374,8 +375,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
int eventType = parser.next();
|
TreeParserToken eventType = parser.next();
|
||||||
if (eventType == XMLStreamConstants.START_ELEMENT) {
|
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
processSubRecord(
|
processSubRecord(
|
||||||
project,
|
project,
|
||||||
parser,
|
parser,
|
||||||
@ -383,7 +384,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
record
|
record
|
||||||
);
|
);
|
||||||
} else if (//eventType == XMLStreamConstants.CDATA ||
|
} else if (//eventType == XMLStreamConstants.CDATA ||
|
||||||
eventType == XMLStreamConstants.CHARACTERS) {
|
eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) {
|
||||||
String text = parser.getText().trim();
|
String text = parser.getText().trim();
|
||||||
if (text.length() > 0) {
|
if (text.length() > 0) {
|
||||||
addCell(
|
addCell(
|
||||||
@ -394,7 +395,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
parser.getText()
|
parser.getText()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else if (eventType == XMLStreamConstants.END_ELEMENT) {
|
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@ import javax.servlet.ServletException;
|
|||||||
import org.codehaus.jackson.JsonFactory;
|
import org.codehaus.jackson.JsonFactory;
|
||||||
import org.codehaus.jackson.JsonParseException;
|
import org.codehaus.jackson.JsonParseException;
|
||||||
import org.codehaus.jackson.JsonParser;
|
import org.codehaus.jackson.JsonParser;
|
||||||
|
import org.codehaus.jackson.JsonToken;
|
||||||
|
|
||||||
public class JSONParser implements TreeParser{
|
public class JSONParser implements TreeParser{
|
||||||
JsonFactory factory = new JsonFactory();
|
JsonFactory factory = new JsonFactory();
|
||||||
@ -78,9 +79,32 @@ public class JSONParser implements TreeParser{
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int next() throws ServletException {
|
public TreeParserToken next() throws ServletException {
|
||||||
// TODO Auto-generated method stub
|
JsonToken next;
|
||||||
return 0;
|
try {
|
||||||
|
next = parser.nextToken();
|
||||||
|
} catch (JsonParseException e) {
|
||||||
|
throw new ServletException(e.getMessage());
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ServletException(e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
if(next == null)
|
||||||
|
throw new ServletException("No more Json Tokens in stream");
|
||||||
|
|
||||||
|
return convertToTreeParserToken(next);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected TreeParserToken convertToTreeParserToken(JsonToken token) throws ServletException{
|
||||||
|
switch(token){
|
||||||
|
case START_ARRAY: return TreeParserToken.StartEntity;
|
||||||
|
case END_ARRAY: return TreeParserToken.EndEntity;
|
||||||
|
case START_OBJECT: return TreeParserToken.StartEntity;
|
||||||
|
case END_OBJECT: return TreeParserToken.EndEntity;
|
||||||
|
case VALUE_STRING: return TreeParserToken.Value;
|
||||||
|
//TODO finish the rest of the cases
|
||||||
|
default: throw new ServletException("Not yet implemented");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -3,8 +3,8 @@ package com.google.refine.importers.parsers;
|
|||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
|
|
||||||
public interface TreeParser {
|
public interface TreeParser {
|
||||||
public int next() throws ServletException;
|
public TreeParserToken next() throws ServletException;
|
||||||
public int getEventType();
|
public int getEventType(); //aka getCurrentToken
|
||||||
public boolean hasNext() throws ServletException;
|
public boolean hasNext() throws ServletException;
|
||||||
public String getLocalName();
|
public String getLocalName();
|
||||||
public String getPrefix();
|
public String getPrefix();
|
||||||
|
@ -0,0 +1,9 @@
|
|||||||
|
package com.google.refine.importers.parsers;
|
||||||
|
|
||||||
|
|
||||||
|
public enum TreeParserToken {
|
||||||
|
StartEntity,
|
||||||
|
EndEntity,
|
||||||
|
Value
|
||||||
|
//TODO
|
||||||
|
}
|
@ -5,6 +5,7 @@ import java.io.InputStream;
|
|||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
import javax.xml.stream.FactoryConfigurationError;
|
import javax.xml.stream.FactoryConfigurationError;
|
||||||
import javax.xml.stream.XMLInputFactory;
|
import javax.xml.stream.XMLInputFactory;
|
||||||
|
import javax.xml.stream.XMLStreamConstants;
|
||||||
import javax.xml.stream.XMLStreamException;
|
import javax.xml.stream.XMLStreamException;
|
||||||
import javax.xml.stream.XMLStreamReader;
|
import javax.xml.stream.XMLStreamReader;
|
||||||
|
|
||||||
@ -23,13 +24,32 @@ public class XmlParser implements TreeParser{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public int next() throws ServletException{
|
public TreeParserToken next() throws ServletException{
|
||||||
try {
|
try {
|
||||||
return parser.next();
|
if(!parser.hasNext())
|
||||||
|
throw new ServletException("End of XML stream");
|
||||||
} catch (XMLStreamException e) {
|
} catch (XMLStreamException e) {
|
||||||
//TODO log and return
|
|
||||||
throw new ServletException(e.getMessage());
|
throw new ServletException(e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int currentToken = -1;
|
||||||
|
try {
|
||||||
|
currentToken = parser.next();
|
||||||
|
} catch (XMLStreamException e) {
|
||||||
|
throw new ServletException(e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return convertToTreeParserToken(currentToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected TreeParserToken convertToTreeParserToken(int token) throws ServletException {
|
||||||
|
switch(token){
|
||||||
|
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
||||||
|
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
||||||
|
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
||||||
|
//TODO
|
||||||
|
default: throw new ServletException("Not yet implemented");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getEventType(){
|
public int getEventType(){
|
||||||
|
Loading…
Reference in New Issue
Block a user