The JsonImporter now passes all current unit tests.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1421 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
ba442ec201
commit
d3f223c196
@ -62,7 +62,6 @@ public class JsonImporter implements StreamImporter{
|
||||
|
||||
if (recordPath == null)
|
||||
return;
|
||||
|
||||
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||
XmlImportUtilities.importTreeData(new JSONParser(pis), project, recordPath, rootColumnGroup);
|
||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||
|
@ -168,8 +168,6 @@ public abstract class TreeImportUtilities {
|
||||
row.add(null);
|
||||
}
|
||||
|
||||
logger.trace("Adding cell with value : \"" + value + "\" to row : " + rowIndex + " at cell index : " + (cellIndex-1));
|
||||
|
||||
row.set(cellIndex, new Cell(value, null));
|
||||
|
||||
column.nextRowIndex = rowIndex + 1;
|
||||
|
@ -110,7 +110,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity){ //XMLStreamConstants.START_ELEMENT) {
|
||||
if (eventType == TreeParserToken.StartEntity) {
|
||||
RecordElementCandidate candidate =
|
||||
detectRecordElement(
|
||||
parser,
|
||||
@ -146,13 +146,17 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||
if (eventType == TreeParserToken.EndEntity ) {
|
||||
break;
|
||||
} else if (eventType == TreeParserToken.Value) {//XMLStreamConstants.CHARACTERS) {
|
||||
if (parser.getText().trim().length() > 0) {
|
||||
textNodeCount++;
|
||||
} else if (eventType == TreeParserToken.Value) {
|
||||
try{
|
||||
if (parser.getText().trim().length() > 0) {
|
||||
textNodeCount++;
|
||||
}
|
||||
}catch(Exception e){
|
||||
//silent
|
||||
}
|
||||
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
} else if (eventType == TreeParserToken.StartEntity) {
|
||||
childElementNodeCount++;
|
||||
|
||||
String tagName = parser.getLocalName();
|
||||
@ -234,15 +238,15 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
String[] recordPath,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
) {
|
||||
logger.trace("importTreeData(TreeParser, Project, String[], ImportColumnGroup)");
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
if (eventType == TreeParserToken.StartEntity) {
|
||||
findRecord(project, parser, recordPath, 0, rootColumnGroup);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
// silent
|
||||
}
|
||||
}
|
||||
@ -265,18 +269,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
int pathIndex,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
) throws ServletException {
|
||||
if(parser.getEventType() == TreeParserToken.EndDocument){//XMLStreamConstants.START_DOCUMENT){
|
||||
logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup");
|
||||
|
||||
if(parser.getEventType() == TreeParserToken.StartDocument){//XMLStreamConstants.START_DOCUMENT){
|
||||
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
|
||||
return;
|
||||
}
|
||||
|
||||
String tagName = parser.getLocalName();
|
||||
if (tagName.equals(recordPath[pathIndex])) {
|
||||
if (pathIndex < recordPath.length - 1) {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
if (eventType == TreeParserToken.StartEntity) {
|
||||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
|
||||
} else if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||
} else if (eventType == TreeParserToken.EndEntity ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -312,6 +319,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
TreeParser parser,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
) throws ServletException {
|
||||
logger.trace("processRecord(Project,TreeParser,ImportColumnGroup)");
|
||||
ImportRecord record = new ImportRecord();
|
||||
|
||||
processSubRecord(project, parser, rootColumnGroup, record);
|
||||
@ -350,13 +358,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
ImportColumnGroup columnGroup,
|
||||
ImportRecord record
|
||||
) throws ServletException {
|
||||
logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)");
|
||||
|
||||
if(parser.getEventType() == TreeParserToken.StartDocument)
|
||||
return;
|
||||
|
||||
ImportColumnGroup thisColumnGroup = getColumnGroup(
|
||||
project,
|
||||
columnGroup,
|
||||
composeName(parser.getPrefix(), parser.getLocalName()));
|
||||
|
||||
project,
|
||||
columnGroup,
|
||||
composeName(parser.getPrefix(), parser.getLocalName()));
|
||||
|
||||
thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex);
|
||||
|
||||
|
||||
int attributeCount = parser.getAttributeCount();
|
||||
for (int i = 0; i < attributeCount; i++) {
|
||||
String text = parser.getAttributeValue(i).trim();
|
||||
@ -373,7 +386,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
if (eventType == TreeParserToken.StartEntity) {
|
||||
processSubRecord(
|
||||
project,
|
||||
parser,
|
||||
@ -382,17 +395,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
);
|
||||
} else if (//eventType == XMLStreamConstants.CDATA ||
|
||||
eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) {
|
||||
String text = parser.getText().trim();
|
||||
if (text.length() > 0) {
|
||||
addCell(
|
||||
project,
|
||||
thisColumnGroup,
|
||||
record,
|
||||
null,
|
||||
parser.getText()
|
||||
);
|
||||
String text = parser.getText();
|
||||
String colName = parser.getLocalName();
|
||||
if(text != null){
|
||||
text = text.trim();
|
||||
if (text.length() > 0) {
|
||||
addCell(
|
||||
project,
|
||||
thisColumnGroup,
|
||||
record,
|
||||
colName,
|
||||
parser.getText()
|
||||
);
|
||||
}
|
||||
}
|
||||
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
|
||||
} else if (eventType == TreeParserToken.EndEntity) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -8,40 +8,58 @@ import org.codehaus.jackson.JsonFactory;
|
||||
import org.codehaus.jackson.JsonParseException;
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class JSONParser implements TreeParser{
|
||||
final static Logger logger = LoggerFactory.getLogger("JsonParser");
|
||||
|
||||
JsonFactory factory = new JsonFactory();
|
||||
JsonParser parser = null;
|
||||
|
||||
//The following is a workaround for inconsistent Jackson JsonParser
|
||||
Boolean lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||
Boolean thisTokenIsAFieldName = false;
|
||||
String lastFieldName = null;
|
||||
//end of workaround
|
||||
|
||||
public JSONParser(InputStream inputStream){
|
||||
try {
|
||||
parser = factory.createJsonParser(inputStream);
|
||||
} catch (JsonParseException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
// TODO Auto-generated catch block
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public int getAttributeCount() {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public String getAttributeLocalName(int index) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public String getAttributePrefix(int index) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public String getAttributeValue(int index) {
|
||||
// TODO Auto-generated method stub
|
||||
@ -50,31 +68,49 @@ public class JSONParser implements TreeParser{
|
||||
|
||||
@Override
|
||||
public TreeParserToken getEventType() throws ServletException {
|
||||
return this.convertToTreeParserToken(parser.getCurrentToken());
|
||||
return this.mapToTreeParserToken(parser.getCurrentToken());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLocalName() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
public String getLocalName() throws ServletException{
|
||||
try {
|
||||
String text = parser.getCurrentName();
|
||||
|
||||
//The following is a workaround for inconsistent Jackson JsonParser
|
||||
if(text == null){
|
||||
if(this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity)
|
||||
text = this.lastFieldName;
|
||||
else
|
||||
text = "__anonymous__";
|
||||
}
|
||||
//end of workaround
|
||||
|
||||
return text;
|
||||
} catch (Exception e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. Json does not have prefixes
|
||||
*/
|
||||
@Override
|
||||
public String getPrefix() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
public String getText() throws ServletException {
|
||||
try {
|
||||
return parser.getText();
|
||||
} catch (Exception e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() throws ServletException {
|
||||
// TODO Auto-generated method stub
|
||||
return false;
|
||||
return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -83,29 +119,58 @@ public class JSONParser implements TreeParser{
|
||||
try {
|
||||
next = parser.nextToken();
|
||||
} catch (JsonParseException e) {
|
||||
throw new ServletException(e.getMessage());
|
||||
throw new ServletException(e);
|
||||
} catch (IOException e) {
|
||||
throw new ServletException(e.getMessage());
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
if(next == null)
|
||||
throw new ServletException("No more Json Tokens in stream");
|
||||
|
||||
return convertToTreeParserToken(next);
|
||||
//The following is a workaround for inconsistent Jackson JsonParser
|
||||
if(next == JsonToken.FIELD_NAME){
|
||||
try {
|
||||
this.thisTokenIsAFieldName = true;
|
||||
this.lastFieldName = parser.getCurrentName();
|
||||
} catch (Exception e) {
|
||||
//silent
|
||||
}
|
||||
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){
|
||||
if(this.thisTokenIsAFieldName){
|
||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = true;
|
||||
this.thisTokenIsAFieldName = false;
|
||||
}else{
|
||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||
this.lastFieldName = null;
|
||||
}
|
||||
}else{
|
||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||
this.lastFieldName = null;
|
||||
this.thisTokenIsAFieldName = false;
|
||||
}
|
||||
//end of workaround
|
||||
|
||||
return mapToTreeParserToken(next);
|
||||
}
|
||||
|
||||
protected TreeParserToken convertToTreeParserToken(JsonToken token) throws ServletException{
|
||||
protected TreeParserToken mapToTreeParserToken(JsonToken token){
|
||||
switch(token){
|
||||
case START_ARRAY: return TreeParserToken.StartEntity;
|
||||
case END_ARRAY: return TreeParserToken.EndEntity;
|
||||
case START_OBJECT: return TreeParserToken.StartEntity;
|
||||
case END_OBJECT: return TreeParserToken.EndEntity;
|
||||
case VALUE_STRING: return TreeParserToken.Value;
|
||||
//Json does not have START_DOCUMENT
|
||||
//Json does not have END_DOCUMENT
|
||||
|
||||
//TODO finish the rest of the cases
|
||||
default: throw new ServletException("Not yet implemented");
|
||||
case FIELD_NAME: return TreeParserToken.Ignorable; //returned by the getLocalName function()
|
||||
case VALUE_NUMBER_INT: return TreeParserToken.Value;
|
||||
//Json does not have START_DOCUMENT token type (so ignored as default)
|
||||
//Json does not have END_DOCUMENT token type (so ignored as default)
|
||||
case VALUE_TRUE : return TreeParserToken.Value;
|
||||
case VALUE_NUMBER_FLOAT : return TreeParserToken.Value;
|
||||
case VALUE_NULL : return TreeParserToken.Value;
|
||||
case VALUE_FALSE : return TreeParserToken.Value;
|
||||
case VALUE_EMBEDDED_OBJECT : return TreeParserToken.Ignorable;
|
||||
case NOT_AVAILABLE : return TreeParserToken.Ignorable;
|
||||
default: return TreeParserToken.Ignorable;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,9 +6,9 @@ public interface TreeParser {
|
||||
public TreeParserToken next() throws ServletException;
|
||||
public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
|
||||
public boolean hasNext() throws ServletException;
|
||||
public String getLocalName();
|
||||
public String getLocalName() throws ServletException; //aka getFieldName
|
||||
public String getPrefix();
|
||||
public String getText();
|
||||
public String getText() throws ServletException;
|
||||
public int getAttributeCount();
|
||||
public String getAttributeValue(int index);
|
||||
public String getAttributePrefix(int index);
|
||||
|
@ -8,5 +8,5 @@ public enum TreeParserToken {
|
||||
StartEntity,
|
||||
EndEntity,
|
||||
Value
|
||||
//TODO
|
||||
//append additional tokens as necessary (most are just mapped to Value or Ignorable)
|
||||
}
|
||||
|
@ -9,7 +9,12 @@ import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class XmlParser implements TreeParser{
|
||||
final static Logger logger = LoggerFactory.getLogger("XmlParser");
|
||||
|
||||
XMLStreamReader parser = null;
|
||||
|
||||
public XmlParser(InputStream inputStream){
|
||||
@ -24,19 +29,20 @@ public class XmlParser implements TreeParser{
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TreeParserToken next() throws ServletException{
|
||||
try {
|
||||
if(!parser.hasNext())
|
||||
throw new ServletException("End of XML stream");
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e.getMessage());
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
int currentToken = -1;
|
||||
try {
|
||||
currentToken = parser.next();
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e.getMessage());
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
return convertToTreeParserToken(currentToken);
|
||||
@ -44,6 +50,8 @@ public class XmlParser implements TreeParser{
|
||||
|
||||
protected TreeParserToken convertToTreeParserToken(int token) throws ServletException {
|
||||
switch(token){
|
||||
//Xml does not have StartArray element type
|
||||
//Xml does not have EndArray element type
|
||||
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
||||
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
||||
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
||||
@ -53,44 +61,58 @@ public class XmlParser implements TreeParser{
|
||||
//TODO
|
||||
default:
|
||||
return TreeParserToken.Ignorable;
|
||||
//throw new ServletException("Not yet implemented");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TreeParserToken getEventType() throws ServletException{
|
||||
return this.convertToTreeParserToken(parser.getEventType());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() throws ServletException{
|
||||
try {
|
||||
return parser.hasNext();
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e.getMessage());
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public String getLocalName(){
|
||||
return parser.getLocalName();
|
||||
@Override
|
||||
public String getLocalName() throws ServletException{
|
||||
try{
|
||||
return parser.getLocalName();
|
||||
}catch(IllegalStateException e){
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPrefix(){
|
||||
return parser.getPrefix();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText(){
|
||||
return parser.getText();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getAttributeCount(){
|
||||
return parser.getAttributeCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAttributeValue(int index){
|
||||
return parser.getAttributeValue(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAttributePrefix(int index){
|
||||
return parser.getAttributePrefix(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAttributeLocalName(int index){
|
||||
return parser.getAttributeLocalName(index);
|
||||
}
|
||||
|
@ -16,6 +16,8 @@ import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.JsonImporter;
|
||||
import com.google.refine.importers.parsers.JSONParser;
|
||||
import com.google.refine.importers.parsers.TreeParserToken;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
@ -102,7 +104,7 @@ public class JsonImporterTests extends RefineTest {
|
||||
log(project);
|
||||
assertProjectCreated(project, 5, 6);
|
||||
|
||||
Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "book - genre");
|
||||
Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "__anonymous__ - genre");
|
||||
|
||||
Row row0 = project.rows.get(0);
|
||||
Assert.assertNotNull(row0);
|
||||
@ -120,12 +122,87 @@ public class JsonImporterTests extends RefineTest {
|
||||
assertProjectCreated(project, 5, 6);
|
||||
|
||||
Assert.assertEquals(project.columnModel.columnGroups.size(),1);
|
||||
Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 2);
|
||||
Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 2);
|
||||
Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 3);
|
||||
Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 3);
|
||||
Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup);
|
||||
Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0);
|
||||
Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* org.codehaus.Jackson.JsonParser has an inconsistency when returning getLocalName
|
||||
* of an Entity_Start token which occurs after a Field_Name token
|
||||
*/
|
||||
@Test
|
||||
public void EnsureJSONParserHandlesgetLocalNameCorrectly() throws Exception{
|
||||
String sampleJson = "{\"field\":\"value\"}";
|
||||
String sampleJson2 = "{\"field\":{}}";
|
||||
String sampleJson3 = "{\"field\":[{},{}]}";
|
||||
|
||||
JSONParser parser = new JSONParser(new ByteArrayInputStream( sampleJson.getBytes( "UTF-8" ) ));
|
||||
TreeParserToken token = TreeParserToken.Ignorable;
|
||||
int i = 0;
|
||||
try{
|
||||
while(token != null){
|
||||
token = parser.next();
|
||||
if(token == null)
|
||||
break;
|
||||
i++;
|
||||
if(i == 3){
|
||||
Assert.assertEquals(TreeParserToken.Value, token);
|
||||
Assert.assertEquals("field", parser.getLocalName());
|
||||
}
|
||||
}
|
||||
}catch(Exception e){
|
||||
//silent
|
||||
}
|
||||
|
||||
|
||||
parser = new JSONParser(new ByteArrayInputStream( sampleJson2.getBytes( "UTF-8" ) ) );
|
||||
token = TreeParserToken.Ignorable;
|
||||
i = 0;
|
||||
try{
|
||||
while(token != null){
|
||||
token = parser.next();
|
||||
if(token == null)
|
||||
break;
|
||||
i++;
|
||||
if(i == 3){
|
||||
Assert.assertEquals(TreeParserToken.StartEntity, token);
|
||||
Assert.assertEquals(parser.getLocalName(), "field");
|
||||
}
|
||||
}
|
||||
}catch(Exception e){
|
||||
//silent
|
||||
}
|
||||
|
||||
parser = new JSONParser(new ByteArrayInputStream( sampleJson3.getBytes( "UTF-8" ) ) );
|
||||
token = TreeParserToken.Ignorable;
|
||||
i = 0;
|
||||
try{
|
||||
while(token != null){
|
||||
token = parser.next();
|
||||
if(token == null)
|
||||
break;
|
||||
i++;
|
||||
if(i == 3){
|
||||
Assert.assertEquals(token, TreeParserToken.StartEntity);
|
||||
Assert.assertEquals(parser.getLocalName(), "field");
|
||||
}
|
||||
if(i == 4){
|
||||
Assert.assertEquals(token, TreeParserToken.StartEntity);
|
||||
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
|
||||
}
|
||||
if(i == 6){
|
||||
Assert.assertEquals(token, TreeParserToken.StartEntity);
|
||||
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
|
||||
}
|
||||
}
|
||||
}catch(Exception e){
|
||||
//silent
|
||||
}
|
||||
}
|
||||
|
||||
//------------helper methods---------------
|
||||
|
||||
@ -139,13 +216,13 @@ public class JsonImporterTests extends RefineTest {
|
||||
|
||||
public static String getElementWithDuplicateSubElement(int id){
|
||||
return "{ \"id\" : " + id + "," +
|
||||
"\"authors\":[" +
|
||||
"{\"author\" : \"Author " + id + ", The\"}," +
|
||||
"{\"author\" : \"Author " + id + ", Another\"}" +
|
||||
"]," +
|
||||
"\"title\" : \"Book title " + id + "\"," +
|
||||
"\"publish_date\" : \"2010-05-26\"" +
|
||||
"}";
|
||||
"\"authors\":[" +
|
||||
"{\"name\" : \"Author " + id + ", The\"}," +
|
||||
"{\"name\" : \"Author " + id + ", Another\"}" +
|
||||
"]," +
|
||||
"\"title\" : \"Book title " + id + "\"," +
|
||||
"\"publish_date\" : \"2010-05-26\"" +
|
||||
"}";
|
||||
}
|
||||
|
||||
public static String getSample(){
|
||||
@ -181,8 +258,8 @@ public class JsonImporterTests extends RefineTest {
|
||||
sb.append(",");
|
||||
}
|
||||
sb.append("{\"id\" : 4," +
|
||||
"\"author\" : \"With line\n break\"," +
|
||||
"\"title\" : \"Book title 4\"" +
|
||||
"\"author\" : \"With line\\n break\"," + //FIXME this line break is doubled - is this correct??
|
||||
"\"title\" : \"Book title 4\"," +
|
||||
"\"publish_date\" : \"2010-05-26\"" +
|
||||
"},");
|
||||
sb.append(getTypicalElement(5));
|
||||
@ -197,12 +274,13 @@ public class JsonImporterTests extends RefineTest {
|
||||
sb.append("[");
|
||||
for(int i = 1; i < 6; i++){
|
||||
sb.append(getTypicalElement(i));
|
||||
sb.append(",");
|
||||
}
|
||||
sb.append("{\"id\" : 6," +
|
||||
"\"author\" : \"Author 6, The\"," +
|
||||
"\"title\" : \"Book title 6\"," +
|
||||
"\"genre\" : \"New element not seen in other records\"," +
|
||||
"\"publish_date\" : \"2010-05-26\"," +
|
||||
"\"publish_date\" : \"2010-05-26\"" +
|
||||
"}");
|
||||
sb.append("]");
|
||||
return sb.toString();
|
||||
@ -217,7 +295,7 @@ public class JsonImporterTests extends RefineTest {
|
||||
"\"author-dob\" : \"1950-0" + i + "-15\"}," +
|
||||
"\"title\" : \"Book title " + i + "\"," +
|
||||
"\"publish_date\" : \"2010-05-26\"" +
|
||||
"},");
|
||||
"}");
|
||||
if(i < 6)
|
||||
sb.append(",");
|
||||
}
|
||||
|
@ -18,7 +18,9 @@ import org.testng.annotations.Test;
|
||||
import com.google.refine.importers.TreeImportUtilities.ImportColumn;
|
||||
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
|
||||
import com.google.refine.importers.TreeImportUtilities.ImportRecord;
|
||||
import com.google.refine.importers.parsers.JSONParser;
|
||||
import com.google.refine.importers.parsers.TreeParser;
|
||||
import com.google.refine.importers.parsers.TreeParserToken;
|
||||
import com.google.refine.importers.parsers.XmlParser;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
@ -63,10 +65,11 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void detectPathFromTagTest(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
public void detectPathFromTagXmlTest(){
|
||||
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
|
||||
String tag = "library";
|
||||
createParser();
|
||||
createXmlParser();
|
||||
|
||||
String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag);
|
||||
Assert.assertNotNull(response);
|
||||
@ -75,11 +78,11 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void detectPathFromTagWithNestedElement(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
public void detectPathFromTagWithNestedElementXml(){
|
||||
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
String tag = "book";
|
||||
|
||||
createParser();
|
||||
createXmlParser();
|
||||
|
||||
String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag);
|
||||
Assert.assertNotNull(response);
|
||||
@ -89,9 +92,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void detectRecordElementTest(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createParser();
|
||||
public void detectRecordElementXmlTest(){
|
||||
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createXmlParser();
|
||||
|
||||
String tag="library";
|
||||
|
||||
@ -107,9 +110,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void detectRecordElementCanHandleWithNestedElements(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createParser();
|
||||
public void detectRecordElementCanHandleWithNestedElementsXml(){
|
||||
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createXmlParser();
|
||||
|
||||
String tag="book";
|
||||
|
||||
@ -126,9 +129,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void detectRecordElementIsNullForUnfoundTag(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createParser();
|
||||
public void detectRecordElementIsNullForUnfoundTagXml(){
|
||||
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createXmlParser();
|
||||
|
||||
String tag="";
|
||||
|
||||
@ -142,7 +145,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void detectRecordElementRegressionTest(){
|
||||
public void detectRecordElementRegressionXmlTest(){
|
||||
loadSampleXml();
|
||||
|
||||
String[] path = XmlImportUtilitiesStub.detectRecordElement(new XmlParser(inputStream));
|
||||
@ -151,9 +154,20 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
Assert.assertEquals(path[0], "library");
|
||||
Assert.assertEquals(path[1], "book");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void detectRecordElementRegressionJsonTest(){
|
||||
loadSampleJson();
|
||||
|
||||
String[] path = XmlImportUtilitiesStub.detectRecordElement(new JSONParser(inputStream));
|
||||
Assert.assertNotNull(path);
|
||||
Assert.assertEquals(path.length, 2);
|
||||
Assert.assertEquals(path[0], "__anonymous__");
|
||||
Assert.assertEquals(path[1], "__anonymous__");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void importXmlTest(){
|
||||
public void importTreeDataXmlTest(){
|
||||
loadSampleXml();
|
||||
|
||||
String[] recordPath = new String[]{"library","book"};
|
||||
@ -174,7 +188,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
@Test
|
||||
public void importXmlWithVaryingStructureTest(){
|
||||
loadXml(XmlImporterTests.getSampleWithVaryingStructure());
|
||||
loadData(XmlImporterTests.getSampleWithVaryingStructure());
|
||||
|
||||
String[] recordPath = new String[]{"library", "book"};
|
||||
XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup);
|
||||
@ -221,9 +235,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void findRecordTest(){
|
||||
public void findRecordTestXml(){
|
||||
loadSampleXml();
|
||||
createParser();
|
||||
createXmlParser();
|
||||
ParserSkip();
|
||||
|
||||
String[] recordPath = new String[]{"library","book"};
|
||||
@ -243,9 +257,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void processRecordTest(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createParser();
|
||||
public void processRecordTestXml(){
|
||||
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createXmlParser();
|
||||
ParserSkip();
|
||||
|
||||
try {
|
||||
@ -264,9 +278,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void processRecordTestDuplicateColumns(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><author>author2</author><genre>genre1</genre></book></library>");
|
||||
createParser();
|
||||
public void processRecordTestDuplicateColumnsXml(){
|
||||
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><authors><author>author1</author><author>author2</author></authors><genre>genre1</genre></book></library>");
|
||||
createXmlParser();
|
||||
ParserSkip();
|
||||
|
||||
try {
|
||||
@ -289,9 +303,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void processRecordTestNestedElement(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author><author-name>author1</author-name><author-dob>a date</author-dob></author><genre>genre1</genre></book></library>");
|
||||
createParser();
|
||||
public void processRecordTestNestedElementXml(){
|
||||
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author><author-name>author1</author-name><author-dob>a date</author-dob></author><genre>genre1</genre></book></library>");
|
||||
createXmlParser();
|
||||
ParserSkip();
|
||||
|
||||
try {
|
||||
@ -313,9 +327,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void processSubRecordTest(){
|
||||
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createParser();
|
||||
public void processSubRecordTestXml(){
|
||||
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
|
||||
createXmlParser();
|
||||
ParserSkip();
|
||||
|
||||
try {
|
||||
@ -365,10 +379,14 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
//----------------helpers-------------
|
||||
public void loadSampleXml(){
|
||||
loadXml( XmlImporterTests.getSample() );
|
||||
loadData( XmlImporterTests.getSample() );
|
||||
}
|
||||
|
||||
public void loadSampleJson(){
|
||||
loadData( JsonImporterTests.getSample() );
|
||||
}
|
||||
|
||||
public void loadXml(String xml){
|
||||
public void loadData(String xml){
|
||||
try {
|
||||
inputStream = new ByteArrayInputStream( xml.getBytes( "UTF-8" ) );
|
||||
} catch (UnsupportedEncodingException e1) {
|
||||
@ -378,13 +396,18 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
public void ParserSkip(){
|
||||
try {
|
||||
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
|
||||
if(parser.getEventType() == TreeParserToken.StartDocument){
|
||||
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
|
||||
}
|
||||
} catch (ServletException e1) {
|
||||
Assert.fail();
|
||||
}
|
||||
}
|
||||
|
||||
public void createParser(){
|
||||
parser = new XmlParser(inputStream);
|
||||
public void createXmlParser(){
|
||||
parser = new XmlParser(inputStream);
|
||||
}
|
||||
public void createJsonParser(){
|
||||
parser = new JSONParser(inputStream);
|
||||
}
|
||||
}
|
||||
|
@ -140,8 +140,10 @@ public class XmlImporterTests extends RefineTest {
|
||||
|
||||
public static String getElementWithDuplicateSubElement(int id){
|
||||
return "<book id=\"" + id + "\">" +
|
||||
"<authors>" +
|
||||
"<author>Author " + id + ", The</author>" +
|
||||
"<author>Author " + id + ", Another</author>" +
|
||||
"</authors>" +
|
||||
"<title>Book title " + id + "</title>" +
|
||||
"<publish_date>2010-05-26</publish_date>" +
|
||||
"</book>";
|
||||
|
Loading…
Reference in New Issue
Block a user