The JsonImporter now passes all current unit tests.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1421 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-10-04 10:02:50 +00:00
parent ba442ec201
commit d3f223c196
10 changed files with 319 additions and 115 deletions

View File

@ -62,7 +62,6 @@ public class JsonImporter implements StreamImporter{
if (recordPath == null) if (recordPath == null)
return; return;
ImportColumnGroup rootColumnGroup = new ImportColumnGroup(); ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
XmlImportUtilities.importTreeData(new JSONParser(pis), project, recordPath, rootColumnGroup); XmlImportUtilities.importTreeData(new JSONParser(pis), project, recordPath, rootColumnGroup);
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);

View File

@ -168,8 +168,6 @@ public abstract class TreeImportUtilities {
row.add(null); row.add(null);
} }
logger.trace("Adding cell with value : \"" + value + "\" to row : " + rowIndex + " at cell index : " + (cellIndex-1));
row.set(cellIndex, new Cell(value, null)); row.set(cellIndex, new Cell(value, null));
column.nextRowIndex = rowIndex + 1; column.nextRowIndex = rowIndex + 1;

View File

@ -110,7 +110,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
try { try {
while (parser.hasNext()) { while (parser.hasNext()) {
TreeParserToken eventType = parser.next(); TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity){ //XMLStreamConstants.START_ELEMENT) { if (eventType == TreeParserToken.StartEntity) {
RecordElementCandidate candidate = RecordElementCandidate candidate =
detectRecordElement( detectRecordElement(
parser, parser,
@ -146,13 +146,17 @@ public class XmlImportUtilities extends TreeImportUtilities {
try { try {
while (parser.hasNext()) { while (parser.hasNext()) {
TreeParserToken eventType = parser.next(); TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) { if (eventType == TreeParserToken.EndEntity ) {
break; break;
} else if (eventType == TreeParserToken.Value) {//XMLStreamConstants.CHARACTERS) { } else if (eventType == TreeParserToken.Value) {
if (parser.getText().trim().length() > 0) { try{
textNodeCount++; if (parser.getText().trim().length() > 0) {
textNodeCount++;
}
}catch(Exception e){
//silent
} }
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) { } else if (eventType == TreeParserToken.StartEntity) {
childElementNodeCount++; childElementNodeCount++;
String tagName = parser.getLocalName(); String tagName = parser.getLocalName();
@ -234,15 +238,15 @@ public class XmlImportUtilities extends TreeImportUtilities {
String[] recordPath, String[] recordPath,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) { ) {
logger.trace("importTreeData(TreeParser, Project, String[], ImportColumnGroup)");
try { try {
while (parser.hasNext()) { while (parser.hasNext()) {
TreeParserToken eventType = parser.next(); TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) { if (eventType == TreeParserToken.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup); findRecord(project, parser, recordPath, 0, rootColumnGroup);
} }
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace();
// silent // silent
} }
} }
@ -265,18 +269,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
int pathIndex, int pathIndex,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) throws ServletException { ) throws ServletException {
if(parser.getEventType() == TreeParserToken.EndDocument){//XMLStreamConstants.START_DOCUMENT){ logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup");
if(parser.getEventType() == TreeParserToken.StartDocument){//XMLStreamConstants.START_DOCUMENT){
logger.warn("Cannot use findRecord method for START_DOCUMENT event"); logger.warn("Cannot use findRecord method for START_DOCUMENT event");
return; return;
} }
String tagName = parser.getLocalName(); String tagName = parser.getLocalName();
if (tagName.equals(recordPath[pathIndex])) { if (tagName.equals(recordPath[pathIndex])) {
if (pathIndex < recordPath.length - 1) { if (pathIndex < recordPath.length - 1) {
while (parser.hasNext()) { while (parser.hasNext()) {
TreeParserToken eventType = parser.next(); TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) { if (eventType == TreeParserToken.StartEntity) {
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup); findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
} else if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) { } else if (eventType == TreeParserToken.EndEntity ) {
break; break;
} }
} }
@ -312,6 +319,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
TreeParser parser, TreeParser parser,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) throws ServletException { ) throws ServletException {
logger.trace("processRecord(Project,TreeParser,ImportColumnGroup)");
ImportRecord record = new ImportRecord(); ImportRecord record = new ImportRecord();
processSubRecord(project, parser, rootColumnGroup, record); processSubRecord(project, parser, rootColumnGroup, record);
@ -350,13 +358,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
ImportRecord record ImportRecord record
) throws ServletException { ) throws ServletException {
logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)");
if(parser.getEventType() == TreeParserToken.StartDocument)
return;
ImportColumnGroup thisColumnGroup = getColumnGroup( ImportColumnGroup thisColumnGroup = getColumnGroup(
project, project,
columnGroup, columnGroup,
composeName(parser.getPrefix(), parser.getLocalName())); composeName(parser.getPrefix(), parser.getLocalName()));
thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex); thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex);
int attributeCount = parser.getAttributeCount(); int attributeCount = parser.getAttributeCount();
for (int i = 0; i < attributeCount; i++) { for (int i = 0; i < attributeCount; i++) {
String text = parser.getAttributeValue(i).trim(); String text = parser.getAttributeValue(i).trim();
@ -373,7 +386,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
while (parser.hasNext()) { while (parser.hasNext()) {
TreeParserToken eventType = parser.next(); TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) { if (eventType == TreeParserToken.StartEntity) {
processSubRecord( processSubRecord(
project, project,
parser, parser,
@ -382,17 +395,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
); );
} else if (//eventType == XMLStreamConstants.CDATA || } else if (//eventType == XMLStreamConstants.CDATA ||
eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) { eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) {
String text = parser.getText().trim(); String text = parser.getText();
if (text.length() > 0) { String colName = parser.getLocalName();
addCell( if(text != null){
project, text = text.trim();
thisColumnGroup, if (text.length() > 0) {
record, addCell(
null, project,
parser.getText() thisColumnGroup,
); record,
colName,
parser.getText()
);
}
} }
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) { } else if (eventType == TreeParserToken.EndEntity) {
break; break;
} }
} }

View File

@ -8,40 +8,58 @@ import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken; import org.codehaus.jackson.JsonToken;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class JSONParser implements TreeParser{ public class JSONParser implements TreeParser{
final static Logger logger = LoggerFactory.getLogger("JsonParser");
JsonFactory factory = new JsonFactory(); JsonFactory factory = new JsonFactory();
JsonParser parser = null; JsonParser parser = null;
//The following is a workaround for inconsistent Jackson JsonParser
Boolean lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
Boolean thisTokenIsAFieldName = false;
String lastFieldName = null;
//end of workaround
public JSONParser(InputStream inputStream){ public JSONParser(InputStream inputStream){
try { try {
parser = factory.createJsonParser(inputStream); parser = factory.createJsonParser(inputStream);
} catch (JsonParseException e) { } catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace(); e.printStackTrace();
} }
} }
/**
* Does nothing. All Json is treated as elements
*/
@Override @Override
public int getAttributeCount() { public int getAttributeCount() {
// TODO Auto-generated method stub // TODO Auto-generated method stub
return 0; return 0;
} }
/**
* Does nothing. All Json is treated as elements
*/
@Override @Override
public String getAttributeLocalName(int index) { public String getAttributeLocalName(int index) {
// TODO Auto-generated method stub
return null; return null;
} }
/**
* Does nothing. All Json is treated as elements
*/
@Override @Override
public String getAttributePrefix(int index) { public String getAttributePrefix(int index) {
// TODO Auto-generated method stub // TODO Auto-generated method stub
return null; return null;
} }
/**
* Does nothing. All Json is treated as elements
*/
@Override @Override
public String getAttributeValue(int index) { public String getAttributeValue(int index) {
// TODO Auto-generated method stub // TODO Auto-generated method stub
@ -50,31 +68,49 @@ public class JSONParser implements TreeParser{
@Override @Override
public TreeParserToken getEventType() throws ServletException { public TreeParserToken getEventType() throws ServletException {
return this.convertToTreeParserToken(parser.getCurrentToken()); return this.mapToTreeParserToken(parser.getCurrentToken());
} }
@Override @Override
public String getLocalName() { public String getLocalName() throws ServletException{
// TODO Auto-generated method stub try {
return null; String text = parser.getCurrentName();
//The following is a workaround for inconsistent Jackson JsonParser
if(text == null){
if(this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity)
text = this.lastFieldName;
else
text = "__anonymous__";
}
//end of workaround
return text;
} catch (Exception e) {
throw new ServletException(e);
}
} }
/**
* Does nothing. Json does not have prefixes
*/
@Override @Override
public String getPrefix() { public String getPrefix() {
// TODO Auto-generated method stub
return null; return null;
} }
@Override @Override
public String getText() { public String getText() throws ServletException {
// TODO Auto-generated method stub try {
return null; return parser.getText();
} catch (Exception e) {
throw new ServletException(e);
}
} }
@Override @Override
public boolean hasNext() throws ServletException { public boolean hasNext() throws ServletException {
// TODO Auto-generated method stub return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
return false;
} }
@Override @Override
@ -83,29 +119,58 @@ public class JSONParser implements TreeParser{
try { try {
next = parser.nextToken(); next = parser.nextToken();
} catch (JsonParseException e) { } catch (JsonParseException e) {
throw new ServletException(e.getMessage()); throw new ServletException(e);
} catch (IOException e) { } catch (IOException e) {
throw new ServletException(e.getMessage()); throw new ServletException(e);
} }
if(next == null) if(next == null)
throw new ServletException("No more Json Tokens in stream"); throw new ServletException("No more Json Tokens in stream");
return convertToTreeParserToken(next); //The following is a workaround for inconsistent Jackson JsonParser
if(next == JsonToken.FIELD_NAME){
try {
this.thisTokenIsAFieldName = true;
this.lastFieldName = parser.getCurrentName();
} catch (Exception e) {
//silent
}
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){
if(this.thisTokenIsAFieldName){
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = true;
this.thisTokenIsAFieldName = false;
}else{
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
this.lastFieldName = null;
}
}else{
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
this.lastFieldName = null;
this.thisTokenIsAFieldName = false;
}
//end of workaround
return mapToTreeParserToken(next);
} }
protected TreeParserToken convertToTreeParserToken(JsonToken token) throws ServletException{ protected TreeParserToken mapToTreeParserToken(JsonToken token){
switch(token){ switch(token){
case START_ARRAY: return TreeParserToken.StartEntity; case START_ARRAY: return TreeParserToken.StartEntity;
case END_ARRAY: return TreeParserToken.EndEntity; case END_ARRAY: return TreeParserToken.EndEntity;
case START_OBJECT: return TreeParserToken.StartEntity; case START_OBJECT: return TreeParserToken.StartEntity;
case END_OBJECT: return TreeParserToken.EndEntity; case END_OBJECT: return TreeParserToken.EndEntity;
case VALUE_STRING: return TreeParserToken.Value; case VALUE_STRING: return TreeParserToken.Value;
//Json does not have START_DOCUMENT case FIELD_NAME: return TreeParserToken.Ignorable; //returned by the getLocalName function()
//Json does not have END_DOCUMENT case VALUE_NUMBER_INT: return TreeParserToken.Value;
//Json does not have START_DOCUMENT token type (so ignored as default)
//TODO finish the rest of the cases //Json does not have END_DOCUMENT token type (so ignored as default)
default: throw new ServletException("Not yet implemented"); case VALUE_TRUE : return TreeParserToken.Value;
case VALUE_NUMBER_FLOAT : return TreeParserToken.Value;
case VALUE_NULL : return TreeParserToken.Value;
case VALUE_FALSE : return TreeParserToken.Value;
case VALUE_EMBEDDED_OBJECT : return TreeParserToken.Ignorable;
case NOT_AVAILABLE : return TreeParserToken.Ignorable;
default: return TreeParserToken.Ignorable;
} }
} }

View File

@ -6,9 +6,9 @@ public interface TreeParser {
public TreeParserToken next() throws ServletException; public TreeParserToken next() throws ServletException;
public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
public boolean hasNext() throws ServletException; public boolean hasNext() throws ServletException;
public String getLocalName(); public String getLocalName() throws ServletException; //aka getFieldName
public String getPrefix(); public String getPrefix();
public String getText(); public String getText() throws ServletException;
public int getAttributeCount(); public int getAttributeCount();
public String getAttributeValue(int index); public String getAttributeValue(int index);
public String getAttributePrefix(int index); public String getAttributePrefix(int index);

View File

@ -8,5 +8,5 @@ public enum TreeParserToken {
StartEntity, StartEntity,
EndEntity, EndEntity,
Value Value
//TODO //append additional tokens as necessary (most are just mapped to Value or Ignorable)
} }

View File

@ -9,7 +9,12 @@ import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader; import javax.xml.stream.XMLStreamReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class XmlParser implements TreeParser{ public class XmlParser implements TreeParser{
final static Logger logger = LoggerFactory.getLogger("XmlParser");
XMLStreamReader parser = null; XMLStreamReader parser = null;
public XmlParser(InputStream inputStream){ public XmlParser(InputStream inputStream){
@ -24,19 +29,20 @@ public class XmlParser implements TreeParser{
} }
} }
@Override
public TreeParserToken next() throws ServletException{ public TreeParserToken next() throws ServletException{
try { try {
if(!parser.hasNext()) if(!parser.hasNext())
throw new ServletException("End of XML stream"); throw new ServletException("End of XML stream");
} catch (XMLStreamException e) { } catch (XMLStreamException e) {
throw new ServletException(e.getMessage()); throw new ServletException(e);
} }
int currentToken = -1; int currentToken = -1;
try { try {
currentToken = parser.next(); currentToken = parser.next();
} catch (XMLStreamException e) { } catch (XMLStreamException e) {
throw new ServletException(e.getMessage()); throw new ServletException(e);
} }
return convertToTreeParserToken(currentToken); return convertToTreeParserToken(currentToken);
@ -44,6 +50,8 @@ public class XmlParser implements TreeParser{
protected TreeParserToken convertToTreeParserToken(int token) throws ServletException { protected TreeParserToken convertToTreeParserToken(int token) throws ServletException {
switch(token){ switch(token){
//Xml does not have StartArray element type
//Xml does not have EndArray element type
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity; case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity; case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value; case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
@ -53,44 +61,58 @@ public class XmlParser implements TreeParser{
//TODO //TODO
default: default:
return TreeParserToken.Ignorable; return TreeParserToken.Ignorable;
//throw new ServletException("Not yet implemented");
} }
} }
@Override
public TreeParserToken getEventType() throws ServletException{ public TreeParserToken getEventType() throws ServletException{
return this.convertToTreeParserToken(parser.getEventType()); return this.convertToTreeParserToken(parser.getEventType());
} }
@Override
public boolean hasNext() throws ServletException{ public boolean hasNext() throws ServletException{
try { try {
return parser.hasNext(); return parser.hasNext();
} catch (XMLStreamException e) { } catch (XMLStreamException e) {
throw new ServletException(e.getMessage()); throw new ServletException(e);
} }
} }
public String getLocalName(){ @Override
return parser.getLocalName(); public String getLocalName() throws ServletException{
try{
return parser.getLocalName();
}catch(IllegalStateException e){
return null;
}
} }
@Override
public String getPrefix(){ public String getPrefix(){
return parser.getPrefix(); return parser.getPrefix();
} }
@Override
public String getText(){ public String getText(){
return parser.getText(); return parser.getText();
} }
@Override
public int getAttributeCount(){ public int getAttributeCount(){
return parser.getAttributeCount(); return parser.getAttributeCount();
} }
@Override
public String getAttributeValue(int index){ public String getAttributeValue(int index){
return parser.getAttributeValue(index); return parser.getAttributeValue(index);
} }
@Override
public String getAttributePrefix(int index){ public String getAttributePrefix(int index){
return parser.getAttributePrefix(index); return parser.getAttributePrefix(index);
} }
@Override
public String getAttributeLocalName(int index){ public String getAttributeLocalName(int index){
return parser.getAttributeLocalName(index); return parser.getAttributeLocalName(index);
} }

View File

@ -16,6 +16,8 @@ import org.testng.annotations.Test;
import com.google.refine.ProjectMetadata; import com.google.refine.ProjectMetadata;
import com.google.refine.importers.JsonImporter; import com.google.refine.importers.JsonImporter;
import com.google.refine.importers.parsers.JSONParser;
import com.google.refine.importers.parsers.TreeParserToken;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
import com.google.refine.tests.RefineTest; import com.google.refine.tests.RefineTest;
@ -102,7 +104,7 @@ public class JsonImporterTests extends RefineTest {
log(project); log(project);
assertProjectCreated(project, 5, 6); assertProjectCreated(project, 5, 6);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "book - genre"); Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "__anonymous__ - genre");
Row row0 = project.rows.get(0); Row row0 = project.rows.get(0);
Assert.assertNotNull(row0); Assert.assertNotNull(row0);
@ -120,12 +122,87 @@ public class JsonImporterTests extends RefineTest {
assertProjectCreated(project, 5, 6); assertProjectCreated(project, 5, 6);
Assert.assertEquals(project.columnModel.columnGroups.size(),1); Assert.assertEquals(project.columnModel.columnGroups.size(),1);
Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 2); Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 3);
Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 2); Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 3);
Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup); Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup);
Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0); Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0);
Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2); Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2);
} }
/**
* org.codehaus.Jackson.JsonParser has an inconsistency when returning getLocalName
* of an Entity_Start token which occurs after a Field_Name token
*/
@Test
public void EnsureJSONParserHandlesgetLocalNameCorrectly() throws Exception{
String sampleJson = "{\"field\":\"value\"}";
String sampleJson2 = "{\"field\":{}}";
String sampleJson3 = "{\"field\":[{},{}]}";
JSONParser parser = new JSONParser(new ByteArrayInputStream( sampleJson.getBytes( "UTF-8" ) ));
TreeParserToken token = TreeParserToken.Ignorable;
int i = 0;
try{
while(token != null){
token = parser.next();
if(token == null)
break;
i++;
if(i == 3){
Assert.assertEquals(TreeParserToken.Value, token);
Assert.assertEquals("field", parser.getLocalName());
}
}
}catch(Exception e){
//silent
}
parser = new JSONParser(new ByteArrayInputStream( sampleJson2.getBytes( "UTF-8" ) ) );
token = TreeParserToken.Ignorable;
i = 0;
try{
while(token != null){
token = parser.next();
if(token == null)
break;
i++;
if(i == 3){
Assert.assertEquals(TreeParserToken.StartEntity, token);
Assert.assertEquals(parser.getLocalName(), "field");
}
}
}catch(Exception e){
//silent
}
parser = new JSONParser(new ByteArrayInputStream( sampleJson3.getBytes( "UTF-8" ) ) );
token = TreeParserToken.Ignorable;
i = 0;
try{
while(token != null){
token = parser.next();
if(token == null)
break;
i++;
if(i == 3){
Assert.assertEquals(token, TreeParserToken.StartEntity);
Assert.assertEquals(parser.getLocalName(), "field");
}
if(i == 4){
Assert.assertEquals(token, TreeParserToken.StartEntity);
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
}
if(i == 6){
Assert.assertEquals(token, TreeParserToken.StartEntity);
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
}
}
}catch(Exception e){
//silent
}
}
//------------helper methods--------------- //------------helper methods---------------
@ -139,13 +216,13 @@ public class JsonImporterTests extends RefineTest {
public static String getElementWithDuplicateSubElement(int id){ public static String getElementWithDuplicateSubElement(int id){
return "{ \"id\" : " + id + "," + return "{ \"id\" : " + id + "," +
"\"authors\":[" + "\"authors\":[" +
"{\"author\" : \"Author " + id + ", The\"}," + "{\"name\" : \"Author " + id + ", The\"}," +
"{\"author\" : \"Author " + id + ", Another\"}" + "{\"name\" : \"Author " + id + ", Another\"}" +
"]," + "]," +
"\"title\" : \"Book title " + id + "\"," + "\"title\" : \"Book title " + id + "\"," +
"\"publish_date\" : \"2010-05-26\"" + "\"publish_date\" : \"2010-05-26\"" +
"}"; "}";
} }
public static String getSample(){ public static String getSample(){
@ -181,8 +258,8 @@ public class JsonImporterTests extends RefineTest {
sb.append(","); sb.append(",");
} }
sb.append("{\"id\" : 4," + sb.append("{\"id\" : 4," +
"\"author\" : \"With line\n break\"," + "\"author\" : \"With line\\n break\"," + //FIXME this line break is doubled - is this correct??
"\"title\" : \"Book title 4\"" + "\"title\" : \"Book title 4\"," +
"\"publish_date\" : \"2010-05-26\"" + "\"publish_date\" : \"2010-05-26\"" +
"},"); "},");
sb.append(getTypicalElement(5)); sb.append(getTypicalElement(5));
@ -197,12 +274,13 @@ public class JsonImporterTests extends RefineTest {
sb.append("["); sb.append("[");
for(int i = 1; i < 6; i++){ for(int i = 1; i < 6; i++){
sb.append(getTypicalElement(i)); sb.append(getTypicalElement(i));
sb.append(",");
} }
sb.append("{\"id\" : 6," + sb.append("{\"id\" : 6," +
"\"author\" : \"Author 6, The\"," + "\"author\" : \"Author 6, The\"," +
"\"title\" : \"Book title 6\"," + "\"title\" : \"Book title 6\"," +
"\"genre\" : \"New element not seen in other records\"," + "\"genre\" : \"New element not seen in other records\"," +
"\"publish_date\" : \"2010-05-26\"," + "\"publish_date\" : \"2010-05-26\"" +
"}"); "}");
sb.append("]"); sb.append("]");
return sb.toString(); return sb.toString();
@ -217,7 +295,7 @@ public class JsonImporterTests extends RefineTest {
"\"author-dob\" : \"1950-0" + i + "-15\"}," + "\"author-dob\" : \"1950-0" + i + "-15\"}," +
"\"title\" : \"Book title " + i + "\"," + "\"title\" : \"Book title " + i + "\"," +
"\"publish_date\" : \"2010-05-26\"" + "\"publish_date\" : \"2010-05-26\"" +
"},"); "}");
if(i < 6) if(i < 6)
sb.append(","); sb.append(",");
} }

View File

@ -18,7 +18,9 @@ import org.testng.annotations.Test;
import com.google.refine.importers.TreeImportUtilities.ImportColumn; import com.google.refine.importers.TreeImportUtilities.ImportColumn;
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup; import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
import com.google.refine.importers.TreeImportUtilities.ImportRecord; import com.google.refine.importers.TreeImportUtilities.ImportRecord;
import com.google.refine.importers.parsers.JSONParser;
import com.google.refine.importers.parsers.TreeParser; import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.TreeParserToken;
import com.google.refine.importers.parsers.XmlParser; import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
@ -63,10 +65,11 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void detectPathFromTagTest(){ public void detectPathFromTagXmlTest(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "library"; String tag = "library";
createParser(); createXmlParser();
String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag); String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag);
Assert.assertNotNull(response); Assert.assertNotNull(response);
@ -75,11 +78,11 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void detectPathFromTagWithNestedElement(){ public void detectPathFromTagWithNestedElementXml(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "book"; String tag = "book";
createParser(); createXmlParser();
String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag); String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag);
Assert.assertNotNull(response); Assert.assertNotNull(response);
@ -89,9 +92,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void detectRecordElementTest(){ public void detectRecordElementXmlTest(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
createParser(); createXmlParser();
String tag="library"; String tag="library";
@ -107,9 +110,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void detectRecordElementCanHandleWithNestedElements(){ public void detectRecordElementCanHandleWithNestedElementsXml(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
createParser(); createXmlParser();
String tag="book"; String tag="book";
@ -126,9 +129,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void detectRecordElementIsNullForUnfoundTag(){ public void detectRecordElementIsNullForUnfoundTagXml(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
createParser(); createXmlParser();
String tag=""; String tag="";
@ -142,7 +145,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void detectRecordElementRegressionTest(){ public void detectRecordElementRegressionXmlTest(){
loadSampleXml(); loadSampleXml();
String[] path = XmlImportUtilitiesStub.detectRecordElement(new XmlParser(inputStream)); String[] path = XmlImportUtilitiesStub.detectRecordElement(new XmlParser(inputStream));
@ -151,9 +154,20 @@ public class XmlImportUtilitiesTests extends RefineTest {
Assert.assertEquals(path[0], "library"); Assert.assertEquals(path[0], "library");
Assert.assertEquals(path[1], "book"); Assert.assertEquals(path[1], "book");
} }
@Test
public void detectRecordElementRegressionJsonTest(){
loadSampleJson();
String[] path = XmlImportUtilitiesStub.detectRecordElement(new JSONParser(inputStream));
Assert.assertNotNull(path);
Assert.assertEquals(path.length, 2);
Assert.assertEquals(path[0], "__anonymous__");
Assert.assertEquals(path[1], "__anonymous__");
}
@Test @Test
public void importXmlTest(){ public void importTreeDataXmlTest(){
loadSampleXml(); loadSampleXml();
String[] recordPath = new String[]{"library","book"}; String[] recordPath = new String[]{"library","book"};
@ -174,7 +188,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
@Test @Test
public void importXmlWithVaryingStructureTest(){ public void importXmlWithVaryingStructureTest(){
loadXml(XmlImporterTests.getSampleWithVaryingStructure()); loadData(XmlImporterTests.getSampleWithVaryingStructure());
String[] recordPath = new String[]{"library", "book"}; String[] recordPath = new String[]{"library", "book"};
XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup); XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup);
@ -221,9 +235,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void findRecordTest(){ public void findRecordTestXml(){
loadSampleXml(); loadSampleXml();
createParser(); createXmlParser();
ParserSkip(); ParserSkip();
String[] recordPath = new String[]{"library","book"}; String[] recordPath = new String[]{"library","book"};
@ -243,9 +257,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void processRecordTest(){ public void processRecordTestXml(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
createParser(); createXmlParser();
ParserSkip(); ParserSkip();
try { try {
@ -264,9 +278,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void processRecordTestDuplicateColumns(){ public void processRecordTestDuplicateColumnsXml(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><author>author2</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><authors><author>author1</author><author>author2</author></authors><genre>genre1</genre></book></library>");
createParser(); createXmlParser();
ParserSkip(); ParserSkip();
try { try {
@ -289,9 +303,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void processRecordTestNestedElement(){ public void processRecordTestNestedElementXml(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author><author-name>author1</author-name><author-dob>a date</author-dob></author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author><author-name>author1</author-name><author-dob>a date</author-dob></author><genre>genre1</genre></book></library>");
createParser(); createXmlParser();
ParserSkip(); ParserSkip();
try { try {
@ -313,9 +327,9 @@ public class XmlImportUtilitiesTests extends RefineTest {
@Test @Test
public void processSubRecordTest(){ public void processSubRecordTestXml(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
createParser(); createXmlParser();
ParserSkip(); ParserSkip();
try { try {
@ -365,10 +379,14 @@ public class XmlImportUtilitiesTests extends RefineTest {
//----------------helpers------------- //----------------helpers-------------
public void loadSampleXml(){ public void loadSampleXml(){
loadXml( XmlImporterTests.getSample() ); loadData( XmlImporterTests.getSample() );
}
public void loadSampleJson(){
loadData( JsonImporterTests.getSample() );
} }
public void loadXml(String xml){ public void loadData(String xml){
try { try {
inputStream = new ByteArrayInputStream( xml.getBytes( "UTF-8" ) ); inputStream = new ByteArrayInputStream( xml.getBytes( "UTF-8" ) );
} catch (UnsupportedEncodingException e1) { } catch (UnsupportedEncodingException e1) {
@ -378,13 +396,18 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void ParserSkip(){ public void ParserSkip(){
try { try {
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event if(parser.getEventType() == TreeParserToken.StartDocument){
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
}
} catch (ServletException e1) { } catch (ServletException e1) {
Assert.fail(); Assert.fail();
} }
} }
public void createParser(){ public void createXmlParser(){
parser = new XmlParser(inputStream); parser = new XmlParser(inputStream);
}
public void createJsonParser(){
parser = new JSONParser(inputStream);
} }
} }

View File

@ -140,8 +140,10 @@ public class XmlImporterTests extends RefineTest {
public static String getElementWithDuplicateSubElement(int id){ public static String getElementWithDuplicateSubElement(int id){
return "<book id=\"" + id + "\">" + return "<book id=\"" + id + "\">" +
"<authors>" +
"<author>Author " + id + ", The</author>" + "<author>Author " + id + ", The</author>" +
"<author>Author " + id + ", Another</author>" + "<author>Author " + id + ", Another</author>" +
"</authors>" +
"<title>Book title " + id + "</title>" + "<title>Book title " + id + "</title>" +
"<publish_date>2010-05-26</publish_date>" + "<publish_date>2010-05-26</publish_date>" +
"</book>"; "</book>";