New JsonImporter, JsonParser and JsonImporterTests (copy of XmlImporterTests with syntax of the example data altered for Json).

Renaming of TreeImporter to TreeImportUtilities (as per the current convention with the XmlImporter and XmlImportUtilities).

NB the new JsonParser class does not work, and 5 of the new unit tests for JsonImporter currently fail.  To be fixed in due course.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1361 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-09-27 22:53:17 +00:00
parent 8cd6476fa4
commit d285999da8
7 changed files with 699 additions and 44 deletions

View File

@ -0,0 +1,96 @@
package com.google.refine.importers;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
import com.google.refine.importers.parsers.JSONParser;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.model.Project;
public class JsonImporter implements StreamImporter{
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
public static final int BUFFER_SIZE = 64 * 1024;
@Override
public void read(InputStream inputStream, Project project,
ProjectMetadata metadata, Properties options)
throws ImportException {
//FIXME the below is a close duplicate of the XmlImporter code.
//Should wrap a lot of the below into methods and put them in a common superclass
logger.trace("JsonImporter.read");
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
String[] recordPath = null;
{
byte[] buffer = new byte[BUFFER_SIZE];
int bytes_read = 0;
try {//fill the buffer with data
while (bytes_read < BUFFER_SIZE) {
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
if (c == -1) break;
bytes_read +=c ;
}
pis.unread(buffer, 0, bytes_read);
} catch (IOException e) {
throw new ImportException("Read error",e);
}
InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read);
TreeParser parser = new JSONParser(iStream);
if (options.containsKey("importer-record-tag")) {
try{
recordPath = XmlImportUtilities.detectPathFromTag(
parser,
options.getProperty("importer-record-tag"));
}catch(Exception e){
// silent
// e.printStackTrace();
}
} else {
recordPath = XmlImportUtilities.detectRecordElement(parser);
}
}
if (recordPath == null)
return;
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
XmlImportUtilities.importTreeData(new XmlParser(pis), project, recordPath, rootColumnGroup);
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
project.columnModel.update();
}
@Override
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
if("application/json".equals(contentType) ||
"text/json".equals(contentType)) {
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (
fileName.endsWith(".json") ||
fileName.endsWith(".js")
) {
return true;
}
}
return false;
}
}

View File

@ -0,0 +1,244 @@
package com.google.refine.importers;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
public abstract class TreeImportUtilities {
final static Logger logger = LoggerFactory.getLogger("TreeImporter");
/**
* An element which holds sub-elements we
* shall import as records
*/
static protected class RecordElementCandidate {
String[] path;
int count;
}
/**
*
*
*
*/
static protected abstract class ImportVertical {
public String name = "";
public int nonBlankCount;
abstract void tabulate();
}
/**
* A column group describes a branch in tree structured data
*/
static public class ImportColumnGroup extends ImportVertical {
public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>();
public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>();
public int nextRowIndex;
@Override
void tabulate() {
for (ImportColumn c : columns.values()) {
c.tabulate();
nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount);
}
for (ImportColumnGroup g : subgroups.values()) {
g.tabulate();
nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount);
}
}
}
/**
* A column is used to describe a branch-terminating element in a tree structure
*
*/
static public class ImportColumn extends ImportVertical {
public int cellIndex;
public int nextRowIndex;
public boolean blankOnFirstRow;
public ImportColumn() {}
public ImportColumn(String name) { //required for testing
super.name = name;
}
@Override
void tabulate() {
// already done the tabulation elsewhere
}
}
/**
* A record describes a data element in a tree-structure
*
*/
static public class ImportRecord {
public List<List<Cell>> rows = new LinkedList<List<Cell>>();
}
static protected void sortRecordElementCandidates(List<RecordElementCandidate> list) {
Collections.sort(list, new Comparator<RecordElementCandidate>() {
public int compare(RecordElementCandidate o1, RecordElementCandidate o2) {
return o2.count - o1.count;
}
});
}
static public void createColumnsFromImport(
Project project,
ImportColumnGroup columnGroup
) {
int startColumnIndex = project.columnModel.columns.size();
List<ImportColumn> columns = new ArrayList<ImportColumn>(columnGroup.columns.values());
Collections.sort(columns, new Comparator<ImportColumn>() {
public int compare(ImportColumn o1, ImportColumn o2) {
if (o1.blankOnFirstRow != o2.blankOnFirstRow) {
return o1.blankOnFirstRow ? 1 : -1;
}
int c = o2.nonBlankCount - o1.nonBlankCount;
return c != 0 ? c : (o1.name.length() - o2.name.length());
}
});
for (int i = 0; i < columns.size(); i++) {
ImportColumn c = columns.get(i);
Column column = new com.google.refine.model.Column(c.cellIndex, c.name);
project.columnModel.columns.add(column);
}
List<ImportColumnGroup> subgroups = new ArrayList<ImportColumnGroup>(columnGroup.subgroups.values());
Collections.sort(subgroups, new Comparator<ImportColumnGroup>() {
public int compare(ImportColumnGroup o1, ImportColumnGroup o2) {
int c = o2.nonBlankCount - o1.nonBlankCount;
return c != 0 ? c : (o1.name.length() - o2.name.length());
}
});
for (ImportColumnGroup g : subgroups) {
createColumnsFromImport(project, g);
}
int endColumnIndex = project.columnModel.columns.size();
int span = endColumnIndex - startColumnIndex;
if (span > 1 && span < project.columnModel.columns.size()) {
project.columnModel.addColumnGroup(startColumnIndex, span, startColumnIndex);
}
}
static protected void addCell(
Project project,
ImportColumnGroup columnGroup,
ImportRecord record,
String columnLocalName,
String text
) {
if (text == null || ((String) text).isEmpty()) {
return;
}
Serializable value = ImporterUtilities.parseCellValue(text);
ImportColumn column = getColumn(project, columnGroup, columnLocalName);
int cellIndex = column.cellIndex;
int rowIndex = Math.max(columnGroup.nextRowIndex, column.nextRowIndex);
while (rowIndex >= record.rows.size()) {
record.rows.add(new ArrayList<Cell>());
}
List<Cell> row = record.rows.get(rowIndex);
while (cellIndex >= row.size()) {
row.add(null);
}
logger.trace("Adding cell with value : \"" + value + "\" to row : " + rowIndex + " at cell index : " + (cellIndex-1));
row.set(cellIndex, new Cell(value, null));
column.nextRowIndex = rowIndex + 1;
column.nonBlankCount++;
}
static protected ImportColumn getColumn(
Project project,
ImportColumnGroup columnGroup,
String localName
) {
if (columnGroup.columns.containsKey(localName)) {
return columnGroup.columns.get(localName);
}
ImportColumn column = createColumn(project, columnGroup, localName);
columnGroup.columns.put(localName, column);
return column;
}
static protected ImportColumn createColumn(
Project project,
ImportColumnGroup columnGroup,
String localName
) {
ImportColumn newColumn = new ImportColumn();
newColumn.name =
columnGroup.name.length() == 0 ?
(localName == null ? "Text" : localName) :
(localName == null ? columnGroup.name : (columnGroup.name + " - " + localName));
newColumn.cellIndex = project.columnModel.allocateNewCellIndex();
newColumn.nextRowIndex = columnGroup.nextRowIndex;
return newColumn;
}
static protected ImportColumnGroup getColumnGroup(
Project project,
ImportColumnGroup columnGroup,
String localName
) {
if (columnGroup.subgroups.containsKey(localName)) {
return columnGroup.subgroups.get(localName);
}
ImportColumnGroup subgroup = createColumnGroup(project, columnGroup, localName);
columnGroup.subgroups.put(localName, subgroup);
return subgroup;
}
static protected ImportColumnGroup createColumnGroup(
Project project,
ImportColumnGroup columnGroup,
String localName
) {
ImportColumnGroup newGroup = new ImportColumnGroup();
newGroup.name =
columnGroup.name.length() == 0 ?
(localName == null ? "Text" : localName) :
(localName == null ? columnGroup.name : (columnGroup.name + " - " + localName));
newGroup.nextRowIndex = columnGroup.nextRowIndex;
return newGroup;
}
}

View File

@ -1,6 +1,5 @@
package com.google.refine.importers; package com.google.refine.importers;
import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.LinkedList; import java.util.LinkedList;
@ -16,12 +15,11 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.refine.importers.parsers.TreeParser; import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.model.Cell; import com.google.refine.model.Cell;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
public class XmlImportUtilities extends TreeImporter { public class XmlImportUtilities extends TreeImportUtilities {
final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities"); final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities");
static public String[] detectPathFromTag(TreeParser parser, String tag) { static public String[] detectPathFromTag(TreeParser parser, String tag) {
@ -46,7 +44,7 @@ public class XmlImportUtilities extends TreeImporter {
return null; return null;
} }
/** /**
* Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it. * Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it.
* *
@ -64,7 +62,7 @@ public class XmlImportUtilities extends TreeImporter {
try{ try{
if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) //FIXME uses Xml, and is not generic if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) //FIXME uses Xml, and is not generic
parser.next(); parser.next();
String localName = parser.getLocalName(); String localName = parser.getLocalName();
String fullName = composeName(parser.getPrefix(), localName); String fullName = composeName(parser.getPrefix(), localName);
if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) { if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) {
@ -92,11 +90,11 @@ public class XmlImportUtilities extends TreeImporter {
} }
return null; return null;
} }
static protected String composeName(String prefix, String localName) { static protected String composeName(String prefix, String localName) {
return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName; return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName;
} }
/** /**
* Seeks for recurring XML element in an InputStream * Seeks for recurring XML element in an InputStream
* which are likely candidates for being data records * which are likely candidates for being data records
@ -106,14 +104,11 @@ public class XmlImportUtilities extends TreeImporter {
* The path to the most numerous of the possible candidates. * The path to the most numerous of the possible candidates.
* null if no candidates were found (less than 6 recurrences) * null if no candidates were found (less than 6 recurrences)
*/ */
static public String[] detectRecordElement(InputStream inputStream) { static public String[] detectRecordElement(TreeParser parser) {
logger.trace("detectRecordElement(inputStream)"); logger.trace("detectRecordElement(inputStream)");
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>(); List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
try { try {
//XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
TreeParser parser = new XmlParser(inputStream);
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic if (eventType == XMLStreamConstants.START_ELEMENT) {//FIXME uses Xml, and is not generic
@ -232,17 +227,15 @@ public class XmlImportUtilities extends TreeImporter {
return null; return null;
} }
static public void importXml( //FIXME could do with a name change to 'importTreeData' or similar
InputStream inputStream, static public void importTreeData(
TreeParser parser,
Project project, Project project,
String[] recordPath, String[] recordPath,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) { ) {
try { try {
TreeParser parser = new XmlParser(inputStream);
while (parser.hasNext()) { while (parser.hasNext()) {
int eventType = parser.next(); int eventType = parser.next();
if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic
@ -337,7 +330,7 @@ public class XmlImportUtilities extends TreeImporter {
cellCount++; cellCount++;
} }
} }
if (cellCount > 0) { if (cellCount > 0) {
project.rows.add(realRow); project.rows.add(realRow);
} }
@ -363,9 +356,9 @@ public class XmlImportUtilities extends TreeImporter {
project, project,
columnGroup, columnGroup,
composeName(parser.getPrefix(), parser.getLocalName())); composeName(parser.getPrefix(), parser.getLocalName()));
thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex); thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex);
int attributeCount = parser.getAttributeCount(); int attributeCount = parser.getAttributeCount();
for (int i = 0; i < attributeCount; i++) { for (int i = 0; i < attributeCount; i++) {
String text = parser.getAttributeValue(i).trim(); String text = parser.getAttributeValue(i).trim();
@ -405,7 +398,7 @@ public class XmlImportUtilities extends TreeImporter {
break; break;
} }
} }
int nextRowIndex = thisColumnGroup.nextRowIndex; int nextRowIndex = thisColumnGroup.nextRowIndex;
for (ImportColumn column2 : thisColumnGroup.columns.values()) { for (ImportColumn column2 : thisColumnGroup.columns.values()) {
nextRowIndex = Math.max(nextRowIndex, column2.nextRowIndex); nextRowIndex = Math.max(nextRowIndex, column2.nextRowIndex);
@ -418,5 +411,5 @@ public class XmlImportUtilities extends TreeImporter {
} }

View File

@ -10,7 +10,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.refine.ProjectMetadata; import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TreeImporter.ImportColumnGroup; import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
import com.google.refine.importers.parsers.TreeParser; import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.XmlParser; import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.model.Project; import com.google.refine.model.Project;
@ -34,7 +34,7 @@ public class XmlImporter implements StreamImporter {
{ {
byte[] buffer = new byte[BUFFER_SIZE]; byte[] buffer = new byte[BUFFER_SIZE];
int bytes_read = 0; int bytes_read = 0;
try { try {//fill the buffer with data
while (bytes_read < BUFFER_SIZE) { while (bytes_read < BUFFER_SIZE) {
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read); int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
if (c == -1) break; if (c == -1) break;
@ -44,13 +44,11 @@ public class XmlImporter implements StreamImporter {
} catch (IOException e) { } catch (IOException e) {
throw new ImportException("Read error",e); throw new ImportException("Read error",e);
} }
if (options.containsKey("importer-record-tag")) {
InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read);
TreeParser parser = null; InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read);
TreeParser parser = new XmlParser(iStream);
if (options.containsKey("importer-record-tag")) {
try{ try{
parser = new XmlParser(iStream);
recordPath = XmlImportUtilities.detectPathFromTag( recordPath = XmlImportUtilities.detectPathFromTag(
parser, parser,
options.getProperty("importer-record-tag")); options.getProperty("importer-record-tag"));
@ -58,21 +56,18 @@ public class XmlImporter implements StreamImporter {
// silent // silent
// e.printStackTrace(); // e.printStackTrace();
} }
} else { } else {
recordPath = XmlImportUtilities.detectRecordElement( recordPath = XmlImportUtilities.detectRecordElement(parser);
new ByteArrayInputStream(buffer, 0, bytes_read));
} }
} }
if (recordPath == null) if (recordPath == null)
return; return;
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
XmlImportUtilities.importXml(pis, project, recordPath, rootColumnGroup); ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
XmlImportUtilities.importTreeData(new XmlParser(pis), project, recordPath, rootColumnGroup);
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
project.columnModel.update(); project.columnModel.update();
} }

View File

@ -0,0 +1,86 @@
package com.google.refine.importers.parsers;
import java.io.IOException;
import java.io.InputStream;
import javax.servlet.ServletException;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser;
public class JSONParser implements TreeParser{
JsonFactory factory = new JsonFactory();
JsonParser parser = null;
public JSONParser(InputStream inputStream){
try {
parser = factory.createJsonParser(inputStream);
} catch (JsonParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public int getAttributeCount() {
// TODO Auto-generated method stub
return 0;
}
@Override
public String getAttributeLocalName(int index) {
// TODO Auto-generated method stub
return null;
}
@Override
public String getAttributePrefix(int index) {
// TODO Auto-generated method stub
return null;
}
@Override
public String getAttributeValue(int index) {
// TODO Auto-generated method stub
return null;
}
@Override
public int getEventType() {
// TODO Auto-generated method stub
return 0;
}
@Override
public String getLocalName() {
// TODO Auto-generated method stub
return null;
}
@Override
public String getPrefix() {
// TODO Auto-generated method stub
return null;
}
@Override
public String getText() {
// TODO Auto-generated method stub
return null;
}
@Override
public boolean hasNext() throws ServletException {
// TODO Auto-generated method stub
return false;
}
@Override
public int next() throws ServletException {
// TODO Auto-generated method stub
return 0;
}
}

View File

@ -0,0 +1,241 @@
package com.google.refine.tests.importers;
import static org.mockito.Mockito.mock;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Properties;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.JsonImporter;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.tests.RefineTest;
public class JsonImporterTests extends RefineTest {
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
//dependencies
Project project = null;
Properties options = null;
ByteArrayInputStream inputStream = null;
//System Under Test
JsonImporter SUT = null;
@BeforeMethod
public void SetUp(){
SUT = new JsonImporter();
project = new Project();
options = mock(Properties.class);
}
@AfterMethod
public void TearDown() throws IOException{
SUT = null;
project = null;
options = null;
if (inputStream != null) inputStream.close();
inputStream = null;
}
@Test
public void canParseSample(){
RunTest(getSample());
log(project);
assertProjectCreated(project, 4, 6);
Row row = project.rows.get(0);
Assert.assertNotNull(row);
Assert.assertNotNull(row.getCell(2));
Assert.assertEquals(row.getCell(2).value, "Author 1, The");
}
@Test
public void canParseSampleWithDuplicateNestedElements(){
RunTest(getSampleWithDuplicateNestedElements());
log(project);
assertProjectCreated(project, 4, 12);
Row row = project.rows.get(0);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(), 5);
Assert.assertNotNull(row.getCell(2));
Assert.assertEquals(row.getCell(2).value, "Author 1, The");
Assert.assertEquals(project.rows.get(1).getCell(2).value, "Author 1, Another");
}
@Test
public void testCanParseLineBreak(){
RunTest(getSampleWithLineBreak());
log(project);
assertProjectCreated(project, 4, 6);
Row row = project.rows.get(3);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(), 5);
Assert.assertNotNull(row.getCell(2));
Assert.assertEquals(row.getCell(2).value, "With line\n break");
}
@Test
public void testElementsWithVaryingStructure(){
RunTest(getSampleWithVaryingStructure());
log(project);
assertProjectCreated(project, 5, 6);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "book - genre");
Row row0 = project.rows.get(0);
Assert.assertNotNull(row0);
Assert.assertEquals(row0.cells.size(),5);
Row row5 = project.rows.get(5);
Assert.assertNotNull(row5);
Assert.assertEquals(row5.cells.size(),6);
}
@Test
public void testElementWithNestedTree(){
RunTest(getSampleWithTreeStructure());
log(project);
assertProjectCreated(project, 5, 6);
Assert.assertEquals(project.columnModel.columnGroups.size(),1);
Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 2);
Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 2);
Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup);
Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0);
Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2);
}
//------------helper methods---------------
public static String getTypicalElement(int id){
return "{ \"id\" : " + id + "," +
"\"author\" : \"Author " + id + ", The\"," +
"\"title\" : \"Book title " + id + "\"," +
"\"publish_date\" : \"2010-05-26\"" +
"}";
}
public static String getElementWithDuplicateSubElement(int id){
return "{ \"id\" : " + id + "," +
"\"authors\":[" +
"{\"author\" : \"Author " + id + ", The\"}," +
"{\"author\" : \"Author " + id + ", Another\"}" +
"]," +
"\"title\" : \"Book title " + id + "\"," +
"\"publish_date\" : \"2010-05-26\"" +
"}";
}
public static String getSample(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 7; i++){
sb.append(getTypicalElement(i));
if(i < 6)
sb.append(",");
}
sb.append("]");
return sb.toString();
}
public static String getSampleWithDuplicateNestedElements(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 7; i++){
sb.append(getElementWithDuplicateSubElement(i));
if(i < 6)
sb.append(",");
}
sb.append("]");
return sb.toString();
}
public static String getSampleWithLineBreak(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 4; i++){
sb.append(getTypicalElement(i));
sb.append(",");
}
sb.append("{\"id\" : 4," +
"\"author\" : \"With line\n break\"," +
"\"title\" : \"Book title 4\"" +
"\"publish_date\" : \"2010-05-26\"" +
"},");
sb.append(getTypicalElement(5));
sb.append(",");
sb.append(getTypicalElement(6));
sb.append("]");
return sb.toString();
}
public static String getSampleWithVaryingStructure(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 6; i++){
sb.append(getTypicalElement(i));
}
sb.append("{\"id\" : 6," +
"\"author\" : \"Author 6, The\"," +
"\"title\" : \"Book title 6\"," +
"\"genre\" : \"New element not seen in other records\"," +
"\"publish_date\" : \"2010-05-26\"," +
"}");
sb.append("]");
return sb.toString();
}
public static String getSampleWithTreeStructure(){
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 7; i++){
sb.append("{\"id\" : " + i + "," +
"\"author\" : {\"author-name\" : \"Author " + i + ", The\"," +
"\"author-dob\" : \"1950-0" + i + "-15\"}," +
"\"title\" : \"Book title " + i + "\"," +
"\"publish_date\" : \"2010-05-26\"" +
"},");
if(i < 6)
sb.append(",");
}
sb.append("]");
return sb.toString();
}
private void RunTest(String testString){
try {
inputStream = new ByteArrayInputStream( testString.getBytes( "UTF-8" ) );
} catch (UnsupportedEncodingException e1) {
Assert.fail();
}
try {
SUT.read(inputStream, project, new ProjectMetadata(), options);
} catch (Exception e) {
Assert.fail();
}
}
}

View File

@ -15,9 +15,9 @@ import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest; import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import com.google.refine.importers.TreeImporter.ImportColumn; import com.google.refine.importers.TreeImportUtilities.ImportColumn;
import com.google.refine.importers.TreeImporter.ImportColumnGroup; import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
import com.google.refine.importers.TreeImporter.ImportRecord; import com.google.refine.importers.TreeImportUtilities.ImportRecord;
import com.google.refine.importers.parsers.TreeParser; import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.XmlParser; import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.model.Project; import com.google.refine.model.Project;
@ -78,7 +78,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void detectPathFromTagWithNestedElement(){ public void detectPathFromTagWithNestedElement(){
loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadXml("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "book"; String tag = "book";
createParser(); createParser();
String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag); String[] response = XmlImportUtilitiesStub.detectPathFromTag(parser, tag);
@ -145,7 +145,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void detectRecordElementRegressionTest(){ public void detectRecordElementRegressionTest(){
loadSampleXml(); loadSampleXml();
String[] path = XmlImportUtilitiesStub.detectRecordElement(inputStream); String[] path = XmlImportUtilitiesStub.detectRecordElement(new XmlParser(inputStream));
Assert.assertNotNull(path); Assert.assertNotNull(path);
Assert.assertEquals(path.length, 2); Assert.assertEquals(path.length, 2);
Assert.assertEquals(path[0], "library"); Assert.assertEquals(path[0], "library");
@ -157,7 +157,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadSampleXml(); loadSampleXml();
String[] recordPath = new String[]{"library","book"}; String[] recordPath = new String[]{"library","book"};
XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup ); XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup );
log(project); log(project);
assertProjectCreated(project, 0, 6); assertProjectCreated(project, 0, 6);
@ -177,7 +177,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadXml(XmlImporterTests.getSampleWithVaryingStructure()); loadXml(XmlImporterTests.getSampleWithVaryingStructure());
String[] recordPath = new String[]{"library", "book"}; String[] recordPath = new String[]{"library", "book"};
XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup); XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup);
log(project); log(project);
assertProjectCreated(project, 0, 6); assertProjectCreated(project, 0, 6);
@ -277,13 +277,13 @@ public class XmlImportUtilitiesTests extends RefineTest {
log(project); log(project);
Assert.assertNotNull(project.rows); Assert.assertNotNull(project.rows);
Assert.assertEquals(project.rows.size(), 2); Assert.assertEquals(project.rows.size(), 2);
Row row = project.rows.get(0); Row row = project.rows.get(0);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(), 4); Assert.assertEquals(row.cells.size(), 4);
Assert.assertNotNull(row.getCell(2)); Assert.assertNotNull(row.getCell(2));
Assert.assertEquals(row.getCell(2).value, "author1"); Assert.assertEquals(row.getCell(2).value, "author1");
row = project.rows.get(1); row = project.rows.get(1);
Assert.assertEquals(row.getCell(2).value, "author2"); Assert.assertEquals(row.getCell(2).value, "author2");
} }