XmlImporter is partially unit tested. One broken test for case where Record Elements contain varying numbers of nested elements. (This is for Issue 61 which is, at the time of this commit, an open issue)
XmlImportUtilities produces log for case when no RecordElementCandidate are found. (too few similar Xml elements). git-svn-id: http://google-refine.googlecode.com/svn/trunk@862 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
1c47ff476b
commit
34cb1c4d07
@ -16,12 +16,17 @@ import javax.xml.stream.XMLStreamConstants;
|
|||||||
import javax.xml.stream.XMLStreamException;
|
import javax.xml.stream.XMLStreamException;
|
||||||
import javax.xml.stream.XMLStreamReader;
|
import javax.xml.stream.XMLStreamReader;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.metaweb.gridworks.model.Cell;
|
import com.metaweb.gridworks.model.Cell;
|
||||||
import com.metaweb.gridworks.model.Column;
|
import com.metaweb.gridworks.model.Column;
|
||||||
import com.metaweb.gridworks.model.Project;
|
import com.metaweb.gridworks.model.Project;
|
||||||
import com.metaweb.gridworks.model.Row;
|
import com.metaweb.gridworks.model.Row;
|
||||||
|
|
||||||
public class XmlImportUtilities {
|
public class XmlImportUtilities {
|
||||||
|
final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities");
|
||||||
|
|
||||||
static protected class RecordElementCandidate {
|
static protected class RecordElementCandidate {
|
||||||
String[] path;
|
String[] path;
|
||||||
int count;
|
int count;
|
||||||
@ -119,6 +124,7 @@ public class XmlImportUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static public String[] detectRecordElement(InputStream inputStream) {
|
static public String[] detectRecordElement(InputStream inputStream) {
|
||||||
|
logger.trace("detectRecordElement(inputStream)");
|
||||||
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
|
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -147,10 +153,12 @@ public class XmlImportUtilities {
|
|||||||
|
|
||||||
return candidates.get(0).path;
|
return candidates.get(0).path;
|
||||||
}
|
}
|
||||||
|
logger.info("No candidate elements were found in Xml - at least 6 similar elements are required");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected RecordElementCandidate detectRecordElement(XMLStreamReader parser, String[] path) {
|
static protected RecordElementCandidate detectRecordElement(XMLStreamReader parser, String[] path) {
|
||||||
|
logger.trace("detectRecordElement(XMLStreamReader, String[])");
|
||||||
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
|
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
|
||||||
|
|
||||||
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
|
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
|
||||||
@ -219,6 +227,7 @@ public class XmlImportUtilities {
|
|||||||
sortRecordElementCandidates(immediateChildCandidates);
|
sortRecordElementCandidates(immediateChildCandidates);
|
||||||
|
|
||||||
RecordElementCandidate ourCandidate = immediateChildCandidates.get(0);
|
RecordElementCandidate ourCandidate = immediateChildCandidates.get(0);
|
||||||
|
logger.trace("ourCandidate.count : " + ourCandidate.count + "; immediateChildCandidates.size() : " + immediateChildCandidates.size());
|
||||||
if (ourCandidate.count / immediateChildCandidates.size() > 5) {
|
if (ourCandidate.count / immediateChildCandidates.size() > 5) {
|
||||||
return ourCandidate;
|
return ourCandidate;
|
||||||
}
|
}
|
||||||
|
@ -6,11 +6,16 @@ import java.io.PushbackInputStream;
|
|||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.metaweb.gridworks.importers.XmlImportUtilities.ImportColumnGroup;
|
import com.metaweb.gridworks.importers.XmlImportUtilities.ImportColumnGroup;
|
||||||
import com.metaweb.gridworks.model.Project;
|
import com.metaweb.gridworks.model.Project;
|
||||||
|
|
||||||
public class XmlImporter implements Importer {
|
public class XmlImporter implements Importer {
|
||||||
|
|
||||||
|
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
|
||||||
|
|
||||||
public static final int BUFFER_SIZE = 64 * 1024;
|
public static final int BUFFER_SIZE = 64 * 1024;
|
||||||
|
|
||||||
public boolean takesReader() {
|
public boolean takesReader() {
|
||||||
@ -28,6 +33,7 @@ public class XmlImporter implements Importer {
|
|||||||
Project project,
|
Project project,
|
||||||
Properties options
|
Properties options
|
||||||
) throws Exception {
|
) throws Exception {
|
||||||
|
logger.trace("XmlImporter.read");
|
||||||
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
||||||
|
|
||||||
String[] recordPath = null;
|
String[] recordPath = null;
|
||||||
|
@ -0,0 +1,207 @@
|
|||||||
|
package com.metaweb.gridworks.tests.importers;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.AfterMethod;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.importers.XmlImporter;
|
||||||
|
import com.metaweb.gridworks.model.Cell;
|
||||||
|
import com.metaweb.gridworks.model.Column;
|
||||||
|
import com.metaweb.gridworks.model.Project;
|
||||||
|
import com.metaweb.gridworks.model.Row;
|
||||||
|
|
||||||
|
|
||||||
|
public class XmlImporterTests {
|
||||||
|
final static Logger logger = LoggerFactory.getLogger("XmlImporterTests");
|
||||||
|
|
||||||
|
//dependencies
|
||||||
|
Project project = null;
|
||||||
|
Properties options = null;
|
||||||
|
ByteArrayInputStream inputStream = null;
|
||||||
|
|
||||||
|
//System Under Test
|
||||||
|
XmlImporter SUT = null;
|
||||||
|
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void SetUp(){
|
||||||
|
SUT = new XmlImporter();
|
||||||
|
project = new Project();
|
||||||
|
options = mock(Properties.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterMethod
|
||||||
|
public void TearDown(){
|
||||||
|
SUT = null;
|
||||||
|
project = null;
|
||||||
|
options = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void canParseSample(){
|
||||||
|
|
||||||
|
RunTest(getSample());
|
||||||
|
|
||||||
|
AssertGridCreate(project, 4, 6);
|
||||||
|
PrintProject(project);
|
||||||
|
|
||||||
|
Row row = project.rows.get(0);
|
||||||
|
Assert.assertNotNull(row);
|
||||||
|
Assert.assertNotNull(row.cells);
|
||||||
|
Assert.assertNotNull(row.cells.get(2));
|
||||||
|
Assert.assertEquals(row.cells.get(2).value, "Author 1, The");
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCanParseLineBreak(){
|
||||||
|
|
||||||
|
RunTest(getSampleWithLineBreak());
|
||||||
|
|
||||||
|
AssertGridCreate(project, 4, 6);
|
||||||
|
PrintProject(project);
|
||||||
|
|
||||||
|
Row row = project.rows.get(3);
|
||||||
|
Assert.assertNotNull(row);
|
||||||
|
Assert.assertNotNull(row.cells);
|
||||||
|
Assert.assertNotNull(row.cells.get(2));
|
||||||
|
Assert.assertEquals(row.cells.get(2).value, "With line\n break");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(groups={"broken"})
|
||||||
|
public void testElementsWithVaryingStructure(){
|
||||||
|
|
||||||
|
|
||||||
|
RunTest(getSampleWithVaryingStructure());
|
||||||
|
|
||||||
|
AssertGridCreate(project, 5, 6);
|
||||||
|
PrintProject(project);
|
||||||
|
|
||||||
|
Row row0 = project.rows.get(0);
|
||||||
|
Assert.assertNotNull(row0);
|
||||||
|
Assert.assertNotNull(row0.cells);
|
||||||
|
Assert.assertEquals(row0.cells.size(),6);
|
||||||
|
|
||||||
|
Row row5 = project.rows.get(5);
|
||||||
|
Assert.assertNotNull(row5);
|
||||||
|
Assert.assertNotNull(row5.cells);
|
||||||
|
Assert.assertEquals(row5.cells.size(),6);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------helper methods---------------
|
||||||
|
|
||||||
|
protected String getTypicalElement(int id){
|
||||||
|
return "<book id=\"" + id + "\">" +
|
||||||
|
"<author>Author " + id + ", The</author>" +
|
||||||
|
"<title>Book title " + id + "</title>" +
|
||||||
|
"<publish_date>2010-05-26</publish_date>" +
|
||||||
|
"</book>";
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String getSample(){
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("<?xml version=\"1.0\"?><library>");
|
||||||
|
for(int i = 1; i < 7; i++){
|
||||||
|
sb.append(getTypicalElement(i));
|
||||||
|
}
|
||||||
|
sb.append("</library>");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String getSampleWithLineBreak(){
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("<?xml version=\"1.0\"?><library>");
|
||||||
|
for(int i = 1; i < 4; i++){
|
||||||
|
sb.append(getTypicalElement(i));
|
||||||
|
}
|
||||||
|
sb.append("<book id=\"4\">" +
|
||||||
|
"<author>With line\n break</author>" +
|
||||||
|
"<title>Book title 4</title>" +
|
||||||
|
"<publish_date>2010-05-26</publish_date>" +
|
||||||
|
"</book>");
|
||||||
|
sb.append(getTypicalElement(5));
|
||||||
|
sb.append(getTypicalElement(6));
|
||||||
|
sb.append("</library>");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String getSampleWithVaryingStructure(){
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("<?xml version=\"1.0\"?><library>");
|
||||||
|
for(int i = 1; i < 6; i++){
|
||||||
|
sb.append(getTypicalElement(i));
|
||||||
|
}
|
||||||
|
sb.append("<book id=\"6\">" +
|
||||||
|
"<author>With line\n break</author>" +
|
||||||
|
"<title>Book title 6</title>" +
|
||||||
|
"<genre>New element not seen in other records</genre>" +
|
||||||
|
"<publish_date>2010-05-26</publish_date>" +
|
||||||
|
"</book>");
|
||||||
|
sb.append("</library>");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void RunTest(String testString){
|
||||||
|
try {
|
||||||
|
inputStream = new ByteArrayInputStream( testString.getBytes( "UTF-8" ) );
|
||||||
|
} catch (UnsupportedEncodingException e1) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
SUT.read(inputStream, project, options);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
inputStream.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void AssertGridCreate(Project project, int numCols, int numRows){
|
||||||
|
Assert.assertNotNull(project);
|
||||||
|
Assert.assertNotNull(project.columnModel);
|
||||||
|
Assert.assertNotNull(project.columnModel.columns);
|
||||||
|
Assert.assertEquals(project.columnModel.columns.size(), numCols);
|
||||||
|
Assert.assertNotNull(project.rows);
|
||||||
|
Assert.assertEquals(project.rows.size(), numRows);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void PrintProject(Project project){
|
||||||
|
//some quick and dirty debugging
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for(Column c : project.columnModel.columns){
|
||||||
|
sb.append(c.getName());
|
||||||
|
sb.append("; ");
|
||||||
|
}
|
||||||
|
logger.info(sb.toString());
|
||||||
|
for(Row r : project.rows){
|
||||||
|
sb = new StringBuilder();
|
||||||
|
for(Cell c : r.cells){
|
||||||
|
if(c != null){
|
||||||
|
sb.append(c.value);
|
||||||
|
sb.append("; ");
|
||||||
|
}else{
|
||||||
|
sb.append("null; ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.info(sb.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user