fixes #3462 (#3921)

Co-authored-by: Antonin Delpeuch <antonin@delpeuch.eu>
This commit is contained in:
Warpeas 2021-05-31 04:24:06 +08:00 committed by GitHub
parent 7dd779e674
commit fed23ec7f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 284 additions and 174 deletions

View File

@ -224,7 +224,7 @@ abstract public class ImportingParserBase implements ImportingParser {
return addColumn(project, fileNameColumnName, columnId);
}
private static int addArchiveColumn(Project project) {
protected static int addArchiveColumn(Project project) {
String columnName = "Archive"; // TODO: Localize?
return addColumn(project, columnName, 0);
}

View File

@ -39,6 +39,8 @@ import java.io.InputStream;
import java.io.Reader;
import java.util.List;
import com.google.refine.model.Cell;
import com.google.refine.model.Row;
import org.apache.commons.lang.NotImplementedException;
import com.fasterxml.jackson.databind.node.ObjectNode;
@ -111,11 +113,24 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
) throws IOException {
final File file = ImportingUtilities.getFile(job, fileRecord);
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
final String archiveFileName = ImportingUtilities.getArchiveFileName(fileRecord);
int filenameColumnIndex = -1;
int archiveColumnIndex = -1;
int startingRowCount = project.rows.size();
progress.startFile(fileSource);
try {
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
try {
if (JSONUtilities.getBoolean(options, "includeArchiveFileName", false)
&& archiveFileName != null) {
archiveColumnIndex = addArchiveColumn(project);
}
if (JSONUtilities.getBoolean(options, "includeFileSources", false)) {
filenameColumnIndex = addFilenameColumn(project, archiveColumnIndex >=0);
}
if (useInputStream) {
parseOneFile(project, metadata, job, fileSource, inputStream,
rootColumnGroup, limit, options, exceptions);
@ -129,6 +144,18 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
parseOneFile(project, metadata, job, fileSource, reader,
rootColumnGroup, limit, options, exceptions);
}
// Fill in filename and archive name column for all rows added from this file
int endingRowCount = project.rows.size();
for (int i = startingRowCount; i < endingRowCount; i++) {
Row row = project.rows.get(i);
if (archiveColumnIndex >= 0) {
row.setCell(archiveColumnIndex, new Cell(archiveFileName, null));
}
if (filenameColumnIndex >= 0) {
row.setCell(filenameColumnIndex, new Cell(fileSource, null));
}
}
} finally {
inputStream.close();
}

View File

@ -0,0 +1,13 @@
{
"library": [
{
"book1": {
"author": {
"author-name": "author1",
"author-dob": "date"
},
"genre": "genre1"
}
}
]
}

View File

@ -33,15 +33,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.io.*;
import java.util.ArrayList;
import java.lang.reflect.Method;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.ITestResult;
@ -77,7 +75,7 @@ public class JsonImporterTests extends ImporterTest {
JsonImporter SUT = null;
@BeforeMethod
public void setUp(Method method){
public void setUp(Method method) {
super.setUp();
SUT = new JsonImporter();
logger.debug("About to run test method: " + method.getName());
@ -99,7 +97,7 @@ public class JsonImporterTests extends ImporterTest {
}
@Test
public void canParseSample(){
public void canParseSample() {
RunTest(getSample());
assertProjectCreated(project, 4, 6);
@ -109,8 +107,8 @@ public class JsonImporterTests extends ImporterTest {
Assert.assertEquals(row.getCell(1).value, "Author 1, The");
}
@Test
public void canThrowError(){
@Test
public void canThrowError() {
String errJSON = getSampleWithError();
ObjectNode options = SUT.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
@ -122,7 +120,7 @@ public class JsonImporterTests extends ImporterTest {
JSONUtilities.safePut(options, "guessCellValueTypes", false);
try {
inputStream = new ByteArrayInputStream(errJSON.getBytes( "UTF-8" ) );
inputStream = new ByteArrayInputStream(errJSON.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e1) {
Assert.fail();
}
@ -146,16 +144,16 @@ public class JsonImporterTests extends ImporterTest {
}
@Test
public void trimLeadingTrailingWhitespaceOnTrimStrings(){
String ScraperwikiOutput =
"[\n" +
"{\n" +
" \"school\": \" University of Cambridge \",\n" +
" \"name\": \" Amy Zhang \",\n" +
" \"student-faculty-score\": \"100\",\n" +
" \"intl-student-score\": \"95\"\n" +
" }\n" +
"]\n";
public void trimLeadingTrailingWhitespaceOnTrimStrings() {
String ScraperwikiOutput =
"[\n" +
"{\n" +
" \"school\": \" University of Cambridge \",\n" +
" \"name\": \" Amy Zhang \",\n" +
" \"student-faculty-score\": \"100\",\n" +
" \"intl-student-score\": \"95\"\n" +
" }\n" +
"]\n";
RunTest(ScraperwikiOutput, true);
assertProjectCreated(project, 4, 1);
Row row = project.rows.get(0);
@ -166,16 +164,16 @@ public class JsonImporterTests extends ImporterTest {
}
@Test
public void doesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings(){
String ScraperwikiOutput =
"[\n" +
"{\n" +
" \"school\": \" University of Cambridge \",\n" +
" \"name\": \" Amy Zhang \",\n" +
" \"student-faculty-score\": \"100\",\n" +
" \"intl-student-score\": \"95\"\n" +
" }\n" +
"]\n";
public void doesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings() {
String ScraperwikiOutput =
"[\n" +
"{\n" +
" \"school\": \" University of Cambridge \",\n" +
" \"name\": \" Amy Zhang \",\n" +
" \"student-faculty-score\": \"100\",\n" +
" \"intl-student-score\": \"95\"\n" +
" }\n" +
"]\n";
RunTest(ScraperwikiOutput);
assertProjectCreated(project, 4, 1);
Row row = project.rows.get(0);
@ -186,7 +184,7 @@ public class JsonImporterTests extends ImporterTest {
}
@Test
public void canParseSampleWithDuplicateNestedElements(){
public void canParseSampleWithDuplicateNestedElements() {
RunTest(getSampleWithDuplicateNestedElements());
assertProjectCreated(project, 4, 12);
@ -199,7 +197,7 @@ public class JsonImporterTests extends ImporterTest {
}
@Test
public void testCanParseLineBreak(){
public void testCanParseLineBreak() {
RunTest(getSampleWithLineBreak());
assertProjectCreated(project, 4, 6);
@ -211,38 +209,38 @@ public class JsonImporterTests extends ImporterTest {
}
@Test
public void testElementsWithVaryingStructure(){
public void testElementsWithVaryingStructure() {
RunTest(getSampleWithVaryingStructure());
assertProjectCreated(project, 5, 6);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(4).getName(), JsonImporter.ANONYMOUS + " - genre");
Assert.assertEquals(project.columnModel.getColumnByCellIndex(4).getName(), JsonImporter.ANONYMOUS + " - genre");
Row row0 = project.rows.get(0);
Assert.assertNotNull(row0);
Assert.assertEquals(row0.cells.size(),4);
Assert.assertEquals(row0.cells.size(), 4);
Row row5 = project.rows.get(5);
Row row5 = project.rows.get(5);
Assert.assertNotNull(row5);
Assert.assertEquals(row5.cells.size(),5);
Assert.assertEquals(row5.cells.size(), 5);
}
@Test
public void testElementWithNestedTree(){
public void testElementWithNestedTree() {
RunTest(getSampleWithTreeStructure());
assertProjectCreated(project, 5, 6);
Assert.assertEquals(project.columnModel.columnGroups.size(),1);
Assert.assertEquals(project.columnModel.columnGroups.size(), 1);
Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 3);
Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 3);
Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup);
Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0);
Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2);
Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(), 0);
Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan, 2);
}
@Test
public void testElementWithMqlReadOutput(){
public void testElementWithMqlReadOutput() {
String mqlOutput = "{\"code\":\"/api/status/ok\",\"result\":[{\"armed_force\":{\"id\":\"/en/wehrmacht\"},\"id\":\"/en/afrika_korps\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/en/sacred_band_of_thebes\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/british_army\"},\"id\":\"/en/british_16_air_assault_brigade\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/british_army\"},\"id\":\"/en/pathfinder_platoon\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0ch7qgz\"},\"id\":\"/en/sacred_band\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/polish_navy\"},\"id\":\"/en/3rd_ship_flotilla\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxn9\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxq9\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqh\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqp\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqw\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c1wxl3\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c1wxlp\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0ck96kz\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0cm3j23\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0cw8hb4\",\"type\":\"/military/military_unit\"}],\"status\":\"200 OK\",\"transaction_id\":\"cache;cache01.p01.sjc1:8101;2010-10-04T15:04:33Z;0007\"}";
ObjectNode options = SUT.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
@ -252,115 +250,115 @@ public class JsonImporterTests extends ImporterTest {
JSONUtilities.safePut(options, "recordPath", path);
RunTest(mqlOutput, options);
assertProjectCreated(project,3,16);
assertProjectCreated(project, 3, 16);
}
@Test
public void testJSONMinimumArray(){
String ScraperwikiOutput =
"[\n" +
"{\n" +
" \"school\": \"University of Cambridge\\n" +
" United Kingdom\",\n" +
" \"student-faculty-score\": \"100\",\n" +
" \"intl-student-score\": \"95\",\n" +
" \"intl-faculty-score\": \"96\",\n" +
" \"rank\": \"#1\",\n" +
" \"peer-review-score\": \"100\",\n" +
" \"emp-review-score\": \"100\",\n" +
" \"score\": \"100.0\",\n" +
" \"citations-score\": \"93\"\n" +
" },\n" +
" {\n" +
" \"school\": \"Harvard University\\n" +
" United States\",\n" +
" \"student-faculty-score\": \"97\",\n" +
" \"intl-student-score\": \"87\",\n" +
" \"intl-faculty-score\": \"71\",\n" +
" \"rank\": \"#2\",\n" +
" \"peer-review-score\": \"100\",\n" +
" \"emp-review-score\": \"100\",\n" +
" \"score\": \"99.2\",\n" +
" \"citations-score\": \"100\"\n" +
" }\n" +
"]\n";
public void testJSONMinimumArray() {
String ScraperwikiOutput =
"[\n" +
"{\n" +
" \"school\": \"University of Cambridge\\n" +
" United Kingdom\",\n" +
" \"student-faculty-score\": \"100\",\n" +
" \"intl-student-score\": \"95\",\n" +
" \"intl-faculty-score\": \"96\",\n" +
" \"rank\": \"#1\",\n" +
" \"peer-review-score\": \"100\",\n" +
" \"emp-review-score\": \"100\",\n" +
" \"score\": \"100.0\",\n" +
" \"citations-score\": \"93\"\n" +
" },\n" +
" {\n" +
" \"school\": \"Harvard University\\n" +
" United States\",\n" +
" \"student-faculty-score\": \"97\",\n" +
" \"intl-student-score\": \"87\",\n" +
" \"intl-faculty-score\": \"71\",\n" +
" \"rank\": \"#2\",\n" +
" \"peer-review-score\": \"100\",\n" +
" \"emp-review-score\": \"100\",\n" +
" \"score\": \"99.2\",\n" +
" \"citations-score\": \"100\"\n" +
" }\n" +
"]\n";
RunTest(ScraperwikiOutput);
assertProjectCreated(project,9,2);
assertProjectCreated(project, 9, 2);
}
/**
* org.codehaus.Jackson.JsonParser has an inconsistency when returning getLocalName
* of an Entity_Start token which occurs after a Field_Name token
*/
@Test
public void EnsureJSONParserHandlesgetLocalNameCorrectly() throws Exception{
public void EnsureJSONParserHandlesgetLocalNameCorrectly() throws Exception {
String sampleJson = "{\"field\":\"value\"}";
String sampleJson2 = "{\"field\":{}}";
String sampleJson3 = "{\"field\":[{},{}]}";
JSONTreeReader parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson.getBytes("UTF-8")));
Token token = Token.Ignorable;
int i = 0;
try{
while(token != null){
try {
while (token != null) {
token = parser.next();
if(token == null) {
if (token == null) {
break;
}
i++;
if(i == 3){
if (i == 3) {
Assert.assertEquals(Token.Value, token);
Assert.assertEquals("field", parser.getFieldName());
}
}
}catch(Exception e){
} catch (Exception e) {
//silent
}
parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson2.getBytes("UTF-8")));
token = Token.Ignorable;
i = 0;
try{
while(token != null){
try {
while (token != null) {
token = parser.next();
if(token == null) {
if (token == null) {
break;
}
i++;
if(i == 3){
if (i == 3) {
Assert.assertEquals(Token.StartEntity, token);
Assert.assertEquals(parser.getFieldName(), "field");
}
}
}catch(Exception e){
} catch (Exception e) {
//silent
}
parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson3.getBytes("UTF-8")));
token = Token.Ignorable;
i = 0;
try{
while(token != null){
try {
while (token != null) {
token = parser.next();
if(token == null) {
if (token == null) {
break;
}
i++;
if(i == 3){
if (i == 3) {
Assert.assertEquals(token, Token.StartEntity);
Assert.assertEquals(parser.getFieldName(), "field");
}
if(i == 4){
if (i == 4) {
Assert.assertEquals(token, Token.StartEntity);
Assert.assertEquals(parser.getFieldName(), JsonImporter.ANONYMOUS);
}
if(i == 6){
if (i == 6) {
Assert.assertEquals(token, Token.StartEntity);
Assert.assertEquals(parser.getFieldName(), JsonImporter.ANONYMOUS);
}
}
}catch(Exception e){
} catch (Exception e) {
//silent
}
}
@ -386,73 +384,73 @@ public class JsonImporterTests extends ImporterTest {
Assert.assertEquals("\tvalue", parser.getFieldValue());
}
}
}catch(Exception e){
} catch (Exception e) {
Assert.fail();
}
}
@Test
public void testJsonDatatypes(){
public void testJsonDatatypes() {
RunTest(getSampleWithDataTypes());
assertProjectCreated(project, 2, 21,4);
assertProjectCreated(project, 2, 21, 4);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id");
Assert.assertEquals( project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell");
Assert.assertEquals(project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id");
Assert.assertEquals(project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell");
Row row = project.rows.get(8);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,""); // Make sure empty strings are preserved
Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value, ""); // Make sure empty strings are preserved
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
row = project.rows.get(12);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertNull(row.cells.get(1).value);
Assert.assertEquals(row.cells.size(), 2);
Assert.assertNull(row.cells.get(1).value);
row = project.rows.get(13);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Boolean.TRUE);
Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value, Boolean.TRUE);
row = project.rows.get(14);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Boolean.FALSE);
Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value, Boolean.FALSE);
row = project.rows.get(15);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(0));
Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value, Long.valueOf(0));
row = project.rows.get(16);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(1));
Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value, Long.valueOf(1));
row = project.rows.get(17);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Double.parseDouble("-2.1"));
Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value, Double.parseDouble("-2.1"));
row = project.rows.get(18);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)0.23));
Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value, Double.valueOf((double) 0.23));
row = project.rows.get(19);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)-0.24));
Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value, Double.valueOf((double) -0.24));
row = project.rows.get(20);
Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2);
Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value));
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)3.14e100));
Assert.assertEquals(row.cells.size(), 2);
Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value));
Assert.assertEquals(row.cells.get(1).value, Double.valueOf((double) 3.14e100));
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
@ -461,59 +459,94 @@ public class JsonImporterTests extends ImporterTest {
@Test
public void testComplexJsonStructure() throws IOException{
public void testComplexJsonStructure() throws IOException {
String fileName = "grid_small.json";
RunComplexJSONTest(getComplexJSON(fileName));
logger.debug("************************ columnu number:" + project.columnModel.columns.size() +
". \tcolumn groups number:" + project.columnModel.columnGroups.size() +
".\trow number:" + project.rows.size() + ".\trecord number:" + project.recordModel.getRecordCount()) ;
".\trow number:" + project.rows.size() + ".\trecord number:" + project.recordModel.getRecordCount());
assertProjectCreated(project, 63, 63, 8);
}
@Test
public void testAddFileColumn() throws Exception {
final String FILE = "json-sample-format-1.json";
String filename = ClassLoader.getSystemResource(FILE).getPath();
// File is assumed to be in job.getRawDataDir(), so copy it there
FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE));
List<ObjectNode> fileRecords = new ArrayList<>();
fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\",\"fileName\": \"%s\"}", FILE, "json-sample-format-1.json")));
ObjectNode options = SUT.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
JSONUtilities.append(path, JsonImporter.ANONYMOUS);
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options,"includeFileSources",true);
List<Exception> exceptions = new ArrayList<Exception>();
SUT.parse(
project,
metadata,
job,
fileRecords,
"text/json",
-1,
options,
exceptions
);
Assert.assertNotNull(project.columnModel.getColumnByName("File"));
Assert.assertEquals(project.rows.get(0).getCell(0).value,"json-sample-format-1.json");
}
//------------helper methods---------------
private static String getTypicalElement(int id){
private static String getTypicalElement(int id) {
return "{ \"id\" : " + id + "," +
"\"author\" : \"Author " + id + ", The\"," +
"\"title\" : \"Book title " + id + "\"," +
"\"publish_date\" : \"2010-05-26\"" +
"}";
"\"author\" : \"Author " + id + ", The\"," +
"\"title\" : \"Book title " + id + "\"," +
"\"publish_date\" : \"2010-05-26\"" +
"}";
}
private static String getElementWithDuplicateSubElement(int id){
private static String getElementWithDuplicateSubElement(int id) {
return "{ \"id\" : " + id + "," +
"\"authors\":[" +
"{\"name\" : \"Author " + id + ", The\"}," +
"{\"name\" : \"Author " + id + ", Another\"}" +
"]," +
"\"title\" : \"Book title " + id + "\"," +
"\"publish_date\" : \"2010-05-26\"" +
"}";
"\"authors\":[" +
"{\"name\" : \"Author " + id + ", The\"}," +
"{\"name\" : \"Author " + id + ", Another\"}" +
"]," +
"\"title\" : \"Book title " + id + "\"," +
"\"publish_date\" : \"2010-05-26\"" +
"}";
}
static String getSample(){
static String getSample() {
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 7; i++){
for (int i = 1; i < 7; i++) {
sb.append(getTypicalElement(i));
if(i < 6) {
if (i < 6) {
sb.append(",");
}
}
sb.append("]");
return sb.toString();
}
private static ObjectNode getOptions(ImportingJob job, TreeImportingParserBase parser, String pathSelector, boolean trimStrings) {
ObjectNode options = parser.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
JSONUtilities.append(path, JsonImporter.ANONYMOUS);
JSONUtilities.append(path, pathSelector);
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", trimStrings);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
@ -522,12 +555,12 @@ public class JsonImporterTests extends ImporterTest {
return options;
}
private static String getSampleWithDuplicateNestedElements(){
private static String getSampleWithDuplicateNestedElements() {
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 7; i++){
for (int i = 1; i < 7; i++) {
sb.append(getElementWithDuplicateSubElement(i));
if(i < 6) {
if (i < 6) {
sb.append(",");
}
}
@ -535,10 +568,10 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString();
}
private static String getSampleWithLineBreak(){
private static String getSampleWithLineBreak() {
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 4; i++){
for (int i = 1; i < 4; i++) {
sb.append(getTypicalElement(i));
sb.append(",");
}
@ -554,10 +587,10 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString();
}
private static String getSampleWithVaryingStructure(){
private static String getSampleWithVaryingStructure() {
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 6; i++){
for (int i = 1; i < 6; i++) {
sb.append(getTypicalElement(i));
sb.append(",");
}
@ -571,36 +604,36 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString();
}
private static String getSampleWithTreeStructure(){
private static String getSampleWithTreeStructure() {
StringBuilder sb = new StringBuilder();
sb.append("[");
for(int i = 1; i < 7; i++){
for (int i = 1; i < 7; i++) {
sb.append("{\"id\" : " + i + "," +
"\"author\" : {\"author-name\" : \"Author " + i + ", The\"," +
"\"author-dob\" : \"1950-0" + i + "-15\"}," +
"\"title\" : \"Book title " + i + "\"," +
"\"publish_date\" : \"2010-05-26\"" +
"}");
if(i < 6) {
if (i < 6) {
sb.append(",");
}
}
sb.append("]");
return sb.toString();
}
private static String getSampleWithDataTypes() {
StringBuilder sb = new StringBuilder();
sb.append("[");
int i = 1;
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":" + i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":" + i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":null,\"cell\":[null,true,false,0,1,-2.1,0.23,-0.24,3.14e100]}\n");
sb.append("]");
return sb.toString();
}
private static String getSampleWithError(){
private static String getSampleWithError() {
StringBuilder sb = new StringBuilder();
sb.append("[");
sb.append("{\"id\":" + "\"\n\";");
@ -611,7 +644,7 @@ public class JsonImporterTests extends ImporterTest {
private void RunTest(String testString) {
RunTest(testString, getOptions(job, SUT, JsonImporter.ANONYMOUS, false));
}
private void RunComplexJSONTest(String testString) {
RunTest(testString, getOptions(job, SUT, "institutes", false));
}
@ -619,10 +652,10 @@ public class JsonImporterTests extends ImporterTest {
private void RunTest(String testString, boolean trimStrings) {
RunTest(testString, getOptions(job, SUT, JsonImporter.ANONYMOUS, trimStrings));
}
private void RunTest(String testString, ObjectNode options) {
try {
inputStream = new ByteArrayInputStream( testString.getBytes( "UTF-8" ) );
inputStream = new ByteArrayInputStream(testString.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e1) {
Assert.fail();
}
@ -633,12 +666,12 @@ public class JsonImporterTests extends ImporterTest {
Assert.fail();
}
}
private String getComplexJSON(String fileName) throws IOException {
InputStream in = this.getClass().getClassLoader()
.getResourceAsStream(fileName);
String content = org.apache.commons.io.IOUtils.toString(in, "UTF-8");
return content;
}
}

View File

@ -33,12 +33,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.io.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import com.google.refine.importers.tree.ImportColumnGroup;
import org.apache.commons.io.FileUtils;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
@ -199,6 +201,41 @@ public class XmlImporterTests extends ImporterTest {
Assert.assertEquals(cg0.columnSpan,2);
}
@Test
public void testAddFileColumn() throws Exception {
final String FILE = "xml-sample-format-1.xml";
String filename = ClassLoader.getSystemResource(FILE).getPath();
// File is assumed to be in job.getRawDataDir(), so copy it there
FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE));
List<ObjectNode> fileRecords = new ArrayList<>();
fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\",\"fileName\": \"%s\"}", FILE, "xml-sample-format-1.xml")));
ObjectNode options = SUT.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
JSONUtilities.append(path, "library");
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options,"includeFileSources",true);
List<Exception> exceptions = new ArrayList<Exception>();
SUT.parse(
project,
metadata,
job,
fileRecords,
"text/json",
-1,
options,
exceptions
);
Assert.assertNotNull(project.columnModel.getColumnByName("File"));
Assert.assertEquals(project.rows.get(0).getCell(0).value,"xml-sample-format-1.xml");
}
//------------helper methods---------------
public static String getTypicalElement(int id){