fixes #3462 (#3921)

Co-authored-by: Antonin Delpeuch <antonin@delpeuch.eu>
This commit is contained in:
Warpeas 2021-05-31 04:24:06 +08:00 committed by GitHub
parent 7dd779e674
commit fed23ec7f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 284 additions and 174 deletions

View File

@ -224,7 +224,7 @@ abstract public class ImportingParserBase implements ImportingParser {
return addColumn(project, fileNameColumnName, columnId); return addColumn(project, fileNameColumnName, columnId);
} }
private static int addArchiveColumn(Project project) { protected static int addArchiveColumn(Project project) {
String columnName = "Archive"; // TODO: Localize? String columnName = "Archive"; // TODO: Localize?
return addColumn(project, columnName, 0); return addColumn(project, columnName, 0);
} }

View File

@ -39,6 +39,8 @@ import java.io.InputStream;
import java.io.Reader; import java.io.Reader;
import java.util.List; import java.util.List;
import com.google.refine.model.Cell;
import com.google.refine.model.Row;
import org.apache.commons.lang.NotImplementedException; import org.apache.commons.lang.NotImplementedException;
import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.node.ObjectNode;
@ -111,11 +113,24 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
) throws IOException { ) throws IOException {
final File file = ImportingUtilities.getFile(job, fileRecord); final File file = ImportingUtilities.getFile(job, fileRecord);
final String fileSource = ImportingUtilities.getFileSource(fileRecord); final String fileSource = ImportingUtilities.getFileSource(fileRecord);
final String archiveFileName = ImportingUtilities.getArchiveFileName(fileRecord);
int filenameColumnIndex = -1;
int archiveColumnIndex = -1;
int startingRowCount = project.rows.size();
progress.startFile(fileSource); progress.startFile(fileSource);
try { try {
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress); InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
try { try {
if (JSONUtilities.getBoolean(options, "includeArchiveFileName", false)
&& archiveFileName != null) {
archiveColumnIndex = addArchiveColumn(project);
}
if (JSONUtilities.getBoolean(options, "includeFileSources", false)) {
filenameColumnIndex = addFilenameColumn(project, archiveColumnIndex >=0);
}
if (useInputStream) { if (useInputStream) {
parseOneFile(project, metadata, job, fileSource, inputStream, parseOneFile(project, metadata, job, fileSource, inputStream,
rootColumnGroup, limit, options, exceptions); rootColumnGroup, limit, options, exceptions);
@ -129,6 +144,18 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
parseOneFile(project, metadata, job, fileSource, reader, parseOneFile(project, metadata, job, fileSource, reader,
rootColumnGroup, limit, options, exceptions); rootColumnGroup, limit, options, exceptions);
} }
// Fill in filename and archive name column for all rows added from this file
int endingRowCount = project.rows.size();
for (int i = startingRowCount; i < endingRowCount; i++) {
Row row = project.rows.get(i);
if (archiveColumnIndex >= 0) {
row.setCell(archiveColumnIndex, new Cell(archiveFileName, null));
}
if (filenameColumnIndex >= 0) {
row.setCell(filenameColumnIndex, new Cell(fileSource, null));
}
}
} finally { } finally {
inputStream.close(); inputStream.close();
} }

View File

@ -0,0 +1,13 @@
{
"library": [
{
"book1": {
"author": {
"author-name": "author1",
"author-dob": "date"
},
"genre": "genre1"
}
}
]
}

View File

@ -33,15 +33,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers; package com.google.refine.importers;
import java.io.ByteArrayInputStream; import java.io.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList; import java.util.ArrayList;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import org.apache.commons.io.FileUtils;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert; import org.testng.Assert;
import org.testng.ITestResult; import org.testng.ITestResult;
@ -77,7 +75,7 @@ public class JsonImporterTests extends ImporterTest {
JsonImporter SUT = null; JsonImporter SUT = null;
@BeforeMethod @BeforeMethod
public void setUp(Method method){ public void setUp(Method method) {
super.setUp(); super.setUp();
SUT = new JsonImporter(); SUT = new JsonImporter();
logger.debug("About to run test method: " + method.getName()); logger.debug("About to run test method: " + method.getName());
@ -99,7 +97,7 @@ public class JsonImporterTests extends ImporterTest {
} }
@Test @Test
public void canParseSample(){ public void canParseSample() {
RunTest(getSample()); RunTest(getSample());
assertProjectCreated(project, 4, 6); assertProjectCreated(project, 4, 6);
@ -110,7 +108,7 @@ public class JsonImporterTests extends ImporterTest {
} }
@Test @Test
public void canThrowError(){ public void canThrowError() {
String errJSON = getSampleWithError(); String errJSON = getSampleWithError();
ObjectNode options = SUT.createParserUIInitializationData( ObjectNode options = SUT.createParserUIInitializationData(
job, new LinkedList<>(), "text/json"); job, new LinkedList<>(), "text/json");
@ -122,7 +120,7 @@ public class JsonImporterTests extends ImporterTest {
JSONUtilities.safePut(options, "guessCellValueTypes", false); JSONUtilities.safePut(options, "guessCellValueTypes", false);
try { try {
inputStream = new ByteArrayInputStream(errJSON.getBytes( "UTF-8" ) ); inputStream = new ByteArrayInputStream(errJSON.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e1) { } catch (UnsupportedEncodingException e1) {
Assert.fail(); Assert.fail();
} }
@ -146,7 +144,7 @@ public class JsonImporterTests extends ImporterTest {
} }
@Test @Test
public void trimLeadingTrailingWhitespaceOnTrimStrings(){ public void trimLeadingTrailingWhitespaceOnTrimStrings() {
String ScraperwikiOutput = String ScraperwikiOutput =
"[\n" + "[\n" +
"{\n" + "{\n" +
@ -166,7 +164,7 @@ public class JsonImporterTests extends ImporterTest {
} }
@Test @Test
public void doesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings(){ public void doesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings() {
String ScraperwikiOutput = String ScraperwikiOutput =
"[\n" + "[\n" +
"{\n" + "{\n" +
@ -186,7 +184,7 @@ public class JsonImporterTests extends ImporterTest {
} }
@Test @Test
public void canParseSampleWithDuplicateNestedElements(){ public void canParseSampleWithDuplicateNestedElements() {
RunTest(getSampleWithDuplicateNestedElements()); RunTest(getSampleWithDuplicateNestedElements());
assertProjectCreated(project, 4, 12); assertProjectCreated(project, 4, 12);
@ -199,7 +197,7 @@ public class JsonImporterTests extends ImporterTest {
} }
@Test @Test
public void testCanParseLineBreak(){ public void testCanParseLineBreak() {
RunTest(getSampleWithLineBreak()); RunTest(getSampleWithLineBreak());
assertProjectCreated(project, 4, 6); assertProjectCreated(project, 4, 6);
@ -211,36 +209,36 @@ public class JsonImporterTests extends ImporterTest {
} }
@Test @Test
public void testElementsWithVaryingStructure(){ public void testElementsWithVaryingStructure() {
RunTest(getSampleWithVaryingStructure()); RunTest(getSampleWithVaryingStructure());
assertProjectCreated(project, 5, 6); assertProjectCreated(project, 5, 6);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(4).getName(), JsonImporter.ANONYMOUS + " - genre"); Assert.assertEquals(project.columnModel.getColumnByCellIndex(4).getName(), JsonImporter.ANONYMOUS + " - genre");
Row row0 = project.rows.get(0); Row row0 = project.rows.get(0);
Assert.assertNotNull(row0); Assert.assertNotNull(row0);
Assert.assertEquals(row0.cells.size(),4); Assert.assertEquals(row0.cells.size(), 4);
Row row5 = project.rows.get(5); Row row5 = project.rows.get(5);
Assert.assertNotNull(row5); Assert.assertNotNull(row5);
Assert.assertEquals(row5.cells.size(),5); Assert.assertEquals(row5.cells.size(), 5);
} }
@Test @Test
public void testElementWithNestedTree(){ public void testElementWithNestedTree() {
RunTest(getSampleWithTreeStructure()); RunTest(getSampleWithTreeStructure());
assertProjectCreated(project, 5, 6); assertProjectCreated(project, 5, 6);
Assert.assertEquals(project.columnModel.columnGroups.size(),1); Assert.assertEquals(project.columnModel.columnGroups.size(), 1);
Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 3); Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 3);
Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 3); Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 3);
Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup); Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup);
Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0); Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(), 0);
Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2); Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan, 2);
} }
@Test @Test
public void testElementWithMqlReadOutput(){ public void testElementWithMqlReadOutput() {
String mqlOutput = "{\"code\":\"/api/status/ok\",\"result\":[{\"armed_force\":{\"id\":\"/en/wehrmacht\"},\"id\":\"/en/afrika_korps\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/en/sacred_band_of_thebes\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/british_army\"},\"id\":\"/en/british_16_air_assault_brigade\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/british_army\"},\"id\":\"/en/pathfinder_platoon\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0ch7qgz\"},\"id\":\"/en/sacred_band\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/polish_navy\"},\"id\":\"/en/3rd_ship_flotilla\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxn9\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxq9\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqh\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqp\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqw\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c1wxl3\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c1wxlp\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0ck96kz\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0cm3j23\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0cw8hb4\",\"type\":\"/military/military_unit\"}],\"status\":\"200 OK\",\"transaction_id\":\"cache;cache01.p01.sjc1:8101;2010-10-04T15:04:33Z;0007\"}"; String mqlOutput = "{\"code\":\"/api/status/ok\",\"result\":[{\"armed_force\":{\"id\":\"/en/wehrmacht\"},\"id\":\"/en/afrika_korps\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/en/sacred_band_of_thebes\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/british_army\"},\"id\":\"/en/british_16_air_assault_brigade\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/british_army\"},\"id\":\"/en/pathfinder_platoon\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0ch7qgz\"},\"id\":\"/en/sacred_band\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/polish_navy\"},\"id\":\"/en/3rd_ship_flotilla\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxn9\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxq9\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqh\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqp\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqw\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c1wxl3\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c1wxlp\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0ck96kz\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0cm3j23\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0cw8hb4\",\"type\":\"/military/military_unit\"}],\"status\":\"200 OK\",\"transaction_id\":\"cache;cache01.p01.sjc1:8101;2010-10-04T15:04:33Z;0007\"}";
ObjectNode options = SUT.createParserUIInitializationData( ObjectNode options = SUT.createParserUIInitializationData(
@ -252,11 +250,11 @@ public class JsonImporterTests extends ImporterTest {
JSONUtilities.safePut(options, "recordPath", path); JSONUtilities.safePut(options, "recordPath", path);
RunTest(mqlOutput, options); RunTest(mqlOutput, options);
assertProjectCreated(project,3,16); assertProjectCreated(project, 3, 16);
} }
@Test @Test
public void testJSONMinimumArray(){ public void testJSONMinimumArray() {
String ScraperwikiOutput = String ScraperwikiOutput =
"[\n" + "[\n" +
"{\n" + "{\n" +
@ -285,7 +283,7 @@ public class JsonImporterTests extends ImporterTest {
" }\n" + " }\n" +
"]\n"; "]\n";
RunTest(ScraperwikiOutput); RunTest(ScraperwikiOutput);
assertProjectCreated(project,9,2); assertProjectCreated(project, 9, 2);
} }
/** /**
@ -293,7 +291,7 @@ public class JsonImporterTests extends ImporterTest {
* of an Entity_Start token which occurs after a Field_Name token * of an Entity_Start token which occurs after a Field_Name token
*/ */
@Test @Test
public void EnsureJSONParserHandlesgetLocalNameCorrectly() throws Exception{ public void EnsureJSONParserHandlesgetLocalNameCorrectly() throws Exception {
String sampleJson = "{\"field\":\"value\"}"; String sampleJson = "{\"field\":\"value\"}";
String sampleJson2 = "{\"field\":{}}"; String sampleJson2 = "{\"field\":{}}";
String sampleJson3 = "{\"field\":[{},{}]}"; String sampleJson3 = "{\"field\":[{},{}]}";
@ -301,19 +299,19 @@ public class JsonImporterTests extends ImporterTest {
JSONTreeReader parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson.getBytes("UTF-8"))); JSONTreeReader parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson.getBytes("UTF-8")));
Token token = Token.Ignorable; Token token = Token.Ignorable;
int i = 0; int i = 0;
try{ try {
while(token != null){ while (token != null) {
token = parser.next(); token = parser.next();
if(token == null) { if (token == null) {
break; break;
} }
i++; i++;
if(i == 3){ if (i == 3) {
Assert.assertEquals(Token.Value, token); Assert.assertEquals(Token.Value, token);
Assert.assertEquals("field", parser.getFieldName()); Assert.assertEquals("field", parser.getFieldName());
} }
} }
}catch(Exception e){ } catch (Exception e) {
//silent //silent
} }
@ -321,46 +319,46 @@ public class JsonImporterTests extends ImporterTest {
parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson2.getBytes("UTF-8"))); parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson2.getBytes("UTF-8")));
token = Token.Ignorable; token = Token.Ignorable;
i = 0; i = 0;
try{ try {
while(token != null){ while (token != null) {
token = parser.next(); token = parser.next();
if(token == null) { if (token == null) {
break; break;
} }
i++; i++;
if(i == 3){ if (i == 3) {
Assert.assertEquals(Token.StartEntity, token); Assert.assertEquals(Token.StartEntity, token);
Assert.assertEquals(parser.getFieldName(), "field"); Assert.assertEquals(parser.getFieldName(), "field");
} }
} }
}catch(Exception e){ } catch (Exception e) {
//silent //silent
} }
parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson3.getBytes("UTF-8"))); parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson3.getBytes("UTF-8")));
token = Token.Ignorable; token = Token.Ignorable;
i = 0; i = 0;
try{ try {
while(token != null){ while (token != null) {
token = parser.next(); token = parser.next();
if(token == null) { if (token == null) {
break; break;
} }
i++; i++;
if(i == 3){ if (i == 3) {
Assert.assertEquals(token, Token.StartEntity); Assert.assertEquals(token, Token.StartEntity);
Assert.assertEquals(parser.getFieldName(), "field"); Assert.assertEquals(parser.getFieldName(), "field");
} }
if(i == 4){ if (i == 4) {
Assert.assertEquals(token, Token.StartEntity); Assert.assertEquals(token, Token.StartEntity);
Assert.assertEquals(parser.getFieldName(), JsonImporter.ANONYMOUS); Assert.assertEquals(parser.getFieldName(), JsonImporter.ANONYMOUS);
} }
if(i == 6){ if (i == 6) {
Assert.assertEquals(token, Token.StartEntity); Assert.assertEquals(token, Token.StartEntity);
Assert.assertEquals(parser.getFieldName(), JsonImporter.ANONYMOUS); Assert.assertEquals(parser.getFieldName(), JsonImporter.ANONYMOUS);
} }
} }
}catch(Exception e){ } catch (Exception e) {
//silent //silent
} }
} }
@ -386,72 +384,72 @@ public class JsonImporterTests extends ImporterTest {
Assert.assertEquals("\tvalue", parser.getFieldValue()); Assert.assertEquals("\tvalue", parser.getFieldValue());
} }
} }
}catch(Exception e){ } catch (Exception e) {
Assert.fail(); Assert.fail();
} }
} }
@Test @Test
public void testJsonDatatypes(){ public void testJsonDatatypes() {
RunTest(getSampleWithDataTypes()); RunTest(getSampleWithDataTypes());
assertProjectCreated(project, 2, 21,4); assertProjectCreated(project, 2, 21, 4);
Assert.assertEquals( project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id"); Assert.assertEquals(project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id");
Assert.assertEquals( project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell"); Assert.assertEquals(project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell");
Row row = project.rows.get(8); Row row = project.rows.get(8);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value,""); // Make sure empty strings are preserved Assert.assertEquals(row.cells.get(1).value, ""); // Make sure empty strings are preserved
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100 // null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
row = project.rows.get(12); row = project.rows.get(12);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertNull(row.cells.get(1).value); Assert.assertNull(row.cells.get(1).value);
row = project.rows.get(13); row = project.rows.get(13);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value,Boolean.TRUE); Assert.assertEquals(row.cells.get(1).value, Boolean.TRUE);
row = project.rows.get(14); row = project.rows.get(14);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value,Boolean.FALSE); Assert.assertEquals(row.cells.get(1).value, Boolean.FALSE);
row = project.rows.get(15); row = project.rows.get(15);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(0)); Assert.assertEquals(row.cells.get(1).value, Long.valueOf(0));
row = project.rows.get(16); row = project.rows.get(16);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value,Long.valueOf(1)); Assert.assertEquals(row.cells.get(1).value, Long.valueOf(1));
row = project.rows.get(17); row = project.rows.get(17);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value,Double.parseDouble("-2.1")); Assert.assertEquals(row.cells.get(1).value, Double.parseDouble("-2.1"));
row = project.rows.get(18); row = project.rows.get(18);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)0.23)); Assert.assertEquals(row.cells.get(1).value, Double.valueOf((double) 0.23));
row = project.rows.get(19); row = project.rows.get(19);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)-0.24)); Assert.assertEquals(row.cells.get(1).value, Double.valueOf((double) -0.24));
row = project.rows.get(20); row = project.rows.get(20);
Assert.assertNotNull(row); Assert.assertNotNull(row);
Assert.assertEquals(row.cells.size(),2); Assert.assertEquals(row.cells.size(), 2);
Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value)); Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value));
Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)3.14e100)); Assert.assertEquals(row.cells.get(1).value, Double.valueOf((double) 3.14e100));
// null, true, false 0,1,-2.1,0.23,-0.24,3.14e100 // null, true, false 0,1,-2.1,0.23,-0.24,3.14e100
@ -461,20 +459,55 @@ public class JsonImporterTests extends ImporterTest {
@Test @Test
public void testComplexJsonStructure() throws IOException{ public void testComplexJsonStructure() throws IOException {
String fileName = "grid_small.json"; String fileName = "grid_small.json";
RunComplexJSONTest(getComplexJSON(fileName)); RunComplexJSONTest(getComplexJSON(fileName));
logger.debug("************************ columnu number:" + project.columnModel.columns.size() + logger.debug("************************ columnu number:" + project.columnModel.columns.size() +
". \tcolumn groups number:" + project.columnModel.columnGroups.size() + ". \tcolumn groups number:" + project.columnModel.columnGroups.size() +
".\trow number:" + project.rows.size() + ".\trecord number:" + project.recordModel.getRecordCount()) ; ".\trow number:" + project.rows.size() + ".\trecord number:" + project.recordModel.getRecordCount());
assertProjectCreated(project, 63, 63, 8); assertProjectCreated(project, 63, 63, 8);
} }
@Test
public void testAddFileColumn() throws Exception {
final String FILE = "json-sample-format-1.json";
String filename = ClassLoader.getSystemResource(FILE).getPath();
// File is assumed to be in job.getRawDataDir(), so copy it there
FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE));
List<ObjectNode> fileRecords = new ArrayList<>();
fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\",\"fileName\": \"%s\"}", FILE, "json-sample-format-1.json")));
ObjectNode options = SUT.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
JSONUtilities.append(path, JsonImporter.ANONYMOUS);
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options,"includeFileSources",true);
List<Exception> exceptions = new ArrayList<Exception>();
SUT.parse(
project,
metadata,
job,
fileRecords,
"text/json",
-1,
options,
exceptions
);
Assert.assertNotNull(project.columnModel.getColumnByName("File"));
Assert.assertEquals(project.rows.get(0).getCell(0).value,"json-sample-format-1.json");
}
//------------helper methods--------------- //------------helper methods---------------
private static String getTypicalElement(int id){ private static String getTypicalElement(int id) {
return "{ \"id\" : " + id + "," + return "{ \"id\" : " + id + "," +
"\"author\" : \"Author " + id + ", The\"," + "\"author\" : \"Author " + id + ", The\"," +
"\"title\" : \"Book title " + id + "\"," + "\"title\" : \"Book title " + id + "\"," +
@ -482,7 +515,7 @@ public class JsonImporterTests extends ImporterTest {
"}"; "}";
} }
private static String getElementWithDuplicateSubElement(int id){ private static String getElementWithDuplicateSubElement(int id) {
return "{ \"id\" : " + id + "," + return "{ \"id\" : " + id + "," +
"\"authors\":[" + "\"authors\":[" +
"{\"name\" : \"Author " + id + ", The\"}," + "{\"name\" : \"Author " + id + ", The\"}," +
@ -493,12 +526,12 @@ public class JsonImporterTests extends ImporterTest {
"}"; "}";
} }
static String getSample(){ static String getSample() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 7; i++){ for (int i = 1; i < 7; i++) {
sb.append(getTypicalElement(i)); sb.append(getTypicalElement(i));
if(i < 6) { if (i < 6) {
sb.append(","); sb.append(",");
} }
} }
@ -522,12 +555,12 @@ public class JsonImporterTests extends ImporterTest {
return options; return options;
} }
private static String getSampleWithDuplicateNestedElements(){ private static String getSampleWithDuplicateNestedElements() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 7; i++){ for (int i = 1; i < 7; i++) {
sb.append(getElementWithDuplicateSubElement(i)); sb.append(getElementWithDuplicateSubElement(i));
if(i < 6) { if (i < 6) {
sb.append(","); sb.append(",");
} }
} }
@ -535,10 +568,10 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString(); return sb.toString();
} }
private static String getSampleWithLineBreak(){ private static String getSampleWithLineBreak() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 4; i++){ for (int i = 1; i < 4; i++) {
sb.append(getTypicalElement(i)); sb.append(getTypicalElement(i));
sb.append(","); sb.append(",");
} }
@ -554,10 +587,10 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString(); return sb.toString();
} }
private static String getSampleWithVaryingStructure(){ private static String getSampleWithVaryingStructure() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 6; i++){ for (int i = 1; i < 6; i++) {
sb.append(getTypicalElement(i)); sb.append(getTypicalElement(i));
sb.append(","); sb.append(",");
} }
@ -571,17 +604,17 @@ public class JsonImporterTests extends ImporterTest {
return sb.toString(); return sb.toString();
} }
private static String getSampleWithTreeStructure(){ private static String getSampleWithTreeStructure() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
for(int i = 1; i < 7; i++){ for (int i = 1; i < 7; i++) {
sb.append("{\"id\" : " + i + "," + sb.append("{\"id\" : " + i + "," +
"\"author\" : {\"author-name\" : \"Author " + i + ", The\"," + "\"author\" : {\"author-name\" : \"Author " + i + ", The\"," +
"\"author-dob\" : \"1950-0" + i + "-15\"}," + "\"author-dob\" : \"1950-0" + i + "-15\"}," +
"\"title\" : \"Book title " + i + "\"," + "\"title\" : \"Book title " + i + "\"," +
"\"publish_date\" : \"2010-05-26\"" + "\"publish_date\" : \"2010-05-26\"" +
"}"); "}");
if(i < 6) { if (i < 6) {
sb.append(","); sb.append(",");
} }
} }
@ -593,14 +626,14 @@ public class JsonImporterTests extends ImporterTest {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
int i = 1; int i = 1;
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n"); sb.append("{\"id\":" + i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n"); sb.append("{\"id\":" + i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n");
sb.append("{\"id\":null,\"cell\":[null,true,false,0,1,-2.1,0.23,-0.24,3.14e100]}\n"); sb.append("{\"id\":null,\"cell\":[null,true,false,0,1,-2.1,0.23,-0.24,3.14e100]}\n");
sb.append("]"); sb.append("]");
return sb.toString(); return sb.toString();
} }
private static String getSampleWithError(){ private static String getSampleWithError() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("["); sb.append("[");
sb.append("{\"id\":" + "\"\n\";"); sb.append("{\"id\":" + "\"\n\";");
@ -622,7 +655,7 @@ public class JsonImporterTests extends ImporterTest {
private void RunTest(String testString, ObjectNode options) { private void RunTest(String testString, ObjectNode options) {
try { try {
inputStream = new ByteArrayInputStream( testString.getBytes( "UTF-8" ) ); inputStream = new ByteArrayInputStream(testString.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e1) { } catch (UnsupportedEncodingException e1) {
Assert.fail(); Assert.fail();
} }

View File

@ -33,12 +33,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers; package com.google.refine.importers;
import java.io.ByteArrayInputStream; import java.io.*;
import java.io.IOException; import java.util.ArrayList;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.Collections; import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import com.google.refine.importers.tree.ImportColumnGroup;
import org.apache.commons.io.FileUtils;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.AfterMethod; import org.testng.annotations.AfterMethod;
@ -199,6 +201,41 @@ public class XmlImporterTests extends ImporterTest {
Assert.assertEquals(cg0.columnSpan,2); Assert.assertEquals(cg0.columnSpan,2);
} }
@Test
public void testAddFileColumn() throws Exception {
final String FILE = "xml-sample-format-1.xml";
String filename = ClassLoader.getSystemResource(FILE).getPath();
// File is assumed to be in job.getRawDataDir(), so copy it there
FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE));
List<ObjectNode> fileRecords = new ArrayList<>();
fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\",\"fileName\": \"%s\"}", FILE, "xml-sample-format-1.xml")));
ObjectNode options = SUT.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
JSONUtilities.append(path, "library");
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options,"includeFileSources",true);
List<Exception> exceptions = new ArrayList<Exception>();
SUT.parse(
project,
metadata,
job,
fileRecords,
"text/json",
-1,
options,
exceptions
);
Assert.assertNotNull(project.columnModel.getColumnByName("File"));
Assert.assertEquals(project.rows.get(0).getCell(0).value,"xml-sample-format-1.xml");
}
//------------helper methods--------------- //------------helper methods---------------
public static String getTypicalElement(int id){ public static String getTypicalElement(int id){