diff --git a/main/src/com/google/refine/importers/ImportingParserBase.java b/main/src/com/google/refine/importers/ImportingParserBase.java index 1448dcb7c..dba1e448b 100644 --- a/main/src/com/google/refine/importers/ImportingParserBase.java +++ b/main/src/com/google/refine/importers/ImportingParserBase.java @@ -224,7 +224,7 @@ abstract public class ImportingParserBase implements ImportingParser { return addColumn(project, fileNameColumnName, columnId); } - private static int addArchiveColumn(Project project) { + protected static int addArchiveColumn(Project project) { String columnName = "Archive"; // TODO: Localize? return addColumn(project, columnName, 0); } diff --git a/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java b/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java index 4896fc2f8..755c08567 100644 --- a/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java +++ b/main/src/com/google/refine/importers/tree/TreeImportingParserBase.java @@ -39,6 +39,8 @@ import java.io.InputStream; import java.io.Reader; import java.util.List; +import com.google.refine.model.Cell; +import com.google.refine.model.Row; import org.apache.commons.lang.NotImplementedException; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -111,11 +113,24 @@ abstract public class TreeImportingParserBase extends ImportingParserBase { ) throws IOException { final File file = ImportingUtilities.getFile(job, fileRecord); final String fileSource = ImportingUtilities.getFileSource(fileRecord); + final String archiveFileName = ImportingUtilities.getArchiveFileName(fileRecord); + int filenameColumnIndex = -1; + int archiveColumnIndex = -1; + int startingRowCount = project.rows.size(); progress.startFile(fileSource); try { InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress); try { + + if (JSONUtilities.getBoolean(options, "includeArchiveFileName", false) + && archiveFileName != null) { + archiveColumnIndex = addArchiveColumn(project); + } + if (JSONUtilities.getBoolean(options, "includeFileSources", false)) { + filenameColumnIndex = addFilenameColumn(project, archiveColumnIndex >=0); + } + if (useInputStream) { parseOneFile(project, metadata, job, fileSource, inputStream, rootColumnGroup, limit, options, exceptions); @@ -129,6 +144,18 @@ abstract public class TreeImportingParserBase extends ImportingParserBase { parseOneFile(project, metadata, job, fileSource, reader, rootColumnGroup, limit, options, exceptions); } + +// Fill in filename and archive name column for all rows added from this file + int endingRowCount = project.rows.size(); + for (int i = startingRowCount; i < endingRowCount; i++) { + Row row = project.rows.get(i); + if (archiveColumnIndex >= 0) { + row.setCell(archiveColumnIndex, new Cell(archiveFileName, null)); + } + if (filenameColumnIndex >= 0) { + row.setCell(filenameColumnIndex, new Cell(fileSource, null)); + } + } } finally { inputStream.close(); } diff --git a/main/tests/data/json-sample-format-1.json b/main/tests/data/json-sample-format-1.json new file mode 100644 index 000000000..3db560a3c --- /dev/null +++ b/main/tests/data/json-sample-format-1.json @@ -0,0 +1,13 @@ +{ + "library": [ + { + "book1": { + "author": { + "author-name": "author1", + "author-dob": "date" + }, + "genre": "genre1" + } + } + ] +} diff --git a/main/tests/server/src/com/google/refine/importers/JsonImporterTests.java b/main/tests/server/src/com/google/refine/importers/JsonImporterTests.java index a634c8061..943d0af5f 100644 --- a/main/tests/server/src/com/google/refine/importers/JsonImporterTests.java +++ b/main/tests/server/src/com/google/refine/importers/JsonImporterTests.java @@ -33,15 +33,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.importers; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; +import java.io.*; import java.util.ArrayList; import java.lang.reflect.Method; import java.util.LinkedList; import java.util.List; +import org.apache.commons.io.FileUtils; import org.slf4j.LoggerFactory; import org.testng.Assert; import org.testng.ITestResult; @@ -77,7 +75,7 @@ public class JsonImporterTests extends ImporterTest { JsonImporter SUT = null; @BeforeMethod - public void setUp(Method method){ + public void setUp(Method method) { super.setUp(); SUT = new JsonImporter(); logger.debug("About to run test method: " + method.getName()); @@ -99,7 +97,7 @@ public class JsonImporterTests extends ImporterTest { } @Test - public void canParseSample(){ + public void canParseSample() { RunTest(getSample()); assertProjectCreated(project, 4, 6); @@ -109,8 +107,8 @@ public class JsonImporterTests extends ImporterTest { Assert.assertEquals(row.getCell(1).value, "Author 1, The"); } - @Test - public void canThrowError(){ + @Test + public void canThrowError() { String errJSON = getSampleWithError(); ObjectNode options = SUT.createParserUIInitializationData( job, new LinkedList<>(), "text/json"); @@ -122,7 +120,7 @@ public class JsonImporterTests extends ImporterTest { JSONUtilities.safePut(options, "guessCellValueTypes", false); try { - inputStream = new ByteArrayInputStream(errJSON.getBytes( "UTF-8" ) ); + inputStream = new ByteArrayInputStream(errJSON.getBytes("UTF-8")); } catch (UnsupportedEncodingException e1) { Assert.fail(); } @@ -146,16 +144,16 @@ public class JsonImporterTests extends ImporterTest { } @Test - public void trimLeadingTrailingWhitespaceOnTrimStrings(){ - String ScraperwikiOutput = - "[\n" + - "{\n" + - " \"school\": \" University of Cambridge \",\n" + - " \"name\": \" Amy Zhang \",\n" + - " \"student-faculty-score\": \"100\",\n" + - " \"intl-student-score\": \"95\"\n" + - " }\n" + - "]\n"; + public void trimLeadingTrailingWhitespaceOnTrimStrings() { + String ScraperwikiOutput = + "[\n" + + "{\n" + + " \"school\": \" University of Cambridge \",\n" + + " \"name\": \" Amy Zhang \",\n" + + " \"student-faculty-score\": \"100\",\n" + + " \"intl-student-score\": \"95\"\n" + + " }\n" + + "]\n"; RunTest(ScraperwikiOutput, true); assertProjectCreated(project, 4, 1); Row row = project.rows.get(0); @@ -166,16 +164,16 @@ public class JsonImporterTests extends ImporterTest { } @Test - public void doesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings(){ - String ScraperwikiOutput = - "[\n" + - "{\n" + - " \"school\": \" University of Cambridge \",\n" + - " \"name\": \" Amy Zhang \",\n" + - " \"student-faculty-score\": \"100\",\n" + - " \"intl-student-score\": \"95\"\n" + - " }\n" + - "]\n"; + public void doesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings() { + String ScraperwikiOutput = + "[\n" + + "{\n" + + " \"school\": \" University of Cambridge \",\n" + + " \"name\": \" Amy Zhang \",\n" + + " \"student-faculty-score\": \"100\",\n" + + " \"intl-student-score\": \"95\"\n" + + " }\n" + + "]\n"; RunTest(ScraperwikiOutput); assertProjectCreated(project, 4, 1); Row row = project.rows.get(0); @@ -186,7 +184,7 @@ public class JsonImporterTests extends ImporterTest { } @Test - public void canParseSampleWithDuplicateNestedElements(){ + public void canParseSampleWithDuplicateNestedElements() { RunTest(getSampleWithDuplicateNestedElements()); assertProjectCreated(project, 4, 12); @@ -199,7 +197,7 @@ public class JsonImporterTests extends ImporterTest { } @Test - public void testCanParseLineBreak(){ + public void testCanParseLineBreak() { RunTest(getSampleWithLineBreak()); assertProjectCreated(project, 4, 6); @@ -211,38 +209,38 @@ public class JsonImporterTests extends ImporterTest { } @Test - public void testElementsWithVaryingStructure(){ + public void testElementsWithVaryingStructure() { RunTest(getSampleWithVaryingStructure()); assertProjectCreated(project, 5, 6); - Assert.assertEquals( project.columnModel.getColumnByCellIndex(4).getName(), JsonImporter.ANONYMOUS + " - genre"); + Assert.assertEquals(project.columnModel.getColumnByCellIndex(4).getName(), JsonImporter.ANONYMOUS + " - genre"); Row row0 = project.rows.get(0); Assert.assertNotNull(row0); - Assert.assertEquals(row0.cells.size(),4); + Assert.assertEquals(row0.cells.size(), 4); - Row row5 = project.rows.get(5); + Row row5 = project.rows.get(5); Assert.assertNotNull(row5); - Assert.assertEquals(row5.cells.size(),5); + Assert.assertEquals(row5.cells.size(), 5); } @Test - public void testElementWithNestedTree(){ + public void testElementWithNestedTree() { RunTest(getSampleWithTreeStructure()); assertProjectCreated(project, 5, 6); - Assert.assertEquals(project.columnModel.columnGroups.size(),1); + Assert.assertEquals(project.columnModel.columnGroups.size(), 1); Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 3); Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 3); Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup); - Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0); - Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2); + Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(), 0); + Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan, 2); } - + @Test - public void testElementWithMqlReadOutput(){ + public void testElementWithMqlReadOutput() { String mqlOutput = "{\"code\":\"/api/status/ok\",\"result\":[{\"armed_force\":{\"id\":\"/en/wehrmacht\"},\"id\":\"/en/afrika_korps\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/en/sacred_band_of_thebes\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/british_army\"},\"id\":\"/en/british_16_air_assault_brigade\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/british_army\"},\"id\":\"/en/pathfinder_platoon\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0ch7qgz\"},\"id\":\"/en/sacred_band\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/en/polish_navy\"},\"id\":\"/en/3rd_ship_flotilla\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxn9\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxq9\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqh\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqp\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c0kxqw\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c1wxl3\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0c1wxlp\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0ck96kz\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0cm3j23\",\"type\":\"/military/military_unit\"},{\"armed_force\":{\"id\":\"/m/0chtrwn\"},\"id\":\"/m/0cw8hb4\",\"type\":\"/military/military_unit\"}],\"status\":\"200 OK\",\"transaction_id\":\"cache;cache01.p01.sjc1:8101;2010-10-04T15:04:33Z;0007\"}"; - + ObjectNode options = SUT.createParserUIInitializationData( job, new LinkedList<>(), "text/json"); ArrayNode path = ParsingUtilities.mapper.createArrayNode(); @@ -252,115 +250,115 @@ public class JsonImporterTests extends ImporterTest { JSONUtilities.safePut(options, "recordPath", path); RunTest(mqlOutput, options); - assertProjectCreated(project,3,16); + assertProjectCreated(project, 3, 16); } - + @Test - public void testJSONMinimumArray(){ - String ScraperwikiOutput = - "[\n" + - "{\n" + - " \"school\": \"University of Cambridge\\n" + - " United Kingdom\",\n" + - " \"student-faculty-score\": \"100\",\n" + - " \"intl-student-score\": \"95\",\n" + - " \"intl-faculty-score\": \"96\",\n" + - " \"rank\": \"#1\",\n" + - " \"peer-review-score\": \"100\",\n" + - " \"emp-review-score\": \"100\",\n" + - " \"score\": \"100.0\",\n" + - " \"citations-score\": \"93\"\n" + - " },\n" + - " {\n" + - " \"school\": \"Harvard University\\n" + - " United States\",\n" + - " \"student-faculty-score\": \"97\",\n" + - " \"intl-student-score\": \"87\",\n" + - " \"intl-faculty-score\": \"71\",\n" + - " \"rank\": \"#2\",\n" + - " \"peer-review-score\": \"100\",\n" + - " \"emp-review-score\": \"100\",\n" + - " \"score\": \"99.2\",\n" + - " \"citations-score\": \"100\"\n" + - " }\n" + - "]\n"; + public void testJSONMinimumArray() { + String ScraperwikiOutput = + "[\n" + + "{\n" + + " \"school\": \"University of Cambridge\\n" + + " United Kingdom\",\n" + + " \"student-faculty-score\": \"100\",\n" + + " \"intl-student-score\": \"95\",\n" + + " \"intl-faculty-score\": \"96\",\n" + + " \"rank\": \"#1\",\n" + + " \"peer-review-score\": \"100\",\n" + + " \"emp-review-score\": \"100\",\n" + + " \"score\": \"100.0\",\n" + + " \"citations-score\": \"93\"\n" + + " },\n" + + " {\n" + + " \"school\": \"Harvard University\\n" + + " United States\",\n" + + " \"student-faculty-score\": \"97\",\n" + + " \"intl-student-score\": \"87\",\n" + + " \"intl-faculty-score\": \"71\",\n" + + " \"rank\": \"#2\",\n" + + " \"peer-review-score\": \"100\",\n" + + " \"emp-review-score\": \"100\",\n" + + " \"score\": \"99.2\",\n" + + " \"citations-score\": \"100\"\n" + + " }\n" + + "]\n"; RunTest(ScraperwikiOutput); - assertProjectCreated(project,9,2); + assertProjectCreated(project, 9, 2); } - + /** * org.codehaus.Jackson.JsonParser has an inconsistency when returning getLocalName * of an Entity_Start token which occurs after a Field_Name token */ @Test - public void EnsureJSONParserHandlesgetLocalNameCorrectly() throws Exception{ + public void EnsureJSONParserHandlesgetLocalNameCorrectly() throws Exception { String sampleJson = "{\"field\":\"value\"}"; String sampleJson2 = "{\"field\":{}}"; String sampleJson3 = "{\"field\":[{},{}]}"; - + JSONTreeReader parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson.getBytes("UTF-8"))); Token token = Token.Ignorable; int i = 0; - try{ - while(token != null){ + try { + while (token != null) { token = parser.next(); - if(token == null) { + if (token == null) { break; } i++; - if(i == 3){ + if (i == 3) { Assert.assertEquals(Token.Value, token); Assert.assertEquals("field", parser.getFieldName()); } } - }catch(Exception e){ + } catch (Exception e) { //silent } - - + + parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson2.getBytes("UTF-8"))); token = Token.Ignorable; i = 0; - try{ - while(token != null){ + try { + while (token != null) { token = parser.next(); - if(token == null) { + if (token == null) { break; } i++; - if(i == 3){ + if (i == 3) { Assert.assertEquals(Token.StartEntity, token); Assert.assertEquals(parser.getFieldName(), "field"); } } - }catch(Exception e){ + } catch (Exception e) { //silent } - + parser = new JSONTreeReader(new ByteArrayInputStream(sampleJson3.getBytes("UTF-8"))); token = Token.Ignorable; i = 0; - try{ - while(token != null){ + try { + while (token != null) { token = parser.next(); - if(token == null) { + if (token == null) { break; } i++; - if(i == 3){ + if (i == 3) { Assert.assertEquals(token, Token.StartEntity); Assert.assertEquals(parser.getFieldName(), "field"); } - if(i == 4){ + if (i == 4) { Assert.assertEquals(token, Token.StartEntity); Assert.assertEquals(parser.getFieldName(), JsonImporter.ANONYMOUS); } - if(i == 6){ + if (i == 6) { Assert.assertEquals(token, Token.StartEntity); Assert.assertEquals(parser.getFieldName(), JsonImporter.ANONYMOUS); } } - }catch(Exception e){ + } catch (Exception e) { //silent } } @@ -386,73 +384,73 @@ public class JsonImporterTests extends ImporterTest { Assert.assertEquals("\tvalue", parser.getFieldValue()); } } - }catch(Exception e){ + } catch (Exception e) { Assert.fail(); } } @Test - public void testJsonDatatypes(){ + public void testJsonDatatypes() { RunTest(getSampleWithDataTypes()); - assertProjectCreated(project, 2, 21,4); + assertProjectCreated(project, 2, 21, 4); - Assert.assertEquals( project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id"); - Assert.assertEquals( project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell"); + Assert.assertEquals(project.columnModel.getColumnByCellIndex(0).getName(), JsonImporter.ANONYMOUS + " - id"); + Assert.assertEquals(project.columnModel.getColumnByCellIndex(1).getName(), JsonImporter.ANONYMOUS + " - cell - cell"); Row row = project.rows.get(8); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertEquals(row.cells.get(1).value,""); // Make sure empty strings are preserved + Assert.assertEquals(row.cells.size(), 2); + Assert.assertEquals(row.cells.get(1).value, ""); // Make sure empty strings are preserved // null, true, false 0,1,-2.1,0.23,-0.24,3.14e100 row = project.rows.get(12); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertNull(row.cells.get(1).value); + Assert.assertEquals(row.cells.size(), 2); + Assert.assertNull(row.cells.get(1).value); row = project.rows.get(13); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertEquals(row.cells.get(1).value,Boolean.TRUE); - + Assert.assertEquals(row.cells.size(), 2); + Assert.assertEquals(row.cells.get(1).value, Boolean.TRUE); + row = project.rows.get(14); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertEquals(row.cells.get(1).value,Boolean.FALSE); - + Assert.assertEquals(row.cells.size(), 2); + Assert.assertEquals(row.cells.get(1).value, Boolean.FALSE); + row = project.rows.get(15); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertEquals(row.cells.get(1).value,Long.valueOf(0)); + Assert.assertEquals(row.cells.size(), 2); + Assert.assertEquals(row.cells.get(1).value, Long.valueOf(0)); row = project.rows.get(16); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertEquals(row.cells.get(1).value,Long.valueOf(1)); + Assert.assertEquals(row.cells.size(), 2); + Assert.assertEquals(row.cells.get(1).value, Long.valueOf(1)); row = project.rows.get(17); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertEquals(row.cells.get(1).value,Double.parseDouble("-2.1")); + Assert.assertEquals(row.cells.size(), 2); + Assert.assertEquals(row.cells.get(1).value, Double.parseDouble("-2.1")); row = project.rows.get(18); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)0.23)); - + Assert.assertEquals(row.cells.size(), 2); + Assert.assertEquals(row.cells.get(1).value, Double.valueOf((double) 0.23)); + row = project.rows.get(19); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)-0.24)); - + Assert.assertEquals(row.cells.size(), 2); + Assert.assertEquals(row.cells.get(1).value, Double.valueOf((double) -0.24)); + row = project.rows.get(20); Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(),2); - Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value)); - Assert.assertEquals(row.cells.get(1).value,Double.valueOf((double)3.14e100)); - + Assert.assertEquals(row.cells.size(), 2); + Assert.assertFalse(Double.isNaN((Double) row.cells.get(1).value)); + Assert.assertEquals(row.cells.get(1).value, Double.valueOf((double) 3.14e100)); + // null, true, false 0,1,-2.1,0.23,-0.24,3.14e100 @@ -461,59 +459,94 @@ public class JsonImporterTests extends ImporterTest { @Test - public void testComplexJsonStructure() throws IOException{ + public void testComplexJsonStructure() throws IOException { String fileName = "grid_small.json"; RunComplexJSONTest(getComplexJSON(fileName)); logger.debug("************************ columnu number:" + project.columnModel.columns.size() + ". \tcolumn groups number:" + project.columnModel.columnGroups.size() + - ".\trow number:" + project.rows.size() + ".\trecord number:" + project.recordModel.getRecordCount()) ; + ".\trow number:" + project.rows.size() + ".\trecord number:" + project.recordModel.getRecordCount()); assertProjectCreated(project, 63, 63, 8); } + @Test + public void testAddFileColumn() throws Exception { + final String FILE = "json-sample-format-1.json"; + String filename = ClassLoader.getSystemResource(FILE).getPath(); + // File is assumed to be in job.getRawDataDir(), so copy it there + FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE)); + List fileRecords = new ArrayList<>(); + fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\",\"fileName\": \"%s\"}", FILE, "json-sample-format-1.json"))); + + ObjectNode options = SUT.createParserUIInitializationData( + job, new LinkedList<>(), "text/json"); + ArrayNode path = ParsingUtilities.mapper.createArrayNode(); + JSONUtilities.append(path, JsonImporter.ANONYMOUS); + JSONUtilities.safePut(options, "recordPath", path); + JSONUtilities.safePut(options, "trimStrings", false); + JSONUtilities.safePut(options, "storeEmptyStrings", true); + JSONUtilities.safePut(options, "guessCellValueTypes", false); + JSONUtilities.safePut(options,"includeFileSources",true); + + List exceptions = new ArrayList(); + + SUT.parse( + project, + metadata, + job, + fileRecords, + "text/json", + -1, + options, + exceptions + ); + Assert.assertNotNull(project.columnModel.getColumnByName("File")); + Assert.assertEquals(project.rows.get(0).getCell(0).value,"json-sample-format-1.json"); + } + //------------helper methods--------------- - private static String getTypicalElement(int id){ + private static String getTypicalElement(int id) { return "{ \"id\" : " + id + "," + - "\"author\" : \"Author " + id + ", The\"," + - "\"title\" : \"Book title " + id + "\"," + - "\"publish_date\" : \"2010-05-26\"" + - "}"; + "\"author\" : \"Author " + id + ", The\"," + + "\"title\" : \"Book title " + id + "\"," + + "\"publish_date\" : \"2010-05-26\"" + + "}"; } - private static String getElementWithDuplicateSubElement(int id){ + private static String getElementWithDuplicateSubElement(int id) { return "{ \"id\" : " + id + "," + - "\"authors\":[" + - "{\"name\" : \"Author " + id + ", The\"}," + - "{\"name\" : \"Author " + id + ", Another\"}" + - "]," + - "\"title\" : \"Book title " + id + "\"," + - "\"publish_date\" : \"2010-05-26\"" + - "}"; + "\"authors\":[" + + "{\"name\" : \"Author " + id + ", The\"}," + + "{\"name\" : \"Author " + id + ", Another\"}" + + "]," + + "\"title\" : \"Book title " + id + "\"," + + "\"publish_date\" : \"2010-05-26\"" + + "}"; } - static String getSample(){ + static String getSample() { StringBuilder sb = new StringBuilder(); sb.append("["); - for(int i = 1; i < 7; i++){ + for (int i = 1; i < 7; i++) { sb.append(getTypicalElement(i)); - if(i < 6) { + if (i < 6) { sb.append(","); } } sb.append("]"); return sb.toString(); } - + private static ObjectNode getOptions(ImportingJob job, TreeImportingParserBase parser, String pathSelector, boolean trimStrings) { ObjectNode options = parser.createParserUIInitializationData( job, new LinkedList<>(), "text/json"); - + ArrayNode path = ParsingUtilities.mapper.createArrayNode(); JSONUtilities.append(path, JsonImporter.ANONYMOUS); JSONUtilities.append(path, pathSelector); - + JSONUtilities.safePut(options, "recordPath", path); JSONUtilities.safePut(options, "trimStrings", trimStrings); JSONUtilities.safePut(options, "storeEmptyStrings", true); @@ -522,12 +555,12 @@ public class JsonImporterTests extends ImporterTest { return options; } - private static String getSampleWithDuplicateNestedElements(){ + private static String getSampleWithDuplicateNestedElements() { StringBuilder sb = new StringBuilder(); sb.append("["); - for(int i = 1; i < 7; i++){ + for (int i = 1; i < 7; i++) { sb.append(getElementWithDuplicateSubElement(i)); - if(i < 6) { + if (i < 6) { sb.append(","); } } @@ -535,10 +568,10 @@ public class JsonImporterTests extends ImporterTest { return sb.toString(); } - private static String getSampleWithLineBreak(){ + private static String getSampleWithLineBreak() { StringBuilder sb = new StringBuilder(); sb.append("["); - for(int i = 1; i < 4; i++){ + for (int i = 1; i < 4; i++) { sb.append(getTypicalElement(i)); sb.append(","); } @@ -554,10 +587,10 @@ public class JsonImporterTests extends ImporterTest { return sb.toString(); } - private static String getSampleWithVaryingStructure(){ + private static String getSampleWithVaryingStructure() { StringBuilder sb = new StringBuilder(); sb.append("["); - for(int i = 1; i < 6; i++){ + for (int i = 1; i < 6; i++) { sb.append(getTypicalElement(i)); sb.append(","); } @@ -571,36 +604,36 @@ public class JsonImporterTests extends ImporterTest { return sb.toString(); } - private static String getSampleWithTreeStructure(){ + private static String getSampleWithTreeStructure() { StringBuilder sb = new StringBuilder(); sb.append("["); - for(int i = 1; i < 7; i++){ + for (int i = 1; i < 7; i++) { sb.append("{\"id\" : " + i + "," + "\"author\" : {\"author-name\" : \"Author " + i + ", The\"," + "\"author-dob\" : \"1950-0" + i + "-15\"}," + "\"title\" : \"Book title " + i + "\"," + "\"publish_date\" : \"2010-05-26\"" + "}"); - if(i < 6) { + if (i < 6) { sb.append(","); } } sb.append("]"); return sb.toString(); } - + private static String getSampleWithDataTypes() { StringBuilder sb = new StringBuilder(); sb.append("["); int i = 1; - sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n"); - sb.append("{\"id\":"+ i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n"); + sb.append("{\"id\":" + i++ + ",\"cell\":[\"39766\",\"T1009\",\"foo\",\"DEU\",\"19\",\"01:49\"]},\n"); + sb.append("{\"id\":" + i++ + ",\"cell\":[\"39766\",\"T1009\",\"\",\"DEU\",\"19\",\"01:49\"]},\n"); sb.append("{\"id\":null,\"cell\":[null,true,false,0,1,-2.1,0.23,-0.24,3.14e100]}\n"); sb.append("]"); return sb.toString(); } - - private static String getSampleWithError(){ + + private static String getSampleWithError() { StringBuilder sb = new StringBuilder(); sb.append("["); sb.append("{\"id\":" + "\"\n\";"); @@ -611,7 +644,7 @@ public class JsonImporterTests extends ImporterTest { private void RunTest(String testString) { RunTest(testString, getOptions(job, SUT, JsonImporter.ANONYMOUS, false)); } - + private void RunComplexJSONTest(String testString) { RunTest(testString, getOptions(job, SUT, "institutes", false)); } @@ -619,10 +652,10 @@ public class JsonImporterTests extends ImporterTest { private void RunTest(String testString, boolean trimStrings) { RunTest(testString, getOptions(job, SUT, JsonImporter.ANONYMOUS, trimStrings)); } - + private void RunTest(String testString, ObjectNode options) { try { - inputStream = new ByteArrayInputStream( testString.getBytes( "UTF-8" ) ); + inputStream = new ByteArrayInputStream(testString.getBytes("UTF-8")); } catch (UnsupportedEncodingException e1) { Assert.fail(); } @@ -633,12 +666,12 @@ public class JsonImporterTests extends ImporterTest { Assert.fail(); } } - + private String getComplexJSON(String fileName) throws IOException { InputStream in = this.getClass().getClassLoader() .getResourceAsStream(fileName); String content = org.apache.commons.io.IOUtils.toString(in, "UTF-8"); - + return content; } } diff --git a/main/tests/server/src/com/google/refine/importers/XmlImporterTests.java b/main/tests/server/src/com/google/refine/importers/XmlImporterTests.java index d2d568feb..2b6db9952 100644 --- a/main/tests/server/src/com/google/refine/importers/XmlImporterTests.java +++ b/main/tests/server/src/com/google/refine/importers/XmlImporterTests.java @@ -33,12 +33,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.importers; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; +import java.io.*; +import java.util.ArrayList; import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import com.google.refine.importers.tree.ImportColumnGroup; +import org.apache.commons.io.FileUtils; import org.slf4j.LoggerFactory; import org.testng.Assert; import org.testng.annotations.AfterMethod; @@ -199,6 +201,41 @@ public class XmlImporterTests extends ImporterTest { Assert.assertEquals(cg0.columnSpan,2); } + @Test + public void testAddFileColumn() throws Exception { + final String FILE = "xml-sample-format-1.xml"; + String filename = ClassLoader.getSystemResource(FILE).getPath(); + // File is assumed to be in job.getRawDataDir(), so copy it there + FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE)); + List fileRecords = new ArrayList<>(); + fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\",\"fileName\": \"%s\"}", FILE, "xml-sample-format-1.xml"))); + + ObjectNode options = SUT.createParserUIInitializationData( + job, new LinkedList<>(), "text/json"); + ArrayNode path = ParsingUtilities.mapper.createArrayNode(); + JSONUtilities.append(path, "library"); + JSONUtilities.safePut(options, "recordPath", path); + JSONUtilities.safePut(options, "trimStrings", false); + JSONUtilities.safePut(options, "storeEmptyStrings", true); + JSONUtilities.safePut(options, "guessCellValueTypes", false); + JSONUtilities.safePut(options,"includeFileSources",true); + + List exceptions = new ArrayList(); + + SUT.parse( + project, + metadata, + job, + fileRecords, + "text/json", + -1, + options, + exceptions + ); + Assert.assertNotNull(project.columnModel.getColumnByName("File")); + Assert.assertEquals(project.rows.get(0).getCell(0).value,"xml-sample-format-1.xml"); + } + //------------helper methods--------------- public static String getTypicalElement(int id){