fixes #3462 (#3921)

Co-authored-by: Antonin Delpeuch <antonin@delpeuch.eu>
This commit is contained in:
Warpeas 2021-05-31 04:24:06 +08:00 committed by GitHub
parent 7dd779e674
commit fed23ec7f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 284 additions and 174 deletions

View File

@ -224,7 +224,7 @@ abstract public class ImportingParserBase implements ImportingParser {
return addColumn(project, fileNameColumnName, columnId);
}
private static int addArchiveColumn(Project project) {
protected static int addArchiveColumn(Project project) {
String columnName = "Archive"; // TODO: Localize?
return addColumn(project, columnName, 0);
}

View File

@ -39,6 +39,8 @@ import java.io.InputStream;
import java.io.Reader;
import java.util.List;
import com.google.refine.model.Cell;
import com.google.refine.model.Row;
import org.apache.commons.lang.NotImplementedException;
import com.fasterxml.jackson.databind.node.ObjectNode;
@ -111,11 +113,24 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
) throws IOException {
final File file = ImportingUtilities.getFile(job, fileRecord);
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
final String archiveFileName = ImportingUtilities.getArchiveFileName(fileRecord);
int filenameColumnIndex = -1;
int archiveColumnIndex = -1;
int startingRowCount = project.rows.size();
progress.startFile(fileSource);
try {
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
try {
if (JSONUtilities.getBoolean(options, "includeArchiveFileName", false)
&& archiveFileName != null) {
archiveColumnIndex = addArchiveColumn(project);
}
if (JSONUtilities.getBoolean(options, "includeFileSources", false)) {
filenameColumnIndex = addFilenameColumn(project, archiveColumnIndex >=0);
}
if (useInputStream) {
parseOneFile(project, metadata, job, fileSource, inputStream,
rootColumnGroup, limit, options, exceptions);
@ -129,6 +144,18 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
parseOneFile(project, metadata, job, fileSource, reader,
rootColumnGroup, limit, options, exceptions);
}
// Fill in filename and archive name column for all rows added from this file
int endingRowCount = project.rows.size();
for (int i = startingRowCount; i < endingRowCount; i++) {
Row row = project.rows.get(i);
if (archiveColumnIndex >= 0) {
row.setCell(archiveColumnIndex, new Cell(archiveFileName, null));
}
if (filenameColumnIndex >= 0) {
row.setCell(filenameColumnIndex, new Cell(fileSource, null));
}
}
} finally {
inputStream.close();
}

View File

@ -0,0 +1,13 @@
{
"library": [
{
"book1": {
"author": {
"author-name": "author1",
"author-dob": "date"
},
"genre": "genre1"
}
}
]
}

View File

@ -33,15 +33,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.io.*;
import java.util.ArrayList;
import java.lang.reflect.Method;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.ITestResult;
@ -472,6 +470,41 @@ public class JsonImporterTests extends ImporterTest {
assertProjectCreated(project, 63, 63, 8);
}
@Test
public void testAddFileColumn() throws Exception {
final String FILE = "json-sample-format-1.json";
String filename = ClassLoader.getSystemResource(FILE).getPath();
// File is assumed to be in job.getRawDataDir(), so copy it there
FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE));
List<ObjectNode> fileRecords = new ArrayList<>();
fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\",\"fileName\": \"%s\"}", FILE, "json-sample-format-1.json")));
ObjectNode options = SUT.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
JSONUtilities.append(path, JsonImporter.ANONYMOUS);
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options,"includeFileSources",true);
List<Exception> exceptions = new ArrayList<Exception>();
SUT.parse(
project,
metadata,
job,
fileRecords,
"text/json",
-1,
options,
exceptions
);
Assert.assertNotNull(project.columnModel.getColumnByName("File"));
Assert.assertEquals(project.rows.get(0).getCell(0).value,"json-sample-format-1.json");
}
//------------helper methods---------------
private static String getTypicalElement(int id) {

View File

@ -33,12 +33,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.io.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import com.google.refine.importers.tree.ImportColumnGroup;
import org.apache.commons.io.FileUtils;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
@ -199,6 +201,41 @@ public class XmlImporterTests extends ImporterTest {
Assert.assertEquals(cg0.columnSpan,2);
}
@Test
public void testAddFileColumn() throws Exception {
final String FILE = "xml-sample-format-1.xml";
String filename = ClassLoader.getSystemResource(FILE).getPath();
// File is assumed to be in job.getRawDataDir(), so copy it there
FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE));
List<ObjectNode> fileRecords = new ArrayList<>();
fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\",\"fileName\": \"%s\"}", FILE, "xml-sample-format-1.xml")));
ObjectNode options = SUT.createParserUIInitializationData(
job, new LinkedList<>(), "text/json");
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
JSONUtilities.append(path, "library");
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", false);
JSONUtilities.safePut(options, "storeEmptyStrings", true);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options,"includeFileSources",true);
List<Exception> exceptions = new ArrayList<Exception>();
SUT.parse(
project,
metadata,
job,
fileRecords,
"text/json",
-1,
options,
exceptions
);
Assert.assertNotNull(project.columnModel.getColumnByName("File"));
Assert.assertEquals(project.rows.get(0).getCell(0).value,"xml-sample-format-1.xml");
}
//------------helper methods---------------
public static String getTypicalElement(int id){