* Make sure data directory is directory, not a file * Add a test for zip archive import Also tests the saving of the archive file name and source filename * Add TODOs - no functional changes * Cosmetic cleanups * Revert importer API changes for archive file name parameter Fixes #2963 - restore binary compatibility to the API - hoist the handling of both fileSource and archiveFileName from TabularImportingParserBase and TreeImportingParserBase to ImportingParserBase so that there's only one copy. These 3 classes are all part of the internal implementation, so there should be no compatibility issue. * Revert weird flow of control for import options metadata This reverts the very convoluted control flow that was introduced when adding the input options to the project metadata. Instead the metadata is all handled in the importer framework rather than having to change APIs are have individual importers worry about it. The feature never had test coverage, so that is still to be added. * Add test for import options in project metadata & fix bug Fixes bug where same options object was being reused and overwritten, so all copies in the list ended up the same.
This commit is contained in:
parent
d5abaac6df
commit
83ed9ffdaf
@ -285,7 +285,6 @@ public class DatabaseImportController implements ImportingController {
|
||||
job,
|
||||
new DBQueryResultPreviewReader(job, databaseService, querySource, columns, dbQueryInfo, 100),
|
||||
querySource,
|
||||
"", // archivefilename
|
||||
limit,
|
||||
options,
|
||||
exceptions
|
||||
@ -415,7 +414,6 @@ public class DatabaseImportController implements ImportingController {
|
||||
job,
|
||||
new DBQueryResultImportReader(job, databaseService, querySource, columns, dbQueryInfo, getCreateBatchSize()),
|
||||
querySource,
|
||||
"", //archivefilename,
|
||||
limit,
|
||||
options,
|
||||
exceptions
|
||||
|
@ -142,7 +142,6 @@ public class GDataImporter {
|
||||
job,
|
||||
new WorksheetBatchRowReader(job, fileSource, service, spreadsheetId, worksheetEntry),
|
||||
fileSource,
|
||||
"", //archivefilename
|
||||
limit,
|
||||
options,
|
||||
exceptions
|
||||
|
@ -71,7 +71,6 @@ public class PCAxisImporter extends TabularImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
Reader reader,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
@ -89,8 +88,6 @@ public class PCAxisImporter extends TabularImportingParserBase {
|
||||
|
||||
TabularImportingParserBase.readTable(
|
||||
project, metadata, job, dataReader,
|
||||
fileSource, archiveFileName, limit, options, exceptions);
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
||||
fileSource, limit, options, exceptions);
|
||||
}
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ public class WikidataRefineTest extends PowerMockTestCase {
|
||||
SeparatorBasedImporter importer = new SeparatorBasedImporter();
|
||||
|
||||
List<Exception> exceptions = new ArrayList<Exception>();
|
||||
importer.parseOneFile(project, metadata, job, "filesource", "archivefile", new StringReader(input), -1, options, exceptions);
|
||||
importer.parseOneFile(project, metadata, job, "filesource", new StringReader(input), -1, options, exceptions);
|
||||
project.update();
|
||||
ProjectManager.singleton.registerProject(project, metadata);
|
||||
|
||||
|
@ -128,7 +128,6 @@ public class ExcelImporter extends TabularImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
InputStream inputStream,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
@ -216,19 +215,19 @@ public class ExcelImporter extends TabularImportingParserBase {
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: Do we need to preserve the original filename? Take first piece before #?
|
||||
// JSONUtilities.safePut(options, "fileSource", fileSource + "#" + sheet.getSheetName());
|
||||
TabularImportingParserBase.readTable(
|
||||
project,
|
||||
metadata,
|
||||
job,
|
||||
dataReader,
|
||||
fileSource + "#" + sheet.getSheetName(), archiveFileName,
|
||||
fileSource + "#" + sheet.getSheetName(),
|
||||
limit,
|
||||
options,
|
||||
exceptions
|
||||
);
|
||||
}
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, limit, options, exceptions);
|
||||
}
|
||||
|
||||
static protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell) {
|
||||
|
@ -83,7 +83,6 @@ public class FixedWidthImporter extends TabularImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
Reader reader,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
@ -133,9 +132,7 @@ public class FixedWidthImporter extends TabularImportingParserBase {
|
||||
}
|
||||
};
|
||||
|
||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, archiveFileName, limit, options, exceptions);
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
Copyright 2012,2020 OpenRefine contributors
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -39,6 +40,7 @@ import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang.NotImplementedException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -49,9 +51,11 @@ import com.google.refine.importing.EncodingGuesser;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingParser;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.ModelException;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
@ -99,6 +103,7 @@ abstract public class ImportingParserBase implements ImportingParser {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Make private? At least protected?
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
@ -112,24 +117,55 @@ abstract public class ImportingParserBase implements ImportingParser {
|
||||
final File file = ImportingUtilities.getFile(job, fileRecord);
|
||||
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||
final String archiveFileName = ImportingUtilities.getArchiveFileName(fileRecord);
|
||||
int filenameColumnIndex = -1;
|
||||
int archiveColumnIndex = -1;
|
||||
int startingRowCount = project.rows.size();
|
||||
|
||||
progress.startFile(fileSource);
|
||||
try {
|
||||
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
||||
try {
|
||||
|
||||
if (JSONUtilities.getBoolean(options, "includeArchiveFileName", false)
|
||||
&& archiveFileName != null) {
|
||||
archiveColumnIndex = addArchiveColumn(project);
|
||||
}
|
||||
if (JSONUtilities.getBoolean(options, "includeFileSources", false)) {
|
||||
filenameColumnIndex = addFilenameColumn(project, archiveColumnIndex >=0);
|
||||
}
|
||||
|
||||
if (useInputStream) {
|
||||
parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, limit, options, exceptions);
|
||||
parseOneFile(project, metadata, job, fileSource, inputStream, limit, options, exceptions);
|
||||
} else {
|
||||
String commonEncoding = JSONUtilities.getString(options, "encoding", null);
|
||||
if (commonEncoding != null && commonEncoding.isEmpty()) {
|
||||
commonEncoding = null;
|
||||
}
|
||||
|
||||
|
||||
Reader reader = ImportingUtilities.getReaderFromStream(
|
||||
inputStream, fileRecord, commonEncoding);
|
||||
|
||||
parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
||||
inputStream, fileRecord, commonEncoding);
|
||||
|
||||
parseOneFile(project, metadata, job, fileSource, reader, limit, options, exceptions);
|
||||
}
|
||||
|
||||
// Fill in filename and archive name column for all rows added from this file
|
||||
int endingRowCount = project.rows.size();
|
||||
for (int i = startingRowCount; i < endingRowCount; i++) {
|
||||
Row row = project.rows.get(i);
|
||||
if (archiveColumnIndex >= 0) {
|
||||
row.setCell(archiveColumnIndex, new Cell(archiveFileName, null));
|
||||
}
|
||||
if (filenameColumnIndex >= 0) {
|
||||
row.setCell(filenameColumnIndex, new Cell(fileSource, null));
|
||||
}
|
||||
}
|
||||
|
||||
ObjectNode fileOptions = options.deepCopy();
|
||||
JSONUtilities.safePut(fileOptions, "fileSource", fileSource);
|
||||
JSONUtilities.safePut(fileOptions, "archiveFileName", archiveFileName);
|
||||
// TODO: This will save a separate copy for each file in the import, but they're
|
||||
// going to be mostly the same
|
||||
metadata.appendImportOptionMetadata(fileOptions);
|
||||
} finally {
|
||||
inputStream.close();
|
||||
}
|
||||
@ -138,51 +174,66 @@ abstract public class ImportingParserBase implements ImportingParser {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parsing method to be implemented by Reader-based parsers.
|
||||
* ie those initialized with useInputStream == false
|
||||
*
|
||||
* @param project
|
||||
* @param metadata
|
||||
* @param job
|
||||
* @param fileSource
|
||||
* @param reader
|
||||
* @param limit
|
||||
* @param options
|
||||
* @param exceptions
|
||||
*/
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
Reader reader,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
pushImportingOptions(metadata, "fileSource", fileSource, options);
|
||||
pushImportingOptions(metadata, "archiveFileName", archiveFileName, options);
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
private void pushImportingOptions(ProjectMetadata metadata, String key, String value, ObjectNode options) {
|
||||
options.put(key, value);
|
||||
// set the import options to metadata:
|
||||
metadata.appendImportOptionMetadata(options);
|
||||
}
|
||||
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
InputStream inputStream,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
pushImportingOptions(metadata, "fileSource", fileSource, options);
|
||||
pushImportingOptions(metadata, "archiveFileName", archiveFileName, options);
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @deprecated 2020-07-21 by tfmorris. This will become private in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
protected static int addFilenameColumn(Project project, boolean archiveColumnAdded) {
|
||||
String fileNameColumnName = "File";
|
||||
String fileNameColumnName = "File"; // TODO: Localize?
|
||||
int columnId = archiveColumnAdded? 1 : 0;
|
||||
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
|
||||
return addColumn(project, fileNameColumnName, columnId);
|
||||
}
|
||||
|
||||
private static int addArchiveColumn(Project project) {
|
||||
String columnName = "Archive"; // TODO: Localize?
|
||||
return addColumn(project, columnName, 0);
|
||||
}
|
||||
|
||||
private static int addColumn(Project project, String columnName, int columnId) {
|
||||
if (project.columnModel.getColumnByName(columnName) == null) {
|
||||
try {
|
||||
project.columnModel.addColumn(
|
||||
columnId, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
|
||||
|
||||
columnId, new Column(project.columnModel.allocateNewCellIndex(), columnName), false);
|
||||
return columnId;
|
||||
} catch (ModelException e) {
|
||||
// Shouldn't happen: We already checked for duplicate name.
|
||||
@ -194,22 +245,5 @@ abstract public class ImportingParserBase implements ImportingParser {
|
||||
}
|
||||
}
|
||||
|
||||
protected static int addArchiveColumn(Project project) {
|
||||
String ArchiveColumnName = "Archive";
|
||||
if (project.columnModel.getColumnByName(ArchiveColumnName) == null) {
|
||||
try {
|
||||
project.columnModel.addColumn(
|
||||
0, new Column(project.columnModel.allocateNewCellIndex(), ArchiveColumnName), false);
|
||||
|
||||
return 0;
|
||||
} catch (ModelException e) {
|
||||
// Shouldn't happen: We already checked for duplicate name.
|
||||
logger.error("ModelException adding Filename column",e);
|
||||
}
|
||||
return -1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -201,13 +201,11 @@ public class JsonImporter extends TreeImportingParserBase {
|
||||
|
||||
@Override
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||
ImportingJob job, String fileSource, String archiveFileName, InputStream is,
|
||||
ImportingJob job, String fileSource, InputStream is,
|
||||
ImportColumnGroup rootColumnGroup, int limit, ObjectNode options, List<Exception> exceptions) {
|
||||
|
||||
parseOneFile(project, metadata, job, fileSource, archiveFileName,
|
||||
parseOneFile(project, metadata, job, fileSource,
|
||||
new JSONTreeReader(is), rootColumnGroup, limit, options, exceptions);
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, is, rootColumnGroup, limit, options, exceptions);
|
||||
}
|
||||
|
||||
static public class JSONTreeReader implements TreeReader {
|
||||
|
@ -66,7 +66,6 @@ public class LineBasedImporter extends TabularImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
Reader reader,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
@ -130,8 +129,6 @@ public class LineBasedImporter extends TabularImportingParserBase {
|
||||
}
|
||||
};
|
||||
|
||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, archiveFileName, limit, options, exceptions);
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||
}
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.List;
|
||||
|
||||
import org.marc4j.MarcPermissiveStreamReader;
|
||||
import org.marc4j.MarcWriter;
|
||||
@ -57,7 +58,7 @@ public class MarcImporter extends XmlImporter {
|
||||
}
|
||||
|
||||
@Override
|
||||
public ObjectNode createParserUIInitializationData(ImportingJob job, java.util.List<ObjectNode> fileRecords, String format) {
|
||||
public ObjectNode createParserUIInitializationData(ImportingJob job, List<ObjectNode> fileRecords, String format) {
|
||||
if (fileRecords.size() > 0) {
|
||||
ObjectNode firstFileRecord = fileRecords.get(0);
|
||||
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
||||
|
@ -39,7 +39,6 @@ import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@ -66,6 +65,7 @@ import com.google.refine.util.JSONUtilities;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
public class OdsImporter extends TabularImportingParserBase {
|
||||
final static Logger logger = LoggerFactory.getLogger("open office");
|
||||
|
||||
@ -127,7 +127,6 @@ public class OdsImporter extends TabularImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
InputStream inputStream,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
@ -195,14 +194,11 @@ public class OdsImporter extends TabularImportingParserBase {
|
||||
job,
|
||||
dataReader,
|
||||
fileSource + "#" + table.getTableName(),
|
||||
archiveFileName,
|
||||
limit,
|
||||
options,
|
||||
exceptions
|
||||
);
|
||||
}
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, limit, options, exceptions);
|
||||
}
|
||||
|
||||
static protected Serializable extractCell(OdfTableCell cell) {
|
||||
|
@ -76,7 +76,8 @@ public class RdfTripleImporter extends ImportingParserBase {
|
||||
this.mode = mode;
|
||||
}
|
||||
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, String archiveFileName,
|
||||
@Override
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource,
|
||||
InputStream input, int limit, ObjectNode options, List<Exception> exceptions) {
|
||||
// create an empty model
|
||||
Model model = ModelFactory.createDefaultModel();
|
||||
@ -164,7 +165,5 @@ public class RdfTripleImporter extends ImportingParserBase {
|
||||
} catch (ModelException e) {
|
||||
exceptions.add(e);
|
||||
}
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, input, limit, options, exceptions);
|
||||
}
|
||||
}
|
||||
|
@ -87,7 +87,6 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
Reader reader,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
@ -148,18 +147,17 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
||||
usedColumnNames = true;
|
||||
return columnNames;
|
||||
} else {
|
||||
String line = lnReader.readLine();
|
||||
if (line == null) {
|
||||
return null;
|
||||
} else {
|
||||
return getCells(line, parser, lnReader);
|
||||
}
|
||||
String line = lnReader.readLine();
|
||||
if (line == null) {
|
||||
return null;
|
||||
} else {
|
||||
return getCells(line, parser, lnReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, archiveFileName, limit, options, exceptions);
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, lnReader, limit, options, exceptions);
|
||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||
}
|
||||
|
||||
static protected ArrayList<Object> getCells(String line, CSVParser parser, LineNumberReader lnReader)
|
||||
|
@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -83,7 +82,6 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
||||
ImportingJob job,
|
||||
TableDataReader reader,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
List<Exception> exceptions
|
||||
@ -104,18 +102,8 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
||||
|
||||
boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
|
||||
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
|
||||
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
|
||||
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", false);
|
||||
boolean includeArchiveFileName = JSONUtilities.getBoolean(options, "includeArchiveFileName", false);
|
||||
|
||||
int filenameColumnIndex = -1, archiveColumnIndex = -1;
|
||||
if (includeArchiveFileName && archiveFileName != null) {
|
||||
archiveColumnIndex = addArchiveColumn(project);
|
||||
}
|
||||
if (includeFileSources) {
|
||||
filenameColumnIndex = addFilenameColumn(project, archiveColumnIndex >=0);
|
||||
}
|
||||
|
||||
List<String> columnNames = new ArrayList<String>();
|
||||
boolean hasOurOwnColumnNames = headerLines > 0;
|
||||
|
||||
@ -153,9 +141,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
||||
} else { // data lines
|
||||
Row row = new Row(cells.size());
|
||||
|
||||
if (storeBlankRows) {
|
||||
rowsWithData++;
|
||||
} else if (cells.size() > 0) {
|
||||
if (storeBlankRows || cells.size() > 0) {
|
||||
rowsWithData++;
|
||||
}
|
||||
|
||||
@ -172,7 +158,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
||||
} else if (ExpressionUtils.isNonBlankData(value)) {
|
||||
Serializable storedValue;
|
||||
if (value instanceof String) {
|
||||
if(trimStrings) {
|
||||
if(trimStrings) {
|
||||
value = ((String) value).trim();
|
||||
}
|
||||
storedValue = guessCellValueTypes ?
|
||||
@ -190,17 +176,11 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
||||
row.setCell(column.getCellIndex(), null);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (rowHasData || storeBlankRows) {
|
||||
if (archiveColumnIndex >= 0) {
|
||||
row.setCell(archiveColumnIndex, new Cell(archiveFileName, null));
|
||||
}
|
||||
if (filenameColumnIndex >= 0) {
|
||||
row.setCell(filenameColumnIndex, new Cell(fileSource, null));
|
||||
}
|
||||
project.rows.add(row);
|
||||
}
|
||||
|
||||
|
||||
if (limit2 > 0 && project.rows.size() >= limit2) {
|
||||
break;
|
||||
}
|
||||
@ -211,9 +191,4 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
||||
exceptions.add(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, String archiveFileName,
|
||||
Reader dataReader, int limit, ObjectNode options, List<Exception> exceptions) {
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, dataReader, limit, options, exceptions);
|
||||
}
|
||||
}
|
||||
|
@ -691,7 +691,6 @@ public class WikitextImporter extends TabularImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
Reader reader,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
@ -750,7 +749,7 @@ public class WikitextImporter extends TabularImportingParserBase {
|
||||
// TODO this does not seem to do anything - maybe we need to pass it to OpenRefine in some other way?
|
||||
}
|
||||
|
||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, archiveFileName, limit, options, exceptions);
|
||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||
|
||||
// Add reconciliation statistics
|
||||
if (dataReader.columnReconciled != null) {
|
||||
@ -768,8 +767,6 @@ public class WikitextImporter extends TabularImportingParserBase {
|
||||
exceptions.add(e1);
|
||||
e1.printStackTrace();
|
||||
}
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
||||
}
|
||||
|
||||
private StandardReconConfig getReconConfig(String url) {
|
||||
|
@ -193,15 +193,13 @@ public class XmlImporter extends TreeImportingParserBase {
|
||||
|
||||
@Override
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||
ImportingJob job, String fileSource, String archiveFileName, InputStream inputStream,
|
||||
ImportingJob job, String fileSource, InputStream inputStream,
|
||||
ImportColumnGroup rootColumnGroup, int limit, ObjectNode options,
|
||||
List<Exception> exceptions) {
|
||||
|
||||
try {
|
||||
parseOneFile(project, metadata, job, fileSource, archiveFileName,
|
||||
parseOneFile(project, metadata, job, fileSource,
|
||||
new XmlParser(inputStream), rootColumnGroup, limit, options, exceptions);
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, rootColumnGroup, limit, options, exceptions);
|
||||
} catch (XMLStreamException e) {
|
||||
exceptions.add(e);
|
||||
} catch (IOException e) {
|
||||
|
@ -31,10 +31,11 @@ public class ImportParameters {
|
||||
protected boolean trimStrings;
|
||||
protected boolean storeEmptyStrings;
|
||||
protected boolean guessDataType;
|
||||
protected boolean includeFileSources;
|
||||
protected String fileSource;
|
||||
protected boolean includeArchiveFileName;
|
||||
protected String archiveFileName;
|
||||
protected boolean includeFileSources = false;
|
||||
protected String fileSource = null;
|
||||
// TODO: What is the compatibility impact of including new fields
|
||||
protected boolean includeArchiveFileName = false;
|
||||
protected String archiveFileName = null;
|
||||
|
||||
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes,
|
||||
boolean includeFileSources, String fileSource, boolean includeArchiveFileName, String archiveFileName) {
|
||||
@ -47,8 +48,13 @@ public class ImportParameters {
|
||||
this.archiveFileName = archiveFileName;
|
||||
}
|
||||
|
||||
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes,
|
||||
boolean includeFileSources, String fileSource) {
|
||||
this(trimStrings, storeEmptyStrings, guessCellValueTypes, includeFileSources, fileSource, false, "");
|
||||
}
|
||||
|
||||
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) {
|
||||
this(trimStrings, storeEmptyStrings, guessCellValueTypes, false, "", false, "");
|
||||
this(trimStrings, storeEmptyStrings, guessCellValueTypes, false, "");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -40,7 +40,6 @@ import java.io.Reader;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang.NotImplementedException;
|
||||
import com.google.refine.importers.tree.TreeReaderException;
|
||||
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.google.refine.ProjectMetadata;
|
||||
@ -112,14 +111,13 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
||||
) throws IOException {
|
||||
final File file = ImportingUtilities.getFile(job, fileRecord);
|
||||
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||
final String archiveFileName = ImportingUtilities.getArchiveFileName(fileRecord);
|
||||
|
||||
progress.startFile(fileSource);
|
||||
try {
|
||||
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
||||
try {
|
||||
if (useInputStream) {
|
||||
parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream,
|
||||
parseOneFile(project, metadata, job, fileSource, inputStream,
|
||||
rootColumnGroup, limit, options, exceptions);
|
||||
} else {
|
||||
String commonEncoding = JSONUtilities.getString(options, "encoding", null);
|
||||
@ -128,7 +126,7 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
||||
}
|
||||
|
||||
Reader reader = ImportingUtilities.getFileReader(file, fileRecord, commonEncoding);
|
||||
parseOneFile(project, metadata, job, fileSource, archiveFileName, reader,
|
||||
parseOneFile(project, metadata, job, fileSource, reader,
|
||||
rootColumnGroup, limit, options, exceptions);
|
||||
}
|
||||
} finally {
|
||||
@ -150,7 +148,6 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
Reader reader,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit,
|
||||
@ -171,15 +168,13 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
InputStream inputStream,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit,
|
||||
ObjectNode options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
// throw new NotImplementedException();
|
||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, limit, options, exceptions);
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -191,7 +186,6 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
String archiveFileName,
|
||||
TreeReader treeParser,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit,
|
||||
@ -216,28 +210,12 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
||||
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true);
|
||||
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
|
||||
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
|
||||
|
||||
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
|
||||
boolean includeArchiveFileName = JSONUtilities.getBoolean(options, "includeArchiveFileName", false);
|
||||
int filenameColumnIndex = -1, archiveColumnIndex = -1;
|
||||
if (includeArchiveFileName && archiveFileName != null) {
|
||||
archiveColumnIndex = addArchiveColumn(project);
|
||||
assert archiveColumnIndex == 0;
|
||||
}
|
||||
if (includeFileSources) {
|
||||
filenameColumnIndex = addFilenameColumn(project, includeArchiveFileName);
|
||||
// If the column add fails for any reason, we'll end up overwriting data in the first column
|
||||
if (includeArchiveFileName) {
|
||||
assert filenameColumnIndex == 1;
|
||||
} else {
|
||||
assert filenameColumnIndex == 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2,
|
||||
new ImportParameters(trimStrings, storeEmptyStrings, guessCellValueTypes, includeFileSources,
|
||||
fileSource, includeArchiveFileName, archiveFileName));
|
||||
fileSource));
|
||||
} catch (Exception e){
|
||||
exceptions.add(e);
|
||||
}
|
||||
|
@ -393,6 +393,7 @@ public class ImportingUtilities {
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
|
||||
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
||||
// TODO: This needs to be refactored to be able to test import from archives
|
||||
if (postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress)) {
|
||||
archiveCount++;
|
||||
}
|
||||
@ -641,6 +642,7 @@ public class ImportingUtilities {
|
||||
return null;
|
||||
}
|
||||
|
||||
// FIXME: This is wasteful of space and time. We should try to process on the fly
|
||||
static public boolean explodeArchive(
|
||||
File rawDataDir,
|
||||
InputStream archiveIS,
|
||||
|
BIN
main/tests/data/movies.zip
Normal file
BIN
main/tests/data/movies.zip
Normal file
Binary file not shown.
@ -57,9 +57,6 @@ import com.fasterxml.jackson.databind.node.BooleanNode;
|
||||
import com.fasterxml.jackson.databind.node.IntNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.fasterxml.jackson.databind.node.TextNode;
|
||||
import com.google.refine.ProjectManager;
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.RefineServlet;
|
||||
import com.google.refine.importers.SeparatorBasedImporter;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
@ -176,7 +173,7 @@ public class RefineTest extends PowerMockTestCase {
|
||||
SeparatorBasedImporter importer = new SeparatorBasedImporter();
|
||||
|
||||
List<Exception> exceptions = new ArrayList<Exception>();
|
||||
importer.parseOneFile(project, metadata, job, "filesource", "archivefile", new StringReader(input), -1, options, exceptions);
|
||||
importer.parseOneFile(project, metadata, job, "filesource", new StringReader(input), -1, options, exceptions);
|
||||
project.update();
|
||||
ProjectManager.singleton.registerProject(project, metadata);
|
||||
|
||||
|
@ -63,7 +63,6 @@ import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||
import com.google.refine.importers.ExcelImporter;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class ExcelImporterTests extends ImporterTest {
|
||||
@ -146,35 +145,6 @@ public class ExcelImporterTests extends ImporterTest {
|
||||
verify(options, times(1)).get("storeBlankCellsAsNulls");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readXlsFromArchiveFile() throws FileNotFoundException, IOException{
|
||||
|
||||
ArrayNode sheets = ParsingUtilities.mapper.createArrayNode();
|
||||
sheets.add(ParsingUtilities.mapper.readTree("{name: \"file-source#Test Sheet 0\", fileNameAndSheetIndex: \"file-source#0\", rows: 31, selected: true}"));
|
||||
whenGetArrayOption("sheets", options, sheets);
|
||||
|
||||
whenGetIntegerOption("ignoreLines", options, 0);
|
||||
whenGetIntegerOption("headerLines", options, 0);
|
||||
whenGetIntegerOption("skipDataLines", options, 0);
|
||||
whenGetIntegerOption("limit", options, -1);
|
||||
whenGetBooleanOption("storeBlankCellsAsNulls",options,true);
|
||||
whenGetBooleanOption("includeArchiveFileName", options, true);
|
||||
|
||||
InputStream stream = new FileInputStream(xlsFile);
|
||||
|
||||
try {
|
||||
parseOneFile(SUT, stream);
|
||||
} catch (Exception e) {
|
||||
Assert.fail(e.getMessage());
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), COLUMNS + 1);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Archive");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "archive-file");
|
||||
|
||||
verify(options, times(1)).get("includeArchiveFileName");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readXlsx() throws FileNotFoundException, IOException{
|
||||
|
||||
|
@ -37,7 +37,6 @@ import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||
import com.google.refine.importers.FixedWidthImporter;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
@ -108,37 +107,4 @@ public class FixedWidthImporterTests extends ImporterTest {
|
||||
Assert.assertNull(project.rows.get(2).getCellValue(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readFixedWidthFromArchiveFile(){
|
||||
StringReader reader = new StringReader(SAMPLE_ROW + "\nTooShort");
|
||||
|
||||
ArrayNode columnWidths = ParsingUtilities.mapper.createArrayNode();
|
||||
JSONUtilities.append(columnWidths, 6);
|
||||
JSONUtilities.append(columnWidths, 9);
|
||||
JSONUtilities.append(columnWidths, 5);
|
||||
whenGetArrayOption("columnWidths", options, columnWidths);
|
||||
|
||||
ArrayNode columnNames = ParsingUtilities.mapper.createArrayNode();
|
||||
columnNames.add("Col 1");
|
||||
columnNames.add("Col 2");
|
||||
columnNames.add("Col 3");
|
||||
whenGetArrayOption("columnNames", options, columnNames);
|
||||
|
||||
whenGetIntegerOption("ignoreLines", options, 0);
|
||||
whenGetIntegerOption("headerLines", options, 0);
|
||||
whenGetIntegerOption("skipDataLines", options, 0);
|
||||
whenGetIntegerOption("limit", options, -1);
|
||||
whenGetBooleanOption("storeBlankCellsAsNulls",options,true);
|
||||
whenGetBooleanOption("includeArchiveFileName", options, true);
|
||||
|
||||
try {
|
||||
parseOneFile(SUT, reader);
|
||||
} catch (Exception e) {
|
||||
Assert.fail(e.getMessage());
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Archive");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "archive-file");
|
||||
}
|
||||
}
|
||||
|
@ -43,7 +43,6 @@ import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.RefineServlet;
|
||||
import com.google.refine.RefineServletStub;
|
||||
import com.google.refine.RefineTest;
|
||||
import com.google.refine.importers.ImportingParserBase;
|
||||
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||
import com.google.refine.importers.tree.XmlImportUtilities;
|
||||
@ -58,9 +57,8 @@ public abstract class ImporterTest extends RefineTest {
|
||||
protected ProjectMetadata metadata;
|
||||
protected ImportingJob job;
|
||||
protected RefineServlet servlet;
|
||||
|
||||
protected ObjectNode options;
|
||||
|
||||
|
||||
public void setUp(){
|
||||
//FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
||||
|
||||
@ -92,7 +90,6 @@ public abstract class ImporterTest extends RefineTest {
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
"archive-file",
|
||||
reader,
|
||||
-1,
|
||||
options,
|
||||
@ -109,7 +106,6 @@ public abstract class ImporterTest extends RefineTest {
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
"archive-file",
|
||||
inputStream,
|
||||
-1,
|
||||
options,
|
||||
@ -126,7 +122,6 @@ public abstract class ImporterTest extends RefineTest {
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
"archive-file",
|
||||
inputStream,
|
||||
-1,
|
||||
options,
|
||||
@ -144,7 +139,6 @@ public abstract class ImporterTest extends RefineTest {
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
"archive-file",
|
||||
reader,
|
||||
rootColumnGroup,
|
||||
-1,
|
||||
@ -170,7 +164,6 @@ public abstract class ImporterTest extends RefineTest {
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
"archive-file",
|
||||
inputStream,
|
||||
rootColumnGroup,
|
||||
-1,
|
||||
@ -191,7 +184,6 @@ public abstract class ImporterTest extends RefineTest {
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
"archive-file",
|
||||
reader,
|
||||
rootColumnGroup,
|
||||
-1,
|
||||
|
@ -52,7 +52,6 @@ import org.testng.annotations.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.google.refine.importers.JsonImporter;
|
||||
import com.google.refine.importers.JsonImporter.JSONTreeReader;
|
||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||
import com.google.refine.importers.tree.TreeReader.Token;
|
||||
@ -135,7 +134,6 @@ public class JsonImporterTests extends ImporterTest {
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
"archive-file",
|
||||
inputStream,
|
||||
rootColumnGroup,
|
||||
-1,
|
||||
|
@ -44,7 +44,6 @@ import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.importers.RdfTripleImporter;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class RdfTripleImporterTests extends ImporterTest {
|
||||
|
@ -43,7 +43,6 @@ import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.importers.SeparatorBasedImporter;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class TsvCsvImporterTests extends ImporterTest {
|
||||
@ -118,27 +117,6 @@ public class TsvCsvImporterTests extends ImporterTest {
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
|
||||
}
|
||||
|
||||
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
||||
public void readSimpleData_CSV_fromArchiveFileName(String sep){
|
||||
//create input to test with
|
||||
String inputSeparator = sep == null ? "\t" : sep;
|
||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||
|
||||
|
||||
try {
|
||||
prepareOptions(sep, -1, 0, 0, 1, false, false,"\"","[]", true);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Exception during file parse",e);
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 4);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Archive");
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "archive-file");
|
||||
}
|
||||
|
||||
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
||||
public void readSimpleData_CSV_1Header_1Row_GuessValues(String sep){
|
||||
//create input to test with
|
||||
|
@ -37,7 +37,7 @@ import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
@ -48,7 +48,6 @@ import org.testng.annotations.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.google.refine.importers.XmlImporter;
|
||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.model.ColumnGroup;
|
||||
@ -248,7 +247,7 @@ public class XmlImporterTests extends ImporterTest {
|
||||
|
||||
public static ObjectNode getOptions(ImportingJob job, TreeImportingParserBase parser) {
|
||||
ObjectNode options = parser.createParserUIInitializationData(
|
||||
job, new LinkedList<>(), "text/json");
|
||||
job, Collections.emptyList(), "text/xml");
|
||||
|
||||
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
|
||||
JSONUtilities.append(path, "library");
|
||||
@ -260,7 +259,7 @@ public class XmlImporterTests extends ImporterTest {
|
||||
|
||||
public static ObjectNode getNestedOptions(ImportingJob job, TreeImportingParserBase parser) {
|
||||
ObjectNode options = parser.createParserUIInitializationData(
|
||||
job, new LinkedList<>(), "text/json");
|
||||
job, Collections.emptyList(), "text/xml");
|
||||
|
||||
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
|
||||
JSONUtilities.append(path, "nest");
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (C) 2018, OpenRefine contributors
|
||||
* Copyright (C) 2018, 2020 OpenRefine contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -26,17 +26,25 @@
|
||||
******************************************************************************/
|
||||
package com.google.refine.importing;
|
||||
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import okhttp3.HttpUrl;
|
||||
import okhttp3.mockwebserver.MockResponse;
|
||||
import okhttp3.mockwebserver.MockWebServer;
|
||||
|
||||
import org.apache.commons.collections.IteratorUtils;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.entity.ContentType;
|
||||
import org.apache.http.entity.mime.MultipartEntityBuilder;
|
||||
@ -50,9 +58,9 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.ImporterTest;
|
||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.importers.ImportingParserBase;
|
||||
import com.google.refine.importers.SeparatorBasedImporter;
|
||||
import com.google.refine.importing.ImportingUtilities.Progress;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
import com.google.refine.util.TestUtils;
|
||||
@ -60,8 +68,6 @@ import com.google.refine.util.TestUtils;
|
||||
import javax.servlet.ServletInputStream;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
public class ImportingUtilitiesTests extends ImporterTest {
|
||||
|
||||
@Override
|
||||
@ -80,24 +86,12 @@ public class ImportingUtilitiesTests extends ImporterTest {
|
||||
Assert.assertEquals(pm.getEncoding(), "UTF-8");
|
||||
Assert.assertTrue(pm.getTags().length == 0);
|
||||
}
|
||||
|
||||
|
||||
@Test(expectedExceptions=IllegalArgumentException.class)
|
||||
public void testZipSlip() throws IOException {
|
||||
File tempDir = TestUtils.createTempDirectory("openrefine-zip-slip-test");
|
||||
File tempDir = TestUtils.createTempDirectory("openrefine-zip-slip-test");
|
||||
// For CVE-2018-19859, issue #1840
|
||||
ImportingUtilities.allocateFile(tempDir, "../../tmp/script.sh");
|
||||
}
|
||||
|
||||
private ObjectNode getNestedOptions(ImportingJob job, TreeImportingParserBase parser) {
|
||||
ObjectNode options = parser.createParserUIInitializationData(
|
||||
job, new LinkedList<>(), "text/json");
|
||||
|
||||
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
|
||||
path.add("results");
|
||||
path.add("result");
|
||||
|
||||
JSONUtilities.safePut(options, "recordPath", path);
|
||||
return options;
|
||||
ImportingUtilities.allocateFile(tempDir, "../../tmp/script.sh");
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -154,6 +148,8 @@ public class ImportingUtilitiesTests extends ImporterTest {
|
||||
Assert.fail("No Exception was thrown");
|
||||
} catch (Exception exception) {
|
||||
Assert.assertEquals(MESSAGE, exception.getMessage());
|
||||
} finally {
|
||||
server.close();
|
||||
}
|
||||
}
|
||||
|
||||
@ -171,4 +167,91 @@ public class ImportingUtilitiesTests extends ImporterTest {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* This tests both exploding a zip archive into it's constituent files
|
||||
* as well as importing them all (both) and making sure that the
|
||||
* recording of archive names and file names works correctly.
|
||||
*
|
||||
* It's kind of a lot to have in one test, but it's a sequence
|
||||
* of steps that need to be done in order.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test
|
||||
public void importArchive() throws IOException{
|
||||
String filename = "movies.zip";
|
||||
String filepath = ClassLoader.getSystemResource(filename).getPath();
|
||||
// Make a copy in our data directory where it's expected
|
||||
File tmp = File.createTempFile("openrefine-test-movies", ".zip", job.getRawDataDir());
|
||||
tmp.deleteOnExit();
|
||||
FileUtils.copyFile(new File(filepath), tmp);
|
||||
|
||||
Progress dummyProgress = new Progress() {
|
||||
@Override
|
||||
public void setProgress(String message, int percent) {}
|
||||
|
||||
@Override
|
||||
public boolean isCanceled() {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
ArrayNode fileRecords = ParsingUtilities.mapper.createArrayNode();
|
||||
ObjectNode fileRecord = ParsingUtilities.mapper.createObjectNode();
|
||||
JSONUtilities.safePut(fileRecord, "origin", "upload");
|
||||
JSONUtilities.safePut(fileRecord, "declaredEncoding", "UTF-8");
|
||||
JSONUtilities.safePut(fileRecord, "declaredMimeType", "application/x-zip-compressed");
|
||||
JSONUtilities.safePut(fileRecord, "fileName", filename);
|
||||
JSONUtilities.safePut(fileRecord, "location", tmp.getName());
|
||||
|
||||
assertTrue(ImportingUtilities.postProcessRetrievedFile(job.getRawDataDir(), tmp, fileRecord, fileRecords, dummyProgress));
|
||||
assertEquals(fileRecords.size(), 2);
|
||||
assertEquals(fileRecords.get(0).get("fileName").asText(), "movies-condensed.tsv");
|
||||
assertEquals(fileRecords.get(0).get("archiveFileName").asText(), "movies.zip");
|
||||
assertEquals(fileRecords.get(1).get("fileName").asText(), "movies.tsv");
|
||||
|
||||
ObjectNode options = ParsingUtilities.mapper.createObjectNode();
|
||||
JSONUtilities.safePut(options, "includeArchiveFileName", true);
|
||||
JSONUtilities.safePut(options, "includeFileSources", true);
|
||||
|
||||
ImportingParserBase parser = new SeparatorBasedImporter();
|
||||
List<Exception> exceptions = new ArrayList<Exception>();
|
||||
parser.parse(
|
||||
project,
|
||||
metadata,
|
||||
job,
|
||||
IteratorUtils.toList(fileRecords.iterator()),
|
||||
"tsv",
|
||||
-1,
|
||||
options,
|
||||
exceptions
|
||||
);
|
||||
assertEquals(exceptions.size(), 0);
|
||||
project.update();
|
||||
|
||||
assertEquals(project.columnModel.columns.get(0).getName(),"Archive");
|
||||
assertEquals(project.rows.get(0).getCell(0).getValue(),"movies.zip");
|
||||
assertEquals(project.columnModel.columns.get(1).getName(),"File");
|
||||
assertEquals(project.rows.get(0).getCell(1).getValue(),"movies-condensed.tsv");
|
||||
assertEquals(project.columnModel.columns.get(2).getName(),"name");
|
||||
assertEquals(project.rows.get(0).getCell(2).getValue(),"Wayne's World");
|
||||
|
||||
// Make sure we imported both files contained in the zip file
|
||||
assertEquals(project.rows.size(), 252);
|
||||
|
||||
ArrayNode importOptionsArray = metadata.getImportOptionMetadata();
|
||||
assertEquals(importOptionsArray.size(), 2);
|
||||
ObjectNode importOptions = (ObjectNode)importOptionsArray.get(0);
|
||||
assertEquals(importOptions.get("archiveFileName").asText(), "movies.zip");
|
||||
assertEquals(importOptions.get("fileSource").asText(), "movies-condensed.tsv");
|
||||
assertTrue(importOptions.get("includeFileSources").asBoolean());
|
||||
assertTrue(importOptions.get("includeArchiveFileName").asBoolean());
|
||||
|
||||
importOptions = (ObjectNode)importOptionsArray.get(1);
|
||||
assertEquals(importOptions.get("fileSource").asText(), "movies.tsv");
|
||||
assertEquals(importOptions.get("archiveFileName").asText(), "movies.zip");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -63,7 +63,6 @@ import com.google.refine.model.AbstractOperation;
|
||||
import com.google.refine.model.ModelException;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.operations.OperationRegistry;
|
||||
import com.google.refine.operations.cell.KeyValueColumnizeOperation;
|
||||
import com.google.refine.process.Process;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
import com.google.refine.util.TestUtils;
|
||||
@ -202,7 +201,7 @@ public class KeyValueColumnizeTests extends RefineTest {
|
||||
+ "price,3.1\n";
|
||||
prepareOptions(",", 20, 0, 0, 1, false, false);
|
||||
List<Exception> exceptions = new ArrayList<Exception>();
|
||||
importer.parseOneFile(project, pm, job, "filesource", "archivefile", new StringReader(csv), -1, options, exceptions);
|
||||
importer.parseOneFile(project, pm, job, "filesource", new StringReader(csv), -1, options, exceptions);
|
||||
project.update();
|
||||
ProjectManager.singleton.registerProject(project, pm);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user