* Make sure data directory is directory, not a file * Add a test for zip archive import Also tests the saving of the archive file name and source filename * Add TODOs - no functional changes * Cosmetic cleanups * Revert importer API changes for archive file name parameter Fixes #2963 - restore binary compatibility to the API - hoist the handling of both fileSource and archiveFileName from TabularImportingParserBase and TreeImportingParserBase to ImportingParserBase so that there's only one copy. These 3 classes are all part of the internal implementation, so there should be no compatibility issue. * Revert weird flow of control for import options metadata This reverts the very convoluted control flow that was introduced when adding the input options to the project metadata. Instead the metadata is all handled in the importer framework rather than having to change APIs are have individual importers worry about it. The feature never had test coverage, so that is still to be added. * Add test for import options in project metadata & fix bug Fixes bug where same options object was being reused and overwritten, so all copies in the list ended up the same.
This commit is contained in:
parent
d5abaac6df
commit
83ed9ffdaf
@ -285,7 +285,6 @@ public class DatabaseImportController implements ImportingController {
|
|||||||
job,
|
job,
|
||||||
new DBQueryResultPreviewReader(job, databaseService, querySource, columns, dbQueryInfo, 100),
|
new DBQueryResultPreviewReader(job, databaseService, querySource, columns, dbQueryInfo, 100),
|
||||||
querySource,
|
querySource,
|
||||||
"", // archivefilename
|
|
||||||
limit,
|
limit,
|
||||||
options,
|
options,
|
||||||
exceptions
|
exceptions
|
||||||
@ -415,7 +414,6 @@ public class DatabaseImportController implements ImportingController {
|
|||||||
job,
|
job,
|
||||||
new DBQueryResultImportReader(job, databaseService, querySource, columns, dbQueryInfo, getCreateBatchSize()),
|
new DBQueryResultImportReader(job, databaseService, querySource, columns, dbQueryInfo, getCreateBatchSize()),
|
||||||
querySource,
|
querySource,
|
||||||
"", //archivefilename,
|
|
||||||
limit,
|
limit,
|
||||||
options,
|
options,
|
||||||
exceptions
|
exceptions
|
||||||
|
@ -142,7 +142,6 @@ public class GDataImporter {
|
|||||||
job,
|
job,
|
||||||
new WorksheetBatchRowReader(job, fileSource, service, spreadsheetId, worksheetEntry),
|
new WorksheetBatchRowReader(job, fileSource, service, spreadsheetId, worksheetEntry),
|
||||||
fileSource,
|
fileSource,
|
||||||
"", //archivefilename
|
|
||||||
limit,
|
limit,
|
||||||
options,
|
options,
|
||||||
exceptions
|
exceptions
|
||||||
|
@ -71,7 +71,6 @@ public class PCAxisImporter extends TabularImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
Reader reader,
|
Reader reader,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
@ -89,8 +88,6 @@ public class PCAxisImporter extends TabularImportingParserBase {
|
|||||||
|
|
||||||
TabularImportingParserBase.readTable(
|
TabularImportingParserBase.readTable(
|
||||||
project, metadata, job, dataReader,
|
project, metadata, job, dataReader,
|
||||||
fileSource, archiveFileName, limit, options, exceptions);
|
fileSource, limit, options, exceptions);
|
||||||
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ public class WikidataRefineTest extends PowerMockTestCase {
|
|||||||
SeparatorBasedImporter importer = new SeparatorBasedImporter();
|
SeparatorBasedImporter importer = new SeparatorBasedImporter();
|
||||||
|
|
||||||
List<Exception> exceptions = new ArrayList<Exception>();
|
List<Exception> exceptions = new ArrayList<Exception>();
|
||||||
importer.parseOneFile(project, metadata, job, "filesource", "archivefile", new StringReader(input), -1, options, exceptions);
|
importer.parseOneFile(project, metadata, job, "filesource", new StringReader(input), -1, options, exceptions);
|
||||||
project.update();
|
project.update();
|
||||||
ProjectManager.singleton.registerProject(project, metadata);
|
ProjectManager.singleton.registerProject(project, metadata);
|
||||||
|
|
||||||
|
@ -128,7 +128,6 @@ public class ExcelImporter extends TabularImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
InputStream inputStream,
|
InputStream inputStream,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
@ -216,19 +215,19 @@ public class ExcelImporter extends TabularImportingParserBase {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// TODO: Do we need to preserve the original filename? Take first piece before #?
|
||||||
|
// JSONUtilities.safePut(options, "fileSource", fileSource + "#" + sheet.getSheetName());
|
||||||
TabularImportingParserBase.readTable(
|
TabularImportingParserBase.readTable(
|
||||||
project,
|
project,
|
||||||
metadata,
|
metadata,
|
||||||
job,
|
job,
|
||||||
dataReader,
|
dataReader,
|
||||||
fileSource + "#" + sheet.getSheetName(), archiveFileName,
|
fileSource + "#" + sheet.getSheetName(),
|
||||||
limit,
|
limit,
|
||||||
options,
|
options,
|
||||||
exceptions
|
exceptions
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell) {
|
static protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell) {
|
||||||
|
@ -83,7 +83,6 @@ public class FixedWidthImporter extends TabularImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
Reader reader,
|
Reader reader,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
@ -133,9 +132,7 @@ public class FixedWidthImporter extends TabularImportingParserBase {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, archiveFileName, limit, options, exceptions);
|
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
|
|
||||||
Copyright 2011, Google Inc.
|
Copyright 2011, Google Inc.
|
||||||
|
Copyright 2012,2020 OpenRefine contributors
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -39,6 +40,7 @@ import java.io.InputStream;
|
|||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.NotImplementedException;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -49,9 +51,11 @@ import com.google.refine.importing.EncodingGuesser;
|
|||||||
import com.google.refine.importing.ImportingJob;
|
import com.google.refine.importing.ImportingJob;
|
||||||
import com.google.refine.importing.ImportingParser;
|
import com.google.refine.importing.ImportingParser;
|
||||||
import com.google.refine.importing.ImportingUtilities;
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Column;
|
import com.google.refine.model.Column;
|
||||||
import com.google.refine.model.ModelException;
|
import com.google.refine.model.ModelException;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.model.Row;
|
||||||
import com.google.refine.util.JSONUtilities;
|
import com.google.refine.util.JSONUtilities;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
@ -99,6 +103,7 @@ abstract public class ImportingParserBase implements ImportingParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Make private? At least protected?
|
||||||
public void parseOneFile(
|
public void parseOneFile(
|
||||||
Project project,
|
Project project,
|
||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
@ -112,24 +117,55 @@ abstract public class ImportingParserBase implements ImportingParser {
|
|||||||
final File file = ImportingUtilities.getFile(job, fileRecord);
|
final File file = ImportingUtilities.getFile(job, fileRecord);
|
||||||
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||||
final String archiveFileName = ImportingUtilities.getArchiveFileName(fileRecord);
|
final String archiveFileName = ImportingUtilities.getArchiveFileName(fileRecord);
|
||||||
|
int filenameColumnIndex = -1;
|
||||||
|
int archiveColumnIndex = -1;
|
||||||
|
int startingRowCount = project.rows.size();
|
||||||
|
|
||||||
progress.startFile(fileSource);
|
progress.startFile(fileSource);
|
||||||
try {
|
try {
|
||||||
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
if (JSONUtilities.getBoolean(options, "includeArchiveFileName", false)
|
||||||
|
&& archiveFileName != null) {
|
||||||
|
archiveColumnIndex = addArchiveColumn(project);
|
||||||
|
}
|
||||||
|
if (JSONUtilities.getBoolean(options, "includeFileSources", false)) {
|
||||||
|
filenameColumnIndex = addFilenameColumn(project, archiveColumnIndex >=0);
|
||||||
|
}
|
||||||
|
|
||||||
if (useInputStream) {
|
if (useInputStream) {
|
||||||
parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, limit, options, exceptions);
|
parseOneFile(project, metadata, job, fileSource, inputStream, limit, options, exceptions);
|
||||||
} else {
|
} else {
|
||||||
String commonEncoding = JSONUtilities.getString(options, "encoding", null);
|
String commonEncoding = JSONUtilities.getString(options, "encoding", null);
|
||||||
if (commonEncoding != null && commonEncoding.isEmpty()) {
|
if (commonEncoding != null && commonEncoding.isEmpty()) {
|
||||||
commonEncoding = null;
|
commonEncoding = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
Reader reader = ImportingUtilities.getReaderFromStream(
|
Reader reader = ImportingUtilities.getReaderFromStream(
|
||||||
inputStream, fileRecord, commonEncoding);
|
inputStream, fileRecord, commonEncoding);
|
||||||
|
|
||||||
parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
parseOneFile(project, metadata, job, fileSource, reader, limit, options, exceptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fill in filename and archive name column for all rows added from this file
|
||||||
|
int endingRowCount = project.rows.size();
|
||||||
|
for (int i = startingRowCount; i < endingRowCount; i++) {
|
||||||
|
Row row = project.rows.get(i);
|
||||||
|
if (archiveColumnIndex >= 0) {
|
||||||
|
row.setCell(archiveColumnIndex, new Cell(archiveFileName, null));
|
||||||
|
}
|
||||||
|
if (filenameColumnIndex >= 0) {
|
||||||
|
row.setCell(filenameColumnIndex, new Cell(fileSource, null));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ObjectNode fileOptions = options.deepCopy();
|
||||||
|
JSONUtilities.safePut(fileOptions, "fileSource", fileSource);
|
||||||
|
JSONUtilities.safePut(fileOptions, "archiveFileName", archiveFileName);
|
||||||
|
// TODO: This will save a separate copy for each file in the import, but they're
|
||||||
|
// going to be mostly the same
|
||||||
|
metadata.appendImportOptionMetadata(fileOptions);
|
||||||
} finally {
|
} finally {
|
||||||
inputStream.close();
|
inputStream.close();
|
||||||
}
|
}
|
||||||
@ -138,51 +174,66 @@ abstract public class ImportingParserBase implements ImportingParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parsing method to be implemented by Reader-based parsers.
|
||||||
|
* ie those initialized with useInputStream == false
|
||||||
|
*
|
||||||
|
* @param project
|
||||||
|
* @param metadata
|
||||||
|
* @param job
|
||||||
|
* @param fileSource
|
||||||
|
* @param reader
|
||||||
|
* @param limit
|
||||||
|
* @param options
|
||||||
|
* @param exceptions
|
||||||
|
*/
|
||||||
public void parseOneFile(
|
public void parseOneFile(
|
||||||
Project project,
|
Project project,
|
||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
Reader reader,
|
Reader reader,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
List<Exception> exceptions
|
List<Exception> exceptions
|
||||||
) {
|
) {
|
||||||
pushImportingOptions(metadata, "fileSource", fileSource, options);
|
throw new NotImplementedException();
|
||||||
pushImportingOptions(metadata, "archiveFileName", archiveFileName, options);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void pushImportingOptions(ProjectMetadata metadata, String key, String value, ObjectNode options) {
|
|
||||||
options.put(key, value);
|
|
||||||
// set the import options to metadata:
|
|
||||||
metadata.appendImportOptionMetadata(options);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void parseOneFile(
|
public void parseOneFile(
|
||||||
Project project,
|
Project project,
|
||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
InputStream inputStream,
|
InputStream inputStream,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
List<Exception> exceptions
|
List<Exception> exceptions
|
||||||
) {
|
) {
|
||||||
pushImportingOptions(metadata, "fileSource", fileSource, options);
|
throw new NotImplementedException();
|
||||||
pushImportingOptions(metadata, "archiveFileName", archiveFileName, options);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated 2020-07-21 by tfmorris. This will become private in a future release.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
protected static int addFilenameColumn(Project project, boolean archiveColumnAdded) {
|
protected static int addFilenameColumn(Project project, boolean archiveColumnAdded) {
|
||||||
String fileNameColumnName = "File";
|
String fileNameColumnName = "File"; // TODO: Localize?
|
||||||
int columnId = archiveColumnAdded? 1 : 0;
|
int columnId = archiveColumnAdded? 1 : 0;
|
||||||
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
|
return addColumn(project, fileNameColumnName, columnId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int addArchiveColumn(Project project) {
|
||||||
|
String columnName = "Archive"; // TODO: Localize?
|
||||||
|
return addColumn(project, columnName, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int addColumn(Project project, String columnName, int columnId) {
|
||||||
|
if (project.columnModel.getColumnByName(columnName) == null) {
|
||||||
try {
|
try {
|
||||||
project.columnModel.addColumn(
|
project.columnModel.addColumn(
|
||||||
columnId, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
|
columnId, new Column(project.columnModel.allocateNewCellIndex(), columnName), false);
|
||||||
|
|
||||||
return columnId;
|
return columnId;
|
||||||
} catch (ModelException e) {
|
} catch (ModelException e) {
|
||||||
// Shouldn't happen: We already checked for duplicate name.
|
// Shouldn't happen: We already checked for duplicate name.
|
||||||
@ -194,22 +245,5 @@ abstract public class ImportingParserBase implements ImportingParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static int addArchiveColumn(Project project) {
|
|
||||||
String ArchiveColumnName = "Archive";
|
|
||||||
if (project.columnModel.getColumnByName(ArchiveColumnName) == null) {
|
|
||||||
try {
|
|
||||||
project.columnModel.addColumn(
|
|
||||||
0, new Column(project.columnModel.allocateNewCellIndex(), ArchiveColumnName), false);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
} catch (ModelException e) {
|
|
||||||
// Shouldn't happen: We already checked for duplicate name.
|
|
||||||
logger.error("ModelException adding Filename column",e);
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -201,13 +201,11 @@ public class JsonImporter extends TreeImportingParserBase {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void parseOneFile(Project project, ProjectMetadata metadata,
|
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||||
ImportingJob job, String fileSource, String archiveFileName, InputStream is,
|
ImportingJob job, String fileSource, InputStream is,
|
||||||
ImportColumnGroup rootColumnGroup, int limit, ObjectNode options, List<Exception> exceptions) {
|
ImportColumnGroup rootColumnGroup, int limit, ObjectNode options, List<Exception> exceptions) {
|
||||||
|
|
||||||
parseOneFile(project, metadata, job, fileSource, archiveFileName,
|
parseOneFile(project, metadata, job, fileSource,
|
||||||
new JSONTreeReader(is), rootColumnGroup, limit, options, exceptions);
|
new JSONTreeReader(is), rootColumnGroup, limit, options, exceptions);
|
||||||
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, is, rootColumnGroup, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static public class JSONTreeReader implements TreeReader {
|
static public class JSONTreeReader implements TreeReader {
|
||||||
|
@ -66,7 +66,6 @@ public class LineBasedImporter extends TabularImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
Reader reader,
|
Reader reader,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
@ -130,8 +129,6 @@ public class LineBasedImporter extends TabularImportingParserBase {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, archiveFileName, limit, options, exceptions);
|
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -39,6 +39,7 @@ import java.io.FileOutputStream;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.marc4j.MarcPermissiveStreamReader;
|
import org.marc4j.MarcPermissiveStreamReader;
|
||||||
import org.marc4j.MarcWriter;
|
import org.marc4j.MarcWriter;
|
||||||
@ -57,7 +58,7 @@ public class MarcImporter extends XmlImporter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ObjectNode createParserUIInitializationData(ImportingJob job, java.util.List<ObjectNode> fileRecords, String format) {
|
public ObjectNode createParserUIInitializationData(ImportingJob job, List<ObjectNode> fileRecords, String format) {
|
||||||
if (fileRecords.size() > 0) {
|
if (fileRecords.size() > 0) {
|
||||||
ObjectNode firstFileRecord = fileRecords.get(0);
|
ObjectNode firstFileRecord = fileRecords.get(0);
|
||||||
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
||||||
|
@ -39,7 +39,6 @@ import java.io.FileNotFoundException;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.time.ZoneOffset;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -66,6 +65,7 @@ import com.google.refine.util.JSONUtilities;
|
|||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
public class OdsImporter extends TabularImportingParserBase {
|
public class OdsImporter extends TabularImportingParserBase {
|
||||||
final static Logger logger = LoggerFactory.getLogger("open office");
|
final static Logger logger = LoggerFactory.getLogger("open office");
|
||||||
|
|
||||||
@ -127,7 +127,6 @@ public class OdsImporter extends TabularImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
InputStream inputStream,
|
InputStream inputStream,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
@ -195,14 +194,11 @@ public class OdsImporter extends TabularImportingParserBase {
|
|||||||
job,
|
job,
|
||||||
dataReader,
|
dataReader,
|
||||||
fileSource + "#" + table.getTableName(),
|
fileSource + "#" + table.getTableName(),
|
||||||
archiveFileName,
|
|
||||||
limit,
|
limit,
|
||||||
options,
|
options,
|
||||||
exceptions
|
exceptions
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected Serializable extractCell(OdfTableCell cell) {
|
static protected Serializable extractCell(OdfTableCell cell) {
|
||||||
|
@ -76,7 +76,8 @@ public class RdfTripleImporter extends ImportingParserBase {
|
|||||||
this.mode = mode;
|
this.mode = mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, String archiveFileName,
|
@Override
|
||||||
|
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource,
|
||||||
InputStream input, int limit, ObjectNode options, List<Exception> exceptions) {
|
InputStream input, int limit, ObjectNode options, List<Exception> exceptions) {
|
||||||
// create an empty model
|
// create an empty model
|
||||||
Model model = ModelFactory.createDefaultModel();
|
Model model = ModelFactory.createDefaultModel();
|
||||||
@ -164,7 +165,5 @@ public class RdfTripleImporter extends ImportingParserBase {
|
|||||||
} catch (ModelException e) {
|
} catch (ModelException e) {
|
||||||
exceptions.add(e);
|
exceptions.add(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, input, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -87,7 +87,6 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
Reader reader,
|
Reader reader,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
@ -148,18 +147,17 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
|||||||
usedColumnNames = true;
|
usedColumnNames = true;
|
||||||
return columnNames;
|
return columnNames;
|
||||||
} else {
|
} else {
|
||||||
String line = lnReader.readLine();
|
String line = lnReader.readLine();
|
||||||
if (line == null) {
|
if (line == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return getCells(line, parser, lnReader);
|
return getCells(line, parser, lnReader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, archiveFileName, limit, options, exceptions);
|
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, lnReader, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected ArrayList<Object> getCells(String line, CSVParser parser, LineNumberReader lnReader)
|
static protected ArrayList<Object> getCells(String line, CSVParser parser, LineNumberReader lnReader)
|
||||||
|
@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
package com.google.refine.importers;
|
package com.google.refine.importers;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -83,7 +82,6 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
|||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
TableDataReader reader,
|
TableDataReader reader,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
List<Exception> exceptions
|
List<Exception> exceptions
|
||||||
@ -104,18 +102,8 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
|||||||
|
|
||||||
boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
|
boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
|
||||||
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
|
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
|
||||||
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
|
|
||||||
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", false);
|
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", false);
|
||||||
boolean includeArchiveFileName = JSONUtilities.getBoolean(options, "includeArchiveFileName", false);
|
|
||||||
|
|
||||||
int filenameColumnIndex = -1, archiveColumnIndex = -1;
|
|
||||||
if (includeArchiveFileName && archiveFileName != null) {
|
|
||||||
archiveColumnIndex = addArchiveColumn(project);
|
|
||||||
}
|
|
||||||
if (includeFileSources) {
|
|
||||||
filenameColumnIndex = addFilenameColumn(project, archiveColumnIndex >=0);
|
|
||||||
}
|
|
||||||
|
|
||||||
List<String> columnNames = new ArrayList<String>();
|
List<String> columnNames = new ArrayList<String>();
|
||||||
boolean hasOurOwnColumnNames = headerLines > 0;
|
boolean hasOurOwnColumnNames = headerLines > 0;
|
||||||
|
|
||||||
@ -153,9 +141,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
|||||||
} else { // data lines
|
} else { // data lines
|
||||||
Row row = new Row(cells.size());
|
Row row = new Row(cells.size());
|
||||||
|
|
||||||
if (storeBlankRows) {
|
if (storeBlankRows || cells.size() > 0) {
|
||||||
rowsWithData++;
|
|
||||||
} else if (cells.size() > 0) {
|
|
||||||
rowsWithData++;
|
rowsWithData++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,7 +158,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
|||||||
} else if (ExpressionUtils.isNonBlankData(value)) {
|
} else if (ExpressionUtils.isNonBlankData(value)) {
|
||||||
Serializable storedValue;
|
Serializable storedValue;
|
||||||
if (value instanceof String) {
|
if (value instanceof String) {
|
||||||
if(trimStrings) {
|
if(trimStrings) {
|
||||||
value = ((String) value).trim();
|
value = ((String) value).trim();
|
||||||
}
|
}
|
||||||
storedValue = guessCellValueTypes ?
|
storedValue = guessCellValueTypes ?
|
||||||
@ -190,17 +176,11 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
|||||||
row.setCell(column.getCellIndex(), null);
|
row.setCell(column.getCellIndex(), null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rowHasData || storeBlankRows) {
|
if (rowHasData || storeBlankRows) {
|
||||||
if (archiveColumnIndex >= 0) {
|
|
||||||
row.setCell(archiveColumnIndex, new Cell(archiveFileName, null));
|
|
||||||
}
|
|
||||||
if (filenameColumnIndex >= 0) {
|
|
||||||
row.setCell(filenameColumnIndex, new Cell(fileSource, null));
|
|
||||||
}
|
|
||||||
project.rows.add(row);
|
project.rows.add(row);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (limit2 > 0 && project.rows.size() >= limit2) {
|
if (limit2 > 0 && project.rows.size() >= limit2) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -211,9 +191,4 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
|
|||||||
exceptions.add(e);
|
exceptions.add(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, String archiveFileName,
|
|
||||||
Reader dataReader, int limit, ObjectNode options, List<Exception> exceptions) {
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, dataReader, limit, options, exceptions);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -691,7 +691,6 @@ public class WikitextImporter extends TabularImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
Reader reader,
|
Reader reader,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
@ -750,7 +749,7 @@ public class WikitextImporter extends TabularImportingParserBase {
|
|||||||
// TODO this does not seem to do anything - maybe we need to pass it to OpenRefine in some other way?
|
// TODO this does not seem to do anything - maybe we need to pass it to OpenRefine in some other way?
|
||||||
}
|
}
|
||||||
|
|
||||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, archiveFileName, limit, options, exceptions);
|
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
|
|
||||||
// Add reconciliation statistics
|
// Add reconciliation statistics
|
||||||
if (dataReader.columnReconciled != null) {
|
if (dataReader.columnReconciled != null) {
|
||||||
@ -768,8 +767,6 @@ public class WikitextImporter extends TabularImportingParserBase {
|
|||||||
exceptions.add(e1);
|
exceptions.add(e1);
|
||||||
e1.printStackTrace();
|
e1.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, reader, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private StandardReconConfig getReconConfig(String url) {
|
private StandardReconConfig getReconConfig(String url) {
|
||||||
|
@ -193,15 +193,13 @@ public class XmlImporter extends TreeImportingParserBase {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void parseOneFile(Project project, ProjectMetadata metadata,
|
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||||
ImportingJob job, String fileSource, String archiveFileName, InputStream inputStream,
|
ImportingJob job, String fileSource, InputStream inputStream,
|
||||||
ImportColumnGroup rootColumnGroup, int limit, ObjectNode options,
|
ImportColumnGroup rootColumnGroup, int limit, ObjectNode options,
|
||||||
List<Exception> exceptions) {
|
List<Exception> exceptions) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
parseOneFile(project, metadata, job, fileSource, archiveFileName,
|
parseOneFile(project, metadata, job, fileSource,
|
||||||
new XmlParser(inputStream), rootColumnGroup, limit, options, exceptions);
|
new XmlParser(inputStream), rootColumnGroup, limit, options, exceptions);
|
||||||
|
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, rootColumnGroup, limit, options, exceptions);
|
|
||||||
} catch (XMLStreamException e) {
|
} catch (XMLStreamException e) {
|
||||||
exceptions.add(e);
|
exceptions.add(e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -31,10 +31,11 @@ public class ImportParameters {
|
|||||||
protected boolean trimStrings;
|
protected boolean trimStrings;
|
||||||
protected boolean storeEmptyStrings;
|
protected boolean storeEmptyStrings;
|
||||||
protected boolean guessDataType;
|
protected boolean guessDataType;
|
||||||
protected boolean includeFileSources;
|
protected boolean includeFileSources = false;
|
||||||
protected String fileSource;
|
protected String fileSource = null;
|
||||||
protected boolean includeArchiveFileName;
|
// TODO: What is the compatibility impact of including new fields
|
||||||
protected String archiveFileName;
|
protected boolean includeArchiveFileName = false;
|
||||||
|
protected String archiveFileName = null;
|
||||||
|
|
||||||
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes,
|
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes,
|
||||||
boolean includeFileSources, String fileSource, boolean includeArchiveFileName, String archiveFileName) {
|
boolean includeFileSources, String fileSource, boolean includeArchiveFileName, String archiveFileName) {
|
||||||
@ -47,8 +48,13 @@ public class ImportParameters {
|
|||||||
this.archiveFileName = archiveFileName;
|
this.archiveFileName = archiveFileName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes,
|
||||||
|
boolean includeFileSources, String fileSource) {
|
||||||
|
this(trimStrings, storeEmptyStrings, guessCellValueTypes, includeFileSources, fileSource, false, "");
|
||||||
|
}
|
||||||
|
|
||||||
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) {
|
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) {
|
||||||
this(trimStrings, storeEmptyStrings, guessCellValueTypes, false, "", false, "");
|
this(trimStrings, storeEmptyStrings, guessCellValueTypes, false, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -40,7 +40,6 @@ import java.io.Reader;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang.NotImplementedException;
|
import org.apache.commons.lang.NotImplementedException;
|
||||||
import com.google.refine.importers.tree.TreeReaderException;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
@ -112,14 +111,13 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
|||||||
) throws IOException {
|
) throws IOException {
|
||||||
final File file = ImportingUtilities.getFile(job, fileRecord);
|
final File file = ImportingUtilities.getFile(job, fileRecord);
|
||||||
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||||
final String archiveFileName = ImportingUtilities.getArchiveFileName(fileRecord);
|
|
||||||
|
|
||||||
progress.startFile(fileSource);
|
progress.startFile(fileSource);
|
||||||
try {
|
try {
|
||||||
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
||||||
try {
|
try {
|
||||||
if (useInputStream) {
|
if (useInputStream) {
|
||||||
parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream,
|
parseOneFile(project, metadata, job, fileSource, inputStream,
|
||||||
rootColumnGroup, limit, options, exceptions);
|
rootColumnGroup, limit, options, exceptions);
|
||||||
} else {
|
} else {
|
||||||
String commonEncoding = JSONUtilities.getString(options, "encoding", null);
|
String commonEncoding = JSONUtilities.getString(options, "encoding", null);
|
||||||
@ -128,7 +126,7 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Reader reader = ImportingUtilities.getFileReader(file, fileRecord, commonEncoding);
|
Reader reader = ImportingUtilities.getFileReader(file, fileRecord, commonEncoding);
|
||||||
parseOneFile(project, metadata, job, fileSource, archiveFileName, reader,
|
parseOneFile(project, metadata, job, fileSource, reader,
|
||||||
rootColumnGroup, limit, options, exceptions);
|
rootColumnGroup, limit, options, exceptions);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
@ -150,7 +148,6 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
Reader reader,
|
Reader reader,
|
||||||
ImportColumnGroup rootColumnGroup,
|
ImportColumnGroup rootColumnGroup,
|
||||||
int limit,
|
int limit,
|
||||||
@ -171,15 +168,13 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
InputStream inputStream,
|
InputStream inputStream,
|
||||||
ImportColumnGroup rootColumnGroup,
|
ImportColumnGroup rootColumnGroup,
|
||||||
int limit,
|
int limit,
|
||||||
ObjectNode options,
|
ObjectNode options,
|
||||||
List<Exception> exceptions
|
List<Exception> exceptions
|
||||||
) {
|
) {
|
||||||
// throw new NotImplementedException();
|
throw new NotImplementedException();
|
||||||
super.parseOneFile(project, metadata, job, fileSource, archiveFileName, inputStream, limit, options, exceptions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -191,7 +186,6 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
|||||||
ProjectMetadata metadata,
|
ProjectMetadata metadata,
|
||||||
ImportingJob job,
|
ImportingJob job,
|
||||||
String fileSource,
|
String fileSource,
|
||||||
String archiveFileName,
|
|
||||||
TreeReader treeParser,
|
TreeReader treeParser,
|
||||||
ImportColumnGroup rootColumnGroup,
|
ImportColumnGroup rootColumnGroup,
|
||||||
int limit,
|
int limit,
|
||||||
@ -216,28 +210,12 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
|
|||||||
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true);
|
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true);
|
||||||
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
|
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
|
||||||
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
|
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
|
||||||
|
|
||||||
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
|
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
|
||||||
boolean includeArchiveFileName = JSONUtilities.getBoolean(options, "includeArchiveFileName", false);
|
|
||||||
int filenameColumnIndex = -1, archiveColumnIndex = -1;
|
|
||||||
if (includeArchiveFileName && archiveFileName != null) {
|
|
||||||
archiveColumnIndex = addArchiveColumn(project);
|
|
||||||
assert archiveColumnIndex == 0;
|
|
||||||
}
|
|
||||||
if (includeFileSources) {
|
|
||||||
filenameColumnIndex = addFilenameColumn(project, includeArchiveFileName);
|
|
||||||
// If the column add fails for any reason, we'll end up overwriting data in the first column
|
|
||||||
if (includeArchiveFileName) {
|
|
||||||
assert filenameColumnIndex == 1;
|
|
||||||
} else {
|
|
||||||
assert filenameColumnIndex == 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2,
|
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2,
|
||||||
new ImportParameters(trimStrings, storeEmptyStrings, guessCellValueTypes, includeFileSources,
|
new ImportParameters(trimStrings, storeEmptyStrings, guessCellValueTypes, includeFileSources,
|
||||||
fileSource, includeArchiveFileName, archiveFileName));
|
fileSource));
|
||||||
} catch (Exception e){
|
} catch (Exception e){
|
||||||
exceptions.add(e);
|
exceptions.add(e);
|
||||||
}
|
}
|
||||||
|
@ -393,6 +393,7 @@ public class ImportingUtilities {
|
|||||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||||
|
|
||||||
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
||||||
|
// TODO: This needs to be refactored to be able to test import from archives
|
||||||
if (postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress)) {
|
if (postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress)) {
|
||||||
archiveCount++;
|
archiveCount++;
|
||||||
}
|
}
|
||||||
@ -641,6 +642,7 @@ public class ImportingUtilities {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: This is wasteful of space and time. We should try to process on the fly
|
||||||
static public boolean explodeArchive(
|
static public boolean explodeArchive(
|
||||||
File rawDataDir,
|
File rawDataDir,
|
||||||
InputStream archiveIS,
|
InputStream archiveIS,
|
||||||
|
BIN
main/tests/data/movies.zip
Normal file
BIN
main/tests/data/movies.zip
Normal file
Binary file not shown.
@ -57,9 +57,6 @@ import com.fasterxml.jackson.databind.node.BooleanNode;
|
|||||||
import com.fasterxml.jackson.databind.node.IntNode;
|
import com.fasterxml.jackson.databind.node.IntNode;
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.fasterxml.jackson.databind.node.TextNode;
|
import com.fasterxml.jackson.databind.node.TextNode;
|
||||||
import com.google.refine.ProjectManager;
|
|
||||||
import com.google.refine.ProjectMetadata;
|
|
||||||
import com.google.refine.RefineServlet;
|
|
||||||
import com.google.refine.importers.SeparatorBasedImporter;
|
import com.google.refine.importers.SeparatorBasedImporter;
|
||||||
import com.google.refine.importing.ImportingJob;
|
import com.google.refine.importing.ImportingJob;
|
||||||
import com.google.refine.importing.ImportingManager;
|
import com.google.refine.importing.ImportingManager;
|
||||||
@ -176,7 +173,7 @@ public class RefineTest extends PowerMockTestCase {
|
|||||||
SeparatorBasedImporter importer = new SeparatorBasedImporter();
|
SeparatorBasedImporter importer = new SeparatorBasedImporter();
|
||||||
|
|
||||||
List<Exception> exceptions = new ArrayList<Exception>();
|
List<Exception> exceptions = new ArrayList<Exception>();
|
||||||
importer.parseOneFile(project, metadata, job, "filesource", "archivefile", new StringReader(input), -1, options, exceptions);
|
importer.parseOneFile(project, metadata, job, "filesource", new StringReader(input), -1, options, exceptions);
|
||||||
project.update();
|
project.update();
|
||||||
ProjectManager.singleton.registerProject(project, metadata);
|
ProjectManager.singleton.registerProject(project, metadata);
|
||||||
|
|
||||||
|
@ -63,7 +63,6 @@ import org.testng.annotations.BeforeTest;
|
|||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
import com.google.refine.importers.ExcelImporter;
|
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
public class ExcelImporterTests extends ImporterTest {
|
public class ExcelImporterTests extends ImporterTest {
|
||||||
@ -146,35 +145,6 @@ public class ExcelImporterTests extends ImporterTest {
|
|||||||
verify(options, times(1)).get("storeBlankCellsAsNulls");
|
verify(options, times(1)).get("storeBlankCellsAsNulls");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void readXlsFromArchiveFile() throws FileNotFoundException, IOException{
|
|
||||||
|
|
||||||
ArrayNode sheets = ParsingUtilities.mapper.createArrayNode();
|
|
||||||
sheets.add(ParsingUtilities.mapper.readTree("{name: \"file-source#Test Sheet 0\", fileNameAndSheetIndex: \"file-source#0\", rows: 31, selected: true}"));
|
|
||||||
whenGetArrayOption("sheets", options, sheets);
|
|
||||||
|
|
||||||
whenGetIntegerOption("ignoreLines", options, 0);
|
|
||||||
whenGetIntegerOption("headerLines", options, 0);
|
|
||||||
whenGetIntegerOption("skipDataLines", options, 0);
|
|
||||||
whenGetIntegerOption("limit", options, -1);
|
|
||||||
whenGetBooleanOption("storeBlankCellsAsNulls",options,true);
|
|
||||||
whenGetBooleanOption("includeArchiveFileName", options, true);
|
|
||||||
|
|
||||||
InputStream stream = new FileInputStream(xlsFile);
|
|
||||||
|
|
||||||
try {
|
|
||||||
parseOneFile(SUT, stream);
|
|
||||||
} catch (Exception e) {
|
|
||||||
Assert.fail(e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), COLUMNS + 1);
|
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Archive");
|
|
||||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "archive-file");
|
|
||||||
|
|
||||||
verify(options, times(1)).get("includeArchiveFileName");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void readXlsx() throws FileNotFoundException, IOException{
|
public void readXlsx() throws FileNotFoundException, IOException{
|
||||||
|
|
||||||
|
@ -37,7 +37,6 @@ import org.testng.annotations.BeforeTest;
|
|||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
import com.google.refine.importers.FixedWidthImporter;
|
|
||||||
import com.google.refine.util.JSONUtilities;
|
import com.google.refine.util.JSONUtilities;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
@ -108,37 +107,4 @@ public class FixedWidthImporterTests extends ImporterTest {
|
|||||||
Assert.assertNull(project.rows.get(2).getCellValue(2));
|
Assert.assertNull(project.rows.get(2).getCellValue(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void readFixedWidthFromArchiveFile(){
|
|
||||||
StringReader reader = new StringReader(SAMPLE_ROW + "\nTooShort");
|
|
||||||
|
|
||||||
ArrayNode columnWidths = ParsingUtilities.mapper.createArrayNode();
|
|
||||||
JSONUtilities.append(columnWidths, 6);
|
|
||||||
JSONUtilities.append(columnWidths, 9);
|
|
||||||
JSONUtilities.append(columnWidths, 5);
|
|
||||||
whenGetArrayOption("columnWidths", options, columnWidths);
|
|
||||||
|
|
||||||
ArrayNode columnNames = ParsingUtilities.mapper.createArrayNode();
|
|
||||||
columnNames.add("Col 1");
|
|
||||||
columnNames.add("Col 2");
|
|
||||||
columnNames.add("Col 3");
|
|
||||||
whenGetArrayOption("columnNames", options, columnNames);
|
|
||||||
|
|
||||||
whenGetIntegerOption("ignoreLines", options, 0);
|
|
||||||
whenGetIntegerOption("headerLines", options, 0);
|
|
||||||
whenGetIntegerOption("skipDataLines", options, 0);
|
|
||||||
whenGetIntegerOption("limit", options, -1);
|
|
||||||
whenGetBooleanOption("storeBlankCellsAsNulls",options,true);
|
|
||||||
whenGetBooleanOption("includeArchiveFileName", options, true);
|
|
||||||
|
|
||||||
try {
|
|
||||||
parseOneFile(SUT, reader);
|
|
||||||
} catch (Exception e) {
|
|
||||||
Assert.fail(e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Archive");
|
|
||||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "archive-file");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,6 @@ import com.google.refine.ProjectMetadata;
|
|||||||
import com.google.refine.RefineServlet;
|
import com.google.refine.RefineServlet;
|
||||||
import com.google.refine.RefineServletStub;
|
import com.google.refine.RefineServletStub;
|
||||||
import com.google.refine.RefineTest;
|
import com.google.refine.RefineTest;
|
||||||
import com.google.refine.importers.ImportingParserBase;
|
|
||||||
import com.google.refine.importers.tree.ImportColumnGroup;
|
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||||
import com.google.refine.importers.tree.XmlImportUtilities;
|
import com.google.refine.importers.tree.XmlImportUtilities;
|
||||||
@ -58,9 +57,8 @@ public abstract class ImporterTest extends RefineTest {
|
|||||||
protected ProjectMetadata metadata;
|
protected ProjectMetadata metadata;
|
||||||
protected ImportingJob job;
|
protected ImportingJob job;
|
||||||
protected RefineServlet servlet;
|
protected RefineServlet servlet;
|
||||||
|
|
||||||
protected ObjectNode options;
|
protected ObjectNode options;
|
||||||
|
|
||||||
public void setUp(){
|
public void setUp(){
|
||||||
//FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
//FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
||||||
|
|
||||||
@ -92,7 +90,6 @@ public abstract class ImporterTest extends RefineTest {
|
|||||||
metadata,
|
metadata,
|
||||||
job,
|
job,
|
||||||
"file-source",
|
"file-source",
|
||||||
"archive-file",
|
|
||||||
reader,
|
reader,
|
||||||
-1,
|
-1,
|
||||||
options,
|
options,
|
||||||
@ -109,7 +106,6 @@ public abstract class ImporterTest extends RefineTest {
|
|||||||
metadata,
|
metadata,
|
||||||
job,
|
job,
|
||||||
"file-source",
|
"file-source",
|
||||||
"archive-file",
|
|
||||||
inputStream,
|
inputStream,
|
||||||
-1,
|
-1,
|
||||||
options,
|
options,
|
||||||
@ -126,7 +122,6 @@ public abstract class ImporterTest extends RefineTest {
|
|||||||
metadata,
|
metadata,
|
||||||
job,
|
job,
|
||||||
"file-source",
|
"file-source",
|
||||||
"archive-file",
|
|
||||||
inputStream,
|
inputStream,
|
||||||
-1,
|
-1,
|
||||||
options,
|
options,
|
||||||
@ -144,7 +139,6 @@ public abstract class ImporterTest extends RefineTest {
|
|||||||
metadata,
|
metadata,
|
||||||
job,
|
job,
|
||||||
"file-source",
|
"file-source",
|
||||||
"archive-file",
|
|
||||||
reader,
|
reader,
|
||||||
rootColumnGroup,
|
rootColumnGroup,
|
||||||
-1,
|
-1,
|
||||||
@ -170,7 +164,6 @@ public abstract class ImporterTest extends RefineTest {
|
|||||||
metadata,
|
metadata,
|
||||||
job,
|
job,
|
||||||
"file-source",
|
"file-source",
|
||||||
"archive-file",
|
|
||||||
inputStream,
|
inputStream,
|
||||||
rootColumnGroup,
|
rootColumnGroup,
|
||||||
-1,
|
-1,
|
||||||
@ -191,7 +184,6 @@ public abstract class ImporterTest extends RefineTest {
|
|||||||
metadata,
|
metadata,
|
||||||
job,
|
job,
|
||||||
"file-source",
|
"file-source",
|
||||||
"archive-file",
|
|
||||||
reader,
|
reader,
|
||||||
rootColumnGroup,
|
rootColumnGroup,
|
||||||
-1,
|
-1,
|
||||||
|
@ -52,7 +52,6 @@ import org.testng.annotations.Test;
|
|||||||
|
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.google.refine.importers.JsonImporter;
|
|
||||||
import com.google.refine.importers.JsonImporter.JSONTreeReader;
|
import com.google.refine.importers.JsonImporter.JSONTreeReader;
|
||||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||||
import com.google.refine.importers.tree.TreeReader.Token;
|
import com.google.refine.importers.tree.TreeReader.Token;
|
||||||
@ -135,7 +134,6 @@ public class JsonImporterTests extends ImporterTest {
|
|||||||
metadata,
|
metadata,
|
||||||
job,
|
job,
|
||||||
"file-source",
|
"file-source",
|
||||||
"archive-file",
|
|
||||||
inputStream,
|
inputStream,
|
||||||
rootColumnGroup,
|
rootColumnGroup,
|
||||||
-1,
|
-1,
|
||||||
|
@ -44,7 +44,6 @@ import org.testng.annotations.BeforeMethod;
|
|||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.google.refine.importers.RdfTripleImporter;
|
|
||||||
import com.google.refine.util.JSONUtilities;
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
public class RdfTripleImporterTests extends ImporterTest {
|
public class RdfTripleImporterTests extends ImporterTest {
|
||||||
|
@ -43,7 +43,6 @@ import org.testng.annotations.BeforeTest;
|
|||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.google.refine.importers.SeparatorBasedImporter;
|
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
public class TsvCsvImporterTests extends ImporterTest {
|
public class TsvCsvImporterTests extends ImporterTest {
|
||||||
@ -118,27 +117,6 @@ public class TsvCsvImporterTests extends ImporterTest {
|
|||||||
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
|
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
|
||||||
public void readSimpleData_CSV_fromArchiveFileName(String sep){
|
|
||||||
//create input to test with
|
|
||||||
String inputSeparator = sep == null ? "\t" : sep;
|
|
||||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
|
||||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
|
||||||
|
|
||||||
|
|
||||||
try {
|
|
||||||
prepareOptions(sep, -1, 0, 0, 1, false, false,"\"","[]", true);
|
|
||||||
parseOneFile(SUT, new StringReader(input));
|
|
||||||
} catch (Exception e) {
|
|
||||||
Assert.fail("Exception during file parse",e);
|
|
||||||
}
|
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 4);
|
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Archive");
|
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
|
||||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "archive-file");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
||||||
public void readSimpleData_CSV_1Header_1Row_GuessValues(String sep){
|
public void readSimpleData_CSV_1Header_1Row_GuessValues(String sep){
|
||||||
//create input to test with
|
//create input to test with
|
||||||
|
@ -37,7 +37,7 @@ import java.io.ByteArrayInputStream;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.LinkedList;
|
import java.util.Collections;
|
||||||
|
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
@ -48,7 +48,6 @@ import org.testng.annotations.Test;
|
|||||||
|
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.google.refine.importers.XmlImporter;
|
|
||||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||||
import com.google.refine.importing.ImportingJob;
|
import com.google.refine.importing.ImportingJob;
|
||||||
import com.google.refine.model.ColumnGroup;
|
import com.google.refine.model.ColumnGroup;
|
||||||
@ -248,7 +247,7 @@ public class XmlImporterTests extends ImporterTest {
|
|||||||
|
|
||||||
public static ObjectNode getOptions(ImportingJob job, TreeImportingParserBase parser) {
|
public static ObjectNode getOptions(ImportingJob job, TreeImportingParserBase parser) {
|
||||||
ObjectNode options = parser.createParserUIInitializationData(
|
ObjectNode options = parser.createParserUIInitializationData(
|
||||||
job, new LinkedList<>(), "text/json");
|
job, Collections.emptyList(), "text/xml");
|
||||||
|
|
||||||
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
|
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
|
||||||
JSONUtilities.append(path, "library");
|
JSONUtilities.append(path, "library");
|
||||||
@ -260,7 +259,7 @@ public class XmlImporterTests extends ImporterTest {
|
|||||||
|
|
||||||
public static ObjectNode getNestedOptions(ImportingJob job, TreeImportingParserBase parser) {
|
public static ObjectNode getNestedOptions(ImportingJob job, TreeImportingParserBase parser) {
|
||||||
ObjectNode options = parser.createParserUIInitializationData(
|
ObjectNode options = parser.createParserUIInitializationData(
|
||||||
job, new LinkedList<>(), "text/json");
|
job, Collections.emptyList(), "text/xml");
|
||||||
|
|
||||||
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
|
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
|
||||||
JSONUtilities.append(path, "nest");
|
JSONUtilities.append(path, "nest");
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2018, OpenRefine contributors
|
* Copyright (C) 2018, 2020 OpenRefine contributors
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
@ -26,17 +26,25 @@
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
package com.google.refine.importing;
|
package com.google.refine.importing;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
import static org.testng.Assert.assertEquals;
|
||||||
|
import static org.testng.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.LinkedList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
import okhttp3.HttpUrl;
|
import okhttp3.HttpUrl;
|
||||||
import okhttp3.mockwebserver.MockResponse;
|
import okhttp3.mockwebserver.MockResponse;
|
||||||
import okhttp3.mockwebserver.MockWebServer;
|
import okhttp3.mockwebserver.MockWebServer;
|
||||||
|
|
||||||
|
import org.apache.commons.collections.IteratorUtils;
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.http.HttpEntity;
|
import org.apache.http.HttpEntity;
|
||||||
import org.apache.http.entity.ContentType;
|
import org.apache.http.entity.ContentType;
|
||||||
import org.apache.http.entity.mime.MultipartEntityBuilder;
|
import org.apache.http.entity.mime.MultipartEntityBuilder;
|
||||||
@ -50,9 +58,9 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
|
|||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.importers.ImporterTest;
|
import com.google.refine.importers.ImporterTest;
|
||||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
import com.google.refine.importers.ImportingParserBase;
|
||||||
import com.google.refine.importing.ImportingJob;
|
import com.google.refine.importers.SeparatorBasedImporter;
|
||||||
import com.google.refine.importing.ImportingUtilities;
|
import com.google.refine.importing.ImportingUtilities.Progress;
|
||||||
import com.google.refine.util.JSONUtilities;
|
import com.google.refine.util.JSONUtilities;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
import com.google.refine.util.TestUtils;
|
import com.google.refine.util.TestUtils;
|
||||||
@ -60,8 +68,6 @@ import com.google.refine.util.TestUtils;
|
|||||||
import javax.servlet.ServletInputStream;
|
import javax.servlet.ServletInputStream;
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
|
||||||
import static org.mockito.Mockito.when;
|
|
||||||
|
|
||||||
public class ImportingUtilitiesTests extends ImporterTest {
|
public class ImportingUtilitiesTests extends ImporterTest {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -80,24 +86,12 @@ public class ImportingUtilitiesTests extends ImporterTest {
|
|||||||
Assert.assertEquals(pm.getEncoding(), "UTF-8");
|
Assert.assertEquals(pm.getEncoding(), "UTF-8");
|
||||||
Assert.assertTrue(pm.getTags().length == 0);
|
Assert.assertTrue(pm.getTags().length == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expectedExceptions=IllegalArgumentException.class)
|
@Test(expectedExceptions=IllegalArgumentException.class)
|
||||||
public void testZipSlip() throws IOException {
|
public void testZipSlip() throws IOException {
|
||||||
File tempDir = TestUtils.createTempDirectory("openrefine-zip-slip-test");
|
File tempDir = TestUtils.createTempDirectory("openrefine-zip-slip-test");
|
||||||
// For CVE-2018-19859, issue #1840
|
// For CVE-2018-19859, issue #1840
|
||||||
ImportingUtilities.allocateFile(tempDir, "../../tmp/script.sh");
|
ImportingUtilities.allocateFile(tempDir, "../../tmp/script.sh");
|
||||||
}
|
|
||||||
|
|
||||||
private ObjectNode getNestedOptions(ImportingJob job, TreeImportingParserBase parser) {
|
|
||||||
ObjectNode options = parser.createParserUIInitializationData(
|
|
||||||
job, new LinkedList<>(), "text/json");
|
|
||||||
|
|
||||||
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
|
|
||||||
path.add("results");
|
|
||||||
path.add("result");
|
|
||||||
|
|
||||||
JSONUtilities.safePut(options, "recordPath", path);
|
|
||||||
return options;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -154,6 +148,8 @@ public class ImportingUtilitiesTests extends ImporterTest {
|
|||||||
Assert.fail("No Exception was thrown");
|
Assert.fail("No Exception was thrown");
|
||||||
} catch (Exception exception) {
|
} catch (Exception exception) {
|
||||||
Assert.assertEquals(MESSAGE, exception.getMessage());
|
Assert.assertEquals(MESSAGE, exception.getMessage());
|
||||||
|
} finally {
|
||||||
|
server.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -171,4 +167,91 @@ public class ImportingUtilitiesTests extends ImporterTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests both exploding a zip archive into it's constituent files
|
||||||
|
* as well as importing them all (both) and making sure that the
|
||||||
|
* recording of archive names and file names works correctly.
|
||||||
|
*
|
||||||
|
* It's kind of a lot to have in one test, but it's a sequence
|
||||||
|
* of steps that need to be done in order.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
@Test
|
||||||
|
public void importArchive() throws IOException{
|
||||||
|
String filename = "movies.zip";
|
||||||
|
String filepath = ClassLoader.getSystemResource(filename).getPath();
|
||||||
|
// Make a copy in our data directory where it's expected
|
||||||
|
File tmp = File.createTempFile("openrefine-test-movies", ".zip", job.getRawDataDir());
|
||||||
|
tmp.deleteOnExit();
|
||||||
|
FileUtils.copyFile(new File(filepath), tmp);
|
||||||
|
|
||||||
|
Progress dummyProgress = new Progress() {
|
||||||
|
@Override
|
||||||
|
public void setProgress(String message, int percent) {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isCanceled() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ArrayNode fileRecords = ParsingUtilities.mapper.createArrayNode();
|
||||||
|
ObjectNode fileRecord = ParsingUtilities.mapper.createObjectNode();
|
||||||
|
JSONUtilities.safePut(fileRecord, "origin", "upload");
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredEncoding", "UTF-8");
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredMimeType", "application/x-zip-compressed");
|
||||||
|
JSONUtilities.safePut(fileRecord, "fileName", filename);
|
||||||
|
JSONUtilities.safePut(fileRecord, "location", tmp.getName());
|
||||||
|
|
||||||
|
assertTrue(ImportingUtilities.postProcessRetrievedFile(job.getRawDataDir(), tmp, fileRecord, fileRecords, dummyProgress));
|
||||||
|
assertEquals(fileRecords.size(), 2);
|
||||||
|
assertEquals(fileRecords.get(0).get("fileName").asText(), "movies-condensed.tsv");
|
||||||
|
assertEquals(fileRecords.get(0).get("archiveFileName").asText(), "movies.zip");
|
||||||
|
assertEquals(fileRecords.get(1).get("fileName").asText(), "movies.tsv");
|
||||||
|
|
||||||
|
ObjectNode options = ParsingUtilities.mapper.createObjectNode();
|
||||||
|
JSONUtilities.safePut(options, "includeArchiveFileName", true);
|
||||||
|
JSONUtilities.safePut(options, "includeFileSources", true);
|
||||||
|
|
||||||
|
ImportingParserBase parser = new SeparatorBasedImporter();
|
||||||
|
List<Exception> exceptions = new ArrayList<Exception>();
|
||||||
|
parser.parse(
|
||||||
|
project,
|
||||||
|
metadata,
|
||||||
|
job,
|
||||||
|
IteratorUtils.toList(fileRecords.iterator()),
|
||||||
|
"tsv",
|
||||||
|
-1,
|
||||||
|
options,
|
||||||
|
exceptions
|
||||||
|
);
|
||||||
|
assertEquals(exceptions.size(), 0);
|
||||||
|
project.update();
|
||||||
|
|
||||||
|
assertEquals(project.columnModel.columns.get(0).getName(),"Archive");
|
||||||
|
assertEquals(project.rows.get(0).getCell(0).getValue(),"movies.zip");
|
||||||
|
assertEquals(project.columnModel.columns.get(1).getName(),"File");
|
||||||
|
assertEquals(project.rows.get(0).getCell(1).getValue(),"movies-condensed.tsv");
|
||||||
|
assertEquals(project.columnModel.columns.get(2).getName(),"name");
|
||||||
|
assertEquals(project.rows.get(0).getCell(2).getValue(),"Wayne's World");
|
||||||
|
|
||||||
|
// Make sure we imported both files contained in the zip file
|
||||||
|
assertEquals(project.rows.size(), 252);
|
||||||
|
|
||||||
|
ArrayNode importOptionsArray = metadata.getImportOptionMetadata();
|
||||||
|
assertEquals(importOptionsArray.size(), 2);
|
||||||
|
ObjectNode importOptions = (ObjectNode)importOptionsArray.get(0);
|
||||||
|
assertEquals(importOptions.get("archiveFileName").asText(), "movies.zip");
|
||||||
|
assertEquals(importOptions.get("fileSource").asText(), "movies-condensed.tsv");
|
||||||
|
assertTrue(importOptions.get("includeFileSources").asBoolean());
|
||||||
|
assertTrue(importOptions.get("includeArchiveFileName").asBoolean());
|
||||||
|
|
||||||
|
importOptions = (ObjectNode)importOptionsArray.get(1);
|
||||||
|
assertEquals(importOptions.get("fileSource").asText(), "movies.tsv");
|
||||||
|
assertEquals(importOptions.get("archiveFileName").asText(), "movies.zip");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -63,7 +63,6 @@ import com.google.refine.model.AbstractOperation;
|
|||||||
import com.google.refine.model.ModelException;
|
import com.google.refine.model.ModelException;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.operations.OperationRegistry;
|
import com.google.refine.operations.OperationRegistry;
|
||||||
import com.google.refine.operations.cell.KeyValueColumnizeOperation;
|
|
||||||
import com.google.refine.process.Process;
|
import com.google.refine.process.Process;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
import com.google.refine.util.TestUtils;
|
import com.google.refine.util.TestUtils;
|
||||||
@ -202,7 +201,7 @@ public class KeyValueColumnizeTests extends RefineTest {
|
|||||||
+ "price,3.1\n";
|
+ "price,3.1\n";
|
||||||
prepareOptions(",", 20, 0, 0, 1, false, false);
|
prepareOptions(",", 20, 0, 0, 1, false, false);
|
||||||
List<Exception> exceptions = new ArrayList<Exception>();
|
List<Exception> exceptions = new ArrayList<Exception>();
|
||||||
importer.parseOneFile(project, pm, job, "filesource", "archivefile", new StringReader(csv), -1, options, exceptions);
|
importer.parseOneFile(project, pm, job, "filesource", new StringReader(csv), -1, options, exceptions);
|
||||||
project.update();
|
project.update();
|
||||||
ProjectManager.singleton.registerProject(project, pm);
|
ProjectManager.singleton.registerProject(project, pm);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user