This commit is contained in:
Tom Morris 2015-09-25 19:01:16 -04:00
parent 8511c6c30d
commit be936a86eb
4 changed files with 35 additions and 38 deletions

View File

@ -41,16 +41,22 @@ import java.util.List;
import org.apache.commons.lang.NotImplementedException; import org.apache.commons.lang.NotImplementedException;
import org.json.JSONObject; import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectMetadata; import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress; import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingParser; import com.google.refine.importing.ImportingParser;
import com.google.refine.importing.ImportingUtilities; import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities; import com.google.refine.util.JSONUtilities;
abstract public class ImportingParserBase implements ImportingParser { abstract public class ImportingParserBase implements ImportingParser {
final static Logger logger = LoggerFactory.getLogger("ImportingParserBase");
final protected boolean useInputStream; final protected boolean useInputStream;
/** /**
@ -154,4 +160,20 @@ abstract public class ImportingParserBase implements ImportingParser {
) { ) {
throw new NotImplementedException(); throw new NotImplementedException();
} }
protected static int addFilenameColumn(Project project) {
String fileNameColumnName = "File";
assert project.columnModel.getColumnByName(fileNameColumnName) == null;
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
return 0;
} catch (ModelException e) {
// Shouldn't happen: We already checked for duplicate name.
logger.error("ModelException adding Filename column",e);
}
return -1;
}
} }

View File

@ -39,21 +39,17 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.json.JSONObject; import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectMetadata; import com.google.refine.ProjectMetadata;
import com.google.refine.expr.ExpressionUtils; import com.google.refine.expr.ExpressionUtils;
import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Cell; import com.google.refine.model.Cell;
import com.google.refine.model.Column; import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities; import com.google.refine.util.JSONUtilities;
abstract public class TabularImportingParserBase extends ImportingParserBase { abstract public class TabularImportingParserBase extends ImportingParserBase {
private final static Logger logger = LoggerFactory.getLogger("ImportingParserBase");
static public interface TableDataReader { static public interface TableDataReader {
public List<Object> getNextRowOfCells() throws IOException; public List<Object> getNextRowOfCells() throws IOException;
} }
@ -109,17 +105,9 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true); boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false); boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
String fileNameColumnName = "File"; int filenameColumnIndex = -1;
if (includeFileSources) { if (includeFileSources) {
if (project.columnModel.getColumnByName(fileNameColumnName) == null) { filenameColumnIndex = addFilenameColumn(project);
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
} catch (ModelException e) {
// Ignore: We already checked for duplicate name.
logger.info("ModelException",e);
}
}
} }
List<String> columnNames = new ArrayList<String>(); List<String> columnNames = new ArrayList<String>();
@ -194,9 +182,9 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
} }
if (rowHasData || storeBlankRows) { if (rowHasData || storeBlankRows) {
if (includeFileSources) { if (includeFileSources && filenameColumnIndex >= 0) {
row.setCell( row.setCell(
project.columnModel.getColumnByName(fileNameColumnName).getCellIndex(), filenameColumnIndex,
new Cell(fileSource, null)); new Cell(fileSource, null));
} }
project.rows.add(row); project.rows.add(row);

View File

@ -18,11 +18,7 @@ public class ImportParameters {
} }
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) { public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) {
this.trimStrings = trimStrings; this(trimStrings, storeEmptyStrings, guessCellValueTypes, false, "");
this.storeEmptyStrings = storeEmptyStrings;
this.guessDataType = guessCellValueTypes;
this.includeFileSources = false;
this.fileSource = "";
} }
} }

View File

@ -48,11 +48,7 @@ import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importers.ImportingParserBase; import com.google.refine.importers.ImportingParserBase;
import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities; import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities; import com.google.refine.util.JSONUtilities;
/** /**
@ -215,21 +211,16 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false); boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true); boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
// copied from TabularImportingParserBase
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false); boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
String fileNameColumnName = "File"; int filenameColumnIndex = -1;
if (includeFileSources) { if (includeFileSources) {
if (project.columnModel.getColumnByName(fileNameColumnName) == null) { filenameColumnIndex = addFilenameColumn(project);
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
} catch (ModelException e) {
// Ignore: We already checked for duplicate name.
}
}
} }
// If the column add fails for any reason, we'll end up overwriting data in the first column
assert filenameColumnIndex == 0;
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2,
new ImportParameters(trimStrings, storeEmptyStrings,guessCellValueTypes, includeFileSources,fileSource)); new ImportParameters(trimStrings, storeEmptyStrings, guessCellValueTypes, includeFileSources,
fileSource));
} }
} }