This commit is contained in:
Tom Morris 2015-09-25 19:01:16 -04:00
parent 8511c6c30d
commit be936a86eb
4 changed files with 35 additions and 38 deletions

View File

@ -41,16 +41,22 @@ import java.util.List;
import org.apache.commons.lang.NotImplementedException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingParser;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
abstract public class ImportingParserBase implements ImportingParser {
final static Logger logger = LoggerFactory.getLogger("ImportingParserBase");
final protected boolean useInputStream;
/**
@ -154,4 +160,20 @@ abstract public class ImportingParserBase implements ImportingParser {
) {
throw new NotImplementedException();
}
protected static int addFilenameColumn(Project project) {
String fileNameColumnName = "File";
assert project.columnModel.getColumnByName(fileNameColumnName) == null;
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
return 0;
} catch (ModelException e) {
// Shouldn't happen: We already checked for duplicate name.
logger.error("ModelException adding Filename column",e);
}
return -1;
}
}

View File

@ -39,21 +39,17 @@ import java.util.ArrayList;
import java.util.List;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectMetadata;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
abstract public class TabularImportingParserBase extends ImportingParserBase {
private final static Logger logger = LoggerFactory.getLogger("ImportingParserBase");
static public interface TableDataReader {
public List<Object> getNextRowOfCells() throws IOException;
}
@ -108,18 +104,10 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
String fileNameColumnName = "File";
int filenameColumnIndex = -1;
if (includeFileSources) {
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
} catch (ModelException e) {
// Ignore: We already checked for duplicate name.
logger.info("ModelException",e);
}
}
filenameColumnIndex = addFilenameColumn(project);
}
List<String> columnNames = new ArrayList<String>();
@ -194,9 +182,9 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
}
if (rowHasData || storeBlankRows) {
if (includeFileSources) {
if (includeFileSources && filenameColumnIndex >= 0) {
row.setCell(
project.columnModel.getColumnByName(fileNameColumnName).getCellIndex(),
filenameColumnIndex,
new Cell(fileSource, null));
}
project.rows.add(row);

View File

@ -18,11 +18,7 @@ public class ImportParameters {
}
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) {
this.trimStrings = trimStrings;
this.storeEmptyStrings = storeEmptyStrings;
this.guessDataType = guessCellValueTypes;
this.includeFileSources = false;
this.fileSource = "";
this(trimStrings, storeEmptyStrings, guessCellValueTypes, false, "");
}
}
}

View File

@ -48,11 +48,7 @@ import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importers.ImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
/**
@ -215,21 +211,16 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
// copied from TabularImportingParserBase
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
String fileNameColumnName = "File";
int filenameColumnIndex = -1;
if (includeFileSources) {
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
} catch (ModelException e) {
// Ignore: We already checked for duplicate name.
}
}
filenameColumnIndex = addFilenameColumn(project);
}
// If the column add fails for any reason, we'll end up overwriting data in the first column
assert filenameColumnIndex == 0;
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2,
new ImportParameters(trimStrings, storeEmptyStrings,guessCellValueTypes, includeFileSources,fileSource));
new ImportParameters(trimStrings, storeEmptyStrings, guessCellValueTypes, includeFileSources,
fileSource));
}
}