Fixed Issue 19: CSV import is too basic. Quoted cells in a CSV file can now contain line breaks. Added a CSV file to test that.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@717 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
c0e006861a
commit
7ad438078f
@ -64,7 +64,7 @@ public class TsvCsvImporter implements Importer {
|
|||||||
if (headerLines > 0) {
|
if (headerLines > 0) {
|
||||||
headerLines--;
|
headerLines--;
|
||||||
|
|
||||||
List<String> cells = parser.split(line);
|
List<String> cells = parser.split(line, lnReader);
|
||||||
for (int c = 0; c < cells.size(); c++) {
|
for (int c = 0; c < cells.size(); c++) {
|
||||||
String cell = cells.get(c).trim();
|
String cell = cells.get(c).trim();
|
||||||
|
|
||||||
@ -73,7 +73,7 @@ public class TsvCsvImporter implements Importer {
|
|||||||
} else {
|
} else {
|
||||||
Row row = new Row(columnNames.size());
|
Row row = new Row(columnNames.size());
|
||||||
|
|
||||||
if (parser.parseRow(row, line, guessValueType)) {
|
if (parser.parseRow(row, line, guessValueType, lnReader)) {
|
||||||
rowsWithData++;
|
rowsWithData++;
|
||||||
|
|
||||||
if (skip <= 0 || rowsWithData > skip) {
|
if (skip <= 0 || rowsWithData > skip) {
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
package com.metaweb.gridworks.importers.parsers;
|
package com.metaweb.gridworks.importers.parsers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.LineNumberReader;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -10,7 +12,7 @@ import com.metaweb.gridworks.model.Cell;
|
|||||||
import com.metaweb.gridworks.model.Row;
|
import com.metaweb.gridworks.model.Row;
|
||||||
|
|
||||||
public class CSVRowParser extends RowParser {
|
public class CSVRowParser extends RowParser {
|
||||||
public List<String> split(String line) {
|
public List<String> split(String line, LineNumberReader lineReader) {
|
||||||
List<String> results = new ArrayList<String>();
|
List<String> results = new ArrayList<String>();
|
||||||
|
|
||||||
int start = 0;
|
int start = 0;
|
||||||
@ -25,8 +27,14 @@ public class CSVRowParser extends RowParser {
|
|||||||
int quote = line.indexOf('"', start);
|
int quote = line.indexOf('"', start);
|
||||||
if (quote < 0) {
|
if (quote < 0) {
|
||||||
sb.append(line.substring(start));
|
sb.append(line.substring(start));
|
||||||
start = line.length();
|
|
||||||
break;
|
start = 0;
|
||||||
|
try {
|
||||||
|
line = lineReader.readLine();
|
||||||
|
} catch (IOException e) {
|
||||||
|
line = "";
|
||||||
|
break;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (quote < line.length() - 1 && line.charAt(quote + 1) == '"') {
|
if (quote < line.length() - 1 && line.charAt(quote + 1) == '"') {
|
||||||
sb.append(line.substring(start, quote + 1)); // include " as well
|
sb.append(line.substring(start, quote + 1)); // include " as well
|
||||||
@ -60,10 +68,10 @@ public class CSVRowParser extends RowParser {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean parseRow(Row row, String line, boolean guessValueType) {
|
public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) {
|
||||||
boolean hasData = false;
|
boolean hasData = false;
|
||||||
|
|
||||||
List<String> strings = split(line);
|
List<String> strings = split(line, lineReader);
|
||||||
for (String s : strings) {
|
for (String s : strings) {
|
||||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s;
|
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s;
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
package com.metaweb.gridworks.importers.parsers;
|
package com.metaweb.gridworks.importers.parsers;
|
||||||
|
|
||||||
|
import java.io.LineNumberReader;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -11,7 +12,7 @@ import com.metaweb.gridworks.model.Row;
|
|||||||
|
|
||||||
public class NonSplitRowParser extends RowParser {
|
public class NonSplitRowParser extends RowParser {
|
||||||
|
|
||||||
public List<String> split(String line) {
|
public List<String> split(String line, LineNumberReader lineReader) {
|
||||||
List<String> results = new ArrayList<String>(1);
|
List<String> results = new ArrayList<String>(1);
|
||||||
|
|
||||||
results.add(line.trim());
|
results.add(line.trim());
|
||||||
@ -19,7 +20,7 @@ public class NonSplitRowParser extends RowParser {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean parseRow(Row row, String line, boolean guessValueType) {
|
public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) {
|
||||||
line = line.trim();
|
line = line.trim();
|
||||||
if (line.isEmpty()) {
|
if (line.isEmpty()) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
package com.metaweb.gridworks.importers.parsers;
|
package com.metaweb.gridworks.importers.parsers;
|
||||||
|
|
||||||
|
import java.io.LineNumberReader;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import com.metaweb.gridworks.model.Row;
|
import com.metaweb.gridworks.model.Row;
|
||||||
|
|
||||||
public abstract class RowParser {
|
public abstract class RowParser {
|
||||||
public abstract List<String> split(String line);
|
public abstract List<String> split(String line, LineNumberReader lineReader);
|
||||||
|
|
||||||
public abstract boolean parseRow(Row row, String line, boolean guessValueType);
|
public abstract boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader);
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
package com.metaweb.gridworks.importers.parsers;
|
package com.metaweb.gridworks.importers.parsers;
|
||||||
|
|
||||||
|
import java.io.LineNumberReader;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -19,7 +20,7 @@ public class SeparatorRowParser extends RowParser {
|
|||||||
this.sep = sep;
|
this.sep = sep;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> split(String line) {
|
public List<String> split(String line, LineNumberReader lineReader) {
|
||||||
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
|
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
|
||||||
|
|
||||||
List<String> results = new ArrayList<String>();
|
List<String> results = new ArrayList<String>();
|
||||||
@ -30,7 +31,7 @@ public class SeparatorRowParser extends RowParser {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean parseRow(Row row, String line, boolean guessValueType) {
|
public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) {
|
||||||
boolean hasData = false;
|
boolean hasData = false;
|
||||||
|
|
||||||
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
|
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
|
||||||
|
Loading…
Reference in New Issue
Block a user