From 2bac6844e2e38576156a829d7d938181a3244927 Mon Sep 17 00:00:00 2001 From: David Huynh Date: Tue, 9 Mar 2010 19:10:55 +0000 Subject: [PATCH] Fixed csv importer to handle escaped quotation marks (""). git-svn-id: http://google-refine.googlecode.com/svn/trunk@257 7d457c2a-affb-35e4-300a-418c747d4874 --- .../importers/ImporterUtilities.java | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java b/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java index ca88c4319..d4e4387dc 100644 --- a/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java +++ b/src/main/java/com/metaweb/gridworks/importers/ImporterUtilities.java @@ -35,14 +35,31 @@ public class ImporterUtilities { String text = null; if (line.charAt(start) == '"') { - int next = line.indexOf('"', start + 1); - if (next < 0) { - text = line.substring(start); - start = line.length(); - } else { - text = line.substring(start, next + 1); - start = next + 2; - } + StringBuffer sb = new StringBuffer(); + + start++; // skip over " + while (start < line.length()) { + int quote = line.indexOf('"', start); + if (quote < 0) { + sb.append(line.substring(start)); + start = line.length(); + break; + } else { + if (quote < line.length() - 1 && line.charAt(quote + 1) == '"') { + sb.append(line.substring(start, quote + 1)); // include " as well + start = quote + 2; + } else { + sb.append(line.substring(start, quote)); + start = quote + 1; + if (start < line.length() && line.charAt(start) == ',') { + start++; // skip , + } + break; + } + } + } + + text = sb.toString(); } else { int next = line.indexOf(',', start); if (next < 0) {