From 554b75fa7b57868f0e3af663796c6b54c74cd3d8 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Thu, 17 Aug 2017 19:18:50 +0100 Subject: [PATCH] Fix parsing of newlines in cells --- .../refine/importers/WikitextImporter.java | 36 +++++++++++++++++-- .../importers/WikitextImporterTests.java | 8 ++--- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/main/src/com/google/refine/importers/WikitextImporter.java b/main/src/com/google/refine/importers/WikitextImporter.java index 8d9b8d133..31df95e4f 100644 --- a/main/src/com/google/refine/importers/WikitextImporter.java +++ b/main/src/com/google/refine/importers/WikitextImporter.java @@ -16,6 +16,7 @@ import org.sweble.wikitext.parser.utils.SimpleParserConfig; import org.sweble.wikitext.parser.WikitextParser; import org.sweble.wikitext.parser.nodes.WtBold; import org.sweble.wikitext.parser.nodes.WtItalics; +import org.sweble.wikitext.parser.nodes.WtNewline; import org.sweble.wikitext.parser.nodes.WtNode; import org.sweble.wikitext.parser.nodes.WtSection; import org.sweble.wikitext.parser.nodes.WtText; @@ -35,6 +36,9 @@ import org.sweble.wikitext.parser.nodes.WtName; import org.sweble.wikitext.parser.nodes.WtValue; import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage; import org.sweble.wikitext.parser.nodes.WtBody; +import org.sweble.wikitext.parser.nodes.WtXmlEmptyTag; +import org.sweble.wikitext.parser.nodes.WtXmlEndTag; +import org.sweble.wikitext.parser.nodes.WtXmlStartTag; import org.sweble.wikitext.parser.WikitextEncodingValidator; import org.sweble.wikitext.parser.WikitextPreprocessor; @@ -60,7 +64,7 @@ import com.google.refine.model.recon.ReconJob; public class WikitextImporter extends TabularImportingParserBase { - static final private Logger logger = LoggerFactory.getLogger(WikitextImporter.class); + // static final private Logger logger = LoggerFactory.getLogger(WikitextImporter.class); public WikitextImporter() { super(false); @@ -249,10 +253,36 @@ public class WikitextImporter extends TabularImportingParserBase { } public void visit(WtText text) { + writeText(text.getContent()); + } + + public void visit(WtNewline e) { + writeText("\n"); + } + + public void visit(WtXmlEmptyTag tag) { + if("br".equals(tag.getName())) { + writeText("\n"); + } + } + + public void visit(WtXmlStartTag tag) { + if("br".equals(tag.getName())) { + writeText("\n"); + } + } + + public void visit(WtXmlEndTag tag) { + if("br".equals(tag.getName())) { + writeText("\n"); + } + } + + public void writeText(String text) { if (xmlAttrStringBuilder != null) { - xmlAttrStringBuilder.append(text.getContent()); + xmlAttrStringBuilder.append(text); } else if (cellStringBuilder != null) { - cellStringBuilder.append(text.getContent()); + cellStringBuilder.append(text); } } diff --git a/main/tests/server/src/com/google/refine/tests/importers/WikitextImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/WikitextImporterTests.java index e5689fdd1..97853bfc6 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/WikitextImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/WikitextImporterTests.java @@ -33,12 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.tests.importers; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; import java.io.StringReader; -import org.json.JSONException; import org.slf4j.LoggerFactory; import org.testng.Assert; import org.testng.annotations.AfterMethod; @@ -77,9 +74,9 @@ public class WikitextImporterTests extends ImporterTest { String input = "\n" + "{|\n" + "|-\n" - + "| a || b || c \n" + + "| a || b
2 || c \n" + "|-\n" - + "| d || e || f\n" + + "| d || e || f
\n" + "|-\n" + "|}\n"; try { @@ -92,6 +89,7 @@ public class WikitextImporterTests extends ImporterTest { Assert.assertEquals(project.rows.size(), 2); Assert.assertEquals(project.rows.get(0).cells.size(), 3); Assert.assertEquals(project.rows.get(0).cells.get(0).value, "a"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "b\n2"); Assert.assertEquals(project.rows.get(1).cells.get(2).value, "f"); }