Fix parsing of newlines in cells

This commit is contained in:
Antonin Delpeuch 2017-08-17 19:18:50 +01:00
parent 2a3c158696
commit 554b75fa7b
2 changed files with 36 additions and 8 deletions

View File

@ -16,6 +16,7 @@ import org.sweble.wikitext.parser.utils.SimpleParserConfig;
import org.sweble.wikitext.parser.WikitextParser;
import org.sweble.wikitext.parser.nodes.WtBold;
import org.sweble.wikitext.parser.nodes.WtItalics;
import org.sweble.wikitext.parser.nodes.WtNewline;
import org.sweble.wikitext.parser.nodes.WtNode;
import org.sweble.wikitext.parser.nodes.WtSection;
import org.sweble.wikitext.parser.nodes.WtText;
@ -35,6 +36,9 @@ import org.sweble.wikitext.parser.nodes.WtName;
import org.sweble.wikitext.parser.nodes.WtValue;
import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage;
import org.sweble.wikitext.parser.nodes.WtBody;
import org.sweble.wikitext.parser.nodes.WtXmlEmptyTag;
import org.sweble.wikitext.parser.nodes.WtXmlEndTag;
import org.sweble.wikitext.parser.nodes.WtXmlStartTag;
import org.sweble.wikitext.parser.WikitextEncodingValidator;
import org.sweble.wikitext.parser.WikitextPreprocessor;
@ -60,7 +64,7 @@ import com.google.refine.model.recon.ReconJob;
public class WikitextImporter extends TabularImportingParserBase {
static final private Logger logger = LoggerFactory.getLogger(WikitextImporter.class);
// static final private Logger logger = LoggerFactory.getLogger(WikitextImporter.class);
public WikitextImporter() {
super(false);
@ -249,10 +253,36 @@ public class WikitextImporter extends TabularImportingParserBase {
}
public void visit(WtText text) {
writeText(text.getContent());
}
public void visit(WtNewline e) {
writeText("\n");
}
public void visit(WtXmlEmptyTag tag) {
if("br".equals(tag.getName())) {
writeText("\n");
}
}
public void visit(WtXmlStartTag tag) {
if("br".equals(tag.getName())) {
writeText("\n");
}
}
public void visit(WtXmlEndTag tag) {
if("br".equals(tag.getName())) {
writeText("\n");
}
}
public void writeText(String text) {
if (xmlAttrStringBuilder != null) {
xmlAttrStringBuilder.append(text.getContent());
xmlAttrStringBuilder.append(text);
} else if (cellStringBuilder != null) {
cellStringBuilder.append(text.getContent());
cellStringBuilder.append(text);
}
}

View File

@ -33,12 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.tests.importers;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import java.io.StringReader;
import org.json.JSONException;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
@ -77,9 +74,9 @@ public class WikitextImporterTests extends ImporterTest {
String input = "\n"
+ "{|\n"
+ "|-\n"
+ "| a || b || c \n"
+ "| a || b<br/>2 || c \n"
+ "|-\n"
+ "| d || e || f\n"
+ "| d || e || f<br>\n"
+ "|-\n"
+ "|}\n";
try {
@ -92,6 +89,7 @@ public class WikitextImporterTests extends ImporterTest {
Assert.assertEquals(project.rows.size(), 2);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "a");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "b\n2");
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "f");
}