Fix parsing of newlines in cells

This commit is contained in:
Antonin Delpeuch 2017-08-17 19:18:50 +01:00
parent 2a3c158696
commit 554b75fa7b
2 changed files with 36 additions and 8 deletions

View File

@ -16,6 +16,7 @@ import org.sweble.wikitext.parser.utils.SimpleParserConfig;
import org.sweble.wikitext.parser.WikitextParser; import org.sweble.wikitext.parser.WikitextParser;
import org.sweble.wikitext.parser.nodes.WtBold; import org.sweble.wikitext.parser.nodes.WtBold;
import org.sweble.wikitext.parser.nodes.WtItalics; import org.sweble.wikitext.parser.nodes.WtItalics;
import org.sweble.wikitext.parser.nodes.WtNewline;
import org.sweble.wikitext.parser.nodes.WtNode; import org.sweble.wikitext.parser.nodes.WtNode;
import org.sweble.wikitext.parser.nodes.WtSection; import org.sweble.wikitext.parser.nodes.WtSection;
import org.sweble.wikitext.parser.nodes.WtText; import org.sweble.wikitext.parser.nodes.WtText;
@ -35,6 +36,9 @@ import org.sweble.wikitext.parser.nodes.WtName;
import org.sweble.wikitext.parser.nodes.WtValue; import org.sweble.wikitext.parser.nodes.WtValue;
import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage; import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage;
import org.sweble.wikitext.parser.nodes.WtBody; import org.sweble.wikitext.parser.nodes.WtBody;
import org.sweble.wikitext.parser.nodes.WtXmlEmptyTag;
import org.sweble.wikitext.parser.nodes.WtXmlEndTag;
import org.sweble.wikitext.parser.nodes.WtXmlStartTag;
import org.sweble.wikitext.parser.WikitextEncodingValidator; import org.sweble.wikitext.parser.WikitextEncodingValidator;
import org.sweble.wikitext.parser.WikitextPreprocessor; import org.sweble.wikitext.parser.WikitextPreprocessor;
@ -60,7 +64,7 @@ import com.google.refine.model.recon.ReconJob;
public class WikitextImporter extends TabularImportingParserBase { public class WikitextImporter extends TabularImportingParserBase {
static final private Logger logger = LoggerFactory.getLogger(WikitextImporter.class); // static final private Logger logger = LoggerFactory.getLogger(WikitextImporter.class);
public WikitextImporter() { public WikitextImporter() {
super(false); super(false);
@ -249,10 +253,36 @@ public class WikitextImporter extends TabularImportingParserBase {
} }
public void visit(WtText text) { public void visit(WtText text) {
writeText(text.getContent());
}
public void visit(WtNewline e) {
writeText("\n");
}
public void visit(WtXmlEmptyTag tag) {
if("br".equals(tag.getName())) {
writeText("\n");
}
}
public void visit(WtXmlStartTag tag) {
if("br".equals(tag.getName())) {
writeText("\n");
}
}
public void visit(WtXmlEndTag tag) {
if("br".equals(tag.getName())) {
writeText("\n");
}
}
public void writeText(String text) {
if (xmlAttrStringBuilder != null) { if (xmlAttrStringBuilder != null) {
xmlAttrStringBuilder.append(text.getContent()); xmlAttrStringBuilder.append(text);
} else if (cellStringBuilder != null) { } else if (cellStringBuilder != null) {
cellStringBuilder.append(text.getContent()); cellStringBuilder.append(text);
} }
} }

View File

@ -33,12 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.tests.importers; package com.google.refine.tests.importers;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import java.io.StringReader; import java.io.StringReader;
import org.json.JSONException;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.AfterMethod; import org.testng.annotations.AfterMethod;
@ -77,9 +74,9 @@ public class WikitextImporterTests extends ImporterTest {
String input = "\n" String input = "\n"
+ "{|\n" + "{|\n"
+ "|-\n" + "|-\n"
+ "| a || b || c \n" + "| a || b<br/>2 || c \n"
+ "|-\n" + "|-\n"
+ "| d || e || f\n" + "| d || e || f<br>\n"
+ "|-\n" + "|-\n"
+ "|}\n"; + "|}\n";
try { try {
@ -92,6 +89,7 @@ public class WikitextImporterTests extends ImporterTest {
Assert.assertEquals(project.rows.size(), 2); Assert.assertEquals(project.rows.size(), 2);
Assert.assertEquals(project.rows.get(0).cells.size(), 3); Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "a"); Assert.assertEquals(project.rows.get(0).cells.get(0).value, "a");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "b\n2");
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "f"); Assert.assertEquals(project.rows.get(1).cells.get(2).value, "f");
} }