Merge pull request #1237.
Conflicts: .classpath main/webapp/modules/core/langs/translation-en.json main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js Closes #363 and #56.
This commit is contained in:
commit
00f8e4fc6b
@ -7,7 +7,6 @@
|
||||
<classpathentry kind="src" path="extensions/pc-axis/src"/>
|
||||
<classpathentry kind="src" path="extensions/sample/src"/>
|
||||
<classpathentry kind="src" path="main/tests/server/src"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/ant-tools-1.8.0.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/arithcode-1.1.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/butterfly-1.0.1.jar" sourcepath="main/webapp/WEB-INF/lib-src/butterfly-1.0.1-sources.jar"/>
|
||||
@ -83,7 +82,9 @@
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/poi-3.13-20150929.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/poi-ooxml-3.13-20150929.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/poi-ooxml-schemas-3.13-20150929.jar"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||
<classpathentry kind="lib" path="extensions/jython/module/MOD-INF/lib/jython-standalone-2.7.1.jar"/>
|
||||
<classpathentry kind="lib" path="main/tests/data"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/swc-parser-lazy-3.1.5-jar-with-dependencies.jar" sourcepath="main/webapp/WEB-INF/lib-src/swc-parser-lazy-3.1.5-sources.jar" />
|
||||
<classpathentry kind="output" path="build"/>
|
||||
</classpath>
|
||||
|
@ -60,6 +60,7 @@ licenses/apache2.0.LICENSE.txt
|
||||
signpost
|
||||
opencsv
|
||||
textng
|
||||
swc-parser-lazy
|
||||
|
||||
Apache License 1.1
|
||||
------------------
|
||||
|
@ -24,6 +24,8 @@ public class TextFormatGuesser implements FormatGuesser {
|
||||
int closeBraces = 0;
|
||||
int openAngleBrackets = 0;
|
||||
int closeAngleBrackets = 0;
|
||||
int wikiTableBegin = 0;
|
||||
int wikiTableRow = 0;
|
||||
int trailingPeriods = 0;
|
||||
|
||||
char firstChar = ' ';
|
||||
@ -37,6 +39,8 @@ public class TextFormatGuesser implements FormatGuesser {
|
||||
closeBraces += countSubstrings(chunk, "}");
|
||||
openAngleBrackets += countSubstrings(chunk, "<");
|
||||
closeAngleBrackets += countSubstrings(chunk, ">");
|
||||
wikiTableBegin += countSubstrings(chunk, "{|");
|
||||
wikiTableRow += countSubstrings(chunk, "|-");
|
||||
trailingPeriods += countLineSuffix(chunk, ".");
|
||||
|
||||
if (!foundFirstChar) {
|
||||
@ -50,7 +54,9 @@ public class TextFormatGuesser implements FormatGuesser {
|
||||
}
|
||||
|
||||
if (foundFirstChar) {
|
||||
if ((firstChar == '{' || firstChar == '[') &&
|
||||
if (wikiTableBegin >= 1 && wikiTableRow >= 2) {
|
||||
return "text/wiki";
|
||||
} if ((firstChar == '{' || firstChar == '[') &&
|
||||
openBraces >= 5 && closeBraces >= 5) {
|
||||
return "text/json";
|
||||
} else if (openAngleBrackets >= 5 && closeAngleBrackets >= 5) {
|
||||
|
638
main/src/com/google/refine/importers/WikitextImporter.java
Normal file
638
main/src/com/google/refine/importers/WikitextImporter.java
Normal file
@ -0,0 +1,638 @@
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import com.google.common.io.CharStreams;
|
||||
import de.fau.cs.osr.ptk.common.AstVisitor;
|
||||
|
||||
import org.sweble.wikitext.parser.ParserConfig;
|
||||
import org.sweble.wikitext.parser.utils.SimpleParserConfig;
|
||||
import org.sweble.wikitext.parser.WikitextParser;
|
||||
import org.sweble.wikitext.parser.nodes.WtBold;
|
||||
import org.sweble.wikitext.parser.nodes.WtItalics;
|
||||
import org.sweble.wikitext.parser.nodes.WtNewline;
|
||||
import org.sweble.wikitext.parser.nodes.WtNode;
|
||||
import org.sweble.wikitext.parser.nodes.WtSection;
|
||||
import org.sweble.wikitext.parser.nodes.WtTemplate;
|
||||
import org.sweble.wikitext.parser.nodes.WtTemplateArgument;
|
||||
import org.sweble.wikitext.parser.nodes.WtTemplateArguments;
|
||||
import org.sweble.wikitext.parser.nodes.WtText;
|
||||
import org.sweble.wikitext.parser.nodes.WtInternalLink;
|
||||
import org.sweble.wikitext.parser.nodes.WtExternalLink;
|
||||
import org.sweble.wikitext.parser.nodes.WtLinkTitle;
|
||||
import org.sweble.wikitext.parser.nodes.WtLinkTitle.WtNoLinkTitle;
|
||||
import org.sweble.wikitext.parser.nodes.WtUrl;
|
||||
import org.sweble.wikitext.parser.nodes.WtTable;
|
||||
import org.sweble.wikitext.parser.nodes.WtTableHeader;
|
||||
import org.sweble.wikitext.parser.nodes.WtTableRow;
|
||||
import org.sweble.wikitext.parser.nodes.WtTableCell;
|
||||
import org.sweble.wikitext.parser.nodes.WtTableCaption;
|
||||
import org.sweble.wikitext.parser.nodes.WtXmlAttributes;
|
||||
import org.sweble.wikitext.parser.nodes.WtXmlAttribute;
|
||||
import org.sweble.wikitext.parser.nodes.WtName;
|
||||
import org.sweble.wikitext.parser.nodes.WtValue;
|
||||
import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage;
|
||||
import org.sweble.wikitext.parser.nodes.WtBody;
|
||||
import org.sweble.wikitext.parser.nodes.WtXmlEmptyTag;
|
||||
import org.sweble.wikitext.parser.nodes.WtXmlEndTag;
|
||||
import org.sweble.wikitext.parser.nodes.WtXmlStartTag;
|
||||
|
||||
import org.sweble.wikitext.parser.WikitextEncodingValidator;
|
||||
import org.sweble.wikitext.parser.WikitextPreprocessor;
|
||||
import org.sweble.wikitext.parser.encval.ValidatedWikitext;
|
||||
import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage;
|
||||
import org.sweble.wikitext.parser.nodes.WtPreproWikitextPage;
|
||||
import org.sweble.wikitext.parser.parser.PreprocessorToParserTransformer;
|
||||
import org.sweble.wikitext.parser.preprocessor.PreprocessedWikitext;
|
||||
|
||||
import xtc.parser.ParseException;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Recon;
|
||||
import com.google.refine.model.ReconStats;
|
||||
import com.google.refine.model.recon.StandardReconConfig.ColumnDetail;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
import com.google.refine.model.recon.StandardReconConfig;
|
||||
import com.google.refine.model.recon.ReconJob;
|
||||
|
||||
|
||||
public class WikitextImporter extends TabularImportingParserBase {
|
||||
// static final private Logger logger = LoggerFactory.getLogger(WikitextImporter.class);
|
||||
|
||||
public WikitextImporter() {
|
||||
super(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject createParserUIInitializationData(
|
||||
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||
|
||||
JSONUtilities.safePut(options, "guessCellValueTypes", false);
|
||||
JSONUtilities.safePut(options, "blankSpanningCells", true);
|
||||
JSONUtilities.safePut(options, "includeRawTemplates", false);
|
||||
JSONUtilities.safePut(options, "wikiUrl", "https://en.wikipedia.org/wiki/");
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
private class SpanningCell {
|
||||
public String value;
|
||||
public String reconciled;
|
||||
public int colspan;
|
||||
public int rowspan;
|
||||
public int row;
|
||||
public int col;
|
||||
|
||||
SpanningCell(String value, String reconciled, int row, int col, int rowspan, int colspan) {
|
||||
this.value = value;
|
||||
this.reconciled = reconciled;
|
||||
this.row = row;
|
||||
this.col = col;
|
||||
this.rowspan = rowspan;
|
||||
this.colspan = colspan;
|
||||
}
|
||||
}
|
||||
|
||||
private class WikilinkedCell {
|
||||
public String internalLink;
|
||||
public int row;
|
||||
public int col;
|
||||
|
||||
WikilinkedCell(String internalLink, int row, int col) {
|
||||
this.internalLink = internalLink;
|
||||
this.row = row;
|
||||
this.col = col;
|
||||
}
|
||||
|
||||
public String toURL(String wikiBaseUrl) {
|
||||
return wikiBaseUrl + internalLink;
|
||||
}
|
||||
}
|
||||
|
||||
public class WikitextTableVisitor extends AstVisitor<WtNode> {
|
||||
|
||||
public String caption;
|
||||
public List<String> header;
|
||||
public List<List<String>> rows;
|
||||
public List<WikilinkedCell> wikilinkedCells;
|
||||
private List<String> currentRow;
|
||||
|
||||
private boolean blankSpanningCells;
|
||||
private boolean includeRawTemplates;
|
||||
|
||||
private int rowId;
|
||||
private List<SpanningCell> spanningCells;
|
||||
private StringBuilder cellStringBuilder;
|
||||
private StringBuilder xmlAttrStringBuilder;
|
||||
private String currentXmlAttr;
|
||||
private String currentInternalLink;
|
||||
private String currentExternalLink;
|
||||
private int colspan;
|
||||
private int rowspan;
|
||||
private int spanningCellIdx;
|
||||
private List<String> internalLinksInCell;
|
||||
|
||||
public WikitextTableVisitor(boolean blankSpanningCells, boolean includeRawTemplates) {
|
||||
this.blankSpanningCells = blankSpanningCells;
|
||||
this.includeRawTemplates = includeRawTemplates;
|
||||
caption = null;
|
||||
header = new ArrayList<String>();
|
||||
rows = new ArrayList<List<String>>();
|
||||
wikilinkedCells = new ArrayList<WikilinkedCell>();
|
||||
spanningCells = new ArrayList<SpanningCell>();
|
||||
cellStringBuilder = null;
|
||||
xmlAttrStringBuilder = null;
|
||||
currentInternalLink = null;
|
||||
currentExternalLink = null;
|
||||
colspan = 0;
|
||||
rowspan = 0;
|
||||
rowId = -1;
|
||||
spanningCellIdx = 0;
|
||||
internalLinksInCell = new ArrayList<String>();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected WtNode before(WtNode node) {
|
||||
return super.before(node);
|
||||
}
|
||||
|
||||
/* Default handler */
|
||||
|
||||
public void visit(WtNode e) {
|
||||
// Ignore other nodes
|
||||
System.out.println(e.getNodeName());
|
||||
}
|
||||
|
||||
/* Table handling */
|
||||
|
||||
public void visit(WtTable e) {
|
||||
iterate(e);
|
||||
}
|
||||
|
||||
public void visit(WtTableHeader e) {
|
||||
String columnName = renderCellAsString(e);
|
||||
header.add(columnName);
|
||||
// For the header, we ignore rowspan and manually add cells for colspan
|
||||
if (colspan > 1) {
|
||||
for (int i = 0; i < colspan-1; i++) {
|
||||
header.add(columnName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void visit(WtTableCaption e) {
|
||||
caption = renderCellAsString(e);
|
||||
}
|
||||
|
||||
public void visit(WtTableRow e)
|
||||
{
|
||||
if (currentRow == null) {
|
||||
if (rowId == -1) {
|
||||
// no header was found, start on the first row
|
||||
rowId = 0;
|
||||
}
|
||||
currentRow = new ArrayList<String>();
|
||||
spanningCellIdx = 0;
|
||||
addSpanningCells();
|
||||
iterate(e);
|
||||
if(currentRow.size() > 0) {
|
||||
rows.add(currentRow);
|
||||
rowId++;
|
||||
}
|
||||
currentRow = null;
|
||||
}
|
||||
}
|
||||
|
||||
public void visit(WtTableCell e)
|
||||
{
|
||||
if (currentRow != null) {
|
||||
rowspan = 1;
|
||||
colspan = 1;
|
||||
internalLinksInCell.clear();
|
||||
String value = renderCellAsString(e);
|
||||
|
||||
int colId = currentRow.size();
|
||||
|
||||
// Add the cell to the row we are currently building
|
||||
currentRow.add(value);
|
||||
|
||||
// Reconcile it if we found exactly one link in the cell
|
||||
String reconciled = null;
|
||||
if (internalLinksInCell.size() == 1) {
|
||||
reconciled = internalLinksInCell.get(0);
|
||||
wikilinkedCells.add(new WikilinkedCell(reconciled, rowId, colId));
|
||||
}
|
||||
|
||||
// Mark it as spanning if we found the tags
|
||||
if (colspan > 1 || rowspan > 1) {
|
||||
SpanningCell spanningCell = new SpanningCell(
|
||||
value, reconciled, rowId, colId, rowspan, colspan);
|
||||
spanningCells.add(spanningCellIdx, spanningCell);
|
||||
}
|
||||
|
||||
// Add all spanning cells that need to be inserted after this one.
|
||||
addSpanningCells();
|
||||
}
|
||||
}
|
||||
|
||||
public String renderCellAsString(WtNode e) {
|
||||
cellStringBuilder = new StringBuilder();
|
||||
iterate(e);
|
||||
String value = cellStringBuilder.toString();
|
||||
if (value == null) {
|
||||
value = "";
|
||||
}
|
||||
value = value.trim();
|
||||
cellStringBuilder = null;
|
||||
return value;
|
||||
}
|
||||
|
||||
public void visit(WtText text) {
|
||||
writeText(text.getContent());
|
||||
}
|
||||
|
||||
public void visit(WtNewline e) {
|
||||
writeText("\n");
|
||||
}
|
||||
|
||||
public void visit(WtXmlEmptyTag tag) {
|
||||
if("br".equals(tag.getName())) {
|
||||
writeText("\n");
|
||||
}
|
||||
}
|
||||
|
||||
public void visit(WtXmlStartTag tag) {
|
||||
if("br".equals(tag.getName())) {
|
||||
writeText("\n");
|
||||
}
|
||||
}
|
||||
|
||||
public void visit(WtXmlEndTag tag) {
|
||||
if("br".equals(tag.getName())) {
|
||||
writeText("\n");
|
||||
}
|
||||
}
|
||||
|
||||
public void writeText(String text) {
|
||||
if (xmlAttrStringBuilder != null) {
|
||||
xmlAttrStringBuilder.append(text);
|
||||
} else if (cellStringBuilder != null) {
|
||||
cellStringBuilder.append(text);
|
||||
}
|
||||
}
|
||||
|
||||
/* Spanning cell helpers */
|
||||
|
||||
private SpanningCell spanningCell() {
|
||||
return spanningCells.get(spanningCellIdx);
|
||||
}
|
||||
|
||||
private void addSpanningCells() {
|
||||
while (spanningCellIdx < spanningCells.size() &&
|
||||
currentRow.size() >= spanningCell().col) {
|
||||
// Add blank cells to represent the current spanning cell
|
||||
SpanningCell cell = spanningCell();
|
||||
if (cell.row + cell.rowspan >= rowId + 1) {
|
||||
while(currentRow.size() < cell.col + cell.colspan) {
|
||||
if (blankSpanningCells) {
|
||||
currentRow.add(null);
|
||||
} else {
|
||||
currentRow.add(cell.value);
|
||||
if (cell.reconciled != null) {
|
||||
wikilinkedCells.add(new WikilinkedCell(cell.reconciled, rowId, currentRow.size()-1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check if this spanning cell has been fully represented
|
||||
if(cell.row + cell.rowspan <= rowId + 1) {
|
||||
spanningCells.remove(spanningCellIdx);
|
||||
} else {
|
||||
spanningCellIdx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* XML attributes : useful for colspan and rowspan */
|
||||
|
||||
public void visit(WtXmlAttributes e) {
|
||||
iterate(e);
|
||||
}
|
||||
|
||||
public void visit(WtXmlAttribute e) {
|
||||
if (currentXmlAttr == null) {
|
||||
xmlAttrStringBuilder = new StringBuilder();
|
||||
iterate(e);
|
||||
try {
|
||||
int attrValue = Integer.parseInt(xmlAttrStringBuilder.toString());
|
||||
if ("colspan".equals(currentXmlAttr)) {
|
||||
colspan = attrValue;
|
||||
} else if ("rowspan".equals(currentXmlAttr)) {
|
||||
rowspan = attrValue;
|
||||
}
|
||||
} catch (NumberFormatException _) {
|
||||
}
|
||||
currentXmlAttr = null;
|
||||
xmlAttrStringBuilder = null;
|
||||
}
|
||||
}
|
||||
|
||||
public void visit(WtName e) {
|
||||
try {
|
||||
currentXmlAttr = e.getAsString();
|
||||
} catch (UnsupportedOperationException _) {
|
||||
currentXmlAttr = null;
|
||||
}
|
||||
}
|
||||
|
||||
public void visit(WtValue e) {
|
||||
iterate(e);
|
||||
}
|
||||
|
||||
/* Link management */
|
||||
|
||||
public void visit(WtInternalLink e) {
|
||||
currentInternalLink = e.getTarget().getAsString();
|
||||
internalLinksInCell.add(currentInternalLink);
|
||||
iterate(e);
|
||||
currentInternalLink = null;
|
||||
}
|
||||
|
||||
public void visit(WtExternalLink e) {
|
||||
WtUrl url = e.getTarget();
|
||||
String externalLink = url.getProtocol() + ":" + url.getPath();
|
||||
if (cellStringBuilder != null) {
|
||||
if(rowId >= 0) {
|
||||
// We are inside the table: all hyperlinks
|
||||
// should be converted to their URLs regardless of
|
||||
// their label.
|
||||
cellStringBuilder.append(externalLink);
|
||||
} else {
|
||||
// We are in the header: keep the labels instead
|
||||
currentExternalLink = externalLink;
|
||||
iterate(e);
|
||||
currentExternalLink = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void visit(WtNoLinkTitle e) {
|
||||
if (cellStringBuilder != null) {
|
||||
if (currentInternalLink != null) {
|
||||
cellStringBuilder.append(currentInternalLink);
|
||||
} else if (currentExternalLink != null) {
|
||||
cellStringBuilder.append(currentExternalLink);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void visit(WtLinkTitle e) {
|
||||
iterate(e);
|
||||
}
|
||||
|
||||
public void visit(WtUrl e) {
|
||||
// already handled, in WtExternalLink, added here for clarity
|
||||
}
|
||||
|
||||
/* Templates */
|
||||
|
||||
public void visit(WtTemplate e) {
|
||||
if (includeRawTemplates) {
|
||||
writeText("{{"+e.getName().getAsString());
|
||||
WtTemplateArguments args = e.getArgs();
|
||||
for (int i = 0; i != args.size(); i++) {
|
||||
writeText("|");
|
||||
iterate(args.get(i));
|
||||
}
|
||||
writeText("}}");
|
||||
}
|
||||
}
|
||||
|
||||
public void visit(WtTemplateArgument e) {
|
||||
writeText("|");
|
||||
if(e.hasName()) {
|
||||
writeText(e.getName().getAsString());
|
||||
writeText("=");
|
||||
}
|
||||
iterate(e.getValue());
|
||||
}
|
||||
|
||||
/* Content blocks */
|
||||
|
||||
public void visit(WtParsedWikitextPage e) {
|
||||
iterate(e);
|
||||
}
|
||||
|
||||
public void visit(WtSection e) {
|
||||
iterate(e);
|
||||
}
|
||||
|
||||
public void visit(WtBody e) {
|
||||
iterate(e);
|
||||
}
|
||||
|
||||
public void visit(WtItalics e) {
|
||||
iterate(e);
|
||||
}
|
||||
|
||||
public void visit(WtBold e) {
|
||||
iterate(e);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Object after(WtNode node, Object result)
|
||||
{
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
|
||||
public class WikiTableDataReader implements TableDataReader {
|
||||
private int currentRow = -1;
|
||||
private WikitextTableVisitor visitor = null;
|
||||
private List<List<Recon>> reconList = null;
|
||||
private List<Boolean> columnReconciled = null;
|
||||
|
||||
public WikiTableDataReader(WikitextTableVisitor visitor) {
|
||||
this.visitor = visitor;
|
||||
currentRow = -1;
|
||||
reconList = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Object> getNextRowOfCells() throws IOException {
|
||||
List<Object> row = null;
|
||||
List<String> origRow = null;
|
||||
if (currentRow == -1) {
|
||||
origRow = this.visitor.header;
|
||||
} else if(currentRow < this.visitor.rows.size()) {
|
||||
origRow = this.visitor.rows.get(currentRow);
|
||||
}
|
||||
|
||||
if (origRow != null) {
|
||||
row = new ArrayList<Object>();
|
||||
for (int i = 0; i < origRow.size(); i++) {
|
||||
Recon recon = null;
|
||||
if (currentRow >= 0 && reconList != null) {
|
||||
recon = reconList.get(currentRow).get(i);
|
||||
}
|
||||
row.add(new Cell(origRow.get(i), recon));
|
||||
}
|
||||
}
|
||||
currentRow++;
|
||||
return row;
|
||||
}
|
||||
|
||||
private void reconcileToQids(String wikiBaseUrl, StandardReconConfig cfg) {
|
||||
if("null".equals(wikiBaseUrl)) {
|
||||
return; // TODO: more thorough URL validation instead
|
||||
}
|
||||
|
||||
// Init the list of recons
|
||||
reconList = new ArrayList<List<Recon>>();
|
||||
columnReconciled = new ArrayList<Boolean>();
|
||||
for (int i = 0; i < this.visitor.rows.size(); i++) {
|
||||
int rowSize = this.visitor.rows.get(i).size();
|
||||
List<Recon> recons = new ArrayList<Recon>(rowSize);
|
||||
for (int j = 0; j < rowSize; j++) {
|
||||
recons.add(null);
|
||||
if (i == 0)
|
||||
columnReconciled.add(false);
|
||||
}
|
||||
reconList.add(recons);
|
||||
|
||||
}
|
||||
|
||||
int batchSize = 50;
|
||||
int i = 0;
|
||||
int totalSize = this.visitor.wikilinkedCells.size();
|
||||
while (i < totalSize) {
|
||||
List<ReconJob> jobs = new ArrayList<ReconJob>();
|
||||
int batchStart = i;
|
||||
while (i < batchStart + batchSize && i < totalSize) {
|
||||
WikilinkedCell cell = this.visitor.wikilinkedCells.get(i);
|
||||
jobs.add(cfg.createSimpleJob(cell.toURL(wikiBaseUrl)));
|
||||
i++;
|
||||
}
|
||||
|
||||
List<Recon> recons = cfg.batchRecon(jobs, 0);
|
||||
for (int j = batchStart; j < batchStart + batchSize && j < totalSize; j++) {
|
||||
WikilinkedCell cell = this.visitor.wikilinkedCells.get(j);
|
||||
Recon recon = recons.get(j - batchStart);
|
||||
if (recon != null) {
|
||||
reconList.get(cell.row).set(cell.col, recon);
|
||||
columnReconciled.set(cell.col, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
Reader reader,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
// Set-up a simple wiki configuration
|
||||
ParserConfig parserConfig = new SimpleParserConfig();
|
||||
|
||||
try {
|
||||
// Encoding validation
|
||||
|
||||
WikitextEncodingValidator v = new WikitextEncodingValidator();
|
||||
|
||||
String wikitext = CharStreams.toString(reader);
|
||||
String title = "Page title";
|
||||
ValidatedWikitext validated = v.validate(parserConfig, wikitext, title);
|
||||
|
||||
// Pre-processing
|
||||
WikitextPreprocessor prep = new WikitextPreprocessor(parserConfig);
|
||||
|
||||
WtPreproWikitextPage prepArticle =
|
||||
(WtPreproWikitextPage) prep.parseArticle(validated, title, false);
|
||||
|
||||
// Parsing
|
||||
PreprocessedWikitext ppw = PreprocessorToParserTransformer
|
||||
.transform(prepArticle);
|
||||
|
||||
WikitextParser parser = new WikitextParser(parserConfig);
|
||||
|
||||
WtParsedWikitextPage parsedArticle;
|
||||
parsedArticle = (WtParsedWikitextPage) parser.parseArticle(ppw, title);
|
||||
|
||||
// Compile the retrieved page
|
||||
boolean blankSpanningCells = JSONUtilities.getBoolean(options, "blankSpanningCells", true);
|
||||
boolean includeRawTemplates = JSONUtilities.getBoolean(options, "includeRawTemplates", false);
|
||||
final WikitextTableVisitor vs = new WikitextTableVisitor(blankSpanningCells, includeRawTemplates);
|
||||
vs.go(parsedArticle);
|
||||
|
||||
WikiTableDataReader dataReader = new WikiTableDataReader(vs);
|
||||
|
||||
// Reconcile if needed
|
||||
String wikiUrl = JSONUtilities.getString(options, "wikiUrl", null);
|
||||
// Wikidata reconciliation endpoint, hardcoded because the user might not have it in its services
|
||||
String reconUrl = JSONUtilities.getString(options, "reconService",
|
||||
"https://tools.wmflabs.org/openrefine-wikidata/en/api");
|
||||
StandardReconConfig cfg = getReconConfig(reconUrl);
|
||||
|
||||
if (wikiUrl != null) {
|
||||
dataReader.reconcileToQids(wikiUrl, cfg);
|
||||
}
|
||||
|
||||
JSONUtilities.safePut(options, "headerLines", 1);
|
||||
|
||||
// Set metadata
|
||||
if (vs.caption != null && vs.caption.length() > 0) {
|
||||
metadata.setName(vs.caption);
|
||||
// TODO this does not seem to do anything - maybe we need to pass it to OpenRefine in some other way?
|
||||
}
|
||||
|
||||
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||
|
||||
// Add reconciliation statistics
|
||||
if (dataReader.columnReconciled != null) {
|
||||
for(int i = 0; i != dataReader.columnReconciled.size(); i++) {
|
||||
if (dataReader.columnReconciled.get(i)) {
|
||||
Column col = project.columnModel.columns.get(i);
|
||||
col.setReconStats(ReconStats.create(project, i));
|
||||
col.setReconConfig(cfg);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e1) {
|
||||
e1.printStackTrace();
|
||||
} catch (ParseException e1) {
|
||||
exceptions.add(e1);
|
||||
e1.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private StandardReconConfig getReconConfig(String url) {
|
||||
StandardReconConfig cfg = new StandardReconConfig(
|
||||
url,
|
||||
"http://www.wikidata.org/entity/",
|
||||
"http://www.wikidata.org/prop/direct/",
|
||||
"",
|
||||
"entity",
|
||||
true,
|
||||
new ArrayList<ColumnDetail>(),
|
||||
1
|
||||
);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
}
|
@ -233,6 +233,30 @@ public class StandardReconConfig extends ReconConfig {
|
||||
return "Reconcile cells in column " + columnName + " to type " + typeID;
|
||||
}
|
||||
|
||||
public ReconJob createSimpleJob(String query) {
|
||||
/* Same as createJob, but for simpler queries
|
||||
* without any properties. This is much easier
|
||||
* to generate as there is no need for a Project,
|
||||
* Row and Cell: this means the job can be created
|
||||
* outside the usual context of reconciliation (e.g.
|
||||
* in an importer).
|
||||
*/
|
||||
StandardReconJob job = new StandardReconJob();
|
||||
try {
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
JSONWriter jsonWriter = new JSONWriter(stringWriter);
|
||||
jsonWriter.object();
|
||||
jsonWriter.key("query");
|
||||
jsonWriter.value(query);
|
||||
jsonWriter.endObject();
|
||||
job.text = query;
|
||||
job.code = stringWriter.toString();
|
||||
return job;
|
||||
} catch (JSONException _) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReconJob createJob(Project project, int rowIndex, Row row,
|
||||
String columnName, Cell cell) {
|
||||
|
@ -0,0 +1,217 @@
|
||||
/*
|
||||
|
||||
Copyright 2010,2011 Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.tests.importers;
|
||||
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.AfterMethod;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.importers.WikitextImporter;
|
||||
|
||||
public class WikitextImporterTests extends ImporterTest {
|
||||
|
||||
private WikitextImporter importer = null;
|
||||
|
||||
@Override
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
logger = LoggerFactory.getLogger(this.getClass());
|
||||
}
|
||||
|
||||
@Override
|
||||
@BeforeMethod
|
||||
public void setUp() {
|
||||
super.setUp();
|
||||
importer = new WikitextImporter();
|
||||
}
|
||||
|
||||
@Override
|
||||
@AfterMethod
|
||||
public void tearDown(){
|
||||
importer = null;
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSimpleData() {
|
||||
String input = "\n"
|
||||
+ "{|\n"
|
||||
+ "|-\n"
|
||||
+ "| a || b<br/>2 || c \n"
|
||||
+ "|-\n"
|
||||
+ "| d || e || f<br>\n"
|
||||
+ "|-\n"
|
||||
+ "|}\n";
|
||||
try {
|
||||
prepareOptions(0, true, true, null);
|
||||
parse(input);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Parsing failed", e);
|
||||
}
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.rows.size(), 2);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "a");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "b\n2");
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTableWithLinks() {
|
||||
// Data credits: Wikipedia contributors, https://de.wikipedia.org/w/index.php?title=Agenturen_der_Europäischen_Union&action=edit
|
||||
String input = "\n"
|
||||
+"{|\n"
|
||||
+"|-\n"
|
||||
+"| [[Europäisches Zentrum für die Förderung der Berufsbildung|Cedefop]] || Cedefop || http://www.cedefop.europa.eu/\n"
|
||||
+"|-\n"
|
||||
+"| [[Europäische Stiftung zur Verbesserung der Lebens- und Arbeitsbedingungen]] || EUROFOUND || [http://www.eurofound.europa.eu/]\n"
|
||||
+"|-\n"
|
||||
+"| [[Europäische Beobachtungsstelle für Drogen und Drogensucht]] || EMCDDA || [http://www.emcdda.europa.eu/ europa.eu]\n"
|
||||
+"|-\n"
|
||||
+"|}\n";
|
||||
|
||||
try {
|
||||
prepareOptions(0, true, true, "https://de.wikipedia.org/wiki/");
|
||||
parse(input);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Parsing failed", e);
|
||||
}
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.rows.size(), 3);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
|
||||
// Reconciled cells
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "Cedefop");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).recon, null);
|
||||
Assert.assertEquals(project.rows.get(2).cells.get(0).value, "Europäische Beobachtungsstelle für Drogen und Drogensucht");
|
||||
Assert.assertEquals(project.rows.get(2).cells.get(0).recon.getBestCandidate().id, "Q1377256");
|
||||
|
||||
// various ways to input external links
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://www.eurofound.europa.eu/");
|
||||
Assert.assertEquals(project.rows.get(2).cells.get(2).value, "http://www.emcdda.europa.eu/");
|
||||
// Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://www.cedefop.europa.eu/");
|
||||
// unfortunately the above does not seem to be supported by the parser (parsed as blank instead)
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readStyledTableWithHeader() {
|
||||
// Data credits: Wikipedia contributors, https://de.wikipedia.org/w/index.php?title=Agenturen_der_Europäischen_Union&action=edit
|
||||
String input = "\n"
|
||||
+"==Agenturen==\n"
|
||||
+"{| class=\"wikitable sortable\"\n"
|
||||
+"! style=\"text-align:left; width: 60em\" | Offizieller Name\n"
|
||||
+"! style=\"text-align:left; width: 9em\" | Abkürzung\n"
|
||||
+"! style=\"text-align:left; width: 6em\" | Website\n"
|
||||
+"! style=\"text-align:left; width: 15em\" | Standort\n"
|
||||
+"! style=\"text-align:left; width: 18em\" | Staat\n"
|
||||
+"! style=\"text-align:left; width: 6em\" | Gründung\n"
|
||||
+"! style=\"text-align:left; width: 50em\" | Anmerkungen\n"
|
||||
+"|-\n"
|
||||
+"| [[Europäisches Zentrum für die Förderung der Berufsbildung]] || '''Cedefop''' || [http://www.cedefop.europa.eu/] || [[Thessaloniki]] || {{Griechenland}} || 1975 ||\n"
|
||||
+"|-\n"
|
||||
+"| [[Europäische Stiftung zur Verbesserung der Lebens- und Arbeitsbedingungen]] || ''EUROFOUND'' || [http://www.eurofound.europa.eu/] || [[Dublin]] || {{Irland}} || 1975 ||\n"
|
||||
+"|-\n"
|
||||
+"| [[Europäische Beobachtungsstelle für Drogen und Drogensucht]] || EMCDDA || [http://www.emcdda.europa.eu/] || [[Lissabon]] || {{Portugal}} || 1993 ||\n"
|
||||
+"|-\n"
|
||||
+"|}\n";
|
||||
|
||||
try {
|
||||
prepareOptions(-1, true, true, null);
|
||||
parse(input);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Parsing failed", e);
|
||||
}
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 7);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "Europäisches Zentrum für die Förderung der Berufsbildung");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "Cedefop");
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "EUROFOUND");
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Offizieller Name");
|
||||
Assert.assertEquals(project.columnModel.columns.get(6).getName(), "Anmerkungen");
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 7);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTableWithSpanningCells() {
|
||||
// inspired from https://www.mediawiki.org/wiki/Help:Tables
|
||||
String input = "{| class=\"wikitable\"\n"
|
||||
+"!colspan=\"6\"|Shopping List\n"
|
||||
+"|-\n"
|
||||
+"|Bread & Butter\n"
|
||||
+"|Pie\n"
|
||||
+"|Buns\n"
|
||||
+"|rowspan=\"2\"|Danish\n"
|
||||
+"|colspan=\"2\"|Croissant\n"
|
||||
+"|-\n"
|
||||
+"|Cheese\n"
|
||||
+"|colspan=\"2\"|Ice cream\n"
|
||||
+"|Butter\n"
|
||||
+"|Yogurt\n"
|
||||
+"|}\n";
|
||||
|
||||
try {
|
||||
prepareOptions(-1, true, true, null);
|
||||
parse(input);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Parsing failed", e);
|
||||
}
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 6);
|
||||
Assert.assertNull(project.rows.get(1).cells.get(2).value);
|
||||
Assert.assertNull(project.rows.get(1).cells.get(3).value);
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(4).value, "Butter");
|
||||
}
|
||||
//--helpers--
|
||||
|
||||
private void parse(String wikitext) {
|
||||
parseOneFile(importer, new StringReader(wikitext));
|
||||
}
|
||||
|
||||
private void prepareOptions(
|
||||
int limit, boolean blankSpanningCells,
|
||||
boolean guessValueType, String wikiUrl) {
|
||||
|
||||
whenGetIntegerOption("limit", options, limit);
|
||||
whenGetBooleanOption("guessCellValueTypes", options, guessValueType);
|
||||
whenGetBooleanOption("blankSpanningCells", options, blankSpanningCells);
|
||||
whenGetBooleanOption("storeBlankCellsAsNulls", options, true);
|
||||
whenGetStringOption("wikiUrl", options, wikiUrl);
|
||||
whenGetIntegerOption("headerLines", options, 1);
|
||||
whenGetStringOption("reconService", options, "https://tools.wmflabs.org/openrefine-wikidata/en/api");
|
||||
}
|
||||
}
|
BIN
main/webapp/WEB-INF/lib-src/swc-parser-lazy-3.1.5-sources.jar
Normal file
BIN
main/webapp/WEB-INF/lib-src/swc-parser-lazy-3.1.5-sources.jar
Normal file
Binary file not shown.
Binary file not shown.
@ -212,6 +212,7 @@ function registerImporting() {
|
||||
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter());
|
||||
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
|
||||
IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter());
|
||||
IM.registerFormat("text/wiki", "Wikitext", "WikitextParserUI", new Packages.com.google.refine.importers.WikitextImporter());
|
||||
|
||||
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
|
||||
|
||||
@ -344,7 +345,10 @@ function init() {
|
||||
"scripts/index/parser-interfaces/excel-parser-ui.js",
|
||||
"scripts/index/parser-interfaces/xml-parser-ui.js",
|
||||
"scripts/index/parser-interfaces/json-parser-ui.js",
|
||||
"scripts/index/parser-interfaces/rdf-triples-parser-ui.js"
|
||||
"scripts/index/parser-interfaces/rdf-triples-parser-ui.js",
|
||||
"scripts/index/parser-interfaces/wikitext-parser-ui.js",
|
||||
|
||||
"scripts/reconciliation/recon-manager.js" // so that reconciliation functions are available to importers
|
||||
]
|
||||
);
|
||||
|
||||
@ -372,7 +376,8 @@ function init() {
|
||||
"styles/views/data-table-view.less", // for the preview table's styles
|
||||
"styles/index/fixed-width-parser-ui.less",
|
||||
"styles/index/xml-parser-ui.less",
|
||||
"styles/index/json-parser-ui.less"
|
||||
"styles/index/json-parser-ui.less",
|
||||
"styles/index/wikitext-parser-ui.less",
|
||||
]
|
||||
);
|
||||
|
||||
|
@ -113,6 +113,10 @@
|
||||
"parse-cell": "Parse cell text into<br/>numbers, dates, ...",
|
||||
"store-blank": "Store blank rows",
|
||||
"store-nulls": "Store blank cells as nulls",
|
||||
"blank-spanning-cells": "Pad cells spanning over multiple rows or columns with nulls",
|
||||
"include-raw-templates": "Include raw templates as wikicode",
|
||||
"wiki-base-url": "Reconcile to wiki with base URL:",
|
||||
"invalid-wikitext": "No table could be parsed. Are you sure this is a valid wiki table?",
|
||||
"store-source": "Store file source <br/>(file names, URLs)<br/>in each row",
|
||||
"preserve-empty": "Preserve empty strings",
|
||||
"trim": "Trim leading & trailing whitespace from strings",
|
||||
|
@ -73,8 +73,8 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
|
||||
this._service = service;
|
||||
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
|
||||
this._serviceMetadata = serviceMetadata;
|
||||
if ("extend" in serviceMetadata) {
|
||||
extend = serviceMetadata.extend;
|
||||
if (serviceMetadata != null && "extend" in serviceMetadata) {
|
||||
var extend = serviceMetadata.extend;
|
||||
if ("propose_properties" in extend) {
|
||||
var endpoint = extend.propose_properties;
|
||||
this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path;
|
||||
|
@ -83,8 +83,13 @@ Refine.PreviewTable.prototype._render = function() {
|
||||
$('<span>').html(" ").appendTo(divContent);
|
||||
} else if ("e" in cell) {
|
||||
$('<span>').addClass("data-table-error").text(cell.e).appendTo(divContent);
|
||||
} else if (!("r" in cell) || !cell.r) {
|
||||
if (typeof cell.v !== "string") {
|
||||
} else {
|
||||
if ("r" in cell && cell.ri !== null) {
|
||||
$('<a>')
|
||||
.attr("href", "#") // we don't have access to the reconciliation data here
|
||||
.text(cell.v)
|
||||
.appendTo(divContent);
|
||||
} else if (typeof cell.v !== "string") {
|
||||
if (typeof cell.v == "number") {
|
||||
divContent.addClass("data-table-cell-content-numeric");
|
||||
}
|
||||
|
@ -0,0 +1,29 @@
|
||||
<div class="grid-layout layout-loose"><table>
|
||||
<tr><td colspan="2" id="or-import-colsep"></td></tr>
|
||||
|
||||
<tr><td width="1%"><input type="checkbox" bind="wikiCheckbox" id="$reconcileWiki" /></td><td><label for="$reconcileWiki" id="or-import-wiki-base-url"></label>
|
||||
<input bind="wikiUrlInput" type="text" class="lightweight" size="30" id="$wikiUrl" /></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" id="$limit" /></td>
|
||||
<td><label for="$limit" id="or-import-load"></label>
|
||||
<input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
|
||||
<label for="$limit" id="or-import-rows2"></label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
|
||||
<td><label for="$guess" id="or-import-parseCell"></label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="blankSpanningCellsCheckbox" id="$blank-spanning-cells" /></td>
|
||||
<td><label for="$blank-spanning-cells" id="or-import-blankSpanningCells"></label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" id="$store-blank-rows" /></td>
|
||||
<td colspan="2"><label for="$store-blank-rows" id="or-import-blank"></label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="includeRawTemplatesCheckbox" id="$include-raw-templates" /></td>
|
||||
<td colspan="2"><label for="$include-raw-templates" id="or-import-includeRawTemplates"></label></td></tr>
|
||||
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" id="$store-blank-cells" /></td>
|
||||
<td colspan="2"><label for="$store-blank-cells" id="or-import-null"></label></td></tr>
|
||||
|
||||
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td>
|
||||
<td><label for="$include-file-sources" id="or-import-source"></label></td></tr>
|
||||
|
||||
<tr>
|
||||
<td width="1%"></td>
|
||||
<td><button class="button" bind="previewButton"></button></td>
|
||||
</tr>
|
||||
</table></div>
|
@ -0,0 +1,205 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.WikitextParserUI = function(controller, jobID, job, format, config,
|
||||
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||
this._controller = controller;
|
||||
this._jobID = jobID;
|
||||
this._job = job;
|
||||
this._format = format;
|
||||
this._config = config;
|
||||
|
||||
this._dataContainer = dataContainerElmt;
|
||||
this._progressContainer = progressContainerElmt;
|
||||
this._optionContainer = optionContainerElmt;
|
||||
|
||||
this._timerID = null;
|
||||
this._initialize();
|
||||
this._updatePreview();
|
||||
};
|
||||
Refine.DefaultImportingController.parserUIs.WikitextParserUI = Refine.WikitextParserUI;
|
||||
|
||||
Refine.WikitextParserUI.prototype.dispose = function() {
|
||||
if (this._timerID !== null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
};
|
||||
|
||||
Refine.WikitextParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||
return true; // always ready
|
||||
};
|
||||
|
||||
Refine.WikitextParserUI.prototype.getOptions = function() {
|
||||
var options = {
|
||||
};
|
||||
|
||||
var parseIntDefault = function(s, def) {
|
||||
try {
|
||||
var n = parseInt(s,10);
|
||||
if (!isNaN(n)) {
|
||||
return n;
|
||||
}
|
||||
} catch (e) {
|
||||
// Ignore
|
||||
}
|
||||
return def;
|
||||
};
|
||||
if (this._optionContainerElmts.wikiCheckbox[0].checked) {
|
||||
options.wikiUrl = this._optionContainerElmts.wikiUrlInput[0].value;
|
||||
} else {
|
||||
options.wikiUrl = null;
|
||||
}
|
||||
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||
} else {
|
||||
options.limit = -1;
|
||||
}
|
||||
options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked;
|
||||
options.blankSpanningCells = this._optionContainerElmts.blankSpanningCellsCheckbox[0].checked;
|
||||
options.includeRawTemplates = this._optionContainerElmts.includeRawTemplatesCheckbox[0].checked;
|
||||
|
||||
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
|
||||
|
||||
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||
|
||||
options.reconService = ReconciliationManager.ensureDefaultServicePresent();
|
||||
|
||||
return options;
|
||||
};
|
||||
|
||||
Refine.WikitextParserUI.prototype._initialize = function() {
|
||||
var self = this;
|
||||
|
||||
this._optionContainer.unbind().empty().html(
|
||||
DOM.loadHTML("core", "scripts/index/parser-interfaces/wikitext-parser-ui.html"));
|
||||
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||
|
||||
this._optionContainerElmts.previewButton.html($.i18n._('core-buttons')["update-preview"]);
|
||||
|
||||
$('#or-import-wiki-base-url').text($.i18n._('core-index-parser')["wiki-base-url"]);
|
||||
$('#or-import-load').text($.i18n._('core-index-parser')["load-at-most"]);
|
||||
$('#or-import-rows2').text($.i18n._('core-index-parser')["rows-data"]);
|
||||
$('#or-import-parseCell').html($.i18n._('core-index-parser')["parse-cell"]);
|
||||
$('#or-import-blankSpanningCells').text($.i18n._('core-index-parser')["blank-spanning-cells"]);
|
||||
$('#or-import-includeRawTemplates').text($.i18n._('core-index-parser')["include-raw-templates"]);
|
||||
$('#or-import-blank').text($.i18n._('core-index-parser')["store-blank"]);
|
||||
$('#or-import-null').text($.i18n._('core-index-parser')["store-nulls"]);
|
||||
$('#or-import-source').html($.i18n._('core-index-parser')["store-source"]);
|
||||
|
||||
/*
|
||||
this._optionContainerElmts.encodingInput
|
||||
.attr('value', this._config.encoding || '')
|
||||
.click(function() {
|
||||
Encoding.selectEncoding($(this), function() {
|
||||
self._updatePreview();
|
||||
});
|
||||
});
|
||||
*/
|
||||
|
||||
var wikiUrl = this._config.wikiUrl.toString();
|
||||
if (wikiUrl != null) {
|
||||
this._optionContainerElmts.wikiUrlInput[0].value = wikiUrl;
|
||||
this._optionContainerElmts.wikiCheckbox.prop("checked", true);
|
||||
}
|
||||
|
||||
if (this._config.limit > 0) {
|
||||
this._optionContainerElmts.limitCheckbox.prop("checked", true);
|
||||
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||
}
|
||||
|
||||
if (this._config.blankSpanningCells) {
|
||||
this._optionContainerElmts.blankSpanningCellsCheckbox.prop("checked", true);
|
||||
}
|
||||
|
||||
if (this._config.includeRawTemplates) {
|
||||
this._optionContainerElmts.includeRawTemplatesCheckbox.prop("checked", true);
|
||||
}
|
||||
|
||||
if (this._config.storeBlankRows) {
|
||||
this._optionContainerElmts.storeBlankRowsCheckbox.prop("checked", true);
|
||||
}
|
||||
|
||||
if (this._config.guessCellValueTypes) {
|
||||
this._optionContainerElmts.guessCellValueTypesCheckbox.prop("checked", true);
|
||||
}
|
||||
|
||||
if (this._config.storeBlankCellsAsNulls) {
|
||||
this._optionContainerElmts.storeBlankCellsAsNullsCheckbox.prop("checked", true);
|
||||
}
|
||||
if (this._config.includeFileSources) {
|
||||
this._optionContainerElmts.includeFileSourcesCheckbox.prop("checked", true);
|
||||
}
|
||||
|
||||
var onChange = function() {
|
||||
self._scheduleUpdatePreview();
|
||||
};
|
||||
this._optionContainer.find("input").bind("change", onChange);
|
||||
this._optionContainer.find("select").bind("change", onChange);
|
||||
};
|
||||
|
||||
Refine.WikitextParserUI.prototype._scheduleUpdatePreview = function() {
|
||||
if (this._timerID !== null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
|
||||
var self = this;
|
||||
this._timerID = window.setTimeout(function() {
|
||||
self._timerID = null;
|
||||
self._updatePreview();
|
||||
}, 500); // 0.5 second
|
||||
};
|
||||
|
||||
Refine.WikitextParserUI.prototype._updatePreview = function() {
|
||||
var self = this;
|
||||
|
||||
this._progressContainer.show();
|
||||
|
||||
this._controller.updateFormatAndOptions(this.getOptions(), function(result) {
|
||||
if (result.status === "ok") {
|
||||
self._controller.getPreviewData(function(projectData) {
|
||||
self._progressContainer.hide();
|
||||
var container = self._dataContainer.unbind().empty();
|
||||
if (projectData.rowModel.rows.length === 0) {
|
||||
$('<div>').addClass("wikitext-parser-ui-message")
|
||||
.text($.i18n._('core-index-parser')["invalid-wikitext"]).appendTo(container);
|
||||
} else {
|
||||
new Refine.PreviewTable(projectData, container);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
};
|
@ -127,8 +127,27 @@ ReconciliationManager.save = function(f) {
|
||||
});
|
||||
};
|
||||
|
||||
(function() {
|
||||
ReconciliationManager.getOrRegisterServiceFromUrl = function(url, f) {
|
||||
var service = ReconciliationManager.getServiceFromUrl(url);
|
||||
if (service == null) {
|
||||
ReconciliationManager.registerStandardService(url, function(idx) {
|
||||
ReconciliationManager.save(function() {
|
||||
f(ReconciliationManager.standardServices[idx]);
|
||||
});
|
||||
});
|
||||
} else {
|
||||
f(service);
|
||||
}
|
||||
};
|
||||
|
||||
ReconciliationManager.ensureDefaultServicePresent = function() {
|
||||
var lang = $.i18n._('core-recon')["wd-recon-lang"];
|
||||
var url = "https://tools.wmflabs.org/openrefine-wikidata/"+lang+"/api";
|
||||
ReconciliationManager.getOrRegisterServiceFromUrl(url, function(service) { });
|
||||
return url;
|
||||
};
|
||||
|
||||
(function() {
|
||||
|
||||
$.ajax({
|
||||
async: false,
|
||||
@ -140,9 +159,7 @@ ReconciliationManager.save = function(f) {
|
||||
ReconciliationManager.standardServices = JSON.parse(data.value);
|
||||
ReconciliationManager._rebuildMap();
|
||||
} else {
|
||||
ReconciliationManager.registerStandardService(
|
||||
"https://tools.wmflabs.org/openrefine-wikidata/"+lang+"/api"
|
||||
);
|
||||
ReconciliationManager.ensureDefaultServicePresent();
|
||||
}
|
||||
},
|
||||
dataType: "json"
|
||||
|
@ -0,0 +1,42 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
@import-less url("../theme.less");
|
||||
|
||||
.wikitext-parser-ui-message {
|
||||
background: #eee;
|
||||
font-size: 150%;
|
||||
color: #666;
|
||||
padding: 20px;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user