Merge pull request #1237.
Conflicts: .classpath main/webapp/modules/core/langs/translation-en.json main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js Closes #363 and #56.
This commit is contained in:
commit
00f8e4fc6b
@ -7,7 +7,6 @@
|
|||||||
<classpathentry kind="src" path="extensions/pc-axis/src"/>
|
<classpathentry kind="src" path="extensions/pc-axis/src"/>
|
||||||
<classpathentry kind="src" path="extensions/sample/src"/>
|
<classpathentry kind="src" path="extensions/sample/src"/>
|
||||||
<classpathentry kind="src" path="main/tests/server/src"/>
|
<classpathentry kind="src" path="main/tests/server/src"/>
|
||||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
|
|
||||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/ant-tools-1.8.0.jar"/>
|
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/ant-tools-1.8.0.jar"/>
|
||||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/arithcode-1.1.jar"/>
|
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/arithcode-1.1.jar"/>
|
||||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/butterfly-1.0.1.jar" sourcepath="main/webapp/WEB-INF/lib-src/butterfly-1.0.1-sources.jar"/>
|
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/butterfly-1.0.1.jar" sourcepath="main/webapp/WEB-INF/lib-src/butterfly-1.0.1-sources.jar"/>
|
||||||
@ -83,7 +82,9 @@
|
|||||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/poi-3.13-20150929.jar"/>
|
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/poi-3.13-20150929.jar"/>
|
||||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/poi-ooxml-3.13-20150929.jar"/>
|
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/poi-ooxml-3.13-20150929.jar"/>
|
||||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/poi-ooxml-schemas-3.13-20150929.jar"/>
|
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/poi-ooxml-schemas-3.13-20150929.jar"/>
|
||||||
|
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||||
<classpathentry kind="lib" path="extensions/jython/module/MOD-INF/lib/jython-standalone-2.7.1.jar"/>
|
<classpathentry kind="lib" path="extensions/jython/module/MOD-INF/lib/jython-standalone-2.7.1.jar"/>
|
||||||
<classpathentry kind="lib" path="main/tests/data"/>
|
<classpathentry kind="lib" path="main/tests/data"/>
|
||||||
|
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/swc-parser-lazy-3.1.5-jar-with-dependencies.jar" sourcepath="main/webapp/WEB-INF/lib-src/swc-parser-lazy-3.1.5-sources.jar" />
|
||||||
<classpathentry kind="output" path="build"/>
|
<classpathentry kind="output" path="build"/>
|
||||||
</classpath>
|
</classpath>
|
||||||
|
@ -60,6 +60,7 @@ licenses/apache2.0.LICENSE.txt
|
|||||||
signpost
|
signpost
|
||||||
opencsv
|
opencsv
|
||||||
textng
|
textng
|
||||||
|
swc-parser-lazy
|
||||||
|
|
||||||
Apache License 1.1
|
Apache License 1.1
|
||||||
------------------
|
------------------
|
||||||
|
@ -24,6 +24,8 @@ public class TextFormatGuesser implements FormatGuesser {
|
|||||||
int closeBraces = 0;
|
int closeBraces = 0;
|
||||||
int openAngleBrackets = 0;
|
int openAngleBrackets = 0;
|
||||||
int closeAngleBrackets = 0;
|
int closeAngleBrackets = 0;
|
||||||
|
int wikiTableBegin = 0;
|
||||||
|
int wikiTableRow = 0;
|
||||||
int trailingPeriods = 0;
|
int trailingPeriods = 0;
|
||||||
|
|
||||||
char firstChar = ' ';
|
char firstChar = ' ';
|
||||||
@ -37,6 +39,8 @@ public class TextFormatGuesser implements FormatGuesser {
|
|||||||
closeBraces += countSubstrings(chunk, "}");
|
closeBraces += countSubstrings(chunk, "}");
|
||||||
openAngleBrackets += countSubstrings(chunk, "<");
|
openAngleBrackets += countSubstrings(chunk, "<");
|
||||||
closeAngleBrackets += countSubstrings(chunk, ">");
|
closeAngleBrackets += countSubstrings(chunk, ">");
|
||||||
|
wikiTableBegin += countSubstrings(chunk, "{|");
|
||||||
|
wikiTableRow += countSubstrings(chunk, "|-");
|
||||||
trailingPeriods += countLineSuffix(chunk, ".");
|
trailingPeriods += countLineSuffix(chunk, ".");
|
||||||
|
|
||||||
if (!foundFirstChar) {
|
if (!foundFirstChar) {
|
||||||
@ -50,7 +54,9 @@ public class TextFormatGuesser implements FormatGuesser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (foundFirstChar) {
|
if (foundFirstChar) {
|
||||||
if ((firstChar == '{' || firstChar == '[') &&
|
if (wikiTableBegin >= 1 && wikiTableRow >= 2) {
|
||||||
|
return "text/wiki";
|
||||||
|
} if ((firstChar == '{' || firstChar == '[') &&
|
||||||
openBraces >= 5 && closeBraces >= 5) {
|
openBraces >= 5 && closeBraces >= 5) {
|
||||||
return "text/json";
|
return "text/json";
|
||||||
} else if (openAngleBrackets >= 5 && closeAngleBrackets >= 5) {
|
} else if (openAngleBrackets >= 5 && closeAngleBrackets >= 5) {
|
||||||
|
638
main/src/com/google/refine/importers/WikitextImporter.java
Normal file
638
main/src/com/google/refine/importers/WikitextImporter.java
Normal file
@ -0,0 +1,638 @@
|
|||||||
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import com.google.common.io.CharStreams;
|
||||||
|
import de.fau.cs.osr.ptk.common.AstVisitor;
|
||||||
|
|
||||||
|
import org.sweble.wikitext.parser.ParserConfig;
|
||||||
|
import org.sweble.wikitext.parser.utils.SimpleParserConfig;
|
||||||
|
import org.sweble.wikitext.parser.WikitextParser;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtBold;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtItalics;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtNewline;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtNode;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtSection;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTemplate;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTemplateArgument;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTemplateArguments;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtText;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtInternalLink;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtExternalLink;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtLinkTitle;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtLinkTitle.WtNoLinkTitle;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtUrl;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTable;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTableHeader;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTableRow;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTableCell;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTableCaption;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtXmlAttributes;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtXmlAttribute;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtName;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtValue;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtBody;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtXmlEmptyTag;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtXmlEndTag;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtXmlStartTag;
|
||||||
|
|
||||||
|
import org.sweble.wikitext.parser.WikitextEncodingValidator;
|
||||||
|
import org.sweble.wikitext.parser.WikitextPreprocessor;
|
||||||
|
import org.sweble.wikitext.parser.encval.ValidatedWikitext;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtPreproWikitextPage;
|
||||||
|
import org.sweble.wikitext.parser.parser.PreprocessorToParserTransformer;
|
||||||
|
import org.sweble.wikitext.parser.preprocessor.PreprocessedWikitext;
|
||||||
|
|
||||||
|
import xtc.parser.ParseException;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
|
import com.google.refine.model.Column;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.model.Recon;
|
||||||
|
import com.google.refine.model.ReconStats;
|
||||||
|
import com.google.refine.model.recon.StandardReconConfig.ColumnDetail;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
import com.google.refine.model.recon.StandardReconConfig;
|
||||||
|
import com.google.refine.model.recon.ReconJob;
|
||||||
|
|
||||||
|
|
||||||
|
public class WikitextImporter extends TabularImportingParserBase {
|
||||||
|
// static final private Logger logger = LoggerFactory.getLogger(WikitextImporter.class);
|
||||||
|
|
||||||
|
public WikitextImporter() {
|
||||||
|
super(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject createParserUIInitializationData(
|
||||||
|
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||||
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "guessCellValueTypes", false);
|
||||||
|
JSONUtilities.safePut(options, "blankSpanningCells", true);
|
||||||
|
JSONUtilities.safePut(options, "includeRawTemplates", false);
|
||||||
|
JSONUtilities.safePut(options, "wikiUrl", "https://en.wikipedia.org/wiki/");
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
private class SpanningCell {
|
||||||
|
public String value;
|
||||||
|
public String reconciled;
|
||||||
|
public int colspan;
|
||||||
|
public int rowspan;
|
||||||
|
public int row;
|
||||||
|
public int col;
|
||||||
|
|
||||||
|
SpanningCell(String value, String reconciled, int row, int col, int rowspan, int colspan) {
|
||||||
|
this.value = value;
|
||||||
|
this.reconciled = reconciled;
|
||||||
|
this.row = row;
|
||||||
|
this.col = col;
|
||||||
|
this.rowspan = rowspan;
|
||||||
|
this.colspan = colspan;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class WikilinkedCell {
|
||||||
|
public String internalLink;
|
||||||
|
public int row;
|
||||||
|
public int col;
|
||||||
|
|
||||||
|
WikilinkedCell(String internalLink, int row, int col) {
|
||||||
|
this.internalLink = internalLink;
|
||||||
|
this.row = row;
|
||||||
|
this.col = col;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toURL(String wikiBaseUrl) {
|
||||||
|
return wikiBaseUrl + internalLink;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class WikitextTableVisitor extends AstVisitor<WtNode> {
|
||||||
|
|
||||||
|
public String caption;
|
||||||
|
public List<String> header;
|
||||||
|
public List<List<String>> rows;
|
||||||
|
public List<WikilinkedCell> wikilinkedCells;
|
||||||
|
private List<String> currentRow;
|
||||||
|
|
||||||
|
private boolean blankSpanningCells;
|
||||||
|
private boolean includeRawTemplates;
|
||||||
|
|
||||||
|
private int rowId;
|
||||||
|
private List<SpanningCell> spanningCells;
|
||||||
|
private StringBuilder cellStringBuilder;
|
||||||
|
private StringBuilder xmlAttrStringBuilder;
|
||||||
|
private String currentXmlAttr;
|
||||||
|
private String currentInternalLink;
|
||||||
|
private String currentExternalLink;
|
||||||
|
private int colspan;
|
||||||
|
private int rowspan;
|
||||||
|
private int spanningCellIdx;
|
||||||
|
private List<String> internalLinksInCell;
|
||||||
|
|
||||||
|
public WikitextTableVisitor(boolean blankSpanningCells, boolean includeRawTemplates) {
|
||||||
|
this.blankSpanningCells = blankSpanningCells;
|
||||||
|
this.includeRawTemplates = includeRawTemplates;
|
||||||
|
caption = null;
|
||||||
|
header = new ArrayList<String>();
|
||||||
|
rows = new ArrayList<List<String>>();
|
||||||
|
wikilinkedCells = new ArrayList<WikilinkedCell>();
|
||||||
|
spanningCells = new ArrayList<SpanningCell>();
|
||||||
|
cellStringBuilder = null;
|
||||||
|
xmlAttrStringBuilder = null;
|
||||||
|
currentInternalLink = null;
|
||||||
|
currentExternalLink = null;
|
||||||
|
colspan = 0;
|
||||||
|
rowspan = 0;
|
||||||
|
rowId = -1;
|
||||||
|
spanningCellIdx = 0;
|
||||||
|
internalLinksInCell = new ArrayList<String>();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected WtNode before(WtNode node) {
|
||||||
|
return super.before(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Default handler */
|
||||||
|
|
||||||
|
public void visit(WtNode e) {
|
||||||
|
// Ignore other nodes
|
||||||
|
System.out.println(e.getNodeName());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Table handling */
|
||||||
|
|
||||||
|
public void visit(WtTable e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtTableHeader e) {
|
||||||
|
String columnName = renderCellAsString(e);
|
||||||
|
header.add(columnName);
|
||||||
|
// For the header, we ignore rowspan and manually add cells for colspan
|
||||||
|
if (colspan > 1) {
|
||||||
|
for (int i = 0; i < colspan-1; i++) {
|
||||||
|
header.add(columnName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtTableCaption e) {
|
||||||
|
caption = renderCellAsString(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtTableRow e)
|
||||||
|
{
|
||||||
|
if (currentRow == null) {
|
||||||
|
if (rowId == -1) {
|
||||||
|
// no header was found, start on the first row
|
||||||
|
rowId = 0;
|
||||||
|
}
|
||||||
|
currentRow = new ArrayList<String>();
|
||||||
|
spanningCellIdx = 0;
|
||||||
|
addSpanningCells();
|
||||||
|
iterate(e);
|
||||||
|
if(currentRow.size() > 0) {
|
||||||
|
rows.add(currentRow);
|
||||||
|
rowId++;
|
||||||
|
}
|
||||||
|
currentRow = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtTableCell e)
|
||||||
|
{
|
||||||
|
if (currentRow != null) {
|
||||||
|
rowspan = 1;
|
||||||
|
colspan = 1;
|
||||||
|
internalLinksInCell.clear();
|
||||||
|
String value = renderCellAsString(e);
|
||||||
|
|
||||||
|
int colId = currentRow.size();
|
||||||
|
|
||||||
|
// Add the cell to the row we are currently building
|
||||||
|
currentRow.add(value);
|
||||||
|
|
||||||
|
// Reconcile it if we found exactly one link in the cell
|
||||||
|
String reconciled = null;
|
||||||
|
if (internalLinksInCell.size() == 1) {
|
||||||
|
reconciled = internalLinksInCell.get(0);
|
||||||
|
wikilinkedCells.add(new WikilinkedCell(reconciled, rowId, colId));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark it as spanning if we found the tags
|
||||||
|
if (colspan > 1 || rowspan > 1) {
|
||||||
|
SpanningCell spanningCell = new SpanningCell(
|
||||||
|
value, reconciled, rowId, colId, rowspan, colspan);
|
||||||
|
spanningCells.add(spanningCellIdx, spanningCell);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add all spanning cells that need to be inserted after this one.
|
||||||
|
addSpanningCells();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String renderCellAsString(WtNode e) {
|
||||||
|
cellStringBuilder = new StringBuilder();
|
||||||
|
iterate(e);
|
||||||
|
String value = cellStringBuilder.toString();
|
||||||
|
if (value == null) {
|
||||||
|
value = "";
|
||||||
|
}
|
||||||
|
value = value.trim();
|
||||||
|
cellStringBuilder = null;
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtText text) {
|
||||||
|
writeText(text.getContent());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtNewline e) {
|
||||||
|
writeText("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtXmlEmptyTag tag) {
|
||||||
|
if("br".equals(tag.getName())) {
|
||||||
|
writeText("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtXmlStartTag tag) {
|
||||||
|
if("br".equals(tag.getName())) {
|
||||||
|
writeText("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtXmlEndTag tag) {
|
||||||
|
if("br".equals(tag.getName())) {
|
||||||
|
writeText("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void writeText(String text) {
|
||||||
|
if (xmlAttrStringBuilder != null) {
|
||||||
|
xmlAttrStringBuilder.append(text);
|
||||||
|
} else if (cellStringBuilder != null) {
|
||||||
|
cellStringBuilder.append(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Spanning cell helpers */
|
||||||
|
|
||||||
|
private SpanningCell spanningCell() {
|
||||||
|
return spanningCells.get(spanningCellIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addSpanningCells() {
|
||||||
|
while (spanningCellIdx < spanningCells.size() &&
|
||||||
|
currentRow.size() >= spanningCell().col) {
|
||||||
|
// Add blank cells to represent the current spanning cell
|
||||||
|
SpanningCell cell = spanningCell();
|
||||||
|
if (cell.row + cell.rowspan >= rowId + 1) {
|
||||||
|
while(currentRow.size() < cell.col + cell.colspan) {
|
||||||
|
if (blankSpanningCells) {
|
||||||
|
currentRow.add(null);
|
||||||
|
} else {
|
||||||
|
currentRow.add(cell.value);
|
||||||
|
if (cell.reconciled != null) {
|
||||||
|
wikilinkedCells.add(new WikilinkedCell(cell.reconciled, rowId, currentRow.size()-1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check if this spanning cell has been fully represented
|
||||||
|
if(cell.row + cell.rowspan <= rowId + 1) {
|
||||||
|
spanningCells.remove(spanningCellIdx);
|
||||||
|
} else {
|
||||||
|
spanningCellIdx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* XML attributes : useful for colspan and rowspan */
|
||||||
|
|
||||||
|
public void visit(WtXmlAttributes e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtXmlAttribute e) {
|
||||||
|
if (currentXmlAttr == null) {
|
||||||
|
xmlAttrStringBuilder = new StringBuilder();
|
||||||
|
iterate(e);
|
||||||
|
try {
|
||||||
|
int attrValue = Integer.parseInt(xmlAttrStringBuilder.toString());
|
||||||
|
if ("colspan".equals(currentXmlAttr)) {
|
||||||
|
colspan = attrValue;
|
||||||
|
} else if ("rowspan".equals(currentXmlAttr)) {
|
||||||
|
rowspan = attrValue;
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException _) {
|
||||||
|
}
|
||||||
|
currentXmlAttr = null;
|
||||||
|
xmlAttrStringBuilder = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtName e) {
|
||||||
|
try {
|
||||||
|
currentXmlAttr = e.getAsString();
|
||||||
|
} catch (UnsupportedOperationException _) {
|
||||||
|
currentXmlAttr = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtValue e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Link management */
|
||||||
|
|
||||||
|
public void visit(WtInternalLink e) {
|
||||||
|
currentInternalLink = e.getTarget().getAsString();
|
||||||
|
internalLinksInCell.add(currentInternalLink);
|
||||||
|
iterate(e);
|
||||||
|
currentInternalLink = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtExternalLink e) {
|
||||||
|
WtUrl url = e.getTarget();
|
||||||
|
String externalLink = url.getProtocol() + ":" + url.getPath();
|
||||||
|
if (cellStringBuilder != null) {
|
||||||
|
if(rowId >= 0) {
|
||||||
|
// We are inside the table: all hyperlinks
|
||||||
|
// should be converted to their URLs regardless of
|
||||||
|
// their label.
|
||||||
|
cellStringBuilder.append(externalLink);
|
||||||
|
} else {
|
||||||
|
// We are in the header: keep the labels instead
|
||||||
|
currentExternalLink = externalLink;
|
||||||
|
iterate(e);
|
||||||
|
currentExternalLink = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtNoLinkTitle e) {
|
||||||
|
if (cellStringBuilder != null) {
|
||||||
|
if (currentInternalLink != null) {
|
||||||
|
cellStringBuilder.append(currentInternalLink);
|
||||||
|
} else if (currentExternalLink != null) {
|
||||||
|
cellStringBuilder.append(currentExternalLink);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtLinkTitle e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtUrl e) {
|
||||||
|
// already handled, in WtExternalLink, added here for clarity
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Templates */
|
||||||
|
|
||||||
|
public void visit(WtTemplate e) {
|
||||||
|
if (includeRawTemplates) {
|
||||||
|
writeText("{{"+e.getName().getAsString());
|
||||||
|
WtTemplateArguments args = e.getArgs();
|
||||||
|
for (int i = 0; i != args.size(); i++) {
|
||||||
|
writeText("|");
|
||||||
|
iterate(args.get(i));
|
||||||
|
}
|
||||||
|
writeText("}}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtTemplateArgument e) {
|
||||||
|
writeText("|");
|
||||||
|
if(e.hasName()) {
|
||||||
|
writeText(e.getName().getAsString());
|
||||||
|
writeText("=");
|
||||||
|
}
|
||||||
|
iterate(e.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Content blocks */
|
||||||
|
|
||||||
|
public void visit(WtParsedWikitextPage e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtSection e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtBody e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtItalics e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtBold e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Object after(WtNode node, Object result)
|
||||||
|
{
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class WikiTableDataReader implements TableDataReader {
|
||||||
|
private int currentRow = -1;
|
||||||
|
private WikitextTableVisitor visitor = null;
|
||||||
|
private List<List<Recon>> reconList = null;
|
||||||
|
private List<Boolean> columnReconciled = null;
|
||||||
|
|
||||||
|
public WikiTableDataReader(WikitextTableVisitor visitor) {
|
||||||
|
this.visitor = visitor;
|
||||||
|
currentRow = -1;
|
||||||
|
reconList = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Object> getNextRowOfCells() throws IOException {
|
||||||
|
List<Object> row = null;
|
||||||
|
List<String> origRow = null;
|
||||||
|
if (currentRow == -1) {
|
||||||
|
origRow = this.visitor.header;
|
||||||
|
} else if(currentRow < this.visitor.rows.size()) {
|
||||||
|
origRow = this.visitor.rows.get(currentRow);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (origRow != null) {
|
||||||
|
row = new ArrayList<Object>();
|
||||||
|
for (int i = 0; i < origRow.size(); i++) {
|
||||||
|
Recon recon = null;
|
||||||
|
if (currentRow >= 0 && reconList != null) {
|
||||||
|
recon = reconList.get(currentRow).get(i);
|
||||||
|
}
|
||||||
|
row.add(new Cell(origRow.get(i), recon));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
currentRow++;
|
||||||
|
return row;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void reconcileToQids(String wikiBaseUrl, StandardReconConfig cfg) {
|
||||||
|
if("null".equals(wikiBaseUrl)) {
|
||||||
|
return; // TODO: more thorough URL validation instead
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init the list of recons
|
||||||
|
reconList = new ArrayList<List<Recon>>();
|
||||||
|
columnReconciled = new ArrayList<Boolean>();
|
||||||
|
for (int i = 0; i < this.visitor.rows.size(); i++) {
|
||||||
|
int rowSize = this.visitor.rows.get(i).size();
|
||||||
|
List<Recon> recons = new ArrayList<Recon>(rowSize);
|
||||||
|
for (int j = 0; j < rowSize; j++) {
|
||||||
|
recons.add(null);
|
||||||
|
if (i == 0)
|
||||||
|
columnReconciled.add(false);
|
||||||
|
}
|
||||||
|
reconList.add(recons);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int batchSize = 50;
|
||||||
|
int i = 0;
|
||||||
|
int totalSize = this.visitor.wikilinkedCells.size();
|
||||||
|
while (i < totalSize) {
|
||||||
|
List<ReconJob> jobs = new ArrayList<ReconJob>();
|
||||||
|
int batchStart = i;
|
||||||
|
while (i < batchStart + batchSize && i < totalSize) {
|
||||||
|
WikilinkedCell cell = this.visitor.wikilinkedCells.get(i);
|
||||||
|
jobs.add(cfg.createSimpleJob(cell.toURL(wikiBaseUrl)));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Recon> recons = cfg.batchRecon(jobs, 0);
|
||||||
|
for (int j = batchStart; j < batchStart + batchSize && j < totalSize; j++) {
|
||||||
|
WikilinkedCell cell = this.visitor.wikilinkedCells.get(j);
|
||||||
|
Recon recon = recons.get(j - batchStart);
|
||||||
|
if (recon != null) {
|
||||||
|
reconList.get(cell.row).set(cell.col, recon);
|
||||||
|
columnReconciled.set(cell.col, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
Reader reader,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
// Set-up a simple wiki configuration
|
||||||
|
ParserConfig parserConfig = new SimpleParserConfig();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Encoding validation
|
||||||
|
|
||||||
|
WikitextEncodingValidator v = new WikitextEncodingValidator();
|
||||||
|
|
||||||
|
String wikitext = CharStreams.toString(reader);
|
||||||
|
String title = "Page title";
|
||||||
|
ValidatedWikitext validated = v.validate(parserConfig, wikitext, title);
|
||||||
|
|
||||||
|
// Pre-processing
|
||||||
|
WikitextPreprocessor prep = new WikitextPreprocessor(parserConfig);
|
||||||
|
|
||||||
|
WtPreproWikitextPage prepArticle =
|
||||||
|
(WtPreproWikitextPage) prep.parseArticle(validated, title, false);
|
||||||
|
|
||||||
|
// Parsing
|
||||||
|
PreprocessedWikitext ppw = PreprocessorToParserTransformer
|
||||||
|
.transform(prepArticle);
|
||||||
|
|
||||||
|
WikitextParser parser = new WikitextParser(parserConfig);
|
||||||
|
|
||||||
|
WtParsedWikitextPage parsedArticle;
|
||||||
|
parsedArticle = (WtParsedWikitextPage) parser.parseArticle(ppw, title);
|
||||||
|
|
||||||
|
// Compile the retrieved page
|
||||||
|
boolean blankSpanningCells = JSONUtilities.getBoolean(options, "blankSpanningCells", true);
|
||||||
|
boolean includeRawTemplates = JSONUtilities.getBoolean(options, "includeRawTemplates", false);
|
||||||
|
final WikitextTableVisitor vs = new WikitextTableVisitor(blankSpanningCells, includeRawTemplates);
|
||||||
|
vs.go(parsedArticle);
|
||||||
|
|
||||||
|
WikiTableDataReader dataReader = new WikiTableDataReader(vs);
|
||||||
|
|
||||||
|
// Reconcile if needed
|
||||||
|
String wikiUrl = JSONUtilities.getString(options, "wikiUrl", null);
|
||||||
|
// Wikidata reconciliation endpoint, hardcoded because the user might not have it in its services
|
||||||
|
String reconUrl = JSONUtilities.getString(options, "reconService",
|
||||||
|
"https://tools.wmflabs.org/openrefine-wikidata/en/api");
|
||||||
|
StandardReconConfig cfg = getReconConfig(reconUrl);
|
||||||
|
|
||||||
|
if (wikiUrl != null) {
|
||||||
|
dataReader.reconcileToQids(wikiUrl, cfg);
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 1);
|
||||||
|
|
||||||
|
// Set metadata
|
||||||
|
if (vs.caption != null && vs.caption.length() > 0) {
|
||||||
|
metadata.setName(vs.caption);
|
||||||
|
// TODO this does not seem to do anything - maybe we need to pass it to OpenRefine in some other way?
|
||||||
|
}
|
||||||
|
|
||||||
|
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
|
|
||||||
|
// Add reconciliation statistics
|
||||||
|
if (dataReader.columnReconciled != null) {
|
||||||
|
for(int i = 0; i != dataReader.columnReconciled.size(); i++) {
|
||||||
|
if (dataReader.columnReconciled.get(i)) {
|
||||||
|
Column col = project.columnModel.columns.get(i);
|
||||||
|
col.setReconStats(ReconStats.create(project, i));
|
||||||
|
col.setReconConfig(cfg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e1) {
|
||||||
|
e1.printStackTrace();
|
||||||
|
} catch (ParseException e1) {
|
||||||
|
exceptions.add(e1);
|
||||||
|
e1.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private StandardReconConfig getReconConfig(String url) {
|
||||||
|
StandardReconConfig cfg = new StandardReconConfig(
|
||||||
|
url,
|
||||||
|
"http://www.wikidata.org/entity/",
|
||||||
|
"http://www.wikidata.org/prop/direct/",
|
||||||
|
"",
|
||||||
|
"entity",
|
||||||
|
true,
|
||||||
|
new ArrayList<ColumnDetail>(),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
return cfg;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -233,6 +233,30 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
return "Reconcile cells in column " + columnName + " to type " + typeID;
|
return "Reconcile cells in column " + columnName + " to type " + typeID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ReconJob createSimpleJob(String query) {
|
||||||
|
/* Same as createJob, but for simpler queries
|
||||||
|
* without any properties. This is much easier
|
||||||
|
* to generate as there is no need for a Project,
|
||||||
|
* Row and Cell: this means the job can be created
|
||||||
|
* outside the usual context of reconciliation (e.g.
|
||||||
|
* in an importer).
|
||||||
|
*/
|
||||||
|
StandardReconJob job = new StandardReconJob();
|
||||||
|
try {
|
||||||
|
StringWriter stringWriter = new StringWriter();
|
||||||
|
JSONWriter jsonWriter = new JSONWriter(stringWriter);
|
||||||
|
jsonWriter.object();
|
||||||
|
jsonWriter.key("query");
|
||||||
|
jsonWriter.value(query);
|
||||||
|
jsonWriter.endObject();
|
||||||
|
job.text = query;
|
||||||
|
job.code = stringWriter.toString();
|
||||||
|
return job;
|
||||||
|
} catch (JSONException _) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ReconJob createJob(Project project, int rowIndex, Row row,
|
public ReconJob createJob(Project project, int rowIndex, Row row,
|
||||||
String columnName, Cell cell) {
|
String columnName, Cell cell) {
|
||||||
|
@ -0,0 +1,217 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010,2011 Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.tests.importers;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.AfterMethod;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.BeforeTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.importers.WikitextImporter;
|
||||||
|
|
||||||
|
public class WikitextImporterTests extends ImporterTest {
|
||||||
|
|
||||||
|
private WikitextImporter importer = null;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@BeforeTest
|
||||||
|
public void init() {
|
||||||
|
logger = LoggerFactory.getLogger(this.getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@BeforeMethod
|
||||||
|
public void setUp() {
|
||||||
|
super.setUp();
|
||||||
|
importer = new WikitextImporter();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@AfterMethod
|
||||||
|
public void tearDown(){
|
||||||
|
importer = null;
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readSimpleData() {
|
||||||
|
String input = "\n"
|
||||||
|
+ "{|\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| a || b<br/>2 || c \n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| d || e || f<br>\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "|}\n";
|
||||||
|
try {
|
||||||
|
prepareOptions(0, true, true, null);
|
||||||
|
parse(input);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Parsing failed", e);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
|
Assert.assertEquals(project.rows.size(), 2);
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "a");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "b\n2");
|
||||||
|
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "f");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readTableWithLinks() {
|
||||||
|
// Data credits: Wikipedia contributors, https://de.wikipedia.org/w/index.php?title=Agenturen_der_Europäischen_Union&action=edit
|
||||||
|
String input = "\n"
|
||||||
|
+"{|\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäisches Zentrum für die Förderung der Berufsbildung|Cedefop]] || Cedefop || http://www.cedefop.europa.eu/\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäische Stiftung zur Verbesserung der Lebens- und Arbeitsbedingungen]] || EUROFOUND || [http://www.eurofound.europa.eu/]\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäische Beobachtungsstelle für Drogen und Drogensucht]] || EMCDDA || [http://www.emcdda.europa.eu/ europa.eu]\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|}\n";
|
||||||
|
|
||||||
|
try {
|
||||||
|
prepareOptions(0, true, true, "https://de.wikipedia.org/wiki/");
|
||||||
|
parse(input);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Parsing failed", e);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
|
Assert.assertEquals(project.rows.size(), 3);
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||||
|
|
||||||
|
// Reconciled cells
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "Cedefop");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(1).recon, null);
|
||||||
|
Assert.assertEquals(project.rows.get(2).cells.get(0).value, "Europäische Beobachtungsstelle für Drogen und Drogensucht");
|
||||||
|
Assert.assertEquals(project.rows.get(2).cells.get(0).recon.getBestCandidate().id, "Q1377256");
|
||||||
|
|
||||||
|
// various ways to input external links
|
||||||
|
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://www.eurofound.europa.eu/");
|
||||||
|
Assert.assertEquals(project.rows.get(2).cells.get(2).value, "http://www.emcdda.europa.eu/");
|
||||||
|
// Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://www.cedefop.europa.eu/");
|
||||||
|
// unfortunately the above does not seem to be supported by the parser (parsed as blank instead)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readStyledTableWithHeader() {
|
||||||
|
// Data credits: Wikipedia contributors, https://de.wikipedia.org/w/index.php?title=Agenturen_der_Europäischen_Union&action=edit
|
||||||
|
String input = "\n"
|
||||||
|
+"==Agenturen==\n"
|
||||||
|
+"{| class=\"wikitable sortable\"\n"
|
||||||
|
+"! style=\"text-align:left; width: 60em\" | Offizieller Name\n"
|
||||||
|
+"! style=\"text-align:left; width: 9em\" | Abkürzung\n"
|
||||||
|
+"! style=\"text-align:left; width: 6em\" | Website\n"
|
||||||
|
+"! style=\"text-align:left; width: 15em\" | Standort\n"
|
||||||
|
+"! style=\"text-align:left; width: 18em\" | Staat\n"
|
||||||
|
+"! style=\"text-align:left; width: 6em\" | Gründung\n"
|
||||||
|
+"! style=\"text-align:left; width: 50em\" | Anmerkungen\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäisches Zentrum für die Förderung der Berufsbildung]] || '''Cedefop''' || [http://www.cedefop.europa.eu/] || [[Thessaloniki]] || {{Griechenland}} || 1975 ||\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäische Stiftung zur Verbesserung der Lebens- und Arbeitsbedingungen]] || ''EUROFOUND'' || [http://www.eurofound.europa.eu/] || [[Dublin]] || {{Irland}} || 1975 ||\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäische Beobachtungsstelle für Drogen und Drogensucht]] || EMCDDA || [http://www.emcdda.europa.eu/] || [[Lissabon]] || {{Portugal}} || 1993 ||\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|}\n";
|
||||||
|
|
||||||
|
try {
|
||||||
|
prepareOptions(-1, true, true, null);
|
||||||
|
parse(input);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Parsing failed", e);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(project.columnModel.columns.size(), 7);
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "Europäisches Zentrum für die Förderung der Berufsbildung");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "Cedefop");
|
||||||
|
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "EUROFOUND");
|
||||||
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Offizieller Name");
|
||||||
|
Assert.assertEquals(project.columnModel.columns.get(6).getName(), "Anmerkungen");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.size(), 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readTableWithSpanningCells() {
|
||||||
|
// inspired from https://www.mediawiki.org/wiki/Help:Tables
|
||||||
|
String input = "{| class=\"wikitable\"\n"
|
||||||
|
+"!colspan=\"6\"|Shopping List\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|Bread & Butter\n"
|
||||||
|
+"|Pie\n"
|
||||||
|
+"|Buns\n"
|
||||||
|
+"|rowspan=\"2\"|Danish\n"
|
||||||
|
+"|colspan=\"2\"|Croissant\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|Cheese\n"
|
||||||
|
+"|colspan=\"2\"|Ice cream\n"
|
||||||
|
+"|Butter\n"
|
||||||
|
+"|Yogurt\n"
|
||||||
|
+"|}\n";
|
||||||
|
|
||||||
|
try {
|
||||||
|
prepareOptions(-1, true, true, null);
|
||||||
|
parse(input);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Parsing failed", e);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(project.columnModel.columns.size(), 6);
|
||||||
|
Assert.assertNull(project.rows.get(1).cells.get(2).value);
|
||||||
|
Assert.assertNull(project.rows.get(1).cells.get(3).value);
|
||||||
|
Assert.assertEquals(project.rows.get(1).cells.get(4).value, "Butter");
|
||||||
|
}
|
||||||
|
//--helpers--
|
||||||
|
|
||||||
|
private void parse(String wikitext) {
|
||||||
|
parseOneFile(importer, new StringReader(wikitext));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void prepareOptions(
|
||||||
|
int limit, boolean blankSpanningCells,
|
||||||
|
boolean guessValueType, String wikiUrl) {
|
||||||
|
|
||||||
|
whenGetIntegerOption("limit", options, limit);
|
||||||
|
whenGetBooleanOption("guessCellValueTypes", options, guessValueType);
|
||||||
|
whenGetBooleanOption("blankSpanningCells", options, blankSpanningCells);
|
||||||
|
whenGetBooleanOption("storeBlankCellsAsNulls", options, true);
|
||||||
|
whenGetStringOption("wikiUrl", options, wikiUrl);
|
||||||
|
whenGetIntegerOption("headerLines", options, 1);
|
||||||
|
whenGetStringOption("reconService", options, "https://tools.wmflabs.org/openrefine-wikidata/en/api");
|
||||||
|
}
|
||||||
|
}
|
BIN
main/webapp/WEB-INF/lib-src/swc-parser-lazy-3.1.5-sources.jar
Normal file
BIN
main/webapp/WEB-INF/lib-src/swc-parser-lazy-3.1.5-sources.jar
Normal file
Binary file not shown.
Binary file not shown.
@ -212,6 +212,7 @@ function registerImporting() {
|
|||||||
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter());
|
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter());
|
||||||
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
|
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
|
||||||
IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter());
|
IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter());
|
||||||
|
IM.registerFormat("text/wiki", "Wikitext", "WikitextParserUI", new Packages.com.google.refine.importers.WikitextImporter());
|
||||||
|
|
||||||
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
|
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
|
||||||
|
|
||||||
@ -344,7 +345,10 @@ function init() {
|
|||||||
"scripts/index/parser-interfaces/excel-parser-ui.js",
|
"scripts/index/parser-interfaces/excel-parser-ui.js",
|
||||||
"scripts/index/parser-interfaces/xml-parser-ui.js",
|
"scripts/index/parser-interfaces/xml-parser-ui.js",
|
||||||
"scripts/index/parser-interfaces/json-parser-ui.js",
|
"scripts/index/parser-interfaces/json-parser-ui.js",
|
||||||
"scripts/index/parser-interfaces/rdf-triples-parser-ui.js"
|
"scripts/index/parser-interfaces/rdf-triples-parser-ui.js",
|
||||||
|
"scripts/index/parser-interfaces/wikitext-parser-ui.js",
|
||||||
|
|
||||||
|
"scripts/reconciliation/recon-manager.js" // so that reconciliation functions are available to importers
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -372,7 +376,8 @@ function init() {
|
|||||||
"styles/views/data-table-view.less", // for the preview table's styles
|
"styles/views/data-table-view.less", // for the preview table's styles
|
||||||
"styles/index/fixed-width-parser-ui.less",
|
"styles/index/fixed-width-parser-ui.less",
|
||||||
"styles/index/xml-parser-ui.less",
|
"styles/index/xml-parser-ui.less",
|
||||||
"styles/index/json-parser-ui.less"
|
"styles/index/json-parser-ui.less",
|
||||||
|
"styles/index/wikitext-parser-ui.less",
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -113,6 +113,10 @@
|
|||||||
"parse-cell": "Parse cell text into<br/>numbers, dates, ...",
|
"parse-cell": "Parse cell text into<br/>numbers, dates, ...",
|
||||||
"store-blank": "Store blank rows",
|
"store-blank": "Store blank rows",
|
||||||
"store-nulls": "Store blank cells as nulls",
|
"store-nulls": "Store blank cells as nulls",
|
||||||
|
"blank-spanning-cells": "Pad cells spanning over multiple rows or columns with nulls",
|
||||||
|
"include-raw-templates": "Include raw templates as wikicode",
|
||||||
|
"wiki-base-url": "Reconcile to wiki with base URL:",
|
||||||
|
"invalid-wikitext": "No table could be parsed. Are you sure this is a valid wiki table?",
|
||||||
"store-source": "Store file source <br/>(file names, URLs)<br/>in each row",
|
"store-source": "Store file source <br/>(file names, URLs)<br/>in each row",
|
||||||
"preserve-empty": "Preserve empty strings",
|
"preserve-empty": "Preserve empty strings",
|
||||||
"trim": "Trim leading & trailing whitespace from strings",
|
"trim": "Trim leading & trailing whitespace from strings",
|
||||||
|
@ -73,8 +73,8 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
|
|||||||
this._service = service;
|
this._service = service;
|
||||||
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
|
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
|
||||||
this._serviceMetadata = serviceMetadata;
|
this._serviceMetadata = serviceMetadata;
|
||||||
if ("extend" in serviceMetadata) {
|
if (serviceMetadata != null && "extend" in serviceMetadata) {
|
||||||
extend = serviceMetadata.extend;
|
var extend = serviceMetadata.extend;
|
||||||
if ("propose_properties" in extend) {
|
if ("propose_properties" in extend) {
|
||||||
var endpoint = extend.propose_properties;
|
var endpoint = extend.propose_properties;
|
||||||
this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path;
|
this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path;
|
||||||
|
@ -83,8 +83,13 @@ Refine.PreviewTable.prototype._render = function() {
|
|||||||
$('<span>').html(" ").appendTo(divContent);
|
$('<span>').html(" ").appendTo(divContent);
|
||||||
} else if ("e" in cell) {
|
} else if ("e" in cell) {
|
||||||
$('<span>').addClass("data-table-error").text(cell.e).appendTo(divContent);
|
$('<span>').addClass("data-table-error").text(cell.e).appendTo(divContent);
|
||||||
} else if (!("r" in cell) || !cell.r) {
|
} else {
|
||||||
if (typeof cell.v !== "string") {
|
if ("r" in cell && cell.ri !== null) {
|
||||||
|
$('<a>')
|
||||||
|
.attr("href", "#") // we don't have access to the reconciliation data here
|
||||||
|
.text(cell.v)
|
||||||
|
.appendTo(divContent);
|
||||||
|
} else if (typeof cell.v !== "string") {
|
||||||
if (typeof cell.v == "number") {
|
if (typeof cell.v == "number") {
|
||||||
divContent.addClass("data-table-cell-content-numeric");
|
divContent.addClass("data-table-cell-content-numeric");
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,29 @@
|
|||||||
|
<div class="grid-layout layout-loose"><table>
|
||||||
|
<tr><td colspan="2" id="or-import-colsep"></td></tr>
|
||||||
|
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="wikiCheckbox" id="$reconcileWiki" /></td><td><label for="$reconcileWiki" id="or-import-wiki-base-url"></label>
|
||||||
|
<input bind="wikiUrlInput" type="text" class="lightweight" size="30" id="$wikiUrl" /></td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" id="$limit" /></td>
|
||||||
|
<td><label for="$limit" id="or-import-load"></label>
|
||||||
|
<input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
|
||||||
|
<label for="$limit" id="or-import-rows2"></label></td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
|
||||||
|
<td><label for="$guess" id="or-import-parseCell"></label></td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="blankSpanningCellsCheckbox" id="$blank-spanning-cells" /></td>
|
||||||
|
<td><label for="$blank-spanning-cells" id="or-import-blankSpanningCells"></label></td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" id="$store-blank-rows" /></td>
|
||||||
|
<td colspan="2"><label for="$store-blank-rows" id="or-import-blank"></label></td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="includeRawTemplatesCheckbox" id="$include-raw-templates" /></td>
|
||||||
|
<td colspan="2"><label for="$include-raw-templates" id="or-import-includeRawTemplates"></label></td></tr>
|
||||||
|
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" id="$store-blank-cells" /></td>
|
||||||
|
<td colspan="2"><label for="$store-blank-cells" id="or-import-null"></label></td></tr>
|
||||||
|
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td>
|
||||||
|
<td><label for="$include-file-sources" id="or-import-source"></label></td></tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td width="1%"></td>
|
||||||
|
<td><button class="button" bind="previewButton"></button></td>
|
||||||
|
</tr>
|
||||||
|
</table></div>
|
@ -0,0 +1,205 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.WikitextParserUI = function(controller, jobID, job, format, config,
|
||||||
|
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||||
|
this._controller = controller;
|
||||||
|
this._jobID = jobID;
|
||||||
|
this._job = job;
|
||||||
|
this._format = format;
|
||||||
|
this._config = config;
|
||||||
|
|
||||||
|
this._dataContainer = dataContainerElmt;
|
||||||
|
this._progressContainer = progressContainerElmt;
|
||||||
|
this._optionContainer = optionContainerElmt;
|
||||||
|
|
||||||
|
this._timerID = null;
|
||||||
|
this._initialize();
|
||||||
|
this._updatePreview();
|
||||||
|
};
|
||||||
|
Refine.DefaultImportingController.parserUIs.WikitextParserUI = Refine.WikitextParserUI;
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype.dispose = function() {
|
||||||
|
if (this._timerID !== null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||||
|
return true; // always ready
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype.getOptions = function() {
|
||||||
|
var options = {
|
||||||
|
};
|
||||||
|
|
||||||
|
var parseIntDefault = function(s, def) {
|
||||||
|
try {
|
||||||
|
var n = parseInt(s,10);
|
||||||
|
if (!isNaN(n)) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
return def;
|
||||||
|
};
|
||||||
|
if (this._optionContainerElmts.wikiCheckbox[0].checked) {
|
||||||
|
options.wikiUrl = this._optionContainerElmts.wikiUrlInput[0].value;
|
||||||
|
} else {
|
||||||
|
options.wikiUrl = null;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||||
|
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||||
|
} else {
|
||||||
|
options.limit = -1;
|
||||||
|
}
|
||||||
|
options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked;
|
||||||
|
options.blankSpanningCells = this._optionContainerElmts.blankSpanningCellsCheckbox[0].checked;
|
||||||
|
options.includeRawTemplates = this._optionContainerElmts.includeRawTemplatesCheckbox[0].checked;
|
||||||
|
|
||||||
|
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
|
||||||
|
|
||||||
|
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||||
|
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||||
|
|
||||||
|
options.reconService = ReconciliationManager.ensureDefaultServicePresent();
|
||||||
|
|
||||||
|
return options;
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype._initialize = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._optionContainer.unbind().empty().html(
|
||||||
|
DOM.loadHTML("core", "scripts/index/parser-interfaces/wikitext-parser-ui.html"));
|
||||||
|
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||||
|
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||||
|
|
||||||
|
this._optionContainerElmts.previewButton.html($.i18n._('core-buttons')["update-preview"]);
|
||||||
|
|
||||||
|
$('#or-import-wiki-base-url').text($.i18n._('core-index-parser')["wiki-base-url"]);
|
||||||
|
$('#or-import-load').text($.i18n._('core-index-parser')["load-at-most"]);
|
||||||
|
$('#or-import-rows2').text($.i18n._('core-index-parser')["rows-data"]);
|
||||||
|
$('#or-import-parseCell').html($.i18n._('core-index-parser')["parse-cell"]);
|
||||||
|
$('#or-import-blankSpanningCells').text($.i18n._('core-index-parser')["blank-spanning-cells"]);
|
||||||
|
$('#or-import-includeRawTemplates').text($.i18n._('core-index-parser')["include-raw-templates"]);
|
||||||
|
$('#or-import-blank').text($.i18n._('core-index-parser')["store-blank"]);
|
||||||
|
$('#or-import-null').text($.i18n._('core-index-parser')["store-nulls"]);
|
||||||
|
$('#or-import-source').html($.i18n._('core-index-parser')["store-source"]);
|
||||||
|
|
||||||
|
/*
|
||||||
|
this._optionContainerElmts.encodingInput
|
||||||
|
.attr('value', this._config.encoding || '')
|
||||||
|
.click(function() {
|
||||||
|
Encoding.selectEncoding($(this), function() {
|
||||||
|
self._updatePreview();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
*/
|
||||||
|
|
||||||
|
var wikiUrl = this._config.wikiUrl.toString();
|
||||||
|
if (wikiUrl != null) {
|
||||||
|
this._optionContainerElmts.wikiUrlInput[0].value = wikiUrl;
|
||||||
|
this._optionContainerElmts.wikiCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.limit > 0) {
|
||||||
|
this._optionContainerElmts.limitCheckbox.prop("checked", true);
|
||||||
|
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.blankSpanningCells) {
|
||||||
|
this._optionContainerElmts.blankSpanningCellsCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.includeRawTemplates) {
|
||||||
|
this._optionContainerElmts.includeRawTemplatesCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.storeBlankRows) {
|
||||||
|
this._optionContainerElmts.storeBlankRowsCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.guessCellValueTypes) {
|
||||||
|
this._optionContainerElmts.guessCellValueTypesCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.storeBlankCellsAsNulls) {
|
||||||
|
this._optionContainerElmts.storeBlankCellsAsNullsCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
if (this._config.includeFileSources) {
|
||||||
|
this._optionContainerElmts.includeFileSourcesCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
var onChange = function() {
|
||||||
|
self._scheduleUpdatePreview();
|
||||||
|
};
|
||||||
|
this._optionContainer.find("input").bind("change", onChange);
|
||||||
|
this._optionContainer.find("select").bind("change", onChange);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype._scheduleUpdatePreview = function() {
|
||||||
|
if (this._timerID !== null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var self = this;
|
||||||
|
this._timerID = window.setTimeout(function() {
|
||||||
|
self._timerID = null;
|
||||||
|
self._updatePreview();
|
||||||
|
}, 500); // 0.5 second
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype._updatePreview = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._progressContainer.show();
|
||||||
|
|
||||||
|
this._controller.updateFormatAndOptions(this.getOptions(), function(result) {
|
||||||
|
if (result.status === "ok") {
|
||||||
|
self._controller.getPreviewData(function(projectData) {
|
||||||
|
self._progressContainer.hide();
|
||||||
|
var container = self._dataContainer.unbind().empty();
|
||||||
|
if (projectData.rowModel.rows.length === 0) {
|
||||||
|
$('<div>').addClass("wikitext-parser-ui-message")
|
||||||
|
.text($.i18n._('core-index-parser')["invalid-wikitext"]).appendTo(container);
|
||||||
|
} else {
|
||||||
|
new Refine.PreviewTable(projectData, container);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
@ -127,8 +127,27 @@ ReconciliationManager.save = function(f) {
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
(function() {
|
ReconciliationManager.getOrRegisterServiceFromUrl = function(url, f) {
|
||||||
|
var service = ReconciliationManager.getServiceFromUrl(url);
|
||||||
|
if (service == null) {
|
||||||
|
ReconciliationManager.registerStandardService(url, function(idx) {
|
||||||
|
ReconciliationManager.save(function() {
|
||||||
|
f(ReconciliationManager.standardServices[idx]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
f(service);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ReconciliationManager.ensureDefaultServicePresent = function() {
|
||||||
var lang = $.i18n._('core-recon')["wd-recon-lang"];
|
var lang = $.i18n._('core-recon')["wd-recon-lang"];
|
||||||
|
var url = "https://tools.wmflabs.org/openrefine-wikidata/"+lang+"/api";
|
||||||
|
ReconciliationManager.getOrRegisterServiceFromUrl(url, function(service) { });
|
||||||
|
return url;
|
||||||
|
};
|
||||||
|
|
||||||
|
(function() {
|
||||||
|
|
||||||
$.ajax({
|
$.ajax({
|
||||||
async: false,
|
async: false,
|
||||||
@ -140,9 +159,7 @@ ReconciliationManager.save = function(f) {
|
|||||||
ReconciliationManager.standardServices = JSON.parse(data.value);
|
ReconciliationManager.standardServices = JSON.parse(data.value);
|
||||||
ReconciliationManager._rebuildMap();
|
ReconciliationManager._rebuildMap();
|
||||||
} else {
|
} else {
|
||||||
ReconciliationManager.registerStandardService(
|
ReconciliationManager.ensureDefaultServicePresent();
|
||||||
"https://tools.wmflabs.org/openrefine-wikidata/"+lang+"/api"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
dataType: "json"
|
dataType: "json"
|
||||||
|
@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
@import-less url("../theme.less");
|
||||||
|
|
||||||
|
.wikitext-parser-ui-message {
|
||||||
|
background: #eee;
|
||||||
|
font-size: 150%;
|
||||||
|
color: #666;
|
||||||
|
padding: 20px;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user