Add reconciliation config in wikitext import.

This commit is contained in:
Antonin Delpeuch 2017-08-16 00:05:40 +01:00
parent d01de6ea85
commit 3dcda5a42c
6 changed files with 53 additions and 21 deletions

View File

@ -47,6 +47,7 @@ import xtc.parser.ParseException;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate;
@ -388,7 +389,7 @@ public class WikitextImporter extends TabularImportingParserBase {
return row;
}
private void reconcileToQids(String wikiBaseUrl) {
private void reconcileToQids(String wikiBaseUrl, StandardReconConfig cfg) {
if("null".equals(wikiBaseUrl)) {
return; // TODO: more thorough URL validation instead
}
@ -406,18 +407,6 @@ public class WikitextImporter extends TabularImportingParserBase {
columnReconciled.add(false);
}
// Wikidata reconciliation endpoint, hardcoded because the user might not have it in its services
StandardReconConfig cfg = new StandardReconConfig(
"https://tools.wmflabs.org/openrefine-wikidata/en/api",
"http://www.wikidata.org/entity/",
"http://www.wikidata.org/prop/direct/",
"",
"entity",
true,
new ArrayList<ColumnDetail>(),
1
);
int batchSize = 50;
int i = 0;
int totalSize = this.visitor.wikilinkedCells.size();
@ -490,8 +479,13 @@ public class WikitextImporter extends TabularImportingParserBase {
// Reconcile if needed
String wikiUrl = JSONUtilities.getString(options, "wikiUrl", null);
// Wikidata reconciliation endpoint, hardcoded because the user might not have it in its services
String reconUrl = JSONUtilities.getString(options, "reconService",
"https://tools.wmflabs.org/openrefine-wikidata/en/api");
StandardReconConfig cfg = getReconConfig(reconUrl);
if (wikiUrl != null) {
dataReader.reconcileToQids(wikiUrl);
dataReader.reconcileToQids(wikiUrl, cfg);
}
JSONUtilities.safePut(options, "headerLines", 1);
@ -508,7 +502,9 @@ public class WikitextImporter extends TabularImportingParserBase {
if (dataReader.columnReconciled != null) {
for(int i = 0; i != dataReader.columnReconciled.size(); i++) {
if (dataReader.columnReconciled.get(i)) {
project.columnModel.columns.get(i).setReconStats(ReconStats.create(project, i));
Column col = project.columnModel.columns.get(i);
col.setReconStats(ReconStats.create(project, i));
col.setReconConfig(cfg);
}
}
}
@ -520,5 +516,18 @@ public class WikitextImporter extends TabularImportingParserBase {
}
}
private StandardReconConfig getReconConfig(String url) {
StandardReconConfig cfg = new StandardReconConfig(
url,
"http://www.wikidata.org/entity/",
"http://www.wikidata.org/prop/direct/",
"",
"entity",
true,
new ArrayList<ColumnDetail>(),
1
);
return cfg;
}
}

View File

@ -204,6 +204,7 @@ public class WikitextImporterTests extends ImporterTest {
whenGetBooleanOption("blankSpanningCells", options, blankSpanningCells);
whenGetBooleanOption("storeBlankCellsAsNulls", options, true);
whenGetIntegerOption("headerLines", options, 1);
whenGetStringOption("reconService", options, "https://tools.wmflabs.org/openrefine-wikidata/en/api");
}
private void verifyOptions() {

View File

@ -346,7 +346,9 @@ function init() {
"scripts/index/parser-interfaces/xml-parser-ui.js",
"scripts/index/parser-interfaces/json-parser-ui.js",
"scripts/index/parser-interfaces/rdf-triples-parser-ui.js",
"scripts/index/parser-interfaces/wikitext-parser-ui.js"
"scripts/index/parser-interfaces/wikitext-parser-ui.js",
"scripts/reconciliation/recon-manager.js" // so that reconciliation functions are available to importers
]
);

View File

@ -73,7 +73,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
this._service = service;
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
this._serviceMetadata = serviceMetadata;
if ("extend" in serviceMetadata) {
if (serviceMetadata != null && "extend" in serviceMetadata) {
var extend = serviceMetadata.extend;
if ("propose_properties" in extend) {
var endpoint = extend.propose_properties;

View File

@ -93,6 +93,8 @@ Refine.WikitextParserUI.prototype.getOptions = function() {
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
options.reconService = ReconciliationManager.ensureDefaultServicePresent();
return options;
};

View File

@ -127,8 +127,28 @@ ReconciliationManager.save = function(f) {
});
};
ReconciliationManager.getOrRegisterServiceFromUrl = function(url, f) {
var service = ReconciliationManager.getServiceFromUrl(url);
if (service == null) {
ReconciliationManager.registerStandardService(url, function(idx) {
ReconciliationManager.save(function() {
f(ReconciliationManager.standardServices[i]);
});
});
} else {
f(service);
}
};
ReconciliationManager.ensureDefaultServicePresent = function() {
console.log('ensureDefaultServicePresent');
var lang = $.i18n._('core-recon')["wd-recon-lang"];
var url = "https://tools.wmflabs.org/openrefine-wikidata/"+lang+"/api";
ReconciliationManager.getOrRegisterServiceFromUrl(url, function(service) { ; });
return url;
};
(function() {
var lang = $.i18n._('core-recon')["wd-recon-lang"];
$.ajax({
async: false,
@ -140,9 +160,7 @@ ReconciliationManager.save = function(f) {
ReconciliationManager.standardServices = JSON.parse(data.value);
ReconciliationManager._rebuildMap();
} else {
ReconciliationManager.registerStandardService(
"https://tools.wmflabs.org/openrefine-wikidata/"+lang+"/api"
);
ReconciliationManager.ensureDefaultServicePresent();
}
},
dataType: "json"