Add reconciliation config in wikitext import.
This commit is contained in:
parent
d01de6ea85
commit
3dcda5a42c
@ -47,6 +47,7 @@ import xtc.parser.ParseException;
|
|||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.importing.ImportingJob;
|
import com.google.refine.importing.ImportingJob;
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
|
import com.google.refine.model.Column;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Recon;
|
import com.google.refine.model.Recon;
|
||||||
import com.google.refine.model.ReconCandidate;
|
import com.google.refine.model.ReconCandidate;
|
||||||
@ -388,7 +389,7 @@ public class WikitextImporter extends TabularImportingParserBase {
|
|||||||
return row;
|
return row;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void reconcileToQids(String wikiBaseUrl) {
|
private void reconcileToQids(String wikiBaseUrl, StandardReconConfig cfg) {
|
||||||
if("null".equals(wikiBaseUrl)) {
|
if("null".equals(wikiBaseUrl)) {
|
||||||
return; // TODO: more thorough URL validation instead
|
return; // TODO: more thorough URL validation instead
|
||||||
}
|
}
|
||||||
@ -406,18 +407,6 @@ public class WikitextImporter extends TabularImportingParserBase {
|
|||||||
columnReconciled.add(false);
|
columnReconciled.add(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wikidata reconciliation endpoint, hardcoded because the user might not have it in its services
|
|
||||||
StandardReconConfig cfg = new StandardReconConfig(
|
|
||||||
"https://tools.wmflabs.org/openrefine-wikidata/en/api",
|
|
||||||
"http://www.wikidata.org/entity/",
|
|
||||||
"http://www.wikidata.org/prop/direct/",
|
|
||||||
"",
|
|
||||||
"entity",
|
|
||||||
true,
|
|
||||||
new ArrayList<ColumnDetail>(),
|
|
||||||
1
|
|
||||||
);
|
|
||||||
|
|
||||||
int batchSize = 50;
|
int batchSize = 50;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int totalSize = this.visitor.wikilinkedCells.size();
|
int totalSize = this.visitor.wikilinkedCells.size();
|
||||||
@ -490,8 +479,13 @@ public class WikitextImporter extends TabularImportingParserBase {
|
|||||||
|
|
||||||
// Reconcile if needed
|
// Reconcile if needed
|
||||||
String wikiUrl = JSONUtilities.getString(options, "wikiUrl", null);
|
String wikiUrl = JSONUtilities.getString(options, "wikiUrl", null);
|
||||||
|
// Wikidata reconciliation endpoint, hardcoded because the user might not have it in its services
|
||||||
|
String reconUrl = JSONUtilities.getString(options, "reconService",
|
||||||
|
"https://tools.wmflabs.org/openrefine-wikidata/en/api");
|
||||||
|
StandardReconConfig cfg = getReconConfig(reconUrl);
|
||||||
|
|
||||||
if (wikiUrl != null) {
|
if (wikiUrl != null) {
|
||||||
dataReader.reconcileToQids(wikiUrl);
|
dataReader.reconcileToQids(wikiUrl, cfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
JSONUtilities.safePut(options, "headerLines", 1);
|
JSONUtilities.safePut(options, "headerLines", 1);
|
||||||
@ -508,7 +502,9 @@ public class WikitextImporter extends TabularImportingParserBase {
|
|||||||
if (dataReader.columnReconciled != null) {
|
if (dataReader.columnReconciled != null) {
|
||||||
for(int i = 0; i != dataReader.columnReconciled.size(); i++) {
|
for(int i = 0; i != dataReader.columnReconciled.size(); i++) {
|
||||||
if (dataReader.columnReconciled.get(i)) {
|
if (dataReader.columnReconciled.get(i)) {
|
||||||
project.columnModel.columns.get(i).setReconStats(ReconStats.create(project, i));
|
Column col = project.columnModel.columns.get(i);
|
||||||
|
col.setReconStats(ReconStats.create(project, i));
|
||||||
|
col.setReconConfig(cfg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -520,5 +516,18 @@ public class WikitextImporter extends TabularImportingParserBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private StandardReconConfig getReconConfig(String url) {
|
||||||
|
StandardReconConfig cfg = new StandardReconConfig(
|
||||||
|
url,
|
||||||
|
"http://www.wikidata.org/entity/",
|
||||||
|
"http://www.wikidata.org/prop/direct/",
|
||||||
|
"",
|
||||||
|
"entity",
|
||||||
|
true,
|
||||||
|
new ArrayList<ColumnDetail>(),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
return cfg;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -204,6 +204,7 @@ public class WikitextImporterTests extends ImporterTest {
|
|||||||
whenGetBooleanOption("blankSpanningCells", options, blankSpanningCells);
|
whenGetBooleanOption("blankSpanningCells", options, blankSpanningCells);
|
||||||
whenGetBooleanOption("storeBlankCellsAsNulls", options, true);
|
whenGetBooleanOption("storeBlankCellsAsNulls", options, true);
|
||||||
whenGetIntegerOption("headerLines", options, 1);
|
whenGetIntegerOption("headerLines", options, 1);
|
||||||
|
whenGetStringOption("reconService", options, "https://tools.wmflabs.org/openrefine-wikidata/en/api");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyOptions() {
|
private void verifyOptions() {
|
||||||
|
@ -346,7 +346,9 @@ function init() {
|
|||||||
"scripts/index/parser-interfaces/xml-parser-ui.js",
|
"scripts/index/parser-interfaces/xml-parser-ui.js",
|
||||||
"scripts/index/parser-interfaces/json-parser-ui.js",
|
"scripts/index/parser-interfaces/json-parser-ui.js",
|
||||||
"scripts/index/parser-interfaces/rdf-triples-parser-ui.js",
|
"scripts/index/parser-interfaces/rdf-triples-parser-ui.js",
|
||||||
"scripts/index/parser-interfaces/wikitext-parser-ui.js"
|
"scripts/index/parser-interfaces/wikitext-parser-ui.js",
|
||||||
|
|
||||||
|
"scripts/reconciliation/recon-manager.js" // so that reconciliation functions are available to importers
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -73,7 +73,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
|
|||||||
this._service = service;
|
this._service = service;
|
||||||
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
|
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
|
||||||
this._serviceMetadata = serviceMetadata;
|
this._serviceMetadata = serviceMetadata;
|
||||||
if ("extend" in serviceMetadata) {
|
if (serviceMetadata != null && "extend" in serviceMetadata) {
|
||||||
var extend = serviceMetadata.extend;
|
var extend = serviceMetadata.extend;
|
||||||
if ("propose_properties" in extend) {
|
if ("propose_properties" in extend) {
|
||||||
var endpoint = extend.propose_properties;
|
var endpoint = extend.propose_properties;
|
||||||
|
@ -93,6 +93,8 @@ Refine.WikitextParserUI.prototype.getOptions = function() {
|
|||||||
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||||
|
|
||||||
|
options.reconService = ReconciliationManager.ensureDefaultServicePresent();
|
||||||
|
|
||||||
return options;
|
return options;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -127,8 +127,28 @@ ReconciliationManager.save = function(f) {
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
(function() {
|
ReconciliationManager.getOrRegisterServiceFromUrl = function(url, f) {
|
||||||
|
var service = ReconciliationManager.getServiceFromUrl(url);
|
||||||
|
if (service == null) {
|
||||||
|
ReconciliationManager.registerStandardService(url, function(idx) {
|
||||||
|
ReconciliationManager.save(function() {
|
||||||
|
f(ReconciliationManager.standardServices[i]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
f(service);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ReconciliationManager.ensureDefaultServicePresent = function() {
|
||||||
|
console.log('ensureDefaultServicePresent');
|
||||||
var lang = $.i18n._('core-recon')["wd-recon-lang"];
|
var lang = $.i18n._('core-recon')["wd-recon-lang"];
|
||||||
|
var url = "https://tools.wmflabs.org/openrefine-wikidata/"+lang+"/api";
|
||||||
|
ReconciliationManager.getOrRegisterServiceFromUrl(url, function(service) { ; });
|
||||||
|
return url;
|
||||||
|
};
|
||||||
|
|
||||||
|
(function() {
|
||||||
|
|
||||||
$.ajax({
|
$.ajax({
|
||||||
async: false,
|
async: false,
|
||||||
@ -140,9 +160,7 @@ ReconciliationManager.save = function(f) {
|
|||||||
ReconciliationManager.standardServices = JSON.parse(data.value);
|
ReconciliationManager.standardServices = JSON.parse(data.value);
|
||||||
ReconciliationManager._rebuildMap();
|
ReconciliationManager._rebuildMap();
|
||||||
} else {
|
} else {
|
||||||
ReconciliationManager.registerStandardService(
|
ReconciliationManager.ensureDefaultServicePresent();
|
||||||
"https://tools.wmflabs.org/openrefine-wikidata/"+lang+"/api"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
dataType: "json"
|
dataType: "json"
|
||||||
|
Loading…
Reference in New Issue
Block a user