* Harden reconciliation - Fixes #2590 - check for non-JSON / unparseable JSON returns - handle malformed results response with no name for candidates - catch any Exception, not just IOExceptions - call processManager.onFailedProcess() for cleanup on error * Add default constructor for Jackson Jackson complains about needing a default constructor for the NON_DEFAULT annotation, but I'm not sure why this worked before. * Clean up indentation and unused variable - no functional changes Make indentation consistent throughout the module, changing recently added lines to use the standard all spaces convention. Remove unused count variable * Simplify control flow * Update limit parameter comment. No functional change. * Replace ternary expression which is causing NPE * Add reconciliation tests using mock HTTP server
This commit is contained in:
parent
6e66cb5144
commit
1849e62234
@ -75,8 +75,8 @@ import com.google.refine.util.ParsingUtilities;
|
|||||||
public class StandardReconConfig extends ReconConfig {
|
public class StandardReconConfig extends ReconConfig {
|
||||||
final static Logger logger = LoggerFactory.getLogger("refine-standard-recon");
|
final static Logger logger = LoggerFactory.getLogger("refine-standard-recon");
|
||||||
|
|
||||||
private static final String DEFAULT_SCHEMA_SPACE = "http://localhost/schema";
|
private static final String DEFAULT_SCHEMA_SPACE = "http://localhost/schema";
|
||||||
private static final String DEFAULT_IDENTIFIER_SPACE = "http://localhost/identifier";
|
private static final String DEFAULT_IDENTIFIER_SPACE = "http://localhost/identifier";
|
||||||
|
|
||||||
static public class ColumnDetail {
|
static public class ColumnDetail {
|
||||||
@JsonProperty("column")
|
@JsonProperty("column")
|
||||||
@ -107,14 +107,14 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
this.propertyID = property == null ? propertyID : property.id;
|
this.propertyID = property == null ? propertyID : property.id;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
try {
|
try {
|
||||||
return ParsingUtilities.mapper.writeValueAsString(this);
|
return ParsingUtilities.mapper.writeValueAsString(this);
|
||||||
} catch (JsonProcessingException e) {
|
} catch (JsonProcessingException e) {
|
||||||
return super.toString();
|
return super.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static public StandardReconConfig reconstruct(String json) throws IOException {
|
static public StandardReconConfig reconstruct(String json) throws IOException {
|
||||||
@ -132,7 +132,7 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -171,9 +171,9 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
@JsonProperty("limit")
|
@JsonProperty("limit")
|
||||||
int limit) {
|
int limit) {
|
||||||
this(service, identifierSpace, schemaSpace,
|
this(service, identifierSpace, schemaSpace,
|
||||||
type != null ? type.id : null,
|
type != null ? type.id : null,
|
||||||
type != null ? type.name : null,
|
type != null ? type.name : null,
|
||||||
autoMatch, columnDetails, limit);
|
autoMatch, columnDetails, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
public StandardReconConfig(
|
public StandardReconConfig(
|
||||||
@ -275,14 +275,14 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
this.v = v;
|
this.v = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
try {
|
try {
|
||||||
return ParsingUtilities.mapper.writeValueAsString(this);
|
return ParsingUtilities.mapper.writeValueAsString(this);
|
||||||
} catch (JsonProcessingException e) {
|
} catch (JsonProcessingException e) {
|
||||||
return super.toString();
|
return super.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static class ReconQuery {
|
protected static class ReconQuery {
|
||||||
@ -306,12 +306,21 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
@JsonInclude(Include.NON_EMPTY)
|
@JsonInclude(Include.NON_EMPTY)
|
||||||
protected List<QueryProperty> properties;
|
protected List<QueryProperty> properties;
|
||||||
|
|
||||||
// Only send limit if it's non-default to preserve backward compatibility with
|
// Only send limit if it's non-default (default = 0) to preserve backward
|
||||||
// services which might choke on this
|
// compatibility with services which might choke on this (pre-2013)
|
||||||
@JsonProperty("limit")
|
@JsonProperty("limit")
|
||||||
@JsonInclude(Include.NON_DEFAULT)
|
@JsonInclude(Include.NON_DEFAULT)
|
||||||
protected int limit;
|
protected int limit;
|
||||||
|
|
||||||
|
public ReconQuery() {
|
||||||
|
super();
|
||||||
|
this.query = "";
|
||||||
|
this.typeID = null;
|
||||||
|
this.properties = null;
|
||||||
|
this.limit = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonCreator
|
||||||
public ReconQuery(
|
public ReconQuery(
|
||||||
String query,
|
String query,
|
||||||
String typeID,
|
String typeID,
|
||||||
@ -323,52 +332,47 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
this.limit = limit;
|
this.limit = limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
try {
|
try {
|
||||||
return ParsingUtilities.mapper.writeValueAsString(this);
|
return ParsingUtilities.mapper.writeValueAsString(this);
|
||||||
} catch (JsonProcessingException e) {
|
} catch (JsonProcessingException e) {
|
||||||
return super.toString();
|
return super.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class ReconResult {
|
public static class ReconResult {
|
||||||
@JsonProperty("name")
|
@JsonProperty("name")
|
||||||
public String name;
|
public String name;
|
||||||
@JsonProperty("id")
|
@JsonProperty("id")
|
||||||
public String id;
|
public String id;
|
||||||
@JsonProperty("type")
|
@JsonProperty("type")
|
||||||
public List<ReconType> types = Collections.emptyList();
|
public List<ReconType> types = Collections.emptyList();
|
||||||
@JsonProperty("score")
|
@JsonProperty("score")
|
||||||
public double score;
|
public double score;
|
||||||
@JsonProperty("match")
|
@JsonProperty("match")
|
||||||
public boolean match = false;
|
public boolean match = false;
|
||||||
|
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
public ReconCandidate toCandidate() {
|
public ReconCandidate toCandidate() {
|
||||||
String[] bareTypes = new String[types.size()];
|
String[] bareTypes = new String[types.size()];
|
||||||
for(int i = 0; i != types.size(); i++) {
|
for (int i = 0; i != types.size(); i++) {
|
||||||
bareTypes[i] = types.get(i).id;
|
bareTypes[i] = types.get(i).id;
|
||||||
}
|
}
|
||||||
ReconCandidate result = new ReconCandidate(
|
ReconCandidate result = new ReconCandidate(id, name, bareTypes, score);
|
||||||
id,
|
|
||||||
name,
|
|
||||||
bareTypes,
|
|
||||||
score
|
|
||||||
);
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
try {
|
try {
|
||||||
return ParsingUtilities.mapper.writeValueAsString(this);
|
return ParsingUtilities.mapper.writeValueAsString(this);
|
||||||
} catch (JsonProcessingException e) {
|
} catch (JsonProcessingException e) {
|
||||||
return super.toString();
|
return super.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -419,6 +423,7 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
job.code = ParsingUtilities.defaultWriter.writeValueAsString(query);
|
job.code = ParsingUtilities.defaultWriter.writeValueAsString(query);
|
||||||
} catch (JsonProcessingException e) {
|
} catch (JsonProcessingException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
|
return null; // TODO: Throw exception instead?
|
||||||
}
|
}
|
||||||
return job;
|
return job;
|
||||||
}
|
}
|
||||||
@ -446,7 +451,7 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
||||||
{
|
{
|
||||||
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
|
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
|
||||||
connection.setConnectTimeout(30000);
|
connection.setConnectTimeout(30000); // TODO parameterize
|
||||||
connection.setDoOutput(true);
|
connection.setDoOutput(true);
|
||||||
|
|
||||||
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
|
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
|
||||||
@ -464,48 +469,53 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
|
|
||||||
if (connection.getResponseCode() >= 400) {
|
if (connection.getResponseCode() >= 400) {
|
||||||
InputStream is = connection.getErrorStream();
|
InputStream is = connection.getErrorStream();
|
||||||
logger.error("Failed - code:"
|
String msg = is == null ? "" : ParsingUtilities.inputStreamToString(is);
|
||||||
|
logger.error("Failed - code: "
|
||||||
+ Integer.toString(connection.getResponseCode())
|
+ Integer.toString(connection.getResponseCode())
|
||||||
+ " message: " + is == null ? ""
|
+ " message: " + msg);
|
||||||
: ParsingUtilities.inputStreamToString(is));
|
|
||||||
} else {
|
} else {
|
||||||
InputStream is = connection.getInputStream();
|
InputStream is = connection.getInputStream();
|
||||||
try {
|
try {
|
||||||
String s = ParsingUtilities.inputStreamToString(is);
|
String s = ParsingUtilities.inputStreamToString(is);
|
||||||
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(s);
|
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(s);
|
||||||
|
if (o == null) { // utility method returns null instead of throwing
|
||||||
|
logger.error("Failed to parse string as JSON: " + s);
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < jobs.size(); i++) {
|
||||||
|
StandardReconJob job = (StandardReconJob) jobs.get(i);
|
||||||
|
Recon recon = null;
|
||||||
|
|
||||||
for (int i = 0; i < jobs.size(); i++) {
|
String text = job.text;
|
||||||
StandardReconJob job = (StandardReconJob) jobs.get(i);
|
String key = "q" + i;
|
||||||
Recon recon = null;
|
if (o.has(key) && o.get(key) instanceof ObjectNode) {
|
||||||
|
ObjectNode o2 = (ObjectNode) o.get(key);
|
||||||
|
if (o2.has("result") && o2.get("result") instanceof ArrayNode) {
|
||||||
|
ArrayNode results = (ArrayNode) o2.get("result");
|
||||||
|
|
||||||
String text = job.text;
|
recon = createReconServiceResults(text, results, historyEntryID);
|
||||||
String key = "q" + i;
|
} else {
|
||||||
if (o.has(key) && o.get(key) instanceof ObjectNode) {
|
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
|
||||||
ObjectNode o2 = (ObjectNode) o.get(key);
|
}
|
||||||
if (o2.has("result") && o2.get("result") instanceof ArrayNode) {
|
|
||||||
ArrayNode results = (ArrayNode) o2.get("result");
|
|
||||||
|
|
||||||
recon = createReconServiceResults(text, results, historyEntryID);
|
|
||||||
} else {
|
} else {
|
||||||
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
|
// TODO: better error reporting
|
||||||
|
logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (recon != null) {
|
if (recon != null) {
|
||||||
recon.service = service;
|
recon.service = service;
|
||||||
|
}
|
||||||
|
recons.add(recon);
|
||||||
}
|
}
|
||||||
recons.add(recon);
|
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
is.close();
|
is.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (Exception e) {
|
||||||
logger.error("Failed to batch recon with load:\n" + queriesString, e);
|
logger.error("Failed to batch recon with load:\n" + queriesString, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: This code prevents the retry mechanism in ReconOperation from working
|
||||||
while (recons.size() < jobs.size()) {
|
while (recons.size() < jobs.size()) {
|
||||||
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
||||||
recon.service = service;
|
recon.service = service;
|
||||||
@ -531,28 +541,26 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
|
|
||||||
// Sort results by decreasing score
|
// Sort results by decreasing score
|
||||||
results.sort(new Comparator<ReconResult>() {
|
results.sort(new Comparator<ReconResult>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(ReconResult a, ReconResult b) {
|
public int compare(ReconResult a, ReconResult b) {
|
||||||
return Double.compare(b.score, a.score);
|
return Double.compare(b.score, a.score);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
int length = results.size();
|
int length = results.size();
|
||||||
int count = 0;
|
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
ReconResult result = results.get(i);
|
ReconResult result = results.get(i);
|
||||||
|
|
||||||
ReconCandidate candidate = result.toCandidate();
|
ReconCandidate candidate = result.toCandidate();
|
||||||
|
|
||||||
if (autoMatch && i == 0 && result.match) {
|
if (autoMatch && i == 0 && result.match) {
|
||||||
recon.match = candidate;
|
recon.match = candidate;
|
||||||
recon.matchRank = 0;
|
recon.matchRank = 0;
|
||||||
recon.judgment = Judgment.Matched;
|
recon.judgment = Judgment.Matched;
|
||||||
recon.judgmentAction = "auto";
|
recon.judgmentAction = "auto";
|
||||||
}
|
}
|
||||||
|
|
||||||
recon.addCandidate(candidate);
|
recon.addCandidate(candidate);
|
||||||
count++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
computeFeatures(recon, text);
|
computeFeatures(recon, text);
|
||||||
@ -563,18 +571,18 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
* Recomputes the features associated with this reconciliation
|
* Recomputes the features associated with this reconciliation
|
||||||
* object (only if we have at least one candidate).
|
* object (only if we have at least one candidate).
|
||||||
*
|
*
|
||||||
* @param text
|
* @param text the cell value to compare the reconciliation data to
|
||||||
* the cell value to compare the reconciliation data to
|
|
||||||
*/
|
*/
|
||||||
public void computeFeatures(Recon recon, String text) {
|
public void computeFeatures(Recon recon, String text) {
|
||||||
if (recon.candidates != null && !recon.candidates.isEmpty() && text != null) {
|
if (recon.candidates != null && !recon.candidates.isEmpty() && text != null) {
|
||||||
ReconCandidate candidate = recon.candidates.get(0);
|
ReconCandidate candidate = recon.candidates.get(0);
|
||||||
|
|
||||||
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name));
|
if (candidate.name != null) {
|
||||||
recon.setFeature(Recon.Feature_nameLevenshtein,
|
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name));
|
||||||
StringUtils.getLevenshteinDistance(StringUtils.lowerCase(text), StringUtils.lowerCase(candidate.name)));
|
recon.setFeature(Recon.Feature_nameLevenshtein,
|
||||||
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.name));
|
StringUtils.getLevenshteinDistance(StringUtils.lowerCase(text), StringUtils.lowerCase(candidate.name)));
|
||||||
|
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.name));
|
||||||
|
}
|
||||||
recon.setFeature(Recon.Feature_typeMatch, false);
|
recon.setFeature(Recon.Feature_typeMatch, false);
|
||||||
if (this.typeID != null) {
|
if (this.typeID != null) {
|
||||||
for (String typeID : candidate.types) {
|
for (String typeID : candidate.types) {
|
||||||
@ -585,7 +593,7 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
recon.features = new Object[Recon.Feature_max];
|
recon.features = new Object[Recon.Feature_max];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -286,22 +286,26 @@ public class ReconOperation extends EngineDependentOperation {
|
|||||||
JobGroup group = jobToGroup.get(job);
|
JobGroup group = jobToGroup.get(job);
|
||||||
List<ReconEntry> entries = group.entries;
|
List<ReconEntry> entries = group.entries;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: Not sure what this retry is meant to handle, but it's currently
|
||||||
|
* non-functional due the code at the end of StandardReconConfig#batchRecon()
|
||||||
|
* which tops up any missing entries.
|
||||||
|
*/
|
||||||
if (recon == null) {
|
if (recon == null) {
|
||||||
group.trials++;
|
group.trials++;
|
||||||
if (group.trials < 3) {
|
if (group.trials < 3) {
|
||||||
logger.warn("Re-trying job including cell containing: " + entries.get(0).cell.value);
|
logger.warn("Re-trying job including cell containing: " + entries.get(0).cell.value);
|
||||||
continue; // try again next time
|
continue; // try again next time
|
||||||
}
|
}
|
||||||
logger.warn("Failed after 3 trials for job including cell containing: " + entries.get(0).cell.value);
|
String msg = "Failed after 3 trials for job including cell containing: " + entries.get(0).cell.value;
|
||||||
|
logger.warn(msg);
|
||||||
|
recon = _reconConfig.createNewRecon(_historyEntryID);
|
||||||
}
|
}
|
||||||
|
|
||||||
jobToGroup.remove(job);
|
jobToGroup.remove(job);
|
||||||
jobs.remove(j);
|
jobs.remove(j);
|
||||||
done++;
|
done++;
|
||||||
|
|
||||||
if (recon == null) {
|
|
||||||
recon = _reconConfig.createNewRecon(_historyEntryID);
|
|
||||||
}
|
|
||||||
recon.judgmentBatchSize = entries.size();
|
recon.judgmentBatchSize = entries.size();
|
||||||
|
|
||||||
for (ReconEntry entry : entries) {
|
for (ReconEntry entry : entries) {
|
||||||
@ -328,6 +332,7 @@ public class ReconOperation extends EngineDependentOperation {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Option to keep partial results after cancellation?
|
||||||
if (!_canceled) {
|
if (!_canceled) {
|
||||||
Change reconChange = new ReconChange(
|
Change reconChange = new ReconChange(
|
||||||
cellChanges,
|
cellChanges,
|
||||||
|
@ -32,7 +32,9 @@ import static org.testng.Assert.assertNull;
|
|||||||
import static org.testng.Assert.assertTrue;
|
import static org.testng.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.URLEncoder;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
@ -43,9 +45,13 @@ import org.testng.annotations.Test;
|
|||||||
import com.fasterxml.jackson.core.JsonParseException;
|
import com.fasterxml.jackson.core.JsonParseException;
|
||||||
import com.fasterxml.jackson.databind.JsonMappingException;
|
import com.fasterxml.jackson.databind.JsonMappingException;
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
|
|
||||||
import com.google.refine.RefineTest;
|
import com.google.refine.RefineTest;
|
||||||
|
import com.google.refine.browsing.EngineConfig;
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Recon;
|
import com.google.refine.model.Recon;
|
||||||
|
import com.google.refine.model.ReconCandidate;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
import com.google.refine.model.recon.ReconConfig;
|
import com.google.refine.model.recon.ReconConfig;
|
||||||
import com.google.refine.model.recon.ReconJob;
|
import com.google.refine.model.recon.ReconJob;
|
||||||
@ -54,9 +60,16 @@ import com.google.refine.model.recon.StandardReconConfig.ColumnDetail;
|
|||||||
import com.google.refine.model.recon.StandardReconConfig.ReconResult;
|
import com.google.refine.model.recon.StandardReconConfig.ReconResult;
|
||||||
import com.google.refine.operations.OperationRegistry;
|
import com.google.refine.operations.OperationRegistry;
|
||||||
import com.google.refine.operations.recon.ReconOperation;
|
import com.google.refine.operations.recon.ReconOperation;
|
||||||
|
import com.google.refine.process.Process;
|
||||||
|
import com.google.refine.process.ProcessManager;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
import com.google.refine.util.TestUtils;
|
import com.google.refine.util.TestUtils;
|
||||||
|
|
||||||
|
import okhttp3.HttpUrl;
|
||||||
|
import okhttp3.mockwebserver.MockResponse;
|
||||||
|
import okhttp3.mockwebserver.MockWebServer;
|
||||||
|
import okhttp3.mockwebserver.RecordedRequest;
|
||||||
|
|
||||||
public class StandardReconConfigTests extends RefineTest {
|
public class StandardReconConfigTests extends RefineTest {
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
@ -170,10 +183,10 @@ public class StandardReconConfigTests extends RefineTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void formulateQueryTest() throws IOException {
|
public void formulateQueryTest() throws IOException {
|
||||||
Project project = createCSVProject("title,director\n"
|
Project project = createCSVProject("title,director\n"
|
||||||
+ "mulholland drive,david lynch");
|
+ "mulholland drive,david lynch");
|
||||||
|
|
||||||
String config = " {\n" +
|
String config = " {\n" +
|
||||||
" \"mode\": \"standard-service\",\n" +
|
" \"mode\": \"standard-service\",\n" +
|
||||||
" \"service\": \"https://tools.wmflabs.org/openrefine-wikidata/en/api\",\n" +
|
" \"service\": \"https://tools.wmflabs.org/openrefine-wikidata/en/api\",\n" +
|
||||||
" \"identifierSpace\": \"http://www.wikidata.org/entity/\",\n" +
|
" \"identifierSpace\": \"http://www.wikidata.org/entity/\",\n" +
|
||||||
@ -190,16 +203,193 @@ public class StandardReconConfigTests extends RefineTest {
|
|||||||
" \"propertyID\": \"P123\"\n" +
|
" \"propertyID\": \"P123\"\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
" ]}";
|
" ]}";
|
||||||
StandardReconConfig r = StandardReconConfig.reconstruct(config);
|
StandardReconConfig r = StandardReconConfig.reconstruct(config);
|
||||||
Row row = project.rows.get(0);
|
Row row = project.rows.get(0);
|
||||||
ReconJob job = r.createJob(project, 0, row, "title", row.getCell(0));
|
ReconJob job = r.createJob(project, 0, row, "title", row.getCell(0));
|
||||||
TestUtils.assertEqualAsJson("{"
|
TestUtils.assertEqualAsJson("{"
|
||||||
+ "\"query\":\"mulholland drive\","
|
+ "\"query\":\"mulholland drive\","
|
||||||
+ "\"type\":\"Q1234\","
|
+ "\"type\":\"Q1234\","
|
||||||
+ "\"properties\":["
|
+ "\"properties\":["
|
||||||
+ " {\"pid\":\"P123\",\"v\":\"david lynch\"}"
|
+ " {\"pid\":\"P123\",\"v\":\"david lynch\"}"
|
||||||
+ "],"
|
+ "],"
|
||||||
+ "\"type_strict\":\"should\"}", job.toString());
|
+ "\"type_strict\":\"should\"}", job.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void reconNonJsonTest() throws Exception {
|
||||||
|
Project project = createCSVProject("title,director\n"
|
||||||
|
+ "mulholland drive,david lynch");
|
||||||
|
|
||||||
|
String nonJsonResponse = "<!DOCTYPE html>\n" +
|
||||||
|
"<html lang=\"en\">\n" +
|
||||||
|
" <head>\n" +
|
||||||
|
" <meta charset=\"utf-8\">\n" +
|
||||||
|
" <title>Error</title>\n" +
|
||||||
|
" </head>\n" +
|
||||||
|
" <body>\n" +
|
||||||
|
" You have reached an error page.\n" +
|
||||||
|
" </body>\n" +
|
||||||
|
"</html>";
|
||||||
|
|
||||||
|
|
||||||
|
try (MockWebServer server = new MockWebServer()) {
|
||||||
|
server.start();
|
||||||
|
HttpUrl url = server.url("/openrefine-wikidata/en/api");
|
||||||
|
server.enqueue(new MockResponse().setBody(nonJsonResponse));
|
||||||
|
server.enqueue(new MockResponse());
|
||||||
|
|
||||||
|
String configJson = " {\n" +
|
||||||
|
" \"mode\": \"standard-service\",\n" +
|
||||||
|
" \"service\": \"" + url + "\",\n" +
|
||||||
|
" \"identifierSpace\": \"http://www.wikidata.org/entity/\",\n" +
|
||||||
|
" \"schemaSpace\": \"http://www.wikidata.org/prop/direct/\",\n" +
|
||||||
|
" \"type\": {\n" +
|
||||||
|
" \"id\": \"Q11424\",\n" +
|
||||||
|
" \"name\": \"film\"\n" +
|
||||||
|
" },\n" +
|
||||||
|
" \"autoMatch\": true,\n" +
|
||||||
|
" \"columnDetails\": [\n" +
|
||||||
|
" {\n" +
|
||||||
|
" \"column\": \"director\",\n" +
|
||||||
|
" \"propertyName\": \"Director\",\n" +
|
||||||
|
" \"propertyID\": \"P57\"\n" +
|
||||||
|
" }\n" +
|
||||||
|
" ]}";
|
||||||
|
StandardReconConfig config = StandardReconConfig.reconstruct(configJson);
|
||||||
|
ReconOperation op = new ReconOperation(EngineConfig.reconstruct(null), "director", config);
|
||||||
|
Process process = op.createProcess(project, new Properties());
|
||||||
|
ProcessManager pm = project.getProcessManager();
|
||||||
|
process.startPerforming(pm);
|
||||||
|
Assert.assertTrue(process.isRunning());
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("Test interrupted");
|
||||||
|
}
|
||||||
|
Assert.assertFalse(process.isRunning());
|
||||||
|
|
||||||
|
RecordedRequest request1 = server.takeRequest();
|
||||||
|
|
||||||
|
assertNotNull(request1);
|
||||||
|
|
||||||
|
// We won't have gotten a result, but we want to make sure things didn't die
|
||||||
|
Row row = project.rows.get(0);
|
||||||
|
Cell cell = row.cells.get(1);
|
||||||
|
assertNotNull(cell.recon);
|
||||||
|
assertEquals(cell.recon.service, url.toString());
|
||||||
|
ReconCandidate candidate = cell.recon.getBestCandidate();
|
||||||
|
assertNull(candidate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void reconTest() throws Exception {
|
||||||
|
Project project = createCSVProject("title,director\n"
|
||||||
|
+ "mulholland drive,david lynch");
|
||||||
|
|
||||||
|
String reconResponse = "{\n" +
|
||||||
|
"q0: {\n" +
|
||||||
|
" result: [\n" +
|
||||||
|
" {\n" +
|
||||||
|
" P57: {\n" +
|
||||||
|
"score: 100,\n" +
|
||||||
|
"weighted: 40\n" +
|
||||||
|
"},\n" +
|
||||||
|
"all_labels: {\n" +
|
||||||
|
"score: 59,\n" +
|
||||||
|
"weighted: 59\n" +
|
||||||
|
"},\n" +
|
||||||
|
"score: 70.71428571428572,\n" +
|
||||||
|
"id: \"Q3989262\",\n" +
|
||||||
|
"name: \"The Short Films of David Lynch\",\n" +
|
||||||
|
"type: [\n" +
|
||||||
|
"{\n" +
|
||||||
|
"id: \"Q24862\",\n" +
|
||||||
|
"name: \"short film\"\n" +
|
||||||
|
"},\n" +
|
||||||
|
"{\n" +
|
||||||
|
"id: \"Q202866\",\n" +
|
||||||
|
"name: \"animated film\"\n" +
|
||||||
|
"}\n" +
|
||||||
|
"],\n" +
|
||||||
|
"match: false\n" +
|
||||||
|
"},\n" +
|
||||||
|
"{\n" +
|
||||||
|
"P57: {\n" +
|
||||||
|
"score: 100,\n" +
|
||||||
|
"weighted: 40\n" +
|
||||||
|
"},\n" +
|
||||||
|
"all_labels: {\n" +
|
||||||
|
"score: 44,\n" +
|
||||||
|
"weighted: 44\n" +
|
||||||
|
"},\n" +
|
||||||
|
"score: 60.00000000000001,\n" +
|
||||||
|
"id: \"Q83365219\",\n" +
|
||||||
|
"name: \"What Did Jack Do?\",\n" +
|
||||||
|
"type: [\n" +
|
||||||
|
"{\n" +
|
||||||
|
"id: \"Q24862\",\n" +
|
||||||
|
"name: \"short film\"\n" +
|
||||||
|
"}\n" +
|
||||||
|
"],\n" +
|
||||||
|
"match: false\n" +
|
||||||
|
" }\n" +
|
||||||
|
" ]\n" +
|
||||||
|
" }\n" +
|
||||||
|
"}\n";
|
||||||
|
try (MockWebServer server = new MockWebServer()) {
|
||||||
|
server.start();
|
||||||
|
HttpUrl url = server.url("/openrefine-wikidata/en/api");
|
||||||
|
// FIXME: Retry doesn't currently work, but should be tested
|
||||||
|
// server.enqueue(new MockResponse().setResponseCode(503)); // service overloaded
|
||||||
|
server.enqueue(new MockResponse().setBody(reconResponse));
|
||||||
|
server.enqueue(new MockResponse());
|
||||||
|
|
||||||
|
String configJson = " {\n" +
|
||||||
|
" \"mode\": \"standard-service\",\n" +
|
||||||
|
" \"service\": \"" + url + "\",\n" +
|
||||||
|
" \"identifierSpace\": \"http://www.wikidata.org/entity/\",\n" +
|
||||||
|
" \"schemaSpace\": \"http://www.wikidata.org/prop/direct/\",\n" +
|
||||||
|
" \"type\": {\n" +
|
||||||
|
" \"id\": \"Q11424\",\n" +
|
||||||
|
" \"name\": \"film\"\n" +
|
||||||
|
" },\n" +
|
||||||
|
" \"autoMatch\": true,\n" +
|
||||||
|
" \"columnDetails\": [\n" +
|
||||||
|
" {\n" +
|
||||||
|
" \"column\": \"director\",\n" +
|
||||||
|
" \"propertyName\": \"Director\",\n" +
|
||||||
|
" \"propertyID\": \"P57\"\n" +
|
||||||
|
" }\n" +
|
||||||
|
" ]}";
|
||||||
|
StandardReconConfig config = StandardReconConfig.reconstruct(configJson);
|
||||||
|
ReconOperation op = new ReconOperation(EngineConfig.reconstruct(null), "director", config);
|
||||||
|
Process process = op.createProcess(project, new Properties());
|
||||||
|
ProcessManager pm = project.getProcessManager();
|
||||||
|
process.startPerforming(pm);
|
||||||
|
Assert.assertTrue(process.isRunning());
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000); // TODO: timeout will need to increase for retries
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("Test interrupted");
|
||||||
|
}
|
||||||
|
Assert.assertFalse(process.isRunning());
|
||||||
|
|
||||||
|
// RecordedRequest scratchFirstRquest = server.takeRequest();
|
||||||
|
RecordedRequest request1 = server.takeRequest();
|
||||||
|
|
||||||
|
assertNotNull(request1);
|
||||||
|
String query = request1.getBody().readUtf8Line();
|
||||||
|
assertNotNull(query);
|
||||||
|
String expected = "queries=" + URLEncoder.encode("{\"q0\":{\"query\":\"david lynch\",\"type\":\"Q11424\",\"properties\":[{\"pid\":\"P57\",\"v\":\"david lynch\"}],\"type_strict\":\"should\"}}", "UTF-8");
|
||||||
|
assertEquals(query, expected);
|
||||||
|
|
||||||
|
Row row = project.rows.get(0);
|
||||||
|
Cell cell = row.cells.get(1);
|
||||||
|
assertNotNull(cell.recon);
|
||||||
|
assertEquals(cell.recon.service, url.toString());
|
||||||
|
assertEquals(cell.recon.getBestCandidate().types[0], "Q24862");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user