![David Huynh](/assets/img/avatar_default.png)
git-svn-id: http://google-refine.googlecode.com/svn/trunk@226 7d457c2a-affb-35e4-300a-418c747d4874
177 lines
5.9 KiB
Java
177 lines
5.9 KiB
Java
package com.metaweb.gridworks.model.recon;
|
|
|
|
import java.io.InputStream;
|
|
import java.io.StringWriter;
|
|
import java.net.URL;
|
|
import java.net.URLConnection;
|
|
import java.util.ArrayList;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Properties;
|
|
|
|
import org.json.JSONArray;
|
|
import org.json.JSONException;
|
|
import org.json.JSONObject;
|
|
import org.json.JSONWriter;
|
|
|
|
import com.metaweb.gridworks.model.Cell;
|
|
import com.metaweb.gridworks.model.Project;
|
|
import com.metaweb.gridworks.model.Recon;
|
|
import com.metaweb.gridworks.model.ReconCandidate;
|
|
import com.metaweb.gridworks.model.Row;
|
|
import com.metaweb.gridworks.model.Recon.Judgment;
|
|
import com.metaweb.gridworks.util.ParsingUtilities;
|
|
|
|
public class IdBasedReconConfig extends StrictReconConfig {
|
|
static public ReconConfig reconstruct(JSONObject obj) throws Exception {
|
|
return new IdBasedReconConfig();
|
|
}
|
|
|
|
public IdBasedReconConfig() {
|
|
}
|
|
|
|
static protected class IdBasedReconJob extends ReconJob {
|
|
String id;
|
|
|
|
public int getKey() {
|
|
return id.hashCode();
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public ReconJob createJob(Project project, int rowIndex, Row row,
|
|
String columnName, Cell cell) {
|
|
|
|
IdBasedReconJob job = new IdBasedReconJob();
|
|
String s = cell.value.toString();
|
|
|
|
if (!s.startsWith("/")) {
|
|
if (s.startsWith("92")) {
|
|
s = "/guid/" + s;
|
|
} else if (!s.contains("/")){
|
|
s = "/en/" + s;
|
|
} else {
|
|
s = "/" + s;
|
|
}
|
|
}
|
|
|
|
job.id = s;
|
|
|
|
return job;
|
|
}
|
|
|
|
@Override
|
|
public int getBatchSize() {
|
|
return 10;
|
|
}
|
|
|
|
@Override
|
|
public String getBriefDescription(Project project, String columnName) {
|
|
return "Reconcile cells in column " + columnName + " as Freebase IDs";
|
|
}
|
|
|
|
public void write(JSONWriter writer, Properties options)
|
|
throws JSONException {
|
|
|
|
writer.object();
|
|
writer.key("mode"); writer.value("strict");
|
|
writer.key("match"); writer.value("id");
|
|
writer.endObject();
|
|
}
|
|
|
|
@Override
|
|
public List<Recon> batchRecon(List<ReconJob> jobs) {
|
|
List<Recon> recons = new ArrayList<Recon>(jobs.size());
|
|
Map<String, Recon> idToRecon = new HashMap<String, Recon>();
|
|
|
|
try {
|
|
String query = null;
|
|
{
|
|
StringWriter stringWriter = new StringWriter();
|
|
JSONWriter jsonWriter = new JSONWriter(stringWriter);
|
|
|
|
jsonWriter.object();
|
|
jsonWriter.key("query");
|
|
jsonWriter.array();
|
|
jsonWriter.object();
|
|
|
|
jsonWriter.key("id"); jsonWriter.value(null);
|
|
jsonWriter.key("name"); jsonWriter.value(null);
|
|
jsonWriter.key("guid"); jsonWriter.value(null);
|
|
jsonWriter.key("type"); jsonWriter.array(); jsonWriter.endArray();
|
|
|
|
jsonWriter.key("id|=");
|
|
jsonWriter.array();
|
|
for (ReconJob job : jobs) {
|
|
jsonWriter.value(((IdBasedReconJob) job).id);
|
|
}
|
|
jsonWriter.endArray();
|
|
|
|
jsonWriter.endObject();
|
|
jsonWriter.endArray();
|
|
jsonWriter.endObject();
|
|
|
|
query = stringWriter.toString();
|
|
}
|
|
|
|
StringBuffer sb = new StringBuffer();
|
|
sb.append(s_mqlreadService + "?query=");
|
|
sb.append(ParsingUtilities.encode(query));
|
|
|
|
URL url = new URL(sb.toString());
|
|
URLConnection connection = url.openConnection();
|
|
connection.setConnectTimeout(5000);
|
|
connection.connect();
|
|
|
|
InputStream is = connection.getInputStream();
|
|
try {
|
|
String s = ParsingUtilities.inputStreamToString(is);
|
|
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
|
|
JSONArray results = o.getJSONArray("result");
|
|
int count = results.length();
|
|
|
|
for (int i = 0; i < count; i++) {
|
|
JSONObject result = results.getJSONObject(i);
|
|
|
|
String id = result.getString("id");
|
|
|
|
JSONArray types = result.getJSONArray("type");
|
|
String[] typeIDs = new String[types.length()];
|
|
for (int j = 0; j < typeIDs.length; j++) {
|
|
typeIDs[j] = types.getString(j);
|
|
}
|
|
|
|
ReconCandidate candidate = new ReconCandidate(
|
|
id,
|
|
result.getString("guid"),
|
|
result.getString("name"),
|
|
typeIDs,
|
|
100
|
|
);
|
|
|
|
Recon recon = new Recon();
|
|
recon.addCandidate(candidate);
|
|
recon.match = candidate;
|
|
recon.judgment = Judgment.Matched;
|
|
|
|
idToRecon.put(id, recon);
|
|
}
|
|
} finally {
|
|
is.close();
|
|
}
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
}
|
|
|
|
for (int i = 0; i < jobs.size(); i++) {
|
|
String id = ((IdBasedReconJob) jobs.get(i)).id;
|
|
Recon recon = idToRecon.get(id);
|
|
recons.add(recon);
|
|
}
|
|
|
|
return recons;
|
|
}
|
|
|
|
}
|