Track freebase types of columns added with data from Freebase, so that we can later add more data based on those columns. Fixed minor bug in serialization of data extension records.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@303 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
cf95e5b5f6
commit
084a6114d7
@ -19,14 +19,21 @@ import com.metaweb.gridworks.model.Column;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
import com.metaweb.gridworks.model.Recon;
|
||||
import com.metaweb.gridworks.model.ReconCandidate;
|
||||
import com.metaweb.gridworks.model.ReconStats;
|
||||
import com.metaweb.gridworks.model.Row;
|
||||
import com.metaweb.gridworks.model.Recon.Judgment;
|
||||
import com.metaweb.gridworks.model.recon.DataExtensionReconConfig;
|
||||
import com.metaweb.gridworks.protograph.FreebaseType;
|
||||
import com.metaweb.gridworks.util.ParsingUtilities;
|
||||
import com.metaweb.gridworks.util.FreebaseDataExtensionJob.DataExtension;
|
||||
|
||||
public class DataExtensionChange implements Change {
|
||||
final protected String _baseColumnName;
|
||||
final protected int _columnInsertIndex;
|
||||
|
||||
final protected List<String> _columnNames;
|
||||
final protected List<FreebaseType> _columnTypes;
|
||||
|
||||
final protected List<Integer> _rowIndices;
|
||||
final protected List<DataExtension> _dataExtensions;
|
||||
|
||||
@ -38,20 +45,27 @@ public class DataExtensionChange implements Change {
|
||||
String baseColumnName,
|
||||
int columnInsertIndex,
|
||||
List<String> columnNames,
|
||||
List<FreebaseType> columnTypes,
|
||||
List<Integer> rowIndices,
|
||||
List<DataExtension> dataExtensions
|
||||
) {
|
||||
_baseColumnName = baseColumnName;
|
||||
_columnInsertIndex = columnInsertIndex;
|
||||
|
||||
_columnNames = columnNames;
|
||||
_columnTypes = columnTypes;
|
||||
|
||||
_rowIndices = rowIndices;
|
||||
_dataExtensions = dataExtensions;
|
||||
}
|
||||
|
||||
protected DataExtensionChange(
|
||||
String baseColumnName,
|
||||
int columnInsertIndex,
|
||||
int columnInsertIndex,
|
||||
|
||||
List<String> columnNames,
|
||||
List<FreebaseType> columnTypes,
|
||||
|
||||
List<Integer> rowIndices,
|
||||
List<DataExtension> dataExtensions,
|
||||
int firstNewCellIndex,
|
||||
@ -60,7 +74,10 @@ public class DataExtensionChange implements Change {
|
||||
) {
|
||||
_baseColumnName = baseColumnName;
|
||||
_columnInsertIndex = columnInsertIndex;
|
||||
|
||||
_columnNames = columnNames;
|
||||
_columnTypes = columnTypes;
|
||||
|
||||
_rowIndices = rowIndices;
|
||||
_dataExtensions = dataExtensions;
|
||||
|
||||
@ -140,9 +157,12 @@ public class DataExtensionChange implements Change {
|
||||
|
||||
for (int i = 0; i < _columnNames.size(); i++) {
|
||||
String name = _columnNames.get(i);
|
||||
int cellIndex = _firstNewCellIndex + i;
|
||||
|
||||
Column column = new Column(cellIndex, name);
|
||||
column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i)));
|
||||
column.setReconStats(ReconStats.create(project, cellIndex));
|
||||
|
||||
Column column = new Column(_firstNewCellIndex + i, name);
|
||||
|
||||
project.columnModel.columns.add(_columnInsertIndex + i, column);
|
||||
}
|
||||
|
||||
@ -194,6 +214,17 @@ public class DataExtensionChange implements Change {
|
||||
for (String name : _columnNames) {
|
||||
writer.write(name); writer.write('\n');
|
||||
}
|
||||
writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n');
|
||||
for (FreebaseType type : _columnTypes) {
|
||||
try {
|
||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||
|
||||
type.write(jsonWriter, options);
|
||||
} catch (JSONException e) {
|
||||
// ???
|
||||
}
|
||||
writer.write('\n');
|
||||
}
|
||||
writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n');
|
||||
for (Integer rowIndex : _rowIndices) {
|
||||
writer.write(rowIndex.toString()); writer.write('\n');
|
||||
@ -201,17 +232,22 @@ public class DataExtensionChange implements Change {
|
||||
writer.write("dataExtensionCount="); writer.write(Integer.toString(_dataExtensions.size())); writer.write('\n');
|
||||
for (DataExtension dataExtension : _dataExtensions) {
|
||||
writer.write(Integer.toString(dataExtension.data.length)); writer.write('\n');
|
||||
|
||||
for (Object[] values : dataExtension.data) {
|
||||
for (Object value : values) {
|
||||
try {
|
||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||
if (value instanceof ReconCandidate) {
|
||||
((ReconCandidate) value).write(jsonWriter, options);
|
||||
} else {
|
||||
jsonWriter.value(value);
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
// ???
|
||||
if (value == null) {
|
||||
writer.write("null");
|
||||
} else if (value instanceof ReconCandidate) {
|
||||
try {
|
||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||
((ReconCandidate) value).write(jsonWriter, options);
|
||||
} catch (JSONException e) {
|
||||
// ???
|
||||
}
|
||||
} else if (value instanceof String) {
|
||||
writer.write(JSONObject.quote((String) value));
|
||||
} else {
|
||||
writer.write(value.toString());
|
||||
}
|
||||
writer.write('\n');
|
||||
}
|
||||
@ -236,13 +272,18 @@ public class DataExtensionChange implements Change {
|
||||
static public Change load(LineNumberReader reader) throws Exception {
|
||||
String baseColumnName = null;
|
||||
int columnInsertIndex = -1;
|
||||
int firstNewCellIndex = -1;
|
||||
|
||||
List<String> columnNames = null;
|
||||
List<FreebaseType> columnTypes = null;
|
||||
|
||||
List<Integer> rowIndices = null;
|
||||
List<DataExtension> dataExtensions = null;
|
||||
|
||||
List<Row> oldRows = null;
|
||||
List<Row> newRows = null;
|
||||
|
||||
int firstNewCellIndex = -1;
|
||||
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
|
||||
int equal = line.indexOf('=');
|
||||
@ -271,6 +312,14 @@ public class DataExtensionChange implements Change {
|
||||
line = reader.readLine();
|
||||
columnNames.add(line);
|
||||
}
|
||||
} else if ("columnTypeCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
columnTypes = new ArrayList<FreebaseType>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
|
||||
}
|
||||
} else if ("dataExtensionCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
@ -289,10 +338,12 @@ public class DataExtensionChange implements Change {
|
||||
JSONTokener t = new JSONTokener(line);
|
||||
Object o = t.nextValue();
|
||||
|
||||
if (o instanceof JSONObject) {
|
||||
row[c] = ReconCandidate.load((JSONObject) o);
|
||||
} else {
|
||||
row[c] = o;
|
||||
if (o != JSONObject.NULL) {
|
||||
if (o instanceof JSONObject) {
|
||||
row[c] = ReconCandidate.load((JSONObject) o);
|
||||
} else {
|
||||
row[c] = o;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -325,6 +376,7 @@ public class DataExtensionChange implements Change {
|
||||
baseColumnName,
|
||||
columnInsertIndex,
|
||||
columnNames,
|
||||
columnTypes,
|
||||
rowIndices,
|
||||
dataExtensions,
|
||||
firstNewCellIndex,
|
||||
|
@ -0,0 +1,67 @@
|
||||
package com.metaweb.gridworks.model.recon;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
|
||||
|
||||
import com.metaweb.gridworks.model.Cell;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
import com.metaweb.gridworks.model.Recon;
|
||||
import com.metaweb.gridworks.model.Row;
|
||||
import com.metaweb.gridworks.protograph.FreebaseType;
|
||||
|
||||
public class DataExtensionReconConfig extends StrictReconConfig {
|
||||
final public FreebaseType type;
|
||||
|
||||
static public ReconConfig reconstruct(JSONObject obj) throws Exception {
|
||||
JSONObject type = obj.getJSONObject("type");
|
||||
|
||||
return new DataExtensionReconConfig(
|
||||
new FreebaseType(
|
||||
type.getString("id"),
|
||||
type.getString("name")
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
public DataExtensionReconConfig(FreebaseType type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReconJob createJob(Project project, int rowIndex, Row row,
|
||||
String columnName, Cell cell) {
|
||||
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBatchSize() {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("mode"); writer.value("extend");
|
||||
writer.key("type"); type.write(writer, options);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Recon> batchRecon(List<ReconJob> jobs) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getBriefDescription(Project project, String columnName) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
}
|
@ -21,6 +21,8 @@ abstract public class ReconConfig implements Jsonizable {
|
||||
return HeuristicReconConfig.reconstruct(obj);
|
||||
} else if ("strict".equals(mode)) {
|
||||
return StrictReconConfig.reconstruct(obj);
|
||||
} else if ("extend".equals(mode)) {
|
||||
return DataExtensionReconConfig.reconstruct(obj);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ import com.metaweb.gridworks.model.changes.CellAtRow;
|
||||
import com.metaweb.gridworks.model.changes.DataExtensionChange;
|
||||
import com.metaweb.gridworks.process.LongRunningProcess;
|
||||
import com.metaweb.gridworks.process.Process;
|
||||
import com.metaweb.gridworks.protograph.FreebaseType;
|
||||
import com.metaweb.gridworks.util.FreebaseDataExtensionJob;
|
||||
import com.metaweb.gridworks.util.FreebaseDataExtensionJob.ColumnInfo;
|
||||
import com.metaweb.gridworks.util.FreebaseDataExtensionJob.DataExtension;
|
||||
@ -221,6 +222,11 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
columnNames.add(StringUtils.join(info.names, " - "));
|
||||
}
|
||||
|
||||
List<FreebaseType> columnTypes = new ArrayList<FreebaseType>();
|
||||
for (ColumnInfo info : _job.columns) {
|
||||
columnTypes.add(info.expectedType);
|
||||
}
|
||||
|
||||
HistoryEntry historyEntry = new HistoryEntry(
|
||||
_project,
|
||||
_description,
|
||||
@ -229,6 +235,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
_baseColumnName,
|
||||
_columnInsertIndex,
|
||||
columnNames,
|
||||
columnTypes,
|
||||
rowIndices,
|
||||
dataExtensions)
|
||||
);
|
||||
|
@ -1,8 +1,36 @@
|
||||
package com.metaweb.gridworks.protograph;
|
||||
|
||||
public class FreebaseType extends FreebaseTopic {
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.metaweb.gridworks.Jsonizable;
|
||||
|
||||
public class FreebaseType extends FreebaseTopic implements Jsonizable {
|
||||
public FreebaseType(String id, String name) {
|
||||
super(id, name);
|
||||
}
|
||||
|
||||
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(id);
|
||||
writer.key("name"); writer.value(name);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
static public FreebaseType load(JSONObject obj) throws Exception {
|
||||
if (obj == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
FreebaseType type = new FreebaseType(
|
||||
obj.getString("id"),
|
||||
obj.getString("name")
|
||||
);
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.metaweb.gridworks.model.ReconCandidate;
|
||||
import com.metaweb.gridworks.protograph.FreebaseType;
|
||||
|
||||
public class FreebaseDataExtensionJob {
|
||||
static public class DataExtension {
|
||||
@ -36,10 +37,12 @@ public class FreebaseDataExtensionJob {
|
||||
static public class ColumnInfo {
|
||||
final public List<String> names;
|
||||
final public List<String> path;
|
||||
final public FreebaseType expectedType;
|
||||
|
||||
protected ColumnInfo(List<String> names, List<String> path) {
|
||||
protected ColumnInfo(List<String> names, List<String> path, FreebaseType expectedType) {
|
||||
this.names = names;
|
||||
this.path = path;
|
||||
this.expectedType = expectedType;
|
||||
}
|
||||
}
|
||||
|
||||
@ -130,7 +133,7 @@ public class FreebaseDataExtensionJob {
|
||||
int startColumnIndex
|
||||
) throws JSONException {
|
||||
String propertyID = extNode.getString("id");
|
||||
String expectedTypeID = extNode.getString("expected");
|
||||
String expectedTypeID = extNode.getJSONObject("expected").getString("id");
|
||||
|
||||
JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ?
|
||||
resultNode.getJSONArray(propertyID) : null;
|
||||
@ -268,7 +271,7 @@ public class FreebaseDataExtensionJob {
|
||||
|
||||
static protected void formulateQueryNode(JSONObject node, JSONWriter writer) throws JSONException {
|
||||
String propertyID = node.getString("id");
|
||||
String expectedTypeID = node.getString("expected");
|
||||
String expectedTypeID = node.getJSONObject("expected").getString("id");
|
||||
|
||||
writer.key(propertyID);
|
||||
writer.array();
|
||||
@ -321,14 +324,20 @@ public class FreebaseDataExtensionJob {
|
||||
if (obj.has("properties") && !obj.isNull("properties")) {
|
||||
boolean included = (obj.has("included") && obj.getBoolean("included"));
|
||||
if (included && columns != null) {
|
||||
columns.add(new ColumnInfo(names2, path2));
|
||||
JSONObject expected = obj.getJSONObject("expected");
|
||||
|
||||
columns.add(new ColumnInfo(names2, path2,
|
||||
new FreebaseType(expected.getString("id"), expected.getString("name"))));
|
||||
}
|
||||
|
||||
return (included ? 1 : 0) +
|
||||
countColumns(obj.getJSONArray("properties"), columns, names2, path2);
|
||||
} else {
|
||||
if (columns != null) {
|
||||
columns.add(new ColumnInfo(names2, path2));
|
||||
JSONObject expected = obj.getJSONObject("expected");
|
||||
|
||||
columns.add(new ColumnInfo(names2, path2,
|
||||
new FreebaseType(expected.getString("id"), expected.getString("name"))));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
@ -64,6 +64,7 @@ ExtendDataPreviewDialog.getAllProperties = function(typeID, onDone) {
|
||||
"name" : null,
|
||||
"/type/property/expected_type" : {
|
||||
"id" : null,
|
||||
"name" : null,
|
||||
"/freebase/type_hints/mediator" : []
|
||||
},
|
||||
"sort" : "name"
|
||||
@ -74,6 +75,7 @@ ExtendDataPreviewDialog.getAllProperties = function(typeID, onDone) {
|
||||
"name" : null,
|
||||
"/type/property/expected_type" : {
|
||||
"id" : null,
|
||||
"name" : null,
|
||||
"/freebase/type_hints/mediator" : []
|
||||
},
|
||||
"sort" : "name"
|
||||
@ -90,7 +92,10 @@ ExtendDataPreviewDialog.getAllProperties = function(typeID, onDone) {
|
||||
allProperties.push({
|
||||
id : property.id,
|
||||
name : property.name,
|
||||
expected : expectedType.id
|
||||
expected : {
|
||||
id : expectedType.id,
|
||||
name : expectedType.name
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
@ -137,7 +142,8 @@ ExtendDataPreviewDialog.getAllProperties = function(typeID, onDone) {
|
||||
"id" : null,
|
||||
"name" : null,
|
||||
"/type/property/expected_type" : {
|
||||
id : null,
|
||||
"id" : null,
|
||||
"name" : null,
|
||||
"/freebase/type_hints/mediator" : []
|
||||
},
|
||||
"sort" : "name"
|
||||
@ -150,15 +156,24 @@ ExtendDataPreviewDialog.getAllProperties = function(typeID, onDone) {
|
||||
function(o2) {
|
||||
if ("result" in o2) {
|
||||
var processCVTProperty = function(parentProperty, properties) {
|
||||
var parentExpected = parentProperty["/type/property/expected_type"];
|
||||
|
||||
$.each(properties, function() {
|
||||
var expected = this["/type/property/expected_type"];
|
||||
allProperties.push({
|
||||
id : parentProperty.id,
|
||||
name : parentProperty.name,
|
||||
expected : parentProperty["/type/property/expected_type"].id,
|
||||
expected : {
|
||||
id : parentExpected.id,
|
||||
name : parentExpected.name
|
||||
},
|
||||
properties: [{
|
||||
id : this.id,
|
||||
name : this.name,
|
||||
expected : this["/type/property/expected_type"].id
|
||||
expected : {
|
||||
id : expected.id,
|
||||
name : expected.name
|
||||
}
|
||||
}]
|
||||
});
|
||||
});
|
||||
@ -204,10 +219,14 @@ ExtendDataPreviewDialog.prototype._show = function(properties) {
|
||||
};
|
||||
|
||||
this._elmts.addPropertyInput.suggestP(suggestConfig).bind("fb-select", function(evt, data) {
|
||||
var expected = data["/type/property/expected_type"];
|
||||
self._addProperty({
|
||||
id : data.id,
|
||||
name: data.name,
|
||||
expected: data["/type/property/expected_type"]
|
||||
expected: {
|
||||
id: expected.id,
|
||||
name: expected.name
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
|
@ -127,7 +127,7 @@
|
||||
mql_output: JSON.stringify([{
|
||||
"id" : null,
|
||||
"name" : null,
|
||||
"/type/property/expected_type" : null
|
||||
"/type/property/expected_type" : { "id" : null, "name" : null }
|
||||
}])
|
||||
};
|
||||
if (start) {
|
||||
|
Loading…
Reference in New Issue
Block a user