Retrieve types from the extend service

This commit is contained in:
Antonin Delpeuch 2017-07-06 21:15:37 +02:00
parent ad3a174abd
commit d99128c330
9 changed files with 366 additions and 343 deletions

View File

@ -38,7 +38,7 @@ import javax.servlet.http.HttpServletRequest;
import org.json.JSONObject;
import com.google.refine.commands.EngineDependentCommand;
import com.google.refine.freebase.operations.ExtendDataOperation;
import com.google.refine.operations.recon.ExtendDataOperation;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Project;
import com.google.refine.util.ParsingUtilities;
@ -50,6 +50,7 @@ public class ExtendDataCommand extends EngineDependentCommand {
String baseColumnName = request.getParameter("baseColumnName");
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
String endpoint = request.getParameter("endpoint");
String jsonString = request.getParameter("extension");
JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString);
@ -57,6 +58,7 @@ public class ExtendDataCommand extends EngineDependentCommand {
return new ExtendDataOperation(
engineConfig,
baseColumnName,
endpoint,
extension,
columnInsertIndex
);

View File

@ -133,19 +133,11 @@ public class PreviewExtendDataCommand extends Command {
writer.array();
for (ColumnInfo info : job.columns) {
writer.object();
writer.key("names");
writer.array();
for (String name : info.names) {
writer.value(name);
}
writer.endArray();
writer.key("path");
writer.array();
for (String id : info.path) {
writer.value(id);
}
writer.endArray();
writer.endObject();
writer.key("name");
writer.value(info.name);
writer.key("id");
writer.value(info.id);
writer.endObject();
}
writer.endArray();

View File

@ -0,0 +1,79 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.model;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.Jsonizable;
/**
* This represents a type from the reconciliation
* service. It is used when extending data to
* store the (expected) types of new columns.
*/
public class ReconType implements Jsonizable {
public String id;
public String name;
public ReconType(String id, String name) {
this.id = id;
this.name = name;
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("id"); writer.value(id);
writer.key("name"); writer.value(name);
writer.endObject();
}
static public ReconType load(JSONObject obj) throws Exception {
if (obj == null) {
return null;
}
ReconType type = new ReconType(
obj.getString("id"),
obj.getString("name")
);
return type;
}
}

View File

@ -47,9 +47,9 @@ import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
// import com.google.refine.freebase.FreebaseType;
import com.google.refine.model.ReconType;
import com.google.refine.model.recon.DataExtensionReconConfig;
import com.google.refine.model.recon.FreebaseDataExtensionJob.DataExtension;
import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
import com.google.refine.history.Change;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
@ -65,10 +65,11 @@ import com.google.refine.util.Pool;
public class DataExtensionChange implements Change {
final protected String _baseColumnName;
final protected String _service;
final protected int _columnInsertIndex;
final protected List<String> _columnNames;
final protected List<FreebaseType> _columnTypes;
final protected List<ReconType> _columnTypes;
final protected List<Integer> _rowIndices;
final protected List<DataExtension> _dataExtensions;
@ -79,15 +80,17 @@ public class DataExtensionChange implements Change {
protected List<Row> _newRows;
public DataExtensionChange(
String baseColumnName,
String baseColumnName,
String service,
int columnInsertIndex,
List<String> columnNames,
List<FreebaseType> columnTypes,
List<ReconType> columnTypes,
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
long historyEntryID
) {
_baseColumnName = baseColumnName;
_service = service;
_columnInsertIndex = columnInsertIndex;
_columnNames = columnNames;
@ -101,10 +104,11 @@ public class DataExtensionChange implements Change {
protected DataExtensionChange(
String baseColumnName,
String service,
int columnInsertIndex,
List<String> columnNames,
List<FreebaseType> columnTypes,
List<ReconType> columnTypes,
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
@ -113,6 +117,7 @@ public class DataExtensionChange implements Change {
List<Row> newRows
) {
_baseColumnName = baseColumnName;
_service = service;
_columnInsertIndex = columnInsertIndex;
_columnNames = columnNames;
@ -204,7 +209,11 @@ public class DataExtensionChange implements Change {
int cellIndex = _firstNewCellIndex + i;
Column column = new Column(cellIndex, name);
column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i)));
column.setReconConfig(new DataExtensionReconConfig(
_service,
"", // TODO retrieve service by URL and fill this
"",
_columnTypes.get(i)));
column.setReconStats(ReconStats.create(project, cellIndex));
try {
@ -275,17 +284,21 @@ public class DataExtensionChange implements Change {
@Override
public void save(Writer writer, Properties options) throws IOException {
writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n');
writer.write("service="); writer.write(_service); writer.write('\n');
writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n');
writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n');
for (String name : _columnNames) {
writer.write(name); writer.write('\n');
}
writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n');
for (FreebaseType type : _columnTypes) {
for (ReconType type : _columnTypes) {
try {
JSONWriter jsonWriter = new JSONWriter(writer);
type.write(jsonWriter, options);
if(type == null) {
writer.write("null");
} else {
JSONWriter jsonWriter = new JSONWriter(writer);
type.write(jsonWriter, options);
}
} catch (JSONException e) {
// ???
}
@ -342,10 +355,11 @@ public class DataExtensionChange implements Change {
static public Change load(LineNumberReader reader, Pool pool) throws Exception {
String baseColumnName = null;
String service = null;
int columnInsertIndex = -1;
List<String> columnNames = null;
List<FreebaseType> columnTypes = null;
List<ReconType> columnTypes = null;
List<Integer> rowIndices = null;
List<DataExtension> dataExtensions = null;
@ -363,6 +377,8 @@ public class DataExtensionChange implements Change {
if ("baseColumnName".equals(field)) {
baseColumnName = value;
} else if ("service".equals(field)) {
service = value;
} else if ("columnInsertIndex".equals(field)) {
columnInsertIndex = Integer.parseInt(value);
} else if ("firstNewCellIndex".equals(field)) {
@ -390,10 +406,12 @@ public class DataExtensionChange implements Change {
} else if ("columnTypeCount".equals(field)) {
int count = Integer.parseInt(value);
columnTypes = new ArrayList<FreebaseType>(count);
columnTypes = new ArrayList<ReconType>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
if (line != null) {
columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
}
}
} else if ("dataExtensionCount".equals(field)) {
int count = Integer.parseInt(value);
@ -453,6 +471,7 @@ public class DataExtensionChange implements Change {
DataExtensionChange change = new DataExtensionChange(
baseColumnName,
service,
columnInsertIndex,
columnNames,
columnTypes,

View File

@ -0,0 +1,109 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.model.recon;
import java.util.List;
import java.util.Properties;
import java.util.ArrayList;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.model.ReconType;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.Row;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.model.recon.ReconJob;
public class DataExtensionReconConfig extends StandardReconConfig {
final public ReconType type;
private final static String WARN = "Not implemented";
static public ReconConfig reconstruct(JSONObject obj) throws Exception {
JSONObject type = obj.getJSONObject("type");
ReconType typ = null;
if(obj.has("id")) {
typ = new ReconType(obj.getString("id"),
obj.has("name") ? obj.getString("name") : obj.getString("id"));
}
return new DataExtensionReconConfig(
obj.getString("service"),
obj.has("identifierSpace") ? obj.getString("identifierSpace") : null,
obj.has("schemaSpace") ? obj.getString("schemaSpace") : null,
typ);
}
public DataExtensionReconConfig(
String service,
String identifierSpace,
String schemaSpace,
ReconType type) {
super(
service,
identifierSpace,
schemaSpace,
type != null ? type.id : null,
type != null ? type.name : null,
true,
new ArrayList<ColumnDetail>());
this.type = type;
}
@Override
public ReconJob createJob(Project project, int rowIndex, Row row,
String columnName, Cell cell) {
throw new RuntimeException(WARN);
}
@Override
public int getBatchSize() {
throw new RuntimeException(WARN);
}
@Override
public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID) {
throw new RuntimeException(WARN);
}
@Override
public String getBriefDescription(Project project, String columnName) {
throw new RuntimeException(WARN);
}
}

View File

@ -55,11 +55,12 @@ import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
// import com.google.refine.freebase.FreebaseType;
import com.google.refine.model.ReconType;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
import com.google.refine.expr.functions.ToDate;
public class ReconciledDataExtensionJob {
static public class DataExtension {
@ -71,28 +72,24 @@ public class ReconciledDataExtensionJob {
}
static public class ColumnInfo {
final public List<String> names;
final public List<String> path;
// final public FreebaseType expectedType;
// TODO
final public String name;
final public String id;
final public ReconType expectedType;
protected ColumnInfo(List<String> names, List<String> path /*, FreebaseType expectedType */) {
this.names = names;
this.path = path;
// this.expectedType = expectedType;
protected ColumnInfo(String name, String id, ReconType expectedType) {
this.name = name;
this.id = id;
this.expectedType = expectedType;
}
}
final public JSONObject extension;
final public String endpoint;
final public int columnCount;
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException {
this.extension = obj;
this.endpoint = endpoint;
this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ?
countColumns(obj.getJSONArray("properties"), columns, new ArrayList<String>(), new ArrayList<String>()) : 0;
}
public Map<String, ReconciledDataExtensionJob.DataExtension> extend(
@ -102,35 +99,30 @@ public class ReconciledDataExtensionJob {
StringWriter writer = new StringWriter();
formulateQuery(ids, extension, writer);
// Extract the order of properties
JSONArray origProperties = extension.getJSONArray("properties");
List<String> properties = new ArrayList<String>();
int l = origProperties.length();
for (int i = 0; i < l; i++) {
properties.add(origProperties.getJSONObject(i).getString("id"));
}
String query = writer.toString();
InputStream is = performQuery(this.endpoint, query);
try {
String s = ParsingUtilities.inputStreamToString(is);
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
// Extract the column metadata
gatherColumnInfo(o.getJSONArray("meta"), columns);
Map<String, ReconciledDataExtensionJob.DataExtension> map = new HashMap<String, ReconciledDataExtensionJob.DataExtension>();
if (o.has("rows")){
JSONObject records = o.getJSONObject("rows");
// for each identifier
// for each identifier
for (String id : ids) {
if (records.has(id)) {
JSONObject record = records.getJSONObject(id);
ReconciledDataExtensionJob.DataExtension ext = collectResult(record, properties, reconCandidateMap);
if (ext != null) {
map.put(id, ext);
}
}
if (records.has(id)) {
JSONObject record = records.getJSONObject(id);
ReconciledDataExtensionJob.DataExtension ext = collectResult(record, reconCandidateMap);
if (ext != null) {
map.put(id, ext);
}
}
}
}
@ -166,65 +158,65 @@ public class ReconciledDataExtensionJob {
protected ReconciledDataExtensionJob.DataExtension collectResult(
JSONObject record,
List<String> properties,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
List<Object[]> rows = new ArrayList<Object[]>();
// for each property
int colindex = 0;
for(String pid : properties) {
JSONArray values = record.getJSONArray(pid);
if (values == null) {
continue;
}
// for each property
int colindex = 0;
for(ColumnInfo ci : columns) {
String pid = ci.id;
JSONArray values = record.getJSONArray(pid);
if (values == null) {
continue;
}
// for each value
for(int rowindex = 0; rowindex < values.length(); rowindex++) {
JSONObject val = values.getJSONObject(rowindex);
// store a reconciled value
if(val.has("id")) {
storeCell(rows, rowindex, colindex, val, reconCandidateMap);
} else if(val.has("str")) {
// store a bare string
String str = val.getString("str");
storeStr(rows, rowindex, colindex, str);
// for each value
for(int rowindex = 0; rowindex < values.length(); rowindex++) {
JSONObject val = values.getJSONObject(rowindex);
// store a reconciled value
if (val.has("id")) {
storeCell(rows, rowindex, colindex, val, reconCandidateMap);
} else if (val.has("str")) {
// store a bare string
String str = val.getString("str");
storeCell(rows, rowindex, colindex, str);
} else if (val.has("float")) {
float v = Float.parseFloat(val.getString("float"));
storeCell(rows, rowindex, colindex, v);
} else if (val.has("int")) {
int v = Integer.parseInt(val.getString("int"));
storeCell(rows, rowindex, colindex, v);
} else if (val.has("date")) {
ToDate td = new ToDate();
String[] args = new String[1];
args[0] = val.getString("date");
Object v = td.call(null, args);
storeCell(rows, rowindex, colindex, v);
} else if(val.has("bool")) {
boolean v = val.getString("bool") == "true";
storeCell(rows, rowindex, colindex, v);
}
// TODO other cases for other types of values (dates, booleans, )
}
colindex++;
}
}
colindex++;
}
// collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap);
Object[][] data = new Object[rows.size()][columnCount];
Object[][] data = new Object[rows.size()][columns.size()];
rows.toArray(data);
return new DataExtension(data);
}
protected void storeStr(
List<Object[]> rows,
int row,
int col,
String str
) throws JSONException {
while (row >= rows.size()) {
rows.add(new Object[columnCount]);
}
rows.get(row)[col] = str;
}
protected void storeCell(
List<Object[]> rows,
int row,
int col,
Object value,
Map<String, ReconCandidate> reconCandidateMap
Object value
) {
while (row >= rows.size()) {
rows.add(new Object[columnCount]);
rows.add(new Object[columns.size()]);
}
rows.get(row)[col] = value;
}
@ -251,128 +243,8 @@ public class ReconciledDataExtensionJob {
reconCandidateMap.put(id, rc);
}
storeCell(rows, row, col, rc, reconCandidateMap);
storeCell(rows, row, col, rc);
}
/*
protected int[] collectResult(
List<Object[]> rows,
JSONObject extNode,
JSONObject resultNode,
int startRowIndex,
int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
String propertyID = extNode.getString("id");
// String expectedTypeID = extNode.getJSONObject("expected").getString("id");
JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ?
resultNode.getJSONArray(propertyID) : null;
if ("/type/key".equals(expectedTypeID)) {
if (a != null) {
int l = a.length();
for (int r = 0; r < l; r++) {
Object o = a.isNull(r) ? null : a.get(r);
if (o instanceof JSONObject) {
storeStr(rows, startRowIndex++, startColumnIndex, (JSONObject) o, reconCandidateMap);
}
}
}
// note that we still take up a column even if we don't have any data
return new int[] { startRowIndex, startColumnIndex + 1 };
} else if (expectedTypeID.startsWith("/type/")) {
if (a != null) {
int l = a.length();
for (int r = 0; r < l; r++) {
Object o = a.isNull(r) ? null : a.get(r);
if (o instanceof Serializable) {
storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap);
}
}
}
// note that we still take up a column even if we don't have any data
return new int[] { startRowIndex, startColumnIndex + 1 };
} else {
boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties"));
boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included"));
if (a != null && a.length() > 0) {
int maxColIndex = startColumnIndex;
int l = a.length();
for (int r = 0; r < l; r++) {
Object v = a.isNull(r) ? null : a.get(r);
JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null;
int startColumnIndex2 = startColumnIndex;
int startRowIndex2 = startRowIndex;
if (isOwnColumn) {
if (o != null) {
storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap);
} else {
storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap);
}
}
if (hasSubProperties && o != null) {
int[] rowcol = collectResult(
rows,
extNode.getJSONArray("properties"),
o,
startRowIndex,
startColumnIndex2,
reconCandidateMap
);
startRowIndex2 = rowcol[0];
startColumnIndex2 = rowcol[1];
}
startRowIndex = startRowIndex2;
maxColIndex = Math.max(maxColIndex, startColumnIndex2);
}
return new int[] { startRowIndex, maxColIndex };
} else {
return new int[] {
startRowIndex,
startColumnIndex + countColumns(extNode, null, new ArrayList<String>(), new ArrayList<String>())
};
}
}
}
protected int[] collectResult(
List<Object[]> rows,
JSONArray subProperties,
JSONObject resultNode,
int startRowIndex,
int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
int maxStartRowIndex = startRowIndex;
int k = subProperties.length();
for (int c = 0; c < k; c++) {
int[] rowcol = collectResult(
rows,
subProperties.getJSONObject(c),
resultNode,
startRowIndex,
startColumnIndex,
reconCandidateMap
);
maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]);
startColumnIndex = rowcol[1];
}
return new int[] { maxStartRowIndex, startColumnIndex };
}*/
static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException {
@ -380,74 +252,46 @@ public class ReconciledDataExtensionJob {
jsonWriter.object();
jsonWriter.key("ids");
jsonWriter.array();
for (String id : ids) {
if (id != null) {
jsonWriter.value(id);
}
}
jsonWriter.endArray();
jsonWriter.key("ids");
jsonWriter.array();
for (String id : ids) {
if (id != null) {
jsonWriter.value(id);
}
}
jsonWriter.endArray();
jsonWriter.key("properties");
jsonWriter.array();
JSONArray properties = node.getJSONArray("properties");
int l = properties.length();
jsonWriter.array();
JSONArray properties = node.getJSONArray("properties");
int l = properties.length();
for (int i = 0; i < l; i++) {
JSONObject property = properties.getJSONObject(i);
jsonWriter.object();
jsonWriter.key("id");
jsonWriter.value(property.getString("id"));
// TODO translate constraints as below
jsonWriter.endObject();
}
jsonWriter.endArray();
for (int i = 0; i < l; i++) {
JSONObject property = properties.getJSONObject(i);
jsonWriter.object();
jsonWriter.key("id");
jsonWriter.value(property.getString("id"));
// TODO translate constraints as below
jsonWriter.endObject();
}
jsonWriter.endArray();
jsonWriter.endObject();
}
static protected int countColumns(JSONObject obj, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
String name = obj.getString("name");
List<String> names2 = null;
List<String> path2 = null;
if (columns != null) {
names2 = new ArrayList<String>(names);
names2.add(name);
path2 = new ArrayList<String>(path);
path2.add(obj.getString("id"));
}
if (obj.has("properties") && !obj.isNull("properties")) {
boolean included = (obj.has("included") && obj.getBoolean("included"));
if (included && columns != null) {
// JSONObject expected = obj.getJSONObject("expected");
columns.add(new ColumnInfo(names2, path2
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */));
}
return (included ? 1 : 0) +
countColumns(obj.getJSONArray("properties"), columns, names2, path2);
} else {
if (columns != null) {
// JSONObject expected = obj.getJSONObject("expected");
columns.add(new ColumnInfo(names2, path2
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */ ));
}
return 1;
}
}
static protected int countColumns(JSONArray a, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
int c = 0;
int l = a.length();
for (int i = 0; i < l; i++) {
c += countColumns(a.getJSONObject(i), columns, names, path);
}
return c;
}
static protected void gatherColumnInfo(JSONArray meta, List<ColumnInfo> columns) throws JSONException {
for(int i = 0; i < meta.length(); i++) {
JSONObject col = meta.getJSONObject(i);
ReconType expectedType = null;
if(col.has("type")) {
JSONObject expectedObj = col.getJSONObject("type");
expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name"));
}
columns.add(new ColumnInfo(
col.getString("name"),
col.getString("id"),
expectedType));
}
}
}

View File

@ -50,15 +50,16 @@ import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.model.changes.DataExtensionChange;
import com.google.refine.model.recon.DataExtensionJob;
import com.google.refine.model.recon.DataExtensionJob.ColumnInfo;
import com.google.refine.model.recon.DataExtensionJob.DataExtension;
import com.google.refine.model.recon.ReconciledDataExtensionJob;
import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo;
import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
import com.google.refine.history.HistoryEntry;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconType;
import com.google.refine.model.Row;
import com.google.refine.model.changes.CellAtRow;
import com.google.refine.operations.EngineDependentOperation;
@ -68,6 +69,7 @@ import com.google.refine.process.Process;
public class ExtendDataOperation extends EngineDependentOperation {
final protected String _baseColumnName;
final protected String _endpoint;
final protected JSONObject _extension;
final protected int _columnInsertIndex;
@ -77,6 +79,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
return new ExtendDataOperation(
engineConfig,
obj.getString("baseColumnName"),
obj.getString("endpoint"),
obj.getJSONObject("extension"),
obj.getInt("columnInsertIndex")
);
@ -85,12 +88,14 @@ public class ExtendDataOperation extends EngineDependentOperation {
public ExtendDataOperation(
JSONObject engineConfig,
String baseColumnName,
String endpoint,
JSONObject extension,
int columnInsertIndex
) {
super(engineConfig);
_baseColumnName = baseColumnName;
_endpoint = endpoint;
_extension = extension;
_columnInsertIndex = columnInsertIndex;
}
@ -105,6 +110,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
writer.key("engineConfig"); writer.value(getEngineConfig());
writer.key("columnInsertIndex"); writer.value(_columnInsertIndex);
writer.key("baseColumnName"); writer.value(_baseColumnName);
writer.key("endpoint"); writer.value(_endpoint);
writer.key("extension"); writer.value(_extension);
writer.endObject();
}
@ -135,7 +141,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
final protected JSONObject _engineConfig;
final protected long _historyEntryID;
protected int _cellIndex;
protected FreebaseDataExtensionJob _job;
protected ReconciledDataExtensionJob _job;
public ExtendDataProcess(
Project project,
@ -147,7 +153,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
_engineConfig = engineConfig;
_historyEntryID = HistoryEntry.allocateID();
_job = new FreebaseDataExtensionJob(_extension);
_job = new ReconciledDataExtensionJob(_extension, _endpoint);
}
@Override
@ -283,10 +289,10 @@ public class ExtendDataOperation extends EngineDependentOperation {
if (!_canceled) {
List<String> columnNames = new ArrayList<String>();
for (ColumnInfo info : _job.columns) {
columnNames.add(StringUtils.join(info.names, " - "));
columnNames.add(info.name);
}
List<String> columnTypes = new ArrayList<String>();
List<ReconType> columnTypes = new ArrayList<ReconType>();
for (ColumnInfo info : _job.columns) {
columnTypes.add(info.expectedType);
}
@ -298,6 +304,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
ExtendDataOperation.this,
new DataExtensionChange(
_baseColumnName,
_endpoint,
_columnInsertIndex,
columnNames,
columnTypes,

View File

@ -52,7 +52,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
alert("Please add some properties first.");
} else {
DialogSystem.dismissUntil(self._level - 1);
self._onDone(self._extension);
self._onDone(self._extension, self._service);
}
});
this._elmts.cancelButton.click(function() {
@ -67,6 +67,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
this._serviceMetadata = null;
if ("reconConfig" in column) {
var service = column.reconConfig.service;
this._service = service;
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
this._serviceMetadata = serviceMetadata;
if ("extend" in serviceMetadata) {
@ -258,7 +259,7 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
var renderColumnHeader = function(column) {
var th = $('<th>').appendTo(trHead);
$('<span>').html(column.names.join(" &raquo; ")).appendTo(th);
$('<span>').html(column.name).appendTo(th);
$('<br>').appendTo(th);
$('<a href="javascript:{}"></a>')
@ -266,15 +267,15 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
.addClass("action")
.attr("title", "Remove this column")
.click(function() {
self._removeProperty(column.path);
self._removeProperty(column.id);
}).appendTo(th);
$('<a href="javascript:{}"></a>')
.text("constrain")
.text("configure")
.addClass("action")
.attr("title", "Add constraints to this column")
.attr("title", "Configure this column")
.click(function() {
self._constrainProperty(column.path);
self._constrainProperty(column.id);
}).appendTo(th);
};
for (var c = 0; c < data.columns.length; c++) {
@ -301,56 +302,25 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
container.append(table);
};
ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(path) {
var removeFromList = function(path, index, properties) {
var id = path[index];
for (var i = properties.length - 1; i >= 0; i--) {
var property = properties[i];
if (property.id == id) {
if (index === path.length - 1) {
if ("included" in property) {
delete property.included;
}
} else if ("properties" in property && property.properties.length > 0) {
removeFromList(path, index + 1, property.properties);
}
if (!("properties" in property) || property.properties.length === 0) {
properties.splice(i, 1);
}
return;
}
ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(id) {
for(var i = this._extension.properties.length - 1; i >= 0; i--) {
var property = this._extension.properties[i];
if (property.id == id) {
this._extension.properties.splice(i, 1);
}
};
removeFromList(path, 0, this._extension.properties);
}
this._update();
};
ExtendReconciledDataPreviewDialog.prototype._findProperty = function(path) {
var find = function(path, index, properties) {
var id = path[index];
for (var i = properties.length - 1; i >= 0; i--) {
var property = properties[i];
if (property.id == id) {
if (index === path.length - 1) {
return property;
} else if ("properties" in property && property.properties.length > 0) {
return find(path, index + 1, property.properties);
}
break;
}
ExtendReconciledDataPreviewDialog.prototype._findProperty = function(id) {
var properties = this._extension.properties;
for(var i = properties.length - 1; i >= 0; i--) {
if (properties[i].id == path) {
return properties[i];
}
return null;
};
return find(path, 0, this._extension.properties);
};
}
return null;
}
ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) {
var self = this;

View File

@ -153,12 +153,13 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
column,
columnIndex,
o.rowIndices,
function(extension) {
function(extension, endpoint) {
Refine.postProcess(
"core",
"extend-data",
{
baseColumnName: column.name,
endpoint: endpoint,
columnInsertIndex: columnIndex + 1
},
{
@ -167,7 +168,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
{ rowsChanged: true, modelsChanged: true }
);
}
); */
);
};
/*