Retrieve types from the extend service

This commit is contained in:
Antonin Delpeuch 2017-07-06 21:15:37 +02:00
parent ad3a174abd
commit d99128c330
9 changed files with 366 additions and 343 deletions

View File

@ -38,7 +38,7 @@ import javax.servlet.http.HttpServletRequest;
import org.json.JSONObject; import org.json.JSONObject;
import com.google.refine.commands.EngineDependentCommand; import com.google.refine.commands.EngineDependentCommand;
import com.google.refine.freebase.operations.ExtendDataOperation; import com.google.refine.operations.recon.ExtendDataOperation;
import com.google.refine.model.AbstractOperation; import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.util.ParsingUtilities; import com.google.refine.util.ParsingUtilities;
@ -50,6 +50,7 @@ public class ExtendDataCommand extends EngineDependentCommand {
String baseColumnName = request.getParameter("baseColumnName"); String baseColumnName = request.getParameter("baseColumnName");
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
String endpoint = request.getParameter("endpoint");
String jsonString = request.getParameter("extension"); String jsonString = request.getParameter("extension");
JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString);
@ -57,6 +58,7 @@ public class ExtendDataCommand extends EngineDependentCommand {
return new ExtendDataOperation( return new ExtendDataOperation(
engineConfig, engineConfig,
baseColumnName, baseColumnName,
endpoint,
extension, extension,
columnInsertIndex columnInsertIndex
); );

View File

@ -133,19 +133,11 @@ public class PreviewExtendDataCommand extends Command {
writer.array(); writer.array();
for (ColumnInfo info : job.columns) { for (ColumnInfo info : job.columns) {
writer.object(); writer.object();
writer.key("names"); writer.key("name");
writer.array(); writer.value(info.name);
for (String name : info.names) { writer.key("id");
writer.value(name); writer.value(info.id);
} writer.endObject();
writer.endArray();
writer.key("path");
writer.array();
for (String id : info.path) {
writer.value(id);
}
writer.endArray();
writer.endObject();
} }
writer.endArray(); writer.endArray();

View File

@ -0,0 +1,79 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.model;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.Jsonizable;
/**
* This represents a type from the reconciliation
* service. It is used when extending data to
* store the (expected) types of new columns.
*/
public class ReconType implements Jsonizable {
public String id;
public String name;
public ReconType(String id, String name) {
this.id = id;
this.name = name;
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("id"); writer.value(id);
writer.key("name"); writer.value(name);
writer.endObject();
}
static public ReconType load(JSONObject obj) throws Exception {
if (obj == null) {
return null;
}
ReconType type = new ReconType(
obj.getString("id"),
obj.getString("name")
);
return type;
}
}

View File

@ -47,9 +47,9 @@ import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.json.JSONWriter; import org.json.JSONWriter;
// import com.google.refine.freebase.FreebaseType; import com.google.refine.model.ReconType;
import com.google.refine.model.recon.DataExtensionReconConfig; import com.google.refine.model.recon.DataExtensionReconConfig;
import com.google.refine.model.recon.FreebaseDataExtensionJob.DataExtension; import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
import com.google.refine.history.Change; import com.google.refine.history.Change;
import com.google.refine.model.Cell; import com.google.refine.model.Cell;
import com.google.refine.model.Column; import com.google.refine.model.Column;
@ -65,10 +65,11 @@ import com.google.refine.util.Pool;
public class DataExtensionChange implements Change { public class DataExtensionChange implements Change {
final protected String _baseColumnName; final protected String _baseColumnName;
final protected String _service;
final protected int _columnInsertIndex; final protected int _columnInsertIndex;
final protected List<String> _columnNames; final protected List<String> _columnNames;
final protected List<FreebaseType> _columnTypes; final protected List<ReconType> _columnTypes;
final protected List<Integer> _rowIndices; final protected List<Integer> _rowIndices;
final protected List<DataExtension> _dataExtensions; final protected List<DataExtension> _dataExtensions;
@ -79,15 +80,17 @@ public class DataExtensionChange implements Change {
protected List<Row> _newRows; protected List<Row> _newRows;
public DataExtensionChange( public DataExtensionChange(
String baseColumnName, String baseColumnName,
String service,
int columnInsertIndex, int columnInsertIndex,
List<String> columnNames, List<String> columnNames,
List<FreebaseType> columnTypes, List<ReconType> columnTypes,
List<Integer> rowIndices, List<Integer> rowIndices,
List<DataExtension> dataExtensions, List<DataExtension> dataExtensions,
long historyEntryID long historyEntryID
) { ) {
_baseColumnName = baseColumnName; _baseColumnName = baseColumnName;
_service = service;
_columnInsertIndex = columnInsertIndex; _columnInsertIndex = columnInsertIndex;
_columnNames = columnNames; _columnNames = columnNames;
@ -101,10 +104,11 @@ public class DataExtensionChange implements Change {
protected DataExtensionChange( protected DataExtensionChange(
String baseColumnName, String baseColumnName,
String service,
int columnInsertIndex, int columnInsertIndex,
List<String> columnNames, List<String> columnNames,
List<FreebaseType> columnTypes, List<ReconType> columnTypes,
List<Integer> rowIndices, List<Integer> rowIndices,
List<DataExtension> dataExtensions, List<DataExtension> dataExtensions,
@ -113,6 +117,7 @@ public class DataExtensionChange implements Change {
List<Row> newRows List<Row> newRows
) { ) {
_baseColumnName = baseColumnName; _baseColumnName = baseColumnName;
_service = service;
_columnInsertIndex = columnInsertIndex; _columnInsertIndex = columnInsertIndex;
_columnNames = columnNames; _columnNames = columnNames;
@ -204,7 +209,11 @@ public class DataExtensionChange implements Change {
int cellIndex = _firstNewCellIndex + i; int cellIndex = _firstNewCellIndex + i;
Column column = new Column(cellIndex, name); Column column = new Column(cellIndex, name);
column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i))); column.setReconConfig(new DataExtensionReconConfig(
_service,
"", // TODO retrieve service by URL and fill this
"",
_columnTypes.get(i)));
column.setReconStats(ReconStats.create(project, cellIndex)); column.setReconStats(ReconStats.create(project, cellIndex));
try { try {
@ -275,17 +284,21 @@ public class DataExtensionChange implements Change {
@Override @Override
public void save(Writer writer, Properties options) throws IOException { public void save(Writer writer, Properties options) throws IOException {
writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n'); writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n');
writer.write("service="); writer.write(_service); writer.write('\n');
writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n'); writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n');
writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n');
for (String name : _columnNames) { for (String name : _columnNames) {
writer.write(name); writer.write('\n'); writer.write(name); writer.write('\n');
} }
writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n'); writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n');
for (FreebaseType type : _columnTypes) { for (ReconType type : _columnTypes) {
try { try {
JSONWriter jsonWriter = new JSONWriter(writer); if(type == null) {
writer.write("null");
type.write(jsonWriter, options); } else {
JSONWriter jsonWriter = new JSONWriter(writer);
type.write(jsonWriter, options);
}
} catch (JSONException e) { } catch (JSONException e) {
// ??? // ???
} }
@ -342,10 +355,11 @@ public class DataExtensionChange implements Change {
static public Change load(LineNumberReader reader, Pool pool) throws Exception { static public Change load(LineNumberReader reader, Pool pool) throws Exception {
String baseColumnName = null; String baseColumnName = null;
String service = null;
int columnInsertIndex = -1; int columnInsertIndex = -1;
List<String> columnNames = null; List<String> columnNames = null;
List<FreebaseType> columnTypes = null; List<ReconType> columnTypes = null;
List<Integer> rowIndices = null; List<Integer> rowIndices = null;
List<DataExtension> dataExtensions = null; List<DataExtension> dataExtensions = null;
@ -363,6 +377,8 @@ public class DataExtensionChange implements Change {
if ("baseColumnName".equals(field)) { if ("baseColumnName".equals(field)) {
baseColumnName = value; baseColumnName = value;
} else if ("service".equals(field)) {
service = value;
} else if ("columnInsertIndex".equals(field)) { } else if ("columnInsertIndex".equals(field)) {
columnInsertIndex = Integer.parseInt(value); columnInsertIndex = Integer.parseInt(value);
} else if ("firstNewCellIndex".equals(field)) { } else if ("firstNewCellIndex".equals(field)) {
@ -390,10 +406,12 @@ public class DataExtensionChange implements Change {
} else if ("columnTypeCount".equals(field)) { } else if ("columnTypeCount".equals(field)) {
int count = Integer.parseInt(value); int count = Integer.parseInt(value);
columnTypes = new ArrayList<FreebaseType>(count); columnTypes = new ArrayList<ReconType>(count);
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
line = reader.readLine(); line = reader.readLine();
columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line))); if (line != null) {
columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
}
} }
} else if ("dataExtensionCount".equals(field)) { } else if ("dataExtensionCount".equals(field)) {
int count = Integer.parseInt(value); int count = Integer.parseInt(value);
@ -453,6 +471,7 @@ public class DataExtensionChange implements Change {
DataExtensionChange change = new DataExtensionChange( DataExtensionChange change = new DataExtensionChange(
baseColumnName, baseColumnName,
service,
columnInsertIndex, columnInsertIndex,
columnNames, columnNames,
columnTypes, columnTypes,

View File

@ -0,0 +1,109 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.model.recon;
import java.util.List;
import java.util.Properties;
import java.util.ArrayList;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.model.ReconType;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.Row;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.model.recon.ReconJob;
public class DataExtensionReconConfig extends StandardReconConfig {
final public ReconType type;
private final static String WARN = "Not implemented";
static public ReconConfig reconstruct(JSONObject obj) throws Exception {
JSONObject type = obj.getJSONObject("type");
ReconType typ = null;
if(obj.has("id")) {
typ = new ReconType(obj.getString("id"),
obj.has("name") ? obj.getString("name") : obj.getString("id"));
}
return new DataExtensionReconConfig(
obj.getString("service"),
obj.has("identifierSpace") ? obj.getString("identifierSpace") : null,
obj.has("schemaSpace") ? obj.getString("schemaSpace") : null,
typ);
}
public DataExtensionReconConfig(
String service,
String identifierSpace,
String schemaSpace,
ReconType type) {
super(
service,
identifierSpace,
schemaSpace,
type != null ? type.id : null,
type != null ? type.name : null,
true,
new ArrayList<ColumnDetail>());
this.type = type;
}
@Override
public ReconJob createJob(Project project, int rowIndex, Row row,
String columnName, Cell cell) {
throw new RuntimeException(WARN);
}
@Override
public int getBatchSize() {
throw new RuntimeException(WARN);
}
@Override
public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID) {
throw new RuntimeException(WARN);
}
@Override
public String getBriefDescription(Project project, String columnName) {
throw new RuntimeException(WARN);
}
}

View File

@ -55,11 +55,12 @@ import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.json.JSONWriter; import org.json.JSONWriter;
// import com.google.refine.freebase.FreebaseType; import com.google.refine.model.ReconType;
import com.google.refine.model.ReconCandidate; import com.google.refine.model.ReconCandidate;
import com.google.refine.model.recon.StandardReconConfig; import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.util.JSONUtilities; import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities; import com.google.refine.util.ParsingUtilities;
import com.google.refine.expr.functions.ToDate;
public class ReconciledDataExtensionJob { public class ReconciledDataExtensionJob {
static public class DataExtension { static public class DataExtension {
@ -71,28 +72,24 @@ public class ReconciledDataExtensionJob {
} }
static public class ColumnInfo { static public class ColumnInfo {
final public List<String> names; final public String name;
final public List<String> path; final public String id;
// final public FreebaseType expectedType; final public ReconType expectedType;
// TODO
protected ColumnInfo(List<String> names, List<String> path /*, FreebaseType expectedType */) { protected ColumnInfo(String name, String id, ReconType expectedType) {
this.names = names; this.name = name;
this.path = path; this.id = id;
// this.expectedType = expectedType; this.expectedType = expectedType;
} }
} }
final public JSONObject extension; final public JSONObject extension;
final public String endpoint; final public String endpoint;
final public int columnCount;
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>(); final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException { public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException {
this.extension = obj; this.extension = obj;
this.endpoint = endpoint; this.endpoint = endpoint;
this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ?
countColumns(obj.getJSONArray("properties"), columns, new ArrayList<String>(), new ArrayList<String>()) : 0;
} }
public Map<String, ReconciledDataExtensionJob.DataExtension> extend( public Map<String, ReconciledDataExtensionJob.DataExtension> extend(
@ -102,35 +99,30 @@ public class ReconciledDataExtensionJob {
StringWriter writer = new StringWriter(); StringWriter writer = new StringWriter();
formulateQuery(ids, extension, writer); formulateQuery(ids, extension, writer);
// Extract the order of properties
JSONArray origProperties = extension.getJSONArray("properties");
List<String> properties = new ArrayList<String>();
int l = origProperties.length();
for (int i = 0; i < l; i++) {
properties.add(origProperties.getJSONObject(i).getString("id"));
}
String query = writer.toString(); String query = writer.toString();
InputStream is = performQuery(this.endpoint, query); InputStream is = performQuery(this.endpoint, query);
try { try {
String s = ParsingUtilities.inputStreamToString(is); String s = ParsingUtilities.inputStreamToString(is);
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
// Extract the column metadata
gatherColumnInfo(o.getJSONArray("meta"), columns);
Map<String, ReconciledDataExtensionJob.DataExtension> map = new HashMap<String, ReconciledDataExtensionJob.DataExtension>(); Map<String, ReconciledDataExtensionJob.DataExtension> map = new HashMap<String, ReconciledDataExtensionJob.DataExtension>();
if (o.has("rows")){ if (o.has("rows")){
JSONObject records = o.getJSONObject("rows"); JSONObject records = o.getJSONObject("rows");
// for each identifier // for each identifier
for (String id : ids) { for (String id : ids) {
if (records.has(id)) { if (records.has(id)) {
JSONObject record = records.getJSONObject(id); JSONObject record = records.getJSONObject(id);
ReconciledDataExtensionJob.DataExtension ext = collectResult(record, properties, reconCandidateMap); ReconciledDataExtensionJob.DataExtension ext = collectResult(record, reconCandidateMap);
if (ext != null) { if (ext != null) {
map.put(id, ext); map.put(id, ext);
} }
} }
} }
} }
@ -166,65 +158,65 @@ public class ReconciledDataExtensionJob {
protected ReconciledDataExtensionJob.DataExtension collectResult( protected ReconciledDataExtensionJob.DataExtension collectResult(
JSONObject record, JSONObject record,
List<String> properties,
Map<String, ReconCandidate> reconCandidateMap Map<String, ReconCandidate> reconCandidateMap
) throws JSONException { ) throws JSONException {
List<Object[]> rows = new ArrayList<Object[]>(); List<Object[]> rows = new ArrayList<Object[]>();
// for each property // for each property
int colindex = 0; int colindex = 0;
for(String pid : properties) { for(ColumnInfo ci : columns) {
JSONArray values = record.getJSONArray(pid); String pid = ci.id;
if (values == null) { JSONArray values = record.getJSONArray(pid);
continue; if (values == null) {
} continue;
}
// for each value // for each value
for(int rowindex = 0; rowindex < values.length(); rowindex++) { for(int rowindex = 0; rowindex < values.length(); rowindex++) {
JSONObject val = values.getJSONObject(rowindex); JSONObject val = values.getJSONObject(rowindex);
// store a reconciled value // store a reconciled value
if(val.has("id")) { if (val.has("id")) {
storeCell(rows, rowindex, colindex, val, reconCandidateMap); storeCell(rows, rowindex, colindex, val, reconCandidateMap);
} else if(val.has("str")) { } else if (val.has("str")) {
// store a bare string // store a bare string
String str = val.getString("str"); String str = val.getString("str");
storeStr(rows, rowindex, colindex, str); storeCell(rows, rowindex, colindex, str);
} else if (val.has("float")) {
float v = Float.parseFloat(val.getString("float"));
storeCell(rows, rowindex, colindex, v);
} else if (val.has("int")) {
int v = Integer.parseInt(val.getString("int"));
storeCell(rows, rowindex, colindex, v);
} else if (val.has("date")) {
ToDate td = new ToDate();
String[] args = new String[1];
args[0] = val.getString("date");
Object v = td.call(null, args);
storeCell(rows, rowindex, colindex, v);
} else if(val.has("bool")) {
boolean v = val.getString("bool") == "true";
storeCell(rows, rowindex, colindex, v);
} }
// TODO other cases for other types of values (dates, booleans, ) }
} colindex++;
colindex++; }
}
// collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap);
Object[][] data = new Object[rows.size()][columnCount]; Object[][] data = new Object[rows.size()][columns.size()];
rows.toArray(data); rows.toArray(data);
return new DataExtension(data); return new DataExtension(data);
} }
protected void storeStr(
List<Object[]> rows,
int row,
int col,
String str
) throws JSONException {
while (row >= rows.size()) {
rows.add(new Object[columnCount]);
}
rows.get(row)[col] = str;
}
protected void storeCell( protected void storeCell(
List<Object[]> rows, List<Object[]> rows,
int row, int row,
int col, int col,
Object value, Object value
Map<String, ReconCandidate> reconCandidateMap
) { ) {
while (row >= rows.size()) { while (row >= rows.size()) {
rows.add(new Object[columnCount]); rows.add(new Object[columns.size()]);
} }
rows.get(row)[col] = value; rows.get(row)[col] = value;
} }
@ -251,128 +243,8 @@ public class ReconciledDataExtensionJob {
reconCandidateMap.put(id, rc); reconCandidateMap.put(id, rc);
} }
storeCell(rows, row, col, rc, reconCandidateMap); storeCell(rows, row, col, rc);
} }
/*
protected int[] collectResult(
List<Object[]> rows,
JSONObject extNode,
JSONObject resultNode,
int startRowIndex,
int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
String propertyID = extNode.getString("id");
// String expectedTypeID = extNode.getJSONObject("expected").getString("id");
JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ?
resultNode.getJSONArray(propertyID) : null;
if ("/type/key".equals(expectedTypeID)) {
if (a != null) {
int l = a.length();
for (int r = 0; r < l; r++) {
Object o = a.isNull(r) ? null : a.get(r);
if (o instanceof JSONObject) {
storeStr(rows, startRowIndex++, startColumnIndex, (JSONObject) o, reconCandidateMap);
}
}
}
// note that we still take up a column even if we don't have any data
return new int[] { startRowIndex, startColumnIndex + 1 };
} else if (expectedTypeID.startsWith("/type/")) {
if (a != null) {
int l = a.length();
for (int r = 0; r < l; r++) {
Object o = a.isNull(r) ? null : a.get(r);
if (o instanceof Serializable) {
storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap);
}
}
}
// note that we still take up a column even if we don't have any data
return new int[] { startRowIndex, startColumnIndex + 1 };
} else {
boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties"));
boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included"));
if (a != null && a.length() > 0) {
int maxColIndex = startColumnIndex;
int l = a.length();
for (int r = 0; r < l; r++) {
Object v = a.isNull(r) ? null : a.get(r);
JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null;
int startColumnIndex2 = startColumnIndex;
int startRowIndex2 = startRowIndex;
if (isOwnColumn) {
if (o != null) {
storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap);
} else {
storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap);
}
}
if (hasSubProperties && o != null) {
int[] rowcol = collectResult(
rows,
extNode.getJSONArray("properties"),
o,
startRowIndex,
startColumnIndex2,
reconCandidateMap
);
startRowIndex2 = rowcol[0];
startColumnIndex2 = rowcol[1];
}
startRowIndex = startRowIndex2;
maxColIndex = Math.max(maxColIndex, startColumnIndex2);
}
return new int[] { startRowIndex, maxColIndex };
} else {
return new int[] {
startRowIndex,
startColumnIndex + countColumns(extNode, null, new ArrayList<String>(), new ArrayList<String>())
};
}
}
}
protected int[] collectResult(
List<Object[]> rows,
JSONArray subProperties,
JSONObject resultNode,
int startRowIndex,
int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
int maxStartRowIndex = startRowIndex;
int k = subProperties.length();
for (int c = 0; c < k; c++) {
int[] rowcol = collectResult(
rows,
subProperties.getJSONObject(c),
resultNode,
startRowIndex,
startColumnIndex,
reconCandidateMap
);
maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]);
startColumnIndex = rowcol[1];
}
return new int[] { maxStartRowIndex, startColumnIndex };
}*/
static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException { static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException {
@ -380,74 +252,46 @@ public class ReconciledDataExtensionJob {
jsonWriter.object(); jsonWriter.object();
jsonWriter.key("ids"); jsonWriter.key("ids");
jsonWriter.array(); jsonWriter.array();
for (String id : ids) { for (String id : ids) {
if (id != null) { if (id != null) {
jsonWriter.value(id); jsonWriter.value(id);
} }
} }
jsonWriter.endArray(); jsonWriter.endArray();
jsonWriter.key("properties"); jsonWriter.key("properties");
jsonWriter.array(); jsonWriter.array();
JSONArray properties = node.getJSONArray("properties"); JSONArray properties = node.getJSONArray("properties");
int l = properties.length(); int l = properties.length();
for (int i = 0; i < l; i++) { for (int i = 0; i < l; i++) {
JSONObject property = properties.getJSONObject(i); JSONObject property = properties.getJSONObject(i);
jsonWriter.object(); jsonWriter.object();
jsonWriter.key("id"); jsonWriter.key("id");
jsonWriter.value(property.getString("id")); jsonWriter.value(property.getString("id"));
// TODO translate constraints as below // TODO translate constraints as below
jsonWriter.endObject(); jsonWriter.endObject();
} }
jsonWriter.endArray(); jsonWriter.endArray();
jsonWriter.endObject(); jsonWriter.endObject();
} }
static protected void gatherColumnInfo(JSONArray meta, List<ColumnInfo> columns) throws JSONException {
static protected int countColumns(JSONObject obj, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException { for(int i = 0; i < meta.length(); i++) {
String name = obj.getString("name"); JSONObject col = meta.getJSONObject(i);
List<String> names2 = null; ReconType expectedType = null;
List<String> path2 = null; if(col.has("type")) {
if (columns != null) { JSONObject expectedObj = col.getJSONObject("type");
names2 = new ArrayList<String>(names); expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name"));
names2.add(name); }
path2 = new ArrayList<String>(path); columns.add(new ColumnInfo(
path2.add(obj.getString("id")); col.getString("name"),
} col.getString("id"),
expectedType));
if (obj.has("properties") && !obj.isNull("properties")) { }
boolean included = (obj.has("included") && obj.getBoolean("included")); }
if (included && columns != null) {
// JSONObject expected = obj.getJSONObject("expected");
columns.add(new ColumnInfo(names2, path2
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */));
}
return (included ? 1 : 0) +
countColumns(obj.getJSONArray("properties"), columns, names2, path2);
} else {
if (columns != null) {
// JSONObject expected = obj.getJSONObject("expected");
columns.add(new ColumnInfo(names2, path2
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */ ));
}
return 1;
}
}
static protected int countColumns(JSONArray a, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
int c = 0;
int l = a.length();
for (int i = 0; i < l; i++) {
c += countColumns(a.getJSONObject(i), columns, names, path);
}
return c;
}
} }

View File

@ -50,15 +50,16 @@ import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows; import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor; import com.google.refine.browsing.RowVisitor;
import com.google.refine.model.changes.DataExtensionChange; import com.google.refine.model.changes.DataExtensionChange;
import com.google.refine.model.recon.DataExtensionJob; import com.google.refine.model.recon.ReconciledDataExtensionJob;
import com.google.refine.model.recon.DataExtensionJob.ColumnInfo; import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo;
import com.google.refine.model.recon.DataExtensionJob.DataExtension; import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
import com.google.refine.history.HistoryEntry; import com.google.refine.history.HistoryEntry;
import com.google.refine.model.AbstractOperation; import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Cell; import com.google.refine.model.Cell;
import com.google.refine.model.Column; import com.google.refine.model.Column;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.ReconCandidate; import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconType;
import com.google.refine.model.Row; import com.google.refine.model.Row;
import com.google.refine.model.changes.CellAtRow; import com.google.refine.model.changes.CellAtRow;
import com.google.refine.operations.EngineDependentOperation; import com.google.refine.operations.EngineDependentOperation;
@ -68,6 +69,7 @@ import com.google.refine.process.Process;
public class ExtendDataOperation extends EngineDependentOperation { public class ExtendDataOperation extends EngineDependentOperation {
final protected String _baseColumnName; final protected String _baseColumnName;
final protected String _endpoint;
final protected JSONObject _extension; final protected JSONObject _extension;
final protected int _columnInsertIndex; final protected int _columnInsertIndex;
@ -77,6 +79,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
return new ExtendDataOperation( return new ExtendDataOperation(
engineConfig, engineConfig,
obj.getString("baseColumnName"), obj.getString("baseColumnName"),
obj.getString("endpoint"),
obj.getJSONObject("extension"), obj.getJSONObject("extension"),
obj.getInt("columnInsertIndex") obj.getInt("columnInsertIndex")
); );
@ -85,12 +88,14 @@ public class ExtendDataOperation extends EngineDependentOperation {
public ExtendDataOperation( public ExtendDataOperation(
JSONObject engineConfig, JSONObject engineConfig,
String baseColumnName, String baseColumnName,
String endpoint,
JSONObject extension, JSONObject extension,
int columnInsertIndex int columnInsertIndex
) { ) {
super(engineConfig); super(engineConfig);
_baseColumnName = baseColumnName; _baseColumnName = baseColumnName;
_endpoint = endpoint;
_extension = extension; _extension = extension;
_columnInsertIndex = columnInsertIndex; _columnInsertIndex = columnInsertIndex;
} }
@ -105,6 +110,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
writer.key("engineConfig"); writer.value(getEngineConfig()); writer.key("engineConfig"); writer.value(getEngineConfig());
writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); writer.key("columnInsertIndex"); writer.value(_columnInsertIndex);
writer.key("baseColumnName"); writer.value(_baseColumnName); writer.key("baseColumnName"); writer.value(_baseColumnName);
writer.key("endpoint"); writer.value(_endpoint);
writer.key("extension"); writer.value(_extension); writer.key("extension"); writer.value(_extension);
writer.endObject(); writer.endObject();
} }
@ -135,7 +141,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
final protected JSONObject _engineConfig; final protected JSONObject _engineConfig;
final protected long _historyEntryID; final protected long _historyEntryID;
protected int _cellIndex; protected int _cellIndex;
protected FreebaseDataExtensionJob _job; protected ReconciledDataExtensionJob _job;
public ExtendDataProcess( public ExtendDataProcess(
Project project, Project project,
@ -147,7 +153,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
_engineConfig = engineConfig; _engineConfig = engineConfig;
_historyEntryID = HistoryEntry.allocateID(); _historyEntryID = HistoryEntry.allocateID();
_job = new FreebaseDataExtensionJob(_extension); _job = new ReconciledDataExtensionJob(_extension, _endpoint);
} }
@Override @Override
@ -283,10 +289,10 @@ public class ExtendDataOperation extends EngineDependentOperation {
if (!_canceled) { if (!_canceled) {
List<String> columnNames = new ArrayList<String>(); List<String> columnNames = new ArrayList<String>();
for (ColumnInfo info : _job.columns) { for (ColumnInfo info : _job.columns) {
columnNames.add(StringUtils.join(info.names, " - ")); columnNames.add(info.name);
} }
List<String> columnTypes = new ArrayList<String>(); List<ReconType> columnTypes = new ArrayList<ReconType>();
for (ColumnInfo info : _job.columns) { for (ColumnInfo info : _job.columns) {
columnTypes.add(info.expectedType); columnTypes.add(info.expectedType);
} }
@ -298,6 +304,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
ExtendDataOperation.this, ExtendDataOperation.this,
new DataExtensionChange( new DataExtensionChange(
_baseColumnName, _baseColumnName,
_endpoint,
_columnInsertIndex, _columnInsertIndex,
columnNames, columnNames,
columnTypes, columnTypes,

View File

@ -52,7 +52,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
alert("Please add some properties first."); alert("Please add some properties first.");
} else { } else {
DialogSystem.dismissUntil(self._level - 1); DialogSystem.dismissUntil(self._level - 1);
self._onDone(self._extension); self._onDone(self._extension, self._service);
} }
}); });
this._elmts.cancelButton.click(function() { this._elmts.cancelButton.click(function() {
@ -67,6 +67,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
this._serviceMetadata = null; this._serviceMetadata = null;
if ("reconConfig" in column) { if ("reconConfig" in column) {
var service = column.reconConfig.service; var service = column.reconConfig.service;
this._service = service;
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service); var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
this._serviceMetadata = serviceMetadata; this._serviceMetadata = serviceMetadata;
if ("extend" in serviceMetadata) { if ("extend" in serviceMetadata) {
@ -258,7 +259,7 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
var renderColumnHeader = function(column) { var renderColumnHeader = function(column) {
var th = $('<th>').appendTo(trHead); var th = $('<th>').appendTo(trHead);
$('<span>').html(column.names.join(" &raquo; ")).appendTo(th); $('<span>').html(column.name).appendTo(th);
$('<br>').appendTo(th); $('<br>').appendTo(th);
$('<a href="javascript:{}"></a>') $('<a href="javascript:{}"></a>')
@ -266,15 +267,15 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
.addClass("action") .addClass("action")
.attr("title", "Remove this column") .attr("title", "Remove this column")
.click(function() { .click(function() {
self._removeProperty(column.path); self._removeProperty(column.id);
}).appendTo(th); }).appendTo(th);
$('<a href="javascript:{}"></a>') $('<a href="javascript:{}"></a>')
.text("constrain") .text("configure")
.addClass("action") .addClass("action")
.attr("title", "Add constraints to this column") .attr("title", "Configure this column")
.click(function() { .click(function() {
self._constrainProperty(column.path); self._constrainProperty(column.id);
}).appendTo(th); }).appendTo(th);
}; };
for (var c = 0; c < data.columns.length; c++) { for (var c = 0; c < data.columns.length; c++) {
@ -301,56 +302,25 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
container.append(table); container.append(table);
}; };
ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(path) { ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(id) {
var removeFromList = function(path, index, properties) { for(var i = this._extension.properties.length - 1; i >= 0; i--) {
var id = path[index]; var property = this._extension.properties[i];
if (property.id == id) {
for (var i = properties.length - 1; i >= 0; i--) { this._extension.properties.splice(i, 1);
var property = properties[i];
if (property.id == id) {
if (index === path.length - 1) {
if ("included" in property) {
delete property.included;
}
} else if ("properties" in property && property.properties.length > 0) {
removeFromList(path, index + 1, property.properties);
}
if (!("properties" in property) || property.properties.length === 0) {
properties.splice(i, 1);
}
return;
}
} }
}; }
removeFromList(path, 0, this._extension.properties);
this._update(); this._update();
}; };
ExtendReconciledDataPreviewDialog.prototype._findProperty = function(path) { ExtendReconciledDataPreviewDialog.prototype._findProperty = function(id) {
var find = function(path, index, properties) { var properties = this._extension.properties;
var id = path[index]; for(var i = properties.length - 1; i >= 0; i--) {
if (properties[i].id == path) {
for (var i = properties.length - 1; i >= 0; i--) { return properties[i];
var property = properties[i];
if (property.id == id) {
if (index === path.length - 1) {
return property;
} else if ("properties" in property && property.properties.length > 0) {
return find(path, index + 1, property.properties);
}
break;
}
} }
}
return null; return null;
}; }
return find(path, 0, this._extension.properties);
};
ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) { ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) {
var self = this; var self = this;

View File

@ -153,12 +153,13 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
column, column,
columnIndex, columnIndex,
o.rowIndices, o.rowIndices,
function(extension) { function(extension, endpoint) {
Refine.postProcess( Refine.postProcess(
"core", "core",
"extend-data", "extend-data",
{ {
baseColumnName: column.name, baseColumnName: column.name,
endpoint: endpoint,
columnInsertIndex: columnIndex + 1 columnInsertIndex: columnIndex + 1
}, },
{ {
@ -167,7 +168,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
{ rowsChanged: true, modelsChanged: true } { rowsChanged: true, modelsChanged: true }
); );
} }
); */ );
}; };
/* /*