Retrieve types from the extend service
This commit is contained in:
parent
ad3a174abd
commit
d99128c330
@ -38,7 +38,7 @@ import javax.servlet.http.HttpServletRequest;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.commands.EngineDependentCommand;
|
||||
import com.google.refine.freebase.operations.ExtendDataOperation;
|
||||
import com.google.refine.operations.recon.ExtendDataOperation;
|
||||
import com.google.refine.model.AbstractOperation;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
@ -50,6 +50,7 @@ public class ExtendDataCommand extends EngineDependentCommand {
|
||||
|
||||
String baseColumnName = request.getParameter("baseColumnName");
|
||||
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
|
||||
String endpoint = request.getParameter("endpoint");
|
||||
|
||||
String jsonString = request.getParameter("extension");
|
||||
JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString);
|
||||
@ -57,6 +58,7 @@ public class ExtendDataCommand extends EngineDependentCommand {
|
||||
return new ExtendDataOperation(
|
||||
engineConfig,
|
||||
baseColumnName,
|
||||
endpoint,
|
||||
extension,
|
||||
columnInsertIndex
|
||||
);
|
||||
|
@ -133,19 +133,11 @@ public class PreviewExtendDataCommand extends Command {
|
||||
writer.array();
|
||||
for (ColumnInfo info : job.columns) {
|
||||
writer.object();
|
||||
writer.key("names");
|
||||
writer.array();
|
||||
for (String name : info.names) {
|
||||
writer.value(name);
|
||||
}
|
||||
writer.endArray();
|
||||
writer.key("path");
|
||||
writer.array();
|
||||
for (String id : info.path) {
|
||||
writer.value(id);
|
||||
}
|
||||
writer.endArray();
|
||||
writer.endObject();
|
||||
writer.key("name");
|
||||
writer.value(info.name);
|
||||
writer.key("id");
|
||||
writer.value(info.id);
|
||||
writer.endObject();
|
||||
}
|
||||
writer.endArray();
|
||||
|
||||
|
79
main/src/com/google/refine/model/ReconType.java
Normal file
79
main/src/com/google/refine/model/ReconType.java
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.model;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.Jsonizable;
|
||||
|
||||
/**
|
||||
* This represents a type from the reconciliation
|
||||
* service. It is used when extending data to
|
||||
* store the (expected) types of new columns.
|
||||
*/
|
||||
public class ReconType implements Jsonizable {
|
||||
public String id;
|
||||
public String name;
|
||||
|
||||
public ReconType(String id, String name) {
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(id);
|
||||
writer.key("name"); writer.value(name);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
static public ReconType load(JSONObject obj) throws Exception {
|
||||
if (obj == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
ReconType type = new ReconType(
|
||||
obj.getString("id"),
|
||||
obj.getString("name")
|
||||
);
|
||||
return type;
|
||||
}
|
||||
}
|
@ -47,9 +47,9 @@ import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
// import com.google.refine.freebase.FreebaseType;
|
||||
import com.google.refine.model.ReconType;
|
||||
import com.google.refine.model.recon.DataExtensionReconConfig;
|
||||
import com.google.refine.model.recon.FreebaseDataExtensionJob.DataExtension;
|
||||
import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
|
||||
import com.google.refine.history.Change;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
@ -65,10 +65,11 @@ import com.google.refine.util.Pool;
|
||||
|
||||
public class DataExtensionChange implements Change {
|
||||
final protected String _baseColumnName;
|
||||
final protected String _service;
|
||||
final protected int _columnInsertIndex;
|
||||
|
||||
final protected List<String> _columnNames;
|
||||
final protected List<FreebaseType> _columnTypes;
|
||||
final protected List<ReconType> _columnTypes;
|
||||
|
||||
final protected List<Integer> _rowIndices;
|
||||
final protected List<DataExtension> _dataExtensions;
|
||||
@ -80,14 +81,16 @@ public class DataExtensionChange implements Change {
|
||||
|
||||
public DataExtensionChange(
|
||||
String baseColumnName,
|
||||
String service,
|
||||
int columnInsertIndex,
|
||||
List<String> columnNames,
|
||||
List<FreebaseType> columnTypes,
|
||||
List<ReconType> columnTypes,
|
||||
List<Integer> rowIndices,
|
||||
List<DataExtension> dataExtensions,
|
||||
long historyEntryID
|
||||
) {
|
||||
_baseColumnName = baseColumnName;
|
||||
_service = service;
|
||||
_columnInsertIndex = columnInsertIndex;
|
||||
|
||||
_columnNames = columnNames;
|
||||
@ -101,10 +104,11 @@ public class DataExtensionChange implements Change {
|
||||
|
||||
protected DataExtensionChange(
|
||||
String baseColumnName,
|
||||
String service,
|
||||
int columnInsertIndex,
|
||||
|
||||
List<String> columnNames,
|
||||
List<FreebaseType> columnTypes,
|
||||
List<ReconType> columnTypes,
|
||||
|
||||
List<Integer> rowIndices,
|
||||
List<DataExtension> dataExtensions,
|
||||
@ -113,6 +117,7 @@ public class DataExtensionChange implements Change {
|
||||
List<Row> newRows
|
||||
) {
|
||||
_baseColumnName = baseColumnName;
|
||||
_service = service;
|
||||
_columnInsertIndex = columnInsertIndex;
|
||||
|
||||
_columnNames = columnNames;
|
||||
@ -204,7 +209,11 @@ public class DataExtensionChange implements Change {
|
||||
int cellIndex = _firstNewCellIndex + i;
|
||||
|
||||
Column column = new Column(cellIndex, name);
|
||||
column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i)));
|
||||
column.setReconConfig(new DataExtensionReconConfig(
|
||||
_service,
|
||||
"", // TODO retrieve service by URL and fill this
|
||||
"",
|
||||
_columnTypes.get(i)));
|
||||
column.setReconStats(ReconStats.create(project, cellIndex));
|
||||
|
||||
try {
|
||||
@ -275,17 +284,21 @@ public class DataExtensionChange implements Change {
|
||||
@Override
|
||||
public void save(Writer writer, Properties options) throws IOException {
|
||||
writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n');
|
||||
writer.write("service="); writer.write(_service); writer.write('\n');
|
||||
writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n');
|
||||
writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n');
|
||||
for (String name : _columnNames) {
|
||||
writer.write(name); writer.write('\n');
|
||||
}
|
||||
writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n');
|
||||
for (FreebaseType type : _columnTypes) {
|
||||
for (ReconType type : _columnTypes) {
|
||||
try {
|
||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||
|
||||
type.write(jsonWriter, options);
|
||||
if(type == null) {
|
||||
writer.write("null");
|
||||
} else {
|
||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||
type.write(jsonWriter, options);
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
// ???
|
||||
}
|
||||
@ -342,10 +355,11 @@ public class DataExtensionChange implements Change {
|
||||
|
||||
static public Change load(LineNumberReader reader, Pool pool) throws Exception {
|
||||
String baseColumnName = null;
|
||||
String service = null;
|
||||
int columnInsertIndex = -1;
|
||||
|
||||
List<String> columnNames = null;
|
||||
List<FreebaseType> columnTypes = null;
|
||||
List<ReconType> columnTypes = null;
|
||||
|
||||
List<Integer> rowIndices = null;
|
||||
List<DataExtension> dataExtensions = null;
|
||||
@ -363,6 +377,8 @@ public class DataExtensionChange implements Change {
|
||||
|
||||
if ("baseColumnName".equals(field)) {
|
||||
baseColumnName = value;
|
||||
} else if ("service".equals(field)) {
|
||||
service = value;
|
||||
} else if ("columnInsertIndex".equals(field)) {
|
||||
columnInsertIndex = Integer.parseInt(value);
|
||||
} else if ("firstNewCellIndex".equals(field)) {
|
||||
@ -390,10 +406,12 @@ public class DataExtensionChange implements Change {
|
||||
} else if ("columnTypeCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
columnTypes = new ArrayList<FreebaseType>(count);
|
||||
columnTypes = new ArrayList<ReconType>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
|
||||
if (line != null) {
|
||||
columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
|
||||
}
|
||||
}
|
||||
} else if ("dataExtensionCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
@ -453,6 +471,7 @@ public class DataExtensionChange implements Change {
|
||||
|
||||
DataExtensionChange change = new DataExtensionChange(
|
||||
baseColumnName,
|
||||
service,
|
||||
columnInsertIndex,
|
||||
columnNames,
|
||||
columnTypes,
|
||||
|
@ -0,0 +1,109 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.model.recon;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.model.ReconType;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Recon;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.model.recon.StandardReconConfig;
|
||||
import com.google.refine.model.recon.ReconJob;
|
||||
|
||||
public class DataExtensionReconConfig extends StandardReconConfig {
|
||||
final public ReconType type;
|
||||
|
||||
private final static String WARN = "Not implemented";
|
||||
|
||||
static public ReconConfig reconstruct(JSONObject obj) throws Exception {
|
||||
JSONObject type = obj.getJSONObject("type");
|
||||
|
||||
ReconType typ = null;
|
||||
if(obj.has("id")) {
|
||||
typ = new ReconType(obj.getString("id"),
|
||||
obj.has("name") ? obj.getString("name") : obj.getString("id"));
|
||||
}
|
||||
|
||||
return new DataExtensionReconConfig(
|
||||
obj.getString("service"),
|
||||
obj.has("identifierSpace") ? obj.getString("identifierSpace") : null,
|
||||
obj.has("schemaSpace") ? obj.getString("schemaSpace") : null,
|
||||
typ);
|
||||
}
|
||||
|
||||
public DataExtensionReconConfig(
|
||||
String service,
|
||||
String identifierSpace,
|
||||
String schemaSpace,
|
||||
ReconType type) {
|
||||
super(
|
||||
service,
|
||||
identifierSpace,
|
||||
schemaSpace,
|
||||
type != null ? type.id : null,
|
||||
type != null ? type.name : null,
|
||||
true,
|
||||
new ArrayList<ColumnDetail>());
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReconJob createJob(Project project, int rowIndex, Row row,
|
||||
String columnName, Cell cell) {
|
||||
throw new RuntimeException(WARN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBatchSize() {
|
||||
throw new RuntimeException(WARN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID) {
|
||||
throw new RuntimeException(WARN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getBriefDescription(Project project, String columnName) {
|
||||
throw new RuntimeException(WARN);
|
||||
}
|
||||
}
|
@ -55,11 +55,12 @@ import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
// import com.google.refine.freebase.FreebaseType;
|
||||
import com.google.refine.model.ReconType;
|
||||
import com.google.refine.model.ReconCandidate;
|
||||
import com.google.refine.model.recon.StandardReconConfig;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
import com.google.refine.expr.functions.ToDate;
|
||||
|
||||
public class ReconciledDataExtensionJob {
|
||||
static public class DataExtension {
|
||||
@ -71,28 +72,24 @@ public class ReconciledDataExtensionJob {
|
||||
}
|
||||
|
||||
static public class ColumnInfo {
|
||||
final public List<String> names;
|
||||
final public List<String> path;
|
||||
// final public FreebaseType expectedType;
|
||||
// TODO
|
||||
final public String name;
|
||||
final public String id;
|
||||
final public ReconType expectedType;
|
||||
|
||||
protected ColumnInfo(List<String> names, List<String> path /*, FreebaseType expectedType */) {
|
||||
this.names = names;
|
||||
this.path = path;
|
||||
// this.expectedType = expectedType;
|
||||
protected ColumnInfo(String name, String id, ReconType expectedType) {
|
||||
this.name = name;
|
||||
this.id = id;
|
||||
this.expectedType = expectedType;
|
||||
}
|
||||
}
|
||||
|
||||
final public JSONObject extension;
|
||||
final public String endpoint;
|
||||
final public int columnCount;
|
||||
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
|
||||
|
||||
public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException {
|
||||
this.extension = obj;
|
||||
this.endpoint = endpoint;
|
||||
this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ?
|
||||
countColumns(obj.getJSONArray("properties"), columns, new ArrayList<String>(), new ArrayList<String>()) : 0;
|
||||
}
|
||||
|
||||
public Map<String, ReconciledDataExtensionJob.DataExtension> extend(
|
||||
@ -102,35 +99,30 @@ public class ReconciledDataExtensionJob {
|
||||
StringWriter writer = new StringWriter();
|
||||
formulateQuery(ids, extension, writer);
|
||||
|
||||
// Extract the order of properties
|
||||
JSONArray origProperties = extension.getJSONArray("properties");
|
||||
List<String> properties = new ArrayList<String>();
|
||||
int l = origProperties.length();
|
||||
for (int i = 0; i < l; i++) {
|
||||
properties.add(origProperties.getJSONObject(i).getString("id"));
|
||||
}
|
||||
|
||||
String query = writer.toString();
|
||||
InputStream is = performQuery(this.endpoint, query);
|
||||
try {
|
||||
String s = ParsingUtilities.inputStreamToString(is);
|
||||
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
|
||||
|
||||
// Extract the column metadata
|
||||
gatherColumnInfo(o.getJSONArray("meta"), columns);
|
||||
|
||||
Map<String, ReconciledDataExtensionJob.DataExtension> map = new HashMap<String, ReconciledDataExtensionJob.DataExtension>();
|
||||
if (o.has("rows")){
|
||||
JSONObject records = o.getJSONObject("rows");
|
||||
|
||||
// for each identifier
|
||||
// for each identifier
|
||||
for (String id : ids) {
|
||||
if (records.has(id)) {
|
||||
JSONObject record = records.getJSONObject(id);
|
||||
if (records.has(id)) {
|
||||
JSONObject record = records.getJSONObject(id);
|
||||
|
||||
ReconciledDataExtensionJob.DataExtension ext = collectResult(record, properties, reconCandidateMap);
|
||||
ReconciledDataExtensionJob.DataExtension ext = collectResult(record, reconCandidateMap);
|
||||
|
||||
if (ext != null) {
|
||||
map.put(id, ext);
|
||||
}
|
||||
}
|
||||
if (ext != null) {
|
||||
map.put(id, ext);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -166,65 +158,65 @@ public class ReconciledDataExtensionJob {
|
||||
|
||||
protected ReconciledDataExtensionJob.DataExtension collectResult(
|
||||
JSONObject record,
|
||||
List<String> properties,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws JSONException {
|
||||
List<Object[]> rows = new ArrayList<Object[]>();
|
||||
|
||||
// for each property
|
||||
int colindex = 0;
|
||||
for(String pid : properties) {
|
||||
JSONArray values = record.getJSONArray(pid);
|
||||
if (values == null) {
|
||||
continue;
|
||||
}
|
||||
// for each property
|
||||
int colindex = 0;
|
||||
for(ColumnInfo ci : columns) {
|
||||
String pid = ci.id;
|
||||
JSONArray values = record.getJSONArray(pid);
|
||||
if (values == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// for each value
|
||||
for(int rowindex = 0; rowindex < values.length(); rowindex++) {
|
||||
JSONObject val = values.getJSONObject(rowindex);
|
||||
// store a reconciled value
|
||||
if(val.has("id")) {
|
||||
storeCell(rows, rowindex, colindex, val, reconCandidateMap);
|
||||
} else if(val.has("str")) {
|
||||
// store a bare string
|
||||
String str = val.getString("str");
|
||||
storeStr(rows, rowindex, colindex, str);
|
||||
// for each value
|
||||
for(int rowindex = 0; rowindex < values.length(); rowindex++) {
|
||||
JSONObject val = values.getJSONObject(rowindex);
|
||||
// store a reconciled value
|
||||
if (val.has("id")) {
|
||||
storeCell(rows, rowindex, colindex, val, reconCandidateMap);
|
||||
} else if (val.has("str")) {
|
||||
// store a bare string
|
||||
String str = val.getString("str");
|
||||
storeCell(rows, rowindex, colindex, str);
|
||||
} else if (val.has("float")) {
|
||||
float v = Float.parseFloat(val.getString("float"));
|
||||
storeCell(rows, rowindex, colindex, v);
|
||||
} else if (val.has("int")) {
|
||||
int v = Integer.parseInt(val.getString("int"));
|
||||
storeCell(rows, rowindex, colindex, v);
|
||||
} else if (val.has("date")) {
|
||||
ToDate td = new ToDate();
|
||||
String[] args = new String[1];
|
||||
args[0] = val.getString("date");
|
||||
Object v = td.call(null, args);
|
||||
storeCell(rows, rowindex, colindex, v);
|
||||
} else if(val.has("bool")) {
|
||||
boolean v = val.getString("bool") == "true";
|
||||
storeCell(rows, rowindex, colindex, v);
|
||||
}
|
||||
// TODO other cases for other types of values (dates, booleans, …)
|
||||
}
|
||||
colindex++;
|
||||
}
|
||||
}
|
||||
colindex++;
|
||||
}
|
||||
|
||||
|
||||
// collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap);
|
||||
|
||||
Object[][] data = new Object[rows.size()][columnCount];
|
||||
Object[][] data = new Object[rows.size()][columns.size()];
|
||||
rows.toArray(data);
|
||||
|
||||
return new DataExtension(data);
|
||||
}
|
||||
|
||||
protected void storeStr(
|
||||
List<Object[]> rows,
|
||||
int row,
|
||||
int col,
|
||||
String str
|
||||
) throws JSONException {
|
||||
while (row >= rows.size()) {
|
||||
rows.add(new Object[columnCount]);
|
||||
}
|
||||
rows.get(row)[col] = str;
|
||||
}
|
||||
|
||||
protected void storeCell(
|
||||
List<Object[]> rows,
|
||||
int row,
|
||||
int col,
|
||||
Object value,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
Object value
|
||||
) {
|
||||
while (row >= rows.size()) {
|
||||
rows.add(new Object[columnCount]);
|
||||
rows.add(new Object[columns.size()]);
|
||||
}
|
||||
rows.get(row)[col] = value;
|
||||
}
|
||||
@ -251,128 +243,8 @@ public class ReconciledDataExtensionJob {
|
||||
reconCandidateMap.put(id, rc);
|
||||
}
|
||||
|
||||
storeCell(rows, row, col, rc, reconCandidateMap);
|
||||
storeCell(rows, row, col, rc);
|
||||
}
|
||||
/*
|
||||
protected int[] collectResult(
|
||||
List<Object[]> rows,
|
||||
JSONObject extNode,
|
||||
JSONObject resultNode,
|
||||
int startRowIndex,
|
||||
int startColumnIndex,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws JSONException {
|
||||
String propertyID = extNode.getString("id");
|
||||
// String expectedTypeID = extNode.getJSONObject("expected").getString("id");
|
||||
|
||||
JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ?
|
||||
resultNode.getJSONArray(propertyID) : null;
|
||||
|
||||
if ("/type/key".equals(expectedTypeID)) {
|
||||
if (a != null) {
|
||||
int l = a.length();
|
||||
for (int r = 0; r < l; r++) {
|
||||
Object o = a.isNull(r) ? null : a.get(r);
|
||||
if (o instanceof JSONObject) {
|
||||
storeStr(rows, startRowIndex++, startColumnIndex, (JSONObject) o, reconCandidateMap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// note that we still take up a column even if we don't have any data
|
||||
return new int[] { startRowIndex, startColumnIndex + 1 };
|
||||
} else if (expectedTypeID.startsWith("/type/")) {
|
||||
if (a != null) {
|
||||
int l = a.length();
|
||||
for (int r = 0; r < l; r++) {
|
||||
Object o = a.isNull(r) ? null : a.get(r);
|
||||
if (o instanceof Serializable) {
|
||||
storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// note that we still take up a column even if we don't have any data
|
||||
return new int[] { startRowIndex, startColumnIndex + 1 };
|
||||
} else {
|
||||
boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties"));
|
||||
boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included"));
|
||||
|
||||
if (a != null && a.length() > 0) {
|
||||
int maxColIndex = startColumnIndex;
|
||||
|
||||
int l = a.length();
|
||||
for (int r = 0; r < l; r++) {
|
||||
Object v = a.isNull(r) ? null : a.get(r);
|
||||
JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null;
|
||||
|
||||
int startColumnIndex2 = startColumnIndex;
|
||||
int startRowIndex2 = startRowIndex;
|
||||
|
||||
if (isOwnColumn) {
|
||||
if (o != null) {
|
||||
storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap);
|
||||
} else {
|
||||
storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasSubProperties && o != null) {
|
||||
int[] rowcol = collectResult(
|
||||
rows,
|
||||
extNode.getJSONArray("properties"),
|
||||
o,
|
||||
startRowIndex,
|
||||
startColumnIndex2,
|
||||
reconCandidateMap
|
||||
);
|
||||
|
||||
startRowIndex2 = rowcol[0];
|
||||
startColumnIndex2 = rowcol[1];
|
||||
}
|
||||
|
||||
startRowIndex = startRowIndex2;
|
||||
maxColIndex = Math.max(maxColIndex, startColumnIndex2);
|
||||
}
|
||||
|
||||
return new int[] { startRowIndex, maxColIndex };
|
||||
} else {
|
||||
return new int[] {
|
||||
startRowIndex,
|
||||
startColumnIndex + countColumns(extNode, null, new ArrayList<String>(), new ArrayList<String>())
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected int[] collectResult(
|
||||
List<Object[]> rows,
|
||||
JSONArray subProperties,
|
||||
JSONObject resultNode,
|
||||
int startRowIndex,
|
||||
int startColumnIndex,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws JSONException {
|
||||
int maxStartRowIndex = startRowIndex;
|
||||
|
||||
int k = subProperties.length();
|
||||
for (int c = 0; c < k; c++) {
|
||||
int[] rowcol = collectResult(
|
||||
rows,
|
||||
subProperties.getJSONObject(c),
|
||||
resultNode,
|
||||
startRowIndex,
|
||||
startColumnIndex,
|
||||
reconCandidateMap
|
||||
);
|
||||
|
||||
maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]);
|
||||
startColumnIndex = rowcol[1];
|
||||
}
|
||||
|
||||
return new int[] { maxStartRowIndex, startColumnIndex };
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException {
|
||||
@ -380,74 +252,46 @@ public class ReconciledDataExtensionJob {
|
||||
|
||||
jsonWriter.object();
|
||||
|
||||
jsonWriter.key("ids");
|
||||
jsonWriter.array();
|
||||
for (String id : ids) {
|
||||
if (id != null) {
|
||||
jsonWriter.value(id);
|
||||
}
|
||||
}
|
||||
jsonWriter.endArray();
|
||||
jsonWriter.key("ids");
|
||||
jsonWriter.array();
|
||||
for (String id : ids) {
|
||||
if (id != null) {
|
||||
jsonWriter.value(id);
|
||||
}
|
||||
}
|
||||
jsonWriter.endArray();
|
||||
|
||||
jsonWriter.key("properties");
|
||||
jsonWriter.array();
|
||||
JSONArray properties = node.getJSONArray("properties");
|
||||
int l = properties.length();
|
||||
jsonWriter.array();
|
||||
JSONArray properties = node.getJSONArray("properties");
|
||||
int l = properties.length();
|
||||
|
||||
for (int i = 0; i < l; i++) {
|
||||
JSONObject property = properties.getJSONObject(i);
|
||||
jsonWriter.object();
|
||||
jsonWriter.key("id");
|
||||
jsonWriter.value(property.getString("id"));
|
||||
// TODO translate constraints as below
|
||||
jsonWriter.endObject();
|
||||
}
|
||||
jsonWriter.endArray();
|
||||
for (int i = 0; i < l; i++) {
|
||||
JSONObject property = properties.getJSONObject(i);
|
||||
jsonWriter.object();
|
||||
jsonWriter.key("id");
|
||||
jsonWriter.value(property.getString("id"));
|
||||
// TODO translate constraints as below
|
||||
jsonWriter.endObject();
|
||||
}
|
||||
jsonWriter.endArray();
|
||||
jsonWriter.endObject();
|
||||
}
|
||||
|
||||
static protected void gatherColumnInfo(JSONArray meta, List<ColumnInfo> columns) throws JSONException {
|
||||
for(int i = 0; i < meta.length(); i++) {
|
||||
JSONObject col = meta.getJSONObject(i);
|
||||
|
||||
static protected int countColumns(JSONObject obj, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
|
||||
String name = obj.getString("name");
|
||||
ReconType expectedType = null;
|
||||
if(col.has("type")) {
|
||||
JSONObject expectedObj = col.getJSONObject("type");
|
||||
expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name"));
|
||||
}
|
||||
|
||||
List<String> names2 = null;
|
||||
List<String> path2 = null;
|
||||
if (columns != null) {
|
||||
names2 = new ArrayList<String>(names);
|
||||
names2.add(name);
|
||||
|
||||
path2 = new ArrayList<String>(path);
|
||||
path2.add(obj.getString("id"));
|
||||
}
|
||||
|
||||
if (obj.has("properties") && !obj.isNull("properties")) {
|
||||
boolean included = (obj.has("included") && obj.getBoolean("included"));
|
||||
if (included && columns != null) {
|
||||
// JSONObject expected = obj.getJSONObject("expected");
|
||||
|
||||
columns.add(new ColumnInfo(names2, path2
|
||||
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */));
|
||||
}
|
||||
|
||||
return (included ? 1 : 0) +
|
||||
countColumns(obj.getJSONArray("properties"), columns, names2, path2);
|
||||
} else {
|
||||
if (columns != null) {
|
||||
// JSONObject expected = obj.getJSONObject("expected");
|
||||
|
||||
columns.add(new ColumnInfo(names2, path2
|
||||
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */ ));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static protected int countColumns(JSONArray a, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
|
||||
int c = 0;
|
||||
int l = a.length();
|
||||
for (int i = 0; i < l; i++) {
|
||||
c += countColumns(a.getJSONObject(i), columns, names, path);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
columns.add(new ColumnInfo(
|
||||
col.getString("name"),
|
||||
col.getString("id"),
|
||||
expectedType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -50,15 +50,16 @@ import com.google.refine.browsing.Engine;
|
||||
import com.google.refine.browsing.FilteredRows;
|
||||
import com.google.refine.browsing.RowVisitor;
|
||||
import com.google.refine.model.changes.DataExtensionChange;
|
||||
import com.google.refine.model.recon.DataExtensionJob;
|
||||
import com.google.refine.model.recon.DataExtensionJob.ColumnInfo;
|
||||
import com.google.refine.model.recon.DataExtensionJob.DataExtension;
|
||||
import com.google.refine.model.recon.ReconciledDataExtensionJob;
|
||||
import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo;
|
||||
import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
|
||||
import com.google.refine.history.HistoryEntry;
|
||||
import com.google.refine.model.AbstractOperation;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.ReconCandidate;
|
||||
import com.google.refine.model.ReconType;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.model.changes.CellAtRow;
|
||||
import com.google.refine.operations.EngineDependentOperation;
|
||||
@ -68,6 +69,7 @@ import com.google.refine.process.Process;
|
||||
|
||||
public class ExtendDataOperation extends EngineDependentOperation {
|
||||
final protected String _baseColumnName;
|
||||
final protected String _endpoint;
|
||||
final protected JSONObject _extension;
|
||||
final protected int _columnInsertIndex;
|
||||
|
||||
@ -77,6 +79,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
return new ExtendDataOperation(
|
||||
engineConfig,
|
||||
obj.getString("baseColumnName"),
|
||||
obj.getString("endpoint"),
|
||||
obj.getJSONObject("extension"),
|
||||
obj.getInt("columnInsertIndex")
|
||||
);
|
||||
@ -85,12 +88,14 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
public ExtendDataOperation(
|
||||
JSONObject engineConfig,
|
||||
String baseColumnName,
|
||||
String endpoint,
|
||||
JSONObject extension,
|
||||
int columnInsertIndex
|
||||
) {
|
||||
super(engineConfig);
|
||||
|
||||
_baseColumnName = baseColumnName;
|
||||
_endpoint = endpoint;
|
||||
_extension = extension;
|
||||
_columnInsertIndex = columnInsertIndex;
|
||||
}
|
||||
@ -105,6 +110,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
writer.key("engineConfig"); writer.value(getEngineConfig());
|
||||
writer.key("columnInsertIndex"); writer.value(_columnInsertIndex);
|
||||
writer.key("baseColumnName"); writer.value(_baseColumnName);
|
||||
writer.key("endpoint"); writer.value(_endpoint);
|
||||
writer.key("extension"); writer.value(_extension);
|
||||
writer.endObject();
|
||||
}
|
||||
@ -135,7 +141,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
final protected JSONObject _engineConfig;
|
||||
final protected long _historyEntryID;
|
||||
protected int _cellIndex;
|
||||
protected FreebaseDataExtensionJob _job;
|
||||
protected ReconciledDataExtensionJob _job;
|
||||
|
||||
public ExtendDataProcess(
|
||||
Project project,
|
||||
@ -147,7 +153,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
_engineConfig = engineConfig;
|
||||
_historyEntryID = HistoryEntry.allocateID();
|
||||
|
||||
_job = new FreebaseDataExtensionJob(_extension);
|
||||
_job = new ReconciledDataExtensionJob(_extension, _endpoint);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -283,10 +289,10 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
if (!_canceled) {
|
||||
List<String> columnNames = new ArrayList<String>();
|
||||
for (ColumnInfo info : _job.columns) {
|
||||
columnNames.add(StringUtils.join(info.names, " - "));
|
||||
columnNames.add(info.name);
|
||||
}
|
||||
|
||||
List<String> columnTypes = new ArrayList<String>();
|
||||
List<ReconType> columnTypes = new ArrayList<ReconType>();
|
||||
for (ColumnInfo info : _job.columns) {
|
||||
columnTypes.add(info.expectedType);
|
||||
}
|
||||
@ -298,6 +304,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
ExtendDataOperation.this,
|
||||
new DataExtensionChange(
|
||||
_baseColumnName,
|
||||
_endpoint,
|
||||
_columnInsertIndex,
|
||||
columnNames,
|
||||
columnTypes,
|
||||
|
@ -52,7 +52,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
|
||||
alert("Please add some properties first.");
|
||||
} else {
|
||||
DialogSystem.dismissUntil(self._level - 1);
|
||||
self._onDone(self._extension);
|
||||
self._onDone(self._extension, self._service);
|
||||
}
|
||||
});
|
||||
this._elmts.cancelButton.click(function() {
|
||||
@ -67,6 +67,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo
|
||||
this._serviceMetadata = null;
|
||||
if ("reconConfig" in column) {
|
||||
var service = column.reconConfig.service;
|
||||
this._service = service;
|
||||
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
|
||||
this._serviceMetadata = serviceMetadata;
|
||||
if ("extend" in serviceMetadata) {
|
||||
@ -258,7 +259,7 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
|
||||
var renderColumnHeader = function(column) {
|
||||
var th = $('<th>').appendTo(trHead);
|
||||
|
||||
$('<span>').html(column.names.join(" » ")).appendTo(th);
|
||||
$('<span>').html(column.name).appendTo(th);
|
||||
$('<br>').appendTo(th);
|
||||
|
||||
$('<a href="javascript:{}"></a>')
|
||||
@ -266,15 +267,15 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
|
||||
.addClass("action")
|
||||
.attr("title", "Remove this column")
|
||||
.click(function() {
|
||||
self._removeProperty(column.path);
|
||||
self._removeProperty(column.id);
|
||||
}).appendTo(th);
|
||||
|
||||
$('<a href="javascript:{}"></a>')
|
||||
.text("constrain")
|
||||
.text("configure")
|
||||
.addClass("action")
|
||||
.attr("title", "Add constraints to this column")
|
||||
.attr("title", "Configure this column")
|
||||
.click(function() {
|
||||
self._constrainProperty(column.path);
|
||||
self._constrainProperty(column.id);
|
||||
}).appendTo(th);
|
||||
};
|
||||
for (var c = 0; c < data.columns.length; c++) {
|
||||
@ -301,56 +302,25 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
|
||||
container.append(table);
|
||||
};
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(path) {
|
||||
var removeFromList = function(path, index, properties) {
|
||||
var id = path[index];
|
||||
|
||||
for (var i = properties.length - 1; i >= 0; i--) {
|
||||
var property = properties[i];
|
||||
if (property.id == id) {
|
||||
if (index === path.length - 1) {
|
||||
if ("included" in property) {
|
||||
delete property.included;
|
||||
}
|
||||
} else if ("properties" in property && property.properties.length > 0) {
|
||||
removeFromList(path, index + 1, property.properties);
|
||||
}
|
||||
|
||||
if (!("properties" in property) || property.properties.length === 0) {
|
||||
properties.splice(i, 1);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(id) {
|
||||
for(var i = this._extension.properties.length - 1; i >= 0; i--) {
|
||||
var property = this._extension.properties[i];
|
||||
if (property.id == id) {
|
||||
this._extension.properties.splice(i, 1);
|
||||
}
|
||||
};
|
||||
|
||||
removeFromList(path, 0, this._extension.properties);
|
||||
|
||||
}
|
||||
this._update();
|
||||
};
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._findProperty = function(path) {
|
||||
var find = function(path, index, properties) {
|
||||
var id = path[index];
|
||||
|
||||
for (var i = properties.length - 1; i >= 0; i--) {
|
||||
var property = properties[i];
|
||||
if (property.id == id) {
|
||||
if (index === path.length - 1) {
|
||||
return property;
|
||||
} else if ("properties" in property && property.properties.length > 0) {
|
||||
return find(path, index + 1, property.properties);
|
||||
}
|
||||
break;
|
||||
}
|
||||
ExtendReconciledDataPreviewDialog.prototype._findProperty = function(id) {
|
||||
var properties = this._extension.properties;
|
||||
for(var i = properties.length - 1; i >= 0; i--) {
|
||||
if (properties[i].id == path) {
|
||||
return properties[i];
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
return find(path, 0, this._extension.properties);
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) {
|
||||
var self = this;
|
||||
|
@ -153,12 +153,13 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
||||
column,
|
||||
columnIndex,
|
||||
o.rowIndices,
|
||||
function(extension) {
|
||||
function(extension, endpoint) {
|
||||
Refine.postProcess(
|
||||
"core",
|
||||
"extend-data",
|
||||
{
|
||||
baseColumnName: column.name,
|
||||
endpoint: endpoint,
|
||||
columnInsertIndex: columnIndex + 1
|
||||
},
|
||||
{
|
||||
@ -167,7 +168,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
||||
{ rowsChanged: true, modelsChanged: true }
|
||||
);
|
||||
}
|
||||
); */
|
||||
);
|
||||
};
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user