Starting to migrate data extension to standard reconciliation services

This commit is contained in:
Antonin Delpeuch 2017-07-04 23:14:19 +02:00
parent b0f845d252
commit ad3a174abd
11 changed files with 2134 additions and 0 deletions

View File

@ -0,0 +1,65 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.commands.recon;
import javax.servlet.http.HttpServletRequest;
import org.json.JSONObject;
import com.google.refine.commands.EngineDependentCommand;
import com.google.refine.freebase.operations.ExtendDataOperation;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Project;
import com.google.refine.util.ParsingUtilities;
public class ExtendDataCommand extends EngineDependentCommand {
@Override
protected AbstractOperation createOperation(Project project,
HttpServletRequest request, JSONObject engineConfig) throws Exception {
String baseColumnName = request.getParameter("baseColumnName");
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
String jsonString = request.getParameter("extension");
JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString);
return new ExtendDataOperation(
engineConfig,
baseColumnName,
extension,
columnInsertIndex
);
}
}

View File

@ -0,0 +1,208 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.commands.recon;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.json.JSONArray;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.commands.Command;
import com.google.refine.model.recon.ReconciledDataExtensionJob;
import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo;
import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.Row;
import com.google.refine.model.Column;
import com.google.refine.model.recon.ReconConfig;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.util.ParsingUtilities;
public class PreviewExtendDataCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
try {
Project project = getProject(request);
String columnName = request.getParameter("columnName");
String rowIndicesString = request.getParameter("rowIndices");
if (rowIndicesString == null) {
respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }");
return;
}
String jsonString = request.getParameter("extension");
JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString);
JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString);
int length = rowIndices.length();
Column column = project.columnModel.getColumnByName(columnName);
int cellIndex = column.getCellIndex();
// get the endpoint to extract data from
String endpoint = null;
ReconConfig cfg = column.getReconConfig();
if (cfg != null &&
cfg instanceof StandardReconConfig) {
StandardReconConfig scfg = (StandardReconConfig)cfg;
endpoint = scfg.service;
} else {
respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }");
return;
}
List<String> topicNames = new ArrayList<String>();
List<String> topicIds = new ArrayList<String>();
Set<String> ids = new HashSet<String>();
for (int i = 0; i < length; i++) {
int rowIndex = rowIndices.getInt(i);
if (rowIndex >= 0 && rowIndex < project.rows.size()) {
Row row = project.rows.get(rowIndex);
Cell cell = row.getCell(cellIndex);
if (cell != null && cell.recon != null && cell.recon.match != null) {
topicNames.add(cell.recon.match.name);
topicIds.add(cell.recon.match.id);
ids.add(cell.recon.match.id);
} else {
topicNames.add(null);
topicIds.add(null);
ids.add(null);
}
}
}
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(json, endpoint);
Map<String, DataExtension> map = job.extend(ids, reconCandidateMap);
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
JSONWriter writer = new JSONWriter(response.getWriter());
writer.object();
writer.key("code"); writer.value("ok");
writer.key("columns");
writer.array();
for (ColumnInfo info : job.columns) {
writer.object();
writer.key("names");
writer.array();
for (String name : info.names) {
writer.value(name);
}
writer.endArray();
writer.key("path");
writer.array();
for (String id : info.path) {
writer.value(id);
}
writer.endArray();
writer.endObject();
}
writer.endArray();
writer.key("rows");
writer.array();
for (int r = 0; r < topicNames.size(); r++) {
String id = topicIds.get(r);
String topicName = topicNames.get(r);
if (id != null && map.containsKey(id)) {
DataExtension ext = map.get(id);
boolean first = true;
if (ext.data.length > 0) {
for (Object[] row : ext.data) {
writer.array();
if (first) {
writer.value(topicName);
first = false;
} else {
writer.value(null);
}
for (Object cell : row) {
if (cell != null && cell instanceof ReconCandidate) {
ReconCandidate rc = (ReconCandidate) cell;
writer.object();
writer.key("id"); writer.value(rc.id);
writer.key("name"); writer.value(rc.name);
writer.endObject();
} else {
writer.value(cell);
}
}
writer.endArray();
}
continue;
}
}
writer.array();
if (id != null) {
writer.object();
writer.key("id"); writer.value(id);
writer.key("name"); writer.value(topicName);
writer.endObject();
} else {
writer.value("<not reconciled>");
}
writer.endArray();
}
writer.endArray();
writer.endObject();
} catch (Exception e) {
respondException(response, e);
}
}
}

View File

@ -0,0 +1,469 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.model.changes;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Serializable;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
// import com.google.refine.freebase.FreebaseType;
import com.google.refine.model.recon.DataExtensionReconConfig;
import com.google.refine.model.recon.FreebaseDataExtensionJob.DataExtension;
import com.google.refine.history.Change;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.Recon.Judgment;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconStats;
import com.google.refine.model.Row;
import com.google.refine.util.ParsingUtilities;
import com.google.refine.util.Pool;
public class DataExtensionChange implements Change {
final protected String _baseColumnName;
final protected int _columnInsertIndex;
final protected List<String> _columnNames;
final protected List<FreebaseType> _columnTypes;
final protected List<Integer> _rowIndices;
final protected List<DataExtension> _dataExtensions;
protected long _historyEntryID;
protected int _firstNewCellIndex = -1;
protected List<Row> _oldRows;
protected List<Row> _newRows;
public DataExtensionChange(
String baseColumnName,
int columnInsertIndex,
List<String> columnNames,
List<FreebaseType> columnTypes,
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
long historyEntryID
) {
_baseColumnName = baseColumnName;
_columnInsertIndex = columnInsertIndex;
_columnNames = columnNames;
_columnTypes = columnTypes;
_rowIndices = rowIndices;
_dataExtensions = dataExtensions;
_historyEntryID = historyEntryID;
}
protected DataExtensionChange(
String baseColumnName,
int columnInsertIndex,
List<String> columnNames,
List<FreebaseType> columnTypes,
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
int firstNewCellIndex,
List<Row> oldRows,
List<Row> newRows
) {
_baseColumnName = baseColumnName;
_columnInsertIndex = columnInsertIndex;
_columnNames = columnNames;
_columnTypes = columnTypes;
_rowIndices = rowIndices;
_dataExtensions = dataExtensions;
_firstNewCellIndex = firstNewCellIndex;
_oldRows = oldRows;
_newRows = newRows;
}
@Override
public void apply(Project project) {
synchronized (project) {
if (_firstNewCellIndex < 0) {
_firstNewCellIndex = project.columnModel.allocateNewCellIndex();
for (int i = 1; i < _columnNames.size(); i++) {
project.columnModel.allocateNewCellIndex();
}
_oldRows = new ArrayList<Row>(project.rows);
_newRows = new ArrayList<Row>(project.rows.size());
int cellIndex = project.columnModel.getColumnByName(_baseColumnName).getCellIndex();
int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex();
int index = 0;
int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size();
DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null;
index++;
Map<String, Recon> reconMap = new HashMap<String, Recon>();
for (int r = 0; r < _oldRows.size(); r++) {
Row oldRow = _oldRows.get(r);
if (r < rowIndex) {
_newRows.add(oldRow.dup());
continue;
}
if (dataExtension == null || dataExtension.data.length == 0) {
_newRows.add(oldRow);
} else {
Row firstNewRow = oldRow.dup();
extendRow(firstNewRow, dataExtension, 0, reconMap);
_newRows.add(firstNewRow);
int r2 = r + 1;
for (int subR = 1; subR < dataExtension.data.length; subR++) {
if (r2 < project.rows.size()) {
Row oldRow2 = project.rows.get(r2);
if (oldRow2.isCellBlank(cellIndex) &&
oldRow2.isCellBlank(keyCellIndex)) {
Row newRow = oldRow2.dup();
extendRow(newRow, dataExtension, subR, reconMap);
_newRows.add(newRow);
r2++;
continue;
}
}
Row newRow = new Row(cellIndex + _columnNames.size());
extendRow(newRow, dataExtension, subR, reconMap);
_newRows.add(newRow);
}
r = r2 - 1; // r will be incremented by the for loop anyway
}
rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size();
dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null;
index++;
}
}
project.rows.clear();
project.rows.addAll(_newRows);
for (int i = 0; i < _columnNames.size(); i++) {
String name = _columnNames.get(i);
int cellIndex = _firstNewCellIndex + i;
Column column = new Column(cellIndex, name);
column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i)));
column.setReconStats(ReconStats.create(project, cellIndex));
try {
project.columnModel.addColumn(_columnInsertIndex + i, column, true);
// the column might have been renamed to avoid collision
_columnNames.set(i, column.getName());
} catch (ModelException e) {
// won't get here since we set the avoid collision flag
}
}
project.update();
}
}
protected void extendRow(
Row row,
DataExtension dataExtension,
int extensionRowIndex,
Map<String, Recon> reconMap
) {
Object[] values = dataExtension.data[extensionRowIndex];
for (int c = 0; c < values.length; c++) {
Object value = values[c];
Cell cell = null;
if (value instanceof ReconCandidate) {
ReconCandidate rc = (ReconCandidate) value;
Recon recon;
if (reconMap.containsKey(rc.id)) {
recon = reconMap.get(rc.id);
} else {
recon = Recon.makeFreebaseRecon(_historyEntryID);
recon.addCandidate(rc);
recon.service = "mql";
recon.match = rc;
recon.matchRank = 0;
recon.judgment = Judgment.Matched;
recon.judgmentAction = "auto";
recon.judgmentBatchSize = 1;
reconMap.put(rc.id, recon);
}
cell = new Cell(rc.name, recon);
} else {
cell = new Cell((Serializable) value, null);
}
row.setCell(_firstNewCellIndex + c, cell);
}
}
@Override
public void revert(Project project) {
synchronized (project) {
project.rows.clear();
project.rows.addAll(_oldRows);
for (int i = 0; i < _columnNames.size(); i++) {
project.columnModel.columns.remove(_columnInsertIndex);
}
project.update();
}
}
@Override
public void save(Writer writer, Properties options) throws IOException {
writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n');
writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n');
writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n');
for (String name : _columnNames) {
writer.write(name); writer.write('\n');
}
writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n');
for (FreebaseType type : _columnTypes) {
try {
JSONWriter jsonWriter = new JSONWriter(writer);
type.write(jsonWriter, options);
} catch (JSONException e) {
// ???
}
writer.write('\n');
}
writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n');
for (Integer rowIndex : _rowIndices) {
writer.write(rowIndex.toString()); writer.write('\n');
}
writer.write("dataExtensionCount="); writer.write(Integer.toString(_dataExtensions.size())); writer.write('\n');
for (DataExtension dataExtension : _dataExtensions) {
if (dataExtension == null) {
writer.write('\n');
continue;
}
writer.write(Integer.toString(dataExtension.data.length)); writer.write('\n');
for (Object[] values : dataExtension.data) {
for (Object value : values) {
if (value == null) {
writer.write("null");
} else if (value instanceof ReconCandidate) {
try {
JSONWriter jsonWriter = new JSONWriter(writer);
((ReconCandidate) value).write(jsonWriter, options);
} catch (JSONException e) {
// ???
}
} else if (value instanceof String) {
writer.write(JSONObject.quote((String) value));
} else {
writer.write(value.toString());
}
writer.write('\n');
}
}
}
writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n');
writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n');
for (Row row : _newRows) {
row.save(writer, options);
writer.write('\n');
}
writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n');
for (Row row : _oldRows) {
row.save(writer, options);
writer.write('\n');
}
writer.write("/ec/\n"); // end of change marker
}
static public Change load(LineNumberReader reader, Pool pool) throws Exception {
String baseColumnName = null;
int columnInsertIndex = -1;
List<String> columnNames = null;
List<FreebaseType> columnTypes = null;
List<Integer> rowIndices = null;
List<DataExtension> dataExtensions = null;
List<Row> oldRows = null;
List<Row> newRows = null;
int firstNewCellIndex = -1;
String line;
while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
int equal = line.indexOf('=');
CharSequence field = line.subSequence(0, equal);
String value = line.substring(equal + 1);
if ("baseColumnName".equals(field)) {
baseColumnName = value;
} else if ("columnInsertIndex".equals(field)) {
columnInsertIndex = Integer.parseInt(value);
} else if ("firstNewCellIndex".equals(field)) {
firstNewCellIndex = Integer.parseInt(value);
} else if ("rowIndexCount".equals(field)) {
int count = Integer.parseInt(value);
rowIndices = new ArrayList<Integer>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line != null) {
rowIndices.add(Integer.parseInt(line));
}
}
} else if ("columnNameCount".equals(field)) {
int count = Integer.parseInt(value);
columnNames = new ArrayList<String>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line != null) {
columnNames.add(line);
}
}
} else if ("columnTypeCount".equals(field)) {
int count = Integer.parseInt(value);
columnTypes = new ArrayList<FreebaseType>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
}
} else if ("dataExtensionCount".equals(field)) {
int count = Integer.parseInt(value);
dataExtensions = new ArrayList<DataExtension>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line == null) {
continue;
}
if (line.length() == 0) {
dataExtensions.add(null);
continue;
}
int rowCount = Integer.parseInt(line);
Object[][] data = new Object[rowCount][];
for (int r = 0; r < rowCount; r++) {
Object[] row = new Object[columnNames.size()];
for (int c = 0; c < columnNames.size(); c++) {
line = reader.readLine();
row[c] = ReconCandidate.loadStreaming(line);
}
data[r] = row;
}
dataExtensions.add(new DataExtension(data));
}
} else if ("oldRowCount".equals(field)) {
int count = Integer.parseInt(value);
oldRows = new ArrayList<Row>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line != null) {
oldRows.add(Row.load(line, pool));
}
}
} else if ("newRowCount".equals(field)) {
int count = Integer.parseInt(value);
newRows = new ArrayList<Row>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line != null) {
newRows.add(Row.load(line, pool));
}
}
}
}
DataExtensionChange change = new DataExtensionChange(
baseColumnName,
columnInsertIndex,
columnNames,
columnTypes,
rowIndices,
dataExtensions,
firstNewCellIndex,
oldRows,
newRows
);
return change;
}
}

View File

@ -0,0 +1,453 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
*
*/
package com.google.refine.model.recon;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
// import com.google.refine.freebase.FreebaseType;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
public class ReconciledDataExtensionJob {
static public class DataExtension {
final public Object[][] data;
public DataExtension(Object[][] data) {
this.data = data;
}
}
static public class ColumnInfo {
final public List<String> names;
final public List<String> path;
// final public FreebaseType expectedType;
// TODO
protected ColumnInfo(List<String> names, List<String> path /*, FreebaseType expectedType */) {
this.names = names;
this.path = path;
// this.expectedType = expectedType;
}
}
final public JSONObject extension;
final public String endpoint;
final public int columnCount;
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException {
this.extension = obj;
this.endpoint = endpoint;
this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ?
countColumns(obj.getJSONArray("properties"), columns, new ArrayList<String>(), new ArrayList<String>()) : 0;
}
public Map<String, ReconciledDataExtensionJob.DataExtension> extend(
Set<String> ids,
Map<String, ReconCandidate> reconCandidateMap
) throws Exception {
StringWriter writer = new StringWriter();
formulateQuery(ids, extension, writer);
// Extract the order of properties
JSONArray origProperties = extension.getJSONArray("properties");
List<String> properties = new ArrayList<String>();
int l = origProperties.length();
for (int i = 0; i < l; i++) {
properties.add(origProperties.getJSONObject(i).getString("id"));
}
String query = writer.toString();
InputStream is = performQuery(this.endpoint, query);
try {
String s = ParsingUtilities.inputStreamToString(is);
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
Map<String, ReconciledDataExtensionJob.DataExtension> map = new HashMap<String, ReconciledDataExtensionJob.DataExtension>();
if (o.has("rows")){
JSONObject records = o.getJSONObject("rows");
// for each identifier
for (String id : ids) {
if (records.has(id)) {
JSONObject record = records.getJSONObject(id);
ReconciledDataExtensionJob.DataExtension ext = collectResult(record, properties, reconCandidateMap);
if (ext != null) {
map.put(id, ext);
}
}
}
}
return map;
} finally {
is.close();
}
}
static protected InputStream performQuery(String endpoint, String query) throws IOException {
URL url = new URL(endpoint);
URLConnection connection = url.openConnection();
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.setConnectTimeout(5000);
connection.setDoOutput(true);
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
try {
String body = "extend=" + ParsingUtilities.encode(query);
dos.writeBytes(body);
} finally {
dos.flush();
dos.close();
}
connection.connect();
return connection.getInputStream();
}
protected ReconciledDataExtensionJob.DataExtension collectResult(
JSONObject record,
List<String> properties,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
List<Object[]> rows = new ArrayList<Object[]>();
// for each property
int colindex = 0;
for(String pid : properties) {
JSONArray values = record.getJSONArray(pid);
if (values == null) {
continue;
}
// for each value
for(int rowindex = 0; rowindex < values.length(); rowindex++) {
JSONObject val = values.getJSONObject(rowindex);
// store a reconciled value
if(val.has("id")) {
storeCell(rows, rowindex, colindex, val, reconCandidateMap);
} else if(val.has("str")) {
// store a bare string
String str = val.getString("str");
storeStr(rows, rowindex, colindex, str);
}
// TODO other cases for other types of values (dates, booleans, )
}
colindex++;
}
// collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap);
Object[][] data = new Object[rows.size()][columnCount];
rows.toArray(data);
return new DataExtension(data);
}
protected void storeStr(
List<Object[]> rows,
int row,
int col,
String str
) throws JSONException {
while (row >= rows.size()) {
rows.add(new Object[columnCount]);
}
rows.get(row)[col] = str;
}
protected void storeCell(
List<Object[]> rows,
int row,
int col,
Object value,
Map<String, ReconCandidate> reconCandidateMap
) {
while (row >= rows.size()) {
rows.add(new Object[columnCount]);
}
rows.get(row)[col] = value;
}
protected void storeCell(
List<Object[]> rows,
int row,
int col,
JSONObject obj,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
String id = obj.getString("id");
ReconCandidate rc;
if (reconCandidateMap.containsKey(id)) {
rc = reconCandidateMap.get(id);
} else {
rc = new ReconCandidate(
obj.getString("id"),
obj.getString("name"),
JSONUtilities.getStringArray(obj, "type"),
100
);
reconCandidateMap.put(id, rc);
}
storeCell(rows, row, col, rc, reconCandidateMap);
}
/*
protected int[] collectResult(
List<Object[]> rows,
JSONObject extNode,
JSONObject resultNode,
int startRowIndex,
int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
String propertyID = extNode.getString("id");
// String expectedTypeID = extNode.getJSONObject("expected").getString("id");
JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ?
resultNode.getJSONArray(propertyID) : null;
if ("/type/key".equals(expectedTypeID)) {
if (a != null) {
int l = a.length();
for (int r = 0; r < l; r++) {
Object o = a.isNull(r) ? null : a.get(r);
if (o instanceof JSONObject) {
storeStr(rows, startRowIndex++, startColumnIndex, (JSONObject) o, reconCandidateMap);
}
}
}
// note that we still take up a column even if we don't have any data
return new int[] { startRowIndex, startColumnIndex + 1 };
} else if (expectedTypeID.startsWith("/type/")) {
if (a != null) {
int l = a.length();
for (int r = 0; r < l; r++) {
Object o = a.isNull(r) ? null : a.get(r);
if (o instanceof Serializable) {
storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap);
}
}
}
// note that we still take up a column even if we don't have any data
return new int[] { startRowIndex, startColumnIndex + 1 };
} else {
boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties"));
boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included"));
if (a != null && a.length() > 0) {
int maxColIndex = startColumnIndex;
int l = a.length();
for (int r = 0; r < l; r++) {
Object v = a.isNull(r) ? null : a.get(r);
JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null;
int startColumnIndex2 = startColumnIndex;
int startRowIndex2 = startRowIndex;
if (isOwnColumn) {
if (o != null) {
storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap);
} else {
storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap);
}
}
if (hasSubProperties && o != null) {
int[] rowcol = collectResult(
rows,
extNode.getJSONArray("properties"),
o,
startRowIndex,
startColumnIndex2,
reconCandidateMap
);
startRowIndex2 = rowcol[0];
startColumnIndex2 = rowcol[1];
}
startRowIndex = startRowIndex2;
maxColIndex = Math.max(maxColIndex, startColumnIndex2);
}
return new int[] { startRowIndex, maxColIndex };
} else {
return new int[] {
startRowIndex,
startColumnIndex + countColumns(extNode, null, new ArrayList<String>(), new ArrayList<String>())
};
}
}
}
protected int[] collectResult(
List<Object[]> rows,
JSONArray subProperties,
JSONObject resultNode,
int startRowIndex,
int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
int maxStartRowIndex = startRowIndex;
int k = subProperties.length();
for (int c = 0; c < k; c++) {
int[] rowcol = collectResult(
rows,
subProperties.getJSONObject(c),
resultNode,
startRowIndex,
startColumnIndex,
reconCandidateMap
);
maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]);
startColumnIndex = rowcol[1];
}
return new int[] { maxStartRowIndex, startColumnIndex };
}*/
static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException {
JSONWriter jsonWriter = new JSONWriter(writer);
jsonWriter.object();
jsonWriter.key("ids");
jsonWriter.array();
for (String id : ids) {
if (id != null) {
jsonWriter.value(id);
}
}
jsonWriter.endArray();
jsonWriter.key("properties");
jsonWriter.array();
JSONArray properties = node.getJSONArray("properties");
int l = properties.length();
for (int i = 0; i < l; i++) {
JSONObject property = properties.getJSONObject(i);
jsonWriter.object();
jsonWriter.key("id");
jsonWriter.value(property.getString("id"));
// TODO translate constraints as below
jsonWriter.endObject();
}
jsonWriter.endArray();
jsonWriter.endObject();
}
static protected int countColumns(JSONObject obj, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
String name = obj.getString("name");
List<String> names2 = null;
List<String> path2 = null;
if (columns != null) {
names2 = new ArrayList<String>(names);
names2.add(name);
path2 = new ArrayList<String>(path);
path2.add(obj.getString("id"));
}
if (obj.has("properties") && !obj.isNull("properties")) {
boolean included = (obj.has("included") && obj.getBoolean("included"));
if (included && columns != null) {
// JSONObject expected = obj.getJSONObject("expected");
columns.add(new ColumnInfo(names2, path2
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */));
}
return (included ? 1 : 0) +
countColumns(obj.getJSONArray("properties"), columns, names2, path2);
} else {
if (columns != null) {
// JSONObject expected = obj.getJSONObject("expected");
columns.add(new ColumnInfo(names2, path2
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */ ));
}
return 1;
}
}
static protected int countColumns(JSONArray a, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
int c = 0;
int l = a.length();
for (int i = 0; i < l; i++) {
c += countColumns(a.getJSONObject(i), columns, names, path);
}
return c;
}
}

View File

@ -0,0 +1,314 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.operations.recon;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.model.changes.DataExtensionChange;
import com.google.refine.model.recon.DataExtensionJob;
import com.google.refine.model.recon.DataExtensionJob.ColumnInfo;
import com.google.refine.model.recon.DataExtensionJob.DataExtension;
import com.google.refine.history.HistoryEntry;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.Row;
import com.google.refine.model.changes.CellAtRow;
import com.google.refine.operations.EngineDependentOperation;
import com.google.refine.operations.OperationRegistry;
import com.google.refine.process.LongRunningProcess;
import com.google.refine.process.Process;
public class ExtendDataOperation extends EngineDependentOperation {
final protected String _baseColumnName;
final protected JSONObject _extension;
final protected int _columnInsertIndex;
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
JSONObject engineConfig = obj.getJSONObject("engineConfig");
return new ExtendDataOperation(
engineConfig,
obj.getString("baseColumnName"),
obj.getJSONObject("extension"),
obj.getInt("columnInsertIndex")
);
}
public ExtendDataOperation(
JSONObject engineConfig,
String baseColumnName,
JSONObject extension,
int columnInsertIndex
) {
super(engineConfig);
_baseColumnName = baseColumnName;
_extension = extension;
_columnInsertIndex = columnInsertIndex;
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
writer.key("description"); writer.value(getBriefDescription(null));
writer.key("engineConfig"); writer.value(getEngineConfig());
writer.key("columnInsertIndex"); writer.value(_columnInsertIndex);
writer.key("baseColumnName"); writer.value(_baseColumnName);
writer.key("extension"); writer.value(_extension);
writer.endObject();
}
@Override
protected String getBriefDescription(Project project) {
return "Extend data at index " + _columnInsertIndex +
" based on column " + _baseColumnName;
}
protected String createDescription(Column column, List<CellAtRow> cellsAtRows) {
return "Extend data at index " + _columnInsertIndex +
" based on column " + column.getName() +
" by filling " + cellsAtRows.size();
}
@Override
public Process createProcess(Project project, Properties options) throws Exception {
return new ExtendDataProcess(
project,
getEngineConfig(),
getBriefDescription(null)
);
}
public class ExtendDataProcess extends LongRunningProcess implements Runnable {
final protected Project _project;
final protected JSONObject _engineConfig;
final protected long _historyEntryID;
protected int _cellIndex;
protected FreebaseDataExtensionJob _job;
public ExtendDataProcess(
Project project,
JSONObject engineConfig,
String description
) throws JSONException {
super(description);
_project = project;
_engineConfig = engineConfig;
_historyEntryID = HistoryEntry.allocateID();
_job = new FreebaseDataExtensionJob(_extension);
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("id"); writer.value(hashCode());
writer.key("description"); writer.value(_description);
writer.key("immediate"); writer.value(false);
writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done"));
writer.key("progress"); writer.value(_progress);
writer.endObject();
}
@Override
protected Runnable getRunnable() {
return this;
}
protected void populateRowsWithMatches(List<Integer> rowIndices) throws Exception {
Engine engine = new Engine(_project);
engine.initializeFromJSON(_engineConfig);
Column column = _project.columnModel.getColumnByName(_baseColumnName);
if (column == null) {
throw new Exception("No column named " + _baseColumnName);
}
_cellIndex = column.getCellIndex();
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(_project, new RowVisitor() {
List<Integer> _rowIndices;
public RowVisitor init(List<Integer> rowIndices) {
_rowIndices = rowIndices;
return this;
}
@Override
public void start(Project project) {
// nothing to do
}
@Override
public void end(Project project) {
// nothing to do
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
Cell cell = row.getCell(_cellIndex);
if (cell != null && cell.recon != null && cell.recon.match != null) {
_rowIndices.add(rowIndex);
}
return false;
}
}.init(rowIndices));
}
protected int extendRows(
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
int from,
int limit,
Map<String, ReconCandidate> reconCandidateMap
) {
Set<String> ids = new HashSet<String>();
int end;
for (end = from; end < limit && ids.size() < 10; end++) {
int index = rowIndices.get(end);
Row row = _project.rows.get(index);
Cell cell = row.getCell(_cellIndex);
ids.add(cell.recon.match.id);
}
Map<String, DataExtension> map = null;
try {
map = _job.extend(ids, reconCandidateMap);
} catch (Exception e) {
map = new HashMap<String, DataExtension>();
}
for (int i = from; i < end; i++) {
int index = rowIndices.get(i);
Row row = _project.rows.get(index);
Cell cell = row.getCell(_cellIndex);
String guid = cell.recon.match.id;
if (map.containsKey(guid)) {
dataExtensions.add(map.get(guid));
} else {
dataExtensions.add(null);
}
}
return end;
}
@Override
public void run() {
List<Integer> rowIndices = new ArrayList<Integer>();
List<DataExtension> dataExtensions = new ArrayList<DataExtension>();
try {
populateRowsWithMatches(rowIndices);
} catch (Exception e2) {
// TODO : Not sure what to do here?
e2.printStackTrace();
}
int start = 0;
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
while (start < rowIndices.size()) {
int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap);
start = end;
_progress = end * 100 / rowIndices.size();
try {
Thread.sleep(200);
} catch (InterruptedException e) {
if (_canceled) {
break;
}
}
}
if (!_canceled) {
List<String> columnNames = new ArrayList<String>();
for (ColumnInfo info : _job.columns) {
columnNames.add(StringUtils.join(info.names, " - "));
}
List<String> columnTypes = new ArrayList<String>();
for (ColumnInfo info : _job.columns) {
columnTypes.add(info.expectedType);
}
HistoryEntry historyEntry = new HistoryEntry(
_historyEntryID,
_project,
_description,
ExtendDataOperation.this,
new DataExtensionChange(
_baseColumnName,
_columnInsertIndex,
columnNames,
columnTypes,
rowIndices,
dataExtensions,
_historyEntryID)
);
_project.history.addEntry(historyEntry);
_project.processManager.onDoneProcess(this);
}
}
}
}

View File

@ -121,6 +121,8 @@ function registerCommands() {
RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand());
RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand());
RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand());
RS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.recon.PreviewExtendDataCommand());
RS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.recon.ExtendDataCommand());
RS.registerCommand(module, "guess-types-of-column", new Packages.com.google.refine.commands.recon.GuessTypesOfColumnCommand());
@ -367,6 +369,7 @@ function init() {
"styles/index/default-importing-sources.less",
"styles/views/data-table-view.less", // for the preview table's styles
"styles/views/extend-data-preview-dialog.less",
"styles/index/fixed-width-parser-ui.less",
"styles/index/xml-parser-ui.less",
"styles/index/json-parser-ui.less"
@ -431,6 +434,7 @@ function init() {
"scripts/reconciliation/standard-service-panel.js",
"scripts/dialogs/expression-preview-dialog.js",
"scripts/dialogs/extend-data-preview-dialog.js",
"scripts/dialogs/clustering-dialog.js",
"scripts/dialogs/scatterplot-dialog.js",
"scripts/dialogs/templating-exporter-dialog.js",

View File

@ -0,0 +1,424 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDone) {
this._column = column;
this._columnIndex = columnIndex;
this._rowIndices = rowIndices;
this._onDone = onDone;
this._extension = { properties: [] };
var self = this;
this._dialog = $(DOM.loadHTML("core", "scripts/views/data-table/extend-data-preview-dialog.html"));
this._elmts = DOM.bind(this._dialog);
this._elmts.dialogHeader.html("Add columns by reconciled column " + column.name);
this._elmts.resetButton.click(function() {
self._extension.properties = [];
self._update();
});
this._elmts.okButton.click(function() {
if (self._extension.properties.length === 0) {
alert("Please add some properties first.");
} else {
DialogSystem.dismissUntil(self._level - 1);
self._onDone(self._extension);
}
});
this._elmts.cancelButton.click(function() {
DialogSystem.dismissUntil(self._level - 1);
});
var dismissBusy = DialogSystem.showBusy();
var type = (column.reconConfig) && (column.reconConfig.type) ? column.reconConfig.type.id : "";
this._proposePropertiesUrl = null;
this._fetchColumnUrl = null;
this._serviceMetadata = null;
if ("reconConfig" in column) {
var service = column.reconConfig.service;
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
this._serviceMetadata = serviceMetadata;
if ("extend" in serviceMetadata) {
var extend = serviceMetadata.extend;
if ("propose_properties" in extend) {
var endpoint = extend.propose_properties;
this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path;
}
if ("fetch_column" in extend) {
var endpoint = extend.fetch_column;
this._fetchColumnUrl = endpoint.service_url + endpoint.service_path;
}
}
}
ExtendReconciledDataPreviewDialog.getAllProperties(this._proposePropertiesUrl, type, function(properties) {
dismissBusy();
self._show(properties);
});
}
ExtendReconciledDataPreviewDialog.getAllProperties = function(url, typeID, onDone) {
if(url == null) {
onDone([]);
} else {
var done = false;
$.getJSON(
url +"?type=" + typeID + "&callback=?",
null,
function(data) {
if (done) return;
done = true;
var allProperties = [];
for (var i = 0; i < data.properties.length; i++) {
var property = data.properties[i];
var property2 = {
id: property.id,
name: property.name
};
/*if ("id2" in property) {
property2.expected = property.schema2;
property2.properties = [{
id: property.id2,
name: property.name2,
expected: property.expects
}];
} else {
property2.expected = property.expects;
} */
allProperties.push(property2);
}
allProperties.sort(function(a, b) { return a.name.localeCompare(b.name); });
onDone(allProperties);
}
);
window.setTimeout(function() {
if (done) return;
done = true;
onDone([]);
}, 7000); // time to give up?
}
};
ExtendReconciledDataPreviewDialog.prototype._show = function(properties) {
this._level = DialogSystem.showDialog(this._dialog);
var n = this._elmts.suggestedPropertyContainer.offset().top +
this._elmts.suggestedPropertyContainer.outerHeight(true) -
this._elmts.addPropertyInput.offset().top;
this._elmts.previewContainer.height(Math.floor(n));
var self = this;
var container = this._elmts.suggestedPropertyContainer;
var renderSuggestedProperty = function(property) {
var label = ("properties" in property) ? (property.name + " &raquo; " + property.properties[0].name) : property.name;
var div = $('<div>').addClass("suggested-property").appendTo(container);
$('<a>')
.attr("href", "javascript:{}")
.html(label)
.appendTo(div)
.click(function() {
self._addProperty(property);
});
};
for (var i = 0; i < properties.length; i++) {
renderSuggestedProperty(properties[i]);
}
var suggestConfig = $.extend({}, this._serviceMetadata.suggest.property);
suggestConfig.key = null;
suggestConfig.query_param_name = "prefix";
/* var suggestConfig = {
filter: '(all type:/type/property)'
};
if ((this._column.reconConfig) && (this._column.reconConfig.type)) {
suggestConfig.filter = '(all type:/type/property (any namespace:/type/object namespace:' + this._column.reconConfig.type.id + '))';
} */
this._elmts.addPropertyInput.suggestP(suggestConfig).bind("fb-select", function(evt, data) {
var expected = data.expected_type;
self._addProperty({
id : data.id,
name: data.name,
/* expected: {
id: expected.id,
name: expected.name
} */
});
});
};
ExtendReconciledDataPreviewDialog.prototype._update = function() {
this._elmts.previewContainer.empty().text("Querying THE service...");
var self = this;
var params = {
project: theProject.id,
columnName: this._column.name
};
$.post(
"command/core/preview-extend-data?" + $.param(params),
{
rowIndices: JSON.stringify(this._rowIndices),
extension: JSON.stringify(this._extension)
},
function(data) {
self._renderPreview(data);
},
"json"
).fail(function(data) {
console.log(data);
});
};
ExtendReconciledDataPreviewDialog.prototype._addProperty = function(p) {
var addSeveralToList = function(properties, oldProperties) {
for (var i = 0; i < properties.length; i++) {
addToList(properties[i], oldProperties);
}
};
var addToList = function(property, oldProperties) {
for (var i = 0; i < oldProperties.length; i++) {
var oldProperty = oldProperties[i];
if (oldProperty.id == property.id) {
if ("included" in property) {
oldProperty.included = "included" in oldProperty ?
(oldProperty.included || property.included) :
property.included;
}
if ("properties" in property) {
if ("properties" in oldProperty) {
addSeveralToList(property.properties, oldProperty.properties);
} else {
oldProperty.properties = property.properties;
}
}
return;
}
}
oldProperties.push(property);
};
addToList(p, this._extension.properties);
this._update();
};
ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
var self = this;
var container = this._elmts.previewContainer.empty();
if (data.code == "error") {
container.text("Error.");
return;
}
var table = $('<table>')[0];
var trHead = table.insertRow(table.rows.length);
$('<th>').appendTo(trHead).text(this._column.name);
var renderColumnHeader = function(column) {
var th = $('<th>').appendTo(trHead);
$('<span>').html(column.names.join(" &raquo; ")).appendTo(th);
$('<br>').appendTo(th);
$('<a href="javascript:{}"></a>')
.text("remove")
.addClass("action")
.attr("title", "Remove this column")
.click(function() {
self._removeProperty(column.path);
}).appendTo(th);
$('<a href="javascript:{}"></a>')
.text("constrain")
.addClass("action")
.attr("title", "Add constraints to this column")
.click(function() {
self._constrainProperty(column.path);
}).appendTo(th);
};
for (var c = 0; c < data.columns.length; c++) {
renderColumnHeader(data.columns[c]);
}
for (var r = 0; r < data.rows.length; r++) {
var tr = table.insertRow(table.rows.length);
var row = data.rows[r];
for (var c = 0; c < row.length; c++) {
var td = tr.insertCell(tr.cells.length);
var cell = row[c];
if (cell !== null) {
if ($.isPlainObject(cell)) {
$('<a>').attr("href", "http://www.freebase.com/view" + cell.id).text(cell.name).appendTo(td);
} else {
$('<span>').text(cell).appendTo(td);
}
}
}
}
container.append(table);
};
ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(path) {
var removeFromList = function(path, index, properties) {
var id = path[index];
for (var i = properties.length - 1; i >= 0; i--) {
var property = properties[i];
if (property.id == id) {
if (index === path.length - 1) {
if ("included" in property) {
delete property.included;
}
} else if ("properties" in property && property.properties.length > 0) {
removeFromList(path, index + 1, property.properties);
}
if (!("properties" in property) || property.properties.length === 0) {
properties.splice(i, 1);
}
return;
}
}
};
removeFromList(path, 0, this._extension.properties);
this._update();
};
ExtendReconciledDataPreviewDialog.prototype._findProperty = function(path) {
var find = function(path, index, properties) {
var id = path[index];
for (var i = properties.length - 1; i >= 0; i--) {
var property = properties[i];
if (property.id == id) {
if (index === path.length - 1) {
return property;
} else if ("properties" in property && property.properties.length > 0) {
return find(path, index + 1, property.properties);
}
break;
}
}
return null;
};
return find(path, 0, this._extension.properties);
};
ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) {
var self = this;
var property = this._findProperty(path);
var frame = DialogSystem.createDialog();
frame.width("500px");
var header = $('<div></div>').addClass("dialog-header").text("Constrain " + path.join(" > ")).appendTo(frame);
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
body.html(
'<div class="grid-layout layout-normal layout-full"><table>' +
'<tr><td>' +
'Enter MQL query constraints as JSON' +
'</td></tr>' +
'<tr><td>' +
'<textarea style="width: 100%; height: 300px; font-family: monospace;" bind="textarea"></textarea>' +
'</td></tr>' +
'</table></div>'
);
var bodyElmts = DOM.bind(body);
if ("constraints" in property) {
bodyElmts.textarea[0].value = JSON.stringify(property.constraints, null, 2);
} else {
bodyElmts.textarea[0].value = JSON.stringify({ "limit" : 10 }, null, 2);
}
footer.html(
'<button class="button" bind="okButton">&nbsp;&nbsp;OK&nbsp;&nbsp;</button>' +
'<button class="button" bind="cancelButton">Cancel</button>'
);
var footerElmts = DOM.bind(footer);
var level = DialogSystem.showDialog(frame);
var dismiss = function() {
DialogSystem.dismissUntil(level - 1);
};
footerElmts.cancelButton.click(dismiss);
footerElmts.okButton.click(function() {
try {
var o = JSON.parse(bodyElmts.textarea[0].value);
if (o === undefined) {
alert("Please ensure that the JSON you enter is valid.");
return;
}
if ($.isArray(o) && o.length == 1) {
o = o[0];
}
if (!$.isPlainObject(o)) {
alert("The JSON you enter must be an object, that is, it is of this form { ... }.");
return;
}
property.constraints = o;
dismiss();
self._update();
} catch (e) {
//console.log(e);
}
});
bodyElmts.textarea.focus();
};

View File

@ -0,0 +1,27 @@
<div class="dialog-frame extend-data-preview-dialog" style="width: 800px;">
<div class="dialog-header" bind="dialogHeader"></div>
<div class="dialog-body" bind="dialogBody">
<div class="grid-layout layout-normal layout-full"><table rows="4">
<tr>
<td width="300" height="1" bind="addPropertyHeader"></td>
<td height="1" bind="previewHeader"></td>
<td height="1" width="1%"><button class="button" bind="resetButton">Reset</button></td>
</tr>
<tr>
<td style="vertical-align: top;" height="1"><div class="input-container"><input bind="addPropertyInput" /></div></td>
<td style="vertical-align: top;" rowspan="3" colspan="2"><div class="preview-container" bind="previewContainer"></div></td>
</tr>
<tr>
<td height="1" bind="suggestedPropertyHeader"></td>
</tr>
<tr>
<td><div class="suggested-property-container" bind="suggestedPropertyContainer"></div></td>
</tr>
</table></div>
</div>
<div class="dialog-footer" bind="dialogFooter">
<button class="button" bind="okButton"></button>
<button class="button" bind="cancelButton"></button>
</div>
</div>

View File

@ -0,0 +1,26 @@
<div class="dialog-frame extend-data-preview-dialog" style="width: 800px;">
<div class="dialog-header" bind="dialogHeader"></div>
<div class="dialog-body" bind="dialogBody">
<div class="grid-layout layout-normal layout-full"><table rows="4">
<tr>
<td width="300" height="1">Add Property</td>
<td height="1">Preview</td>
<td height="1" width="1%"><button class="button" bind="resetButton">Reset</button></td>
</tr>
<tr>
<td style="vertical-align: top;" height="1"><div class="input-container"><input bind="addPropertyInput" /></div></td>
<td style="vertical-align: top;" rowspan="3" colspan="2"><div class="preview-container" bind="previewContainer"></div></td>
</tr>
<tr>
<td height="1">Suggested Properties</td>
</tr>
<tr>
<td><div class="suggested-property-container" bind="suggestedPropertyContainer"></div></td>
</tr>
</table></div>
</div>
<div class="dialog-footer" bind="dialogFooter">
<button class="button" bind="okButton">&nbsp;&nbsp;OK&nbsp;&nbsp;</button>
<button class="button" bind="cancelButton">Cancel</button>
</div>
</div>

View File

@ -146,6 +146,74 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
});
};
var doAddColumnByReconciliation = function() {
var columnIndex = Refine.columnNameToColumnIndex(column.name);
var o = DataTableView.sampleVisibleRows(column);
new ExtendReconciledDataPreviewDialog(
column,
columnIndex,
o.rowIndices,
function(extension) {
Refine.postProcess(
"core",
"extend-data",
{
baseColumnName: column.name,
columnInsertIndex: columnIndex + 1
},
{
extension: JSON.stringify(extension)
},
{ rowsChanged: true, modelsChanged: true }
);
}
); */
};
/*
var doAddColumnByReconciliation = function() {
var frame = $(
DOM.loadHTML("core", "scripts/views/data-table/add-column-by-reconciliation.html"));
var elmts = DOM.bind(frame);
elmts.dialogHeader.text($.i18n._('core-views')["add-by-recon"]);
elmts.suggestedPropertyHeader.html('Suggested properties');
elmts.previewHeader.html('Preview');
elmts.addPropertyHeader.html('Add property');
elmts.okButton.html($.i18n._('core-buttons')["ok"]);
elmts.cancelButton.text($.i18n._('core-buttons')["cancel"]);
var level = DialogSystem.showDialog(frame);
var dismiss = function() { DialogSystem.dismissUntil(level - 1); };
elmts.cancelButton.click(dismiss);
elmts.okButton.click(function() {
var columnName = $.trim(elmts.columnNameInput[0].value);
if (!columnName.length) {
alert($.i18n._('core-views')["warning-col-name"]);
return;
}
Refine.postCoreProcess(
"add-column-by-fetching-urls",
{
baseColumnName: column.name,
urlExpression: previewWidget.getExpression(true),
newColumnName: columnName,
columnInsertIndex: columnIndex + 1,
delay: elmts.throttleDelayInput[0].value,
onError: $('input[name="dialog-onerror-choice"]:checked')[0].value,
cacheResponses: $('input[name="dialog-cache-responses"]')[0].checked,
},
null,
{ modelsChanged: true }
);
dismiss();
});
};
*/
var doRemoveColumn = function() {
Refine.postCoreProcess(
"remove-column",
@ -298,6 +366,11 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
label: $.i18n._('core-views')["add-by-urls"]+"...",
click: doAddColumnByFetchingURLs
},
{
id: "core/add-column-by-reconciliation",
label: $.i18n._('core-views')["add-by-recon"]+"...",
click: doAddColumnByReconciliation
},
{},
{
id: "core/rename-column",

View File

@ -0,0 +1,71 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
@import-less url("../theme.less");
.extend-data-preview-dialog .suggested-property-container {
border: 1px solid #aaa;
padding: 5px;
overflow: auto;
height: 375px;
}
.extend-data-preview-dialog .suggested-property {
padding: 5px;
}
.extend-data-preview-dialog input.property-suggest {
display: block;
padding: 2%;
width: 96%;
}
.extend-data-preview-dialog .preview-container {
border: 1px solid #aaa;
overflow: auto;
}
.extend-data-preview-dialog .preview-container table {
border-collapse: collapse;
}
.extend-data-preview-dialog .preview-container td, .extend-data-preview-dialog .preview-container th {
padding: 3px 5px;
border-bottom: 1px solid #ddd;
border-right: 1px solid #ddd;
}
.extend-data-preview-dialog .preview-container th img {
vertical-align: top;
margin-left: 5px;
}