Starting to migrate data extension to standard reconciliation services
This commit is contained in:
parent
b0f845d252
commit
ad3a174abd
@ -0,0 +1,65 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.commands.recon;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.commands.EngineDependentCommand;
|
||||
import com.google.refine.freebase.operations.ExtendDataOperation;
|
||||
import com.google.refine.model.AbstractOperation;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class ExtendDataCommand extends EngineDependentCommand {
|
||||
@Override
|
||||
protected AbstractOperation createOperation(Project project,
|
||||
HttpServletRequest request, JSONObject engineConfig) throws Exception {
|
||||
|
||||
String baseColumnName = request.getParameter("baseColumnName");
|
||||
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
|
||||
|
||||
String jsonString = request.getParameter("extension");
|
||||
JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString);
|
||||
|
||||
return new ExtendDataOperation(
|
||||
engineConfig,
|
||||
baseColumnName,
|
||||
extension,
|
||||
columnInsertIndex
|
||||
);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,208 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.commands.recon;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.model.recon.ReconciledDataExtensionJob;
|
||||
import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo;
|
||||
import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.ReconCandidate;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.recon.ReconConfig;
|
||||
import com.google.refine.model.recon.StandardReconConfig;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class PreviewExtendDataCommand extends Command {
|
||||
|
||||
@Override
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
try {
|
||||
Project project = getProject(request);
|
||||
String columnName = request.getParameter("columnName");
|
||||
|
||||
String rowIndicesString = request.getParameter("rowIndices");
|
||||
if (rowIndicesString == null) {
|
||||
respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }");
|
||||
return;
|
||||
}
|
||||
|
||||
String jsonString = request.getParameter("extension");
|
||||
JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString);
|
||||
|
||||
JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString);
|
||||
int length = rowIndices.length();
|
||||
Column column = project.columnModel.getColumnByName(columnName);
|
||||
int cellIndex = column.getCellIndex();
|
||||
|
||||
// get the endpoint to extract data from
|
||||
String endpoint = null;
|
||||
ReconConfig cfg = column.getReconConfig();
|
||||
if (cfg != null &&
|
||||
cfg instanceof StandardReconConfig) {
|
||||
StandardReconConfig scfg = (StandardReconConfig)cfg;
|
||||
endpoint = scfg.service;
|
||||
} else {
|
||||
respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
List<String> topicNames = new ArrayList<String>();
|
||||
List<String> topicIds = new ArrayList<String>();
|
||||
Set<String> ids = new HashSet<String>();
|
||||
for (int i = 0; i < length; i++) {
|
||||
int rowIndex = rowIndices.getInt(i);
|
||||
if (rowIndex >= 0 && rowIndex < project.rows.size()) {
|
||||
Row row = project.rows.get(rowIndex);
|
||||
Cell cell = row.getCell(cellIndex);
|
||||
if (cell != null && cell.recon != null && cell.recon.match != null) {
|
||||
topicNames.add(cell.recon.match.name);
|
||||
topicIds.add(cell.recon.match.id);
|
||||
ids.add(cell.recon.match.id);
|
||||
} else {
|
||||
topicNames.add(null);
|
||||
topicIds.add(null);
|
||||
ids.add(null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
|
||||
ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(json, endpoint);
|
||||
Map<String, DataExtension> map = job.extend(ids, reconCandidateMap);
|
||||
|
||||
response.setCharacterEncoding("UTF-8");
|
||||
response.setHeader("Content-Type", "application/json");
|
||||
|
||||
JSONWriter writer = new JSONWriter(response.getWriter());
|
||||
writer.object();
|
||||
writer.key("code"); writer.value("ok");
|
||||
writer.key("columns");
|
||||
writer.array();
|
||||
for (ColumnInfo info : job.columns) {
|
||||
writer.object();
|
||||
writer.key("names");
|
||||
writer.array();
|
||||
for (String name : info.names) {
|
||||
writer.value(name);
|
||||
}
|
||||
writer.endArray();
|
||||
writer.key("path");
|
||||
writer.array();
|
||||
for (String id : info.path) {
|
||||
writer.value(id);
|
||||
}
|
||||
writer.endArray();
|
||||
writer.endObject();
|
||||
}
|
||||
writer.endArray();
|
||||
|
||||
writer.key("rows");
|
||||
writer.array();
|
||||
for (int r = 0; r < topicNames.size(); r++) {
|
||||
String id = topicIds.get(r);
|
||||
String topicName = topicNames.get(r);
|
||||
|
||||
if (id != null && map.containsKey(id)) {
|
||||
DataExtension ext = map.get(id);
|
||||
boolean first = true;
|
||||
|
||||
if (ext.data.length > 0) {
|
||||
for (Object[] row : ext.data) {
|
||||
writer.array();
|
||||
if (first) {
|
||||
writer.value(topicName);
|
||||
first = false;
|
||||
} else {
|
||||
writer.value(null);
|
||||
}
|
||||
|
||||
for (Object cell : row) {
|
||||
if (cell != null && cell instanceof ReconCandidate) {
|
||||
ReconCandidate rc = (ReconCandidate) cell;
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(rc.id);
|
||||
writer.key("name"); writer.value(rc.name);
|
||||
writer.endObject();
|
||||
} else {
|
||||
writer.value(cell);
|
||||
}
|
||||
}
|
||||
|
||||
writer.endArray();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
writer.array();
|
||||
if (id != null) {
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(id);
|
||||
writer.key("name"); writer.value(topicName);
|
||||
writer.endObject();
|
||||
} else {
|
||||
writer.value("<not reconciled>");
|
||||
}
|
||||
writer.endArray();
|
||||
}
|
||||
writer.endArray();
|
||||
|
||||
writer.endObject();
|
||||
} catch (Exception e) {
|
||||
respondException(response, e);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,469 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.model.changes;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.Serializable;
|
||||
import java.io.Writer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
// import com.google.refine.freebase.FreebaseType;
|
||||
import com.google.refine.model.recon.DataExtensionReconConfig;
|
||||
import com.google.refine.model.recon.FreebaseDataExtensionJob.DataExtension;
|
||||
import com.google.refine.history.Change;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.ModelException;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Recon;
|
||||
import com.google.refine.model.Recon.Judgment;
|
||||
import com.google.refine.model.ReconCandidate;
|
||||
import com.google.refine.model.ReconStats;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
import com.google.refine.util.Pool;
|
||||
|
||||
public class DataExtensionChange implements Change {
|
||||
final protected String _baseColumnName;
|
||||
final protected int _columnInsertIndex;
|
||||
|
||||
final protected List<String> _columnNames;
|
||||
final protected List<FreebaseType> _columnTypes;
|
||||
|
||||
final protected List<Integer> _rowIndices;
|
||||
final protected List<DataExtension> _dataExtensions;
|
||||
|
||||
protected long _historyEntryID;
|
||||
protected int _firstNewCellIndex = -1;
|
||||
protected List<Row> _oldRows;
|
||||
protected List<Row> _newRows;
|
||||
|
||||
public DataExtensionChange(
|
||||
String baseColumnName,
|
||||
int columnInsertIndex,
|
||||
List<String> columnNames,
|
||||
List<FreebaseType> columnTypes,
|
||||
List<Integer> rowIndices,
|
||||
List<DataExtension> dataExtensions,
|
||||
long historyEntryID
|
||||
) {
|
||||
_baseColumnName = baseColumnName;
|
||||
_columnInsertIndex = columnInsertIndex;
|
||||
|
||||
_columnNames = columnNames;
|
||||
_columnTypes = columnTypes;
|
||||
|
||||
_rowIndices = rowIndices;
|
||||
_dataExtensions = dataExtensions;
|
||||
|
||||
_historyEntryID = historyEntryID;
|
||||
}
|
||||
|
||||
protected DataExtensionChange(
|
||||
String baseColumnName,
|
||||
int columnInsertIndex,
|
||||
|
||||
List<String> columnNames,
|
||||
List<FreebaseType> columnTypes,
|
||||
|
||||
List<Integer> rowIndices,
|
||||
List<DataExtension> dataExtensions,
|
||||
int firstNewCellIndex,
|
||||
List<Row> oldRows,
|
||||
List<Row> newRows
|
||||
) {
|
||||
_baseColumnName = baseColumnName;
|
||||
_columnInsertIndex = columnInsertIndex;
|
||||
|
||||
_columnNames = columnNames;
|
||||
_columnTypes = columnTypes;
|
||||
|
||||
_rowIndices = rowIndices;
|
||||
_dataExtensions = dataExtensions;
|
||||
|
||||
_firstNewCellIndex = firstNewCellIndex;
|
||||
_oldRows = oldRows;
|
||||
_newRows = newRows;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void apply(Project project) {
|
||||
synchronized (project) {
|
||||
if (_firstNewCellIndex < 0) {
|
||||
_firstNewCellIndex = project.columnModel.allocateNewCellIndex();
|
||||
for (int i = 1; i < _columnNames.size(); i++) {
|
||||
project.columnModel.allocateNewCellIndex();
|
||||
}
|
||||
|
||||
_oldRows = new ArrayList<Row>(project.rows);
|
||||
|
||||
_newRows = new ArrayList<Row>(project.rows.size());
|
||||
|
||||
int cellIndex = project.columnModel.getColumnByName(_baseColumnName).getCellIndex();
|
||||
int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex();
|
||||
int index = 0;
|
||||
|
||||
int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size();
|
||||
DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null;
|
||||
|
||||
index++;
|
||||
|
||||
Map<String, Recon> reconMap = new HashMap<String, Recon>();
|
||||
|
||||
for (int r = 0; r < _oldRows.size(); r++) {
|
||||
Row oldRow = _oldRows.get(r);
|
||||
if (r < rowIndex) {
|
||||
_newRows.add(oldRow.dup());
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dataExtension == null || dataExtension.data.length == 0) {
|
||||
_newRows.add(oldRow);
|
||||
} else {
|
||||
Row firstNewRow = oldRow.dup();
|
||||
extendRow(firstNewRow, dataExtension, 0, reconMap);
|
||||
_newRows.add(firstNewRow);
|
||||
|
||||
int r2 = r + 1;
|
||||
for (int subR = 1; subR < dataExtension.data.length; subR++) {
|
||||
if (r2 < project.rows.size()) {
|
||||
Row oldRow2 = project.rows.get(r2);
|
||||
if (oldRow2.isCellBlank(cellIndex) &&
|
||||
oldRow2.isCellBlank(keyCellIndex)) {
|
||||
|
||||
Row newRow = oldRow2.dup();
|
||||
extendRow(newRow, dataExtension, subR, reconMap);
|
||||
|
||||
_newRows.add(newRow);
|
||||
r2++;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Row newRow = new Row(cellIndex + _columnNames.size());
|
||||
extendRow(newRow, dataExtension, subR, reconMap);
|
||||
|
||||
_newRows.add(newRow);
|
||||
}
|
||||
|
||||
r = r2 - 1; // r will be incremented by the for loop anyway
|
||||
}
|
||||
|
||||
rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size();
|
||||
dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
project.rows.clear();
|
||||
project.rows.addAll(_newRows);
|
||||
|
||||
for (int i = 0; i < _columnNames.size(); i++) {
|
||||
String name = _columnNames.get(i);
|
||||
int cellIndex = _firstNewCellIndex + i;
|
||||
|
||||
Column column = new Column(cellIndex, name);
|
||||
column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i)));
|
||||
column.setReconStats(ReconStats.create(project, cellIndex));
|
||||
|
||||
try {
|
||||
project.columnModel.addColumn(_columnInsertIndex + i, column, true);
|
||||
|
||||
// the column might have been renamed to avoid collision
|
||||
_columnNames.set(i, column.getName());
|
||||
} catch (ModelException e) {
|
||||
// won't get here since we set the avoid collision flag
|
||||
}
|
||||
}
|
||||
|
||||
project.update();
|
||||
}
|
||||
}
|
||||
|
||||
protected void extendRow(
|
||||
Row row,
|
||||
DataExtension dataExtension,
|
||||
int extensionRowIndex,
|
||||
Map<String, Recon> reconMap
|
||||
) {
|
||||
Object[] values = dataExtension.data[extensionRowIndex];
|
||||
for (int c = 0; c < values.length; c++) {
|
||||
Object value = values[c];
|
||||
Cell cell = null;
|
||||
|
||||
if (value instanceof ReconCandidate) {
|
||||
ReconCandidate rc = (ReconCandidate) value;
|
||||
Recon recon;
|
||||
if (reconMap.containsKey(rc.id)) {
|
||||
recon = reconMap.get(rc.id);
|
||||
} else {
|
||||
recon = Recon.makeFreebaseRecon(_historyEntryID);
|
||||
recon.addCandidate(rc);
|
||||
recon.service = "mql";
|
||||
recon.match = rc;
|
||||
recon.matchRank = 0;
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
recon.judgmentBatchSize = 1;
|
||||
|
||||
reconMap.put(rc.id, recon);
|
||||
}
|
||||
cell = new Cell(rc.name, recon);
|
||||
} else {
|
||||
cell = new Cell((Serializable) value, null);
|
||||
}
|
||||
|
||||
row.setCell(_firstNewCellIndex + c, cell);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void revert(Project project) {
|
||||
synchronized (project) {
|
||||
project.rows.clear();
|
||||
project.rows.addAll(_oldRows);
|
||||
|
||||
for (int i = 0; i < _columnNames.size(); i++) {
|
||||
project.columnModel.columns.remove(_columnInsertIndex);
|
||||
}
|
||||
|
||||
project.update();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(Writer writer, Properties options) throws IOException {
|
||||
writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n');
|
||||
writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n');
|
||||
writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n');
|
||||
for (String name : _columnNames) {
|
||||
writer.write(name); writer.write('\n');
|
||||
}
|
||||
writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n');
|
||||
for (FreebaseType type : _columnTypes) {
|
||||
try {
|
||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||
|
||||
type.write(jsonWriter, options);
|
||||
} catch (JSONException e) {
|
||||
// ???
|
||||
}
|
||||
writer.write('\n');
|
||||
}
|
||||
writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n');
|
||||
for (Integer rowIndex : _rowIndices) {
|
||||
writer.write(rowIndex.toString()); writer.write('\n');
|
||||
}
|
||||
writer.write("dataExtensionCount="); writer.write(Integer.toString(_dataExtensions.size())); writer.write('\n');
|
||||
for (DataExtension dataExtension : _dataExtensions) {
|
||||
if (dataExtension == null) {
|
||||
writer.write('\n');
|
||||
continue;
|
||||
}
|
||||
|
||||
writer.write(Integer.toString(dataExtension.data.length)); writer.write('\n');
|
||||
|
||||
for (Object[] values : dataExtension.data) {
|
||||
for (Object value : values) {
|
||||
if (value == null) {
|
||||
writer.write("null");
|
||||
} else if (value instanceof ReconCandidate) {
|
||||
try {
|
||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||
((ReconCandidate) value).write(jsonWriter, options);
|
||||
} catch (JSONException e) {
|
||||
// ???
|
||||
}
|
||||
} else if (value instanceof String) {
|
||||
writer.write(JSONObject.quote((String) value));
|
||||
} else {
|
||||
writer.write(value.toString());
|
||||
}
|
||||
writer.write('\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n');
|
||||
|
||||
writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n');
|
||||
for (Row row : _newRows) {
|
||||
row.save(writer, options);
|
||||
writer.write('\n');
|
||||
}
|
||||
writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n');
|
||||
for (Row row : _oldRows) {
|
||||
row.save(writer, options);
|
||||
writer.write('\n');
|
||||
}
|
||||
writer.write("/ec/\n"); // end of change marker
|
||||
}
|
||||
|
||||
static public Change load(LineNumberReader reader, Pool pool) throws Exception {
|
||||
String baseColumnName = null;
|
||||
int columnInsertIndex = -1;
|
||||
|
||||
List<String> columnNames = null;
|
||||
List<FreebaseType> columnTypes = null;
|
||||
|
||||
List<Integer> rowIndices = null;
|
||||
List<DataExtension> dataExtensions = null;
|
||||
|
||||
List<Row> oldRows = null;
|
||||
List<Row> newRows = null;
|
||||
|
||||
int firstNewCellIndex = -1;
|
||||
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
|
||||
int equal = line.indexOf('=');
|
||||
CharSequence field = line.subSequence(0, equal);
|
||||
String value = line.substring(equal + 1);
|
||||
|
||||
if ("baseColumnName".equals(field)) {
|
||||
baseColumnName = value;
|
||||
} else if ("columnInsertIndex".equals(field)) {
|
||||
columnInsertIndex = Integer.parseInt(value);
|
||||
} else if ("firstNewCellIndex".equals(field)) {
|
||||
firstNewCellIndex = Integer.parseInt(value);
|
||||
} else if ("rowIndexCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
rowIndices = new ArrayList<Integer>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
if (line != null) {
|
||||
rowIndices.add(Integer.parseInt(line));
|
||||
}
|
||||
}
|
||||
} else if ("columnNameCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
columnNames = new ArrayList<String>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
if (line != null) {
|
||||
columnNames.add(line);
|
||||
}
|
||||
}
|
||||
} else if ("columnTypeCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
columnTypes = new ArrayList<FreebaseType>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
|
||||
}
|
||||
} else if ("dataExtensionCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
dataExtensions = new ArrayList<DataExtension>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
|
||||
if (line == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.length() == 0) {
|
||||
dataExtensions.add(null);
|
||||
continue;
|
||||
}
|
||||
|
||||
int rowCount = Integer.parseInt(line);
|
||||
Object[][] data = new Object[rowCount][];
|
||||
|
||||
for (int r = 0; r < rowCount; r++) {
|
||||
Object[] row = new Object[columnNames.size()];
|
||||
for (int c = 0; c < columnNames.size(); c++) {
|
||||
line = reader.readLine();
|
||||
|
||||
row[c] = ReconCandidate.loadStreaming(line);
|
||||
}
|
||||
|
||||
data[r] = row;
|
||||
}
|
||||
|
||||
dataExtensions.add(new DataExtension(data));
|
||||
}
|
||||
} else if ("oldRowCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
oldRows = new ArrayList<Row>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
if (line != null) {
|
||||
oldRows.add(Row.load(line, pool));
|
||||
}
|
||||
}
|
||||
} else if ("newRowCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
newRows = new ArrayList<Row>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
if (line != null) {
|
||||
newRows.add(Row.load(line, pool));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
DataExtensionChange change = new DataExtensionChange(
|
||||
baseColumnName,
|
||||
columnInsertIndex,
|
||||
columnNames,
|
||||
columnTypes,
|
||||
rowIndices,
|
||||
dataExtensions,
|
||||
firstNewCellIndex,
|
||||
oldRows,
|
||||
newRows
|
||||
);
|
||||
|
||||
|
||||
return change;
|
||||
}
|
||||
}
|
@ -0,0 +1,453 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
package com.google.refine.model.recon;
|
||||
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
// import com.google.refine.freebase.FreebaseType;
|
||||
import com.google.refine.model.ReconCandidate;
|
||||
import com.google.refine.model.recon.StandardReconConfig;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class ReconciledDataExtensionJob {
|
||||
static public class DataExtension {
|
||||
final public Object[][] data;
|
||||
|
||||
public DataExtension(Object[][] data) {
|
||||
this.data = data;
|
||||
}
|
||||
}
|
||||
|
||||
static public class ColumnInfo {
|
||||
final public List<String> names;
|
||||
final public List<String> path;
|
||||
// final public FreebaseType expectedType;
|
||||
// TODO
|
||||
|
||||
protected ColumnInfo(List<String> names, List<String> path /*, FreebaseType expectedType */) {
|
||||
this.names = names;
|
||||
this.path = path;
|
||||
// this.expectedType = expectedType;
|
||||
}
|
||||
}
|
||||
|
||||
final public JSONObject extension;
|
||||
final public String endpoint;
|
||||
final public int columnCount;
|
||||
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
|
||||
|
||||
public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException {
|
||||
this.extension = obj;
|
||||
this.endpoint = endpoint;
|
||||
this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ?
|
||||
countColumns(obj.getJSONArray("properties"), columns, new ArrayList<String>(), new ArrayList<String>()) : 0;
|
||||
}
|
||||
|
||||
public Map<String, ReconciledDataExtensionJob.DataExtension> extend(
|
||||
Set<String> ids,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws Exception {
|
||||
StringWriter writer = new StringWriter();
|
||||
formulateQuery(ids, extension, writer);
|
||||
|
||||
// Extract the order of properties
|
||||
JSONArray origProperties = extension.getJSONArray("properties");
|
||||
List<String> properties = new ArrayList<String>();
|
||||
int l = origProperties.length();
|
||||
for (int i = 0; i < l; i++) {
|
||||
properties.add(origProperties.getJSONObject(i).getString("id"));
|
||||
}
|
||||
|
||||
String query = writer.toString();
|
||||
InputStream is = performQuery(this.endpoint, query);
|
||||
try {
|
||||
String s = ParsingUtilities.inputStreamToString(is);
|
||||
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
|
||||
|
||||
Map<String, ReconciledDataExtensionJob.DataExtension> map = new HashMap<String, ReconciledDataExtensionJob.DataExtension>();
|
||||
if (o.has("rows")){
|
||||
JSONObject records = o.getJSONObject("rows");
|
||||
|
||||
// for each identifier
|
||||
for (String id : ids) {
|
||||
if (records.has(id)) {
|
||||
JSONObject record = records.getJSONObject(id);
|
||||
|
||||
ReconciledDataExtensionJob.DataExtension ext = collectResult(record, properties, reconCandidateMap);
|
||||
|
||||
if (ext != null) {
|
||||
map.put(id, ext);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
|
||||
static protected InputStream performQuery(String endpoint, String query) throws IOException {
|
||||
URL url = new URL(endpoint);
|
||||
|
||||
URLConnection connection = url.openConnection();
|
||||
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
|
||||
connection.setConnectTimeout(5000);
|
||||
connection.setDoOutput(true);
|
||||
|
||||
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
|
||||
try {
|
||||
String body = "extend=" + ParsingUtilities.encode(query);
|
||||
|
||||
dos.writeBytes(body);
|
||||
} finally {
|
||||
dos.flush();
|
||||
dos.close();
|
||||
}
|
||||
|
||||
connection.connect();
|
||||
|
||||
return connection.getInputStream();
|
||||
}
|
||||
|
||||
|
||||
protected ReconciledDataExtensionJob.DataExtension collectResult(
|
||||
JSONObject record,
|
||||
List<String> properties,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws JSONException {
|
||||
List<Object[]> rows = new ArrayList<Object[]>();
|
||||
|
||||
// for each property
|
||||
int colindex = 0;
|
||||
for(String pid : properties) {
|
||||
JSONArray values = record.getJSONArray(pid);
|
||||
if (values == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// for each value
|
||||
for(int rowindex = 0; rowindex < values.length(); rowindex++) {
|
||||
JSONObject val = values.getJSONObject(rowindex);
|
||||
// store a reconciled value
|
||||
if(val.has("id")) {
|
||||
storeCell(rows, rowindex, colindex, val, reconCandidateMap);
|
||||
} else if(val.has("str")) {
|
||||
// store a bare string
|
||||
String str = val.getString("str");
|
||||
storeStr(rows, rowindex, colindex, str);
|
||||
}
|
||||
// TODO other cases for other types of values (dates, booleans, …)
|
||||
}
|
||||
colindex++;
|
||||
}
|
||||
|
||||
|
||||
// collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap);
|
||||
|
||||
Object[][] data = new Object[rows.size()][columnCount];
|
||||
rows.toArray(data);
|
||||
|
||||
return new DataExtension(data);
|
||||
}
|
||||
|
||||
protected void storeStr(
|
||||
List<Object[]> rows,
|
||||
int row,
|
||||
int col,
|
||||
String str
|
||||
) throws JSONException {
|
||||
while (row >= rows.size()) {
|
||||
rows.add(new Object[columnCount]);
|
||||
}
|
||||
rows.get(row)[col] = str;
|
||||
}
|
||||
|
||||
protected void storeCell(
|
||||
List<Object[]> rows,
|
||||
int row,
|
||||
int col,
|
||||
Object value,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) {
|
||||
while (row >= rows.size()) {
|
||||
rows.add(new Object[columnCount]);
|
||||
}
|
||||
rows.get(row)[col] = value;
|
||||
}
|
||||
|
||||
protected void storeCell(
|
||||
List<Object[]> rows,
|
||||
int row,
|
||||
int col,
|
||||
JSONObject obj,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws JSONException {
|
||||
String id = obj.getString("id");
|
||||
ReconCandidate rc;
|
||||
if (reconCandidateMap.containsKey(id)) {
|
||||
rc = reconCandidateMap.get(id);
|
||||
} else {
|
||||
rc = new ReconCandidate(
|
||||
obj.getString("id"),
|
||||
obj.getString("name"),
|
||||
JSONUtilities.getStringArray(obj, "type"),
|
||||
100
|
||||
);
|
||||
|
||||
reconCandidateMap.put(id, rc);
|
||||
}
|
||||
|
||||
storeCell(rows, row, col, rc, reconCandidateMap);
|
||||
}
|
||||
/*
|
||||
protected int[] collectResult(
|
||||
List<Object[]> rows,
|
||||
JSONObject extNode,
|
||||
JSONObject resultNode,
|
||||
int startRowIndex,
|
||||
int startColumnIndex,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws JSONException {
|
||||
String propertyID = extNode.getString("id");
|
||||
// String expectedTypeID = extNode.getJSONObject("expected").getString("id");
|
||||
|
||||
JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ?
|
||||
resultNode.getJSONArray(propertyID) : null;
|
||||
|
||||
if ("/type/key".equals(expectedTypeID)) {
|
||||
if (a != null) {
|
||||
int l = a.length();
|
||||
for (int r = 0; r < l; r++) {
|
||||
Object o = a.isNull(r) ? null : a.get(r);
|
||||
if (o instanceof JSONObject) {
|
||||
storeStr(rows, startRowIndex++, startColumnIndex, (JSONObject) o, reconCandidateMap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// note that we still take up a column even if we don't have any data
|
||||
return new int[] { startRowIndex, startColumnIndex + 1 };
|
||||
} else if (expectedTypeID.startsWith("/type/")) {
|
||||
if (a != null) {
|
||||
int l = a.length();
|
||||
for (int r = 0; r < l; r++) {
|
||||
Object o = a.isNull(r) ? null : a.get(r);
|
||||
if (o instanceof Serializable) {
|
||||
storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// note that we still take up a column even if we don't have any data
|
||||
return new int[] { startRowIndex, startColumnIndex + 1 };
|
||||
} else {
|
||||
boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties"));
|
||||
boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included"));
|
||||
|
||||
if (a != null && a.length() > 0) {
|
||||
int maxColIndex = startColumnIndex;
|
||||
|
||||
int l = a.length();
|
||||
for (int r = 0; r < l; r++) {
|
||||
Object v = a.isNull(r) ? null : a.get(r);
|
||||
JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null;
|
||||
|
||||
int startColumnIndex2 = startColumnIndex;
|
||||
int startRowIndex2 = startRowIndex;
|
||||
|
||||
if (isOwnColumn) {
|
||||
if (o != null) {
|
||||
storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap);
|
||||
} else {
|
||||
storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasSubProperties && o != null) {
|
||||
int[] rowcol = collectResult(
|
||||
rows,
|
||||
extNode.getJSONArray("properties"),
|
||||
o,
|
||||
startRowIndex,
|
||||
startColumnIndex2,
|
||||
reconCandidateMap
|
||||
);
|
||||
|
||||
startRowIndex2 = rowcol[0];
|
||||
startColumnIndex2 = rowcol[1];
|
||||
}
|
||||
|
||||
startRowIndex = startRowIndex2;
|
||||
maxColIndex = Math.max(maxColIndex, startColumnIndex2);
|
||||
}
|
||||
|
||||
return new int[] { startRowIndex, maxColIndex };
|
||||
} else {
|
||||
return new int[] {
|
||||
startRowIndex,
|
||||
startColumnIndex + countColumns(extNode, null, new ArrayList<String>(), new ArrayList<String>())
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected int[] collectResult(
|
||||
List<Object[]> rows,
|
||||
JSONArray subProperties,
|
||||
JSONObject resultNode,
|
||||
int startRowIndex,
|
||||
int startColumnIndex,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws JSONException {
|
||||
int maxStartRowIndex = startRowIndex;
|
||||
|
||||
int k = subProperties.length();
|
||||
for (int c = 0; c < k; c++) {
|
||||
int[] rowcol = collectResult(
|
||||
rows,
|
||||
subProperties.getJSONObject(c),
|
||||
resultNode,
|
||||
startRowIndex,
|
||||
startColumnIndex,
|
||||
reconCandidateMap
|
||||
);
|
||||
|
||||
maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]);
|
||||
startColumnIndex = rowcol[1];
|
||||
}
|
||||
|
||||
return new int[] { maxStartRowIndex, startColumnIndex };
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException {
|
||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||
|
||||
jsonWriter.object();
|
||||
|
||||
jsonWriter.key("ids");
|
||||
jsonWriter.array();
|
||||
for (String id : ids) {
|
||||
if (id != null) {
|
||||
jsonWriter.value(id);
|
||||
}
|
||||
}
|
||||
jsonWriter.endArray();
|
||||
|
||||
jsonWriter.key("properties");
|
||||
jsonWriter.array();
|
||||
JSONArray properties = node.getJSONArray("properties");
|
||||
int l = properties.length();
|
||||
|
||||
for (int i = 0; i < l; i++) {
|
||||
JSONObject property = properties.getJSONObject(i);
|
||||
jsonWriter.object();
|
||||
jsonWriter.key("id");
|
||||
jsonWriter.value(property.getString("id"));
|
||||
// TODO translate constraints as below
|
||||
jsonWriter.endObject();
|
||||
}
|
||||
jsonWriter.endArray();
|
||||
jsonWriter.endObject();
|
||||
}
|
||||
|
||||
|
||||
static protected int countColumns(JSONObject obj, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
|
||||
String name = obj.getString("name");
|
||||
|
||||
List<String> names2 = null;
|
||||
List<String> path2 = null;
|
||||
if (columns != null) {
|
||||
names2 = new ArrayList<String>(names);
|
||||
names2.add(name);
|
||||
|
||||
path2 = new ArrayList<String>(path);
|
||||
path2.add(obj.getString("id"));
|
||||
}
|
||||
|
||||
if (obj.has("properties") && !obj.isNull("properties")) {
|
||||
boolean included = (obj.has("included") && obj.getBoolean("included"));
|
||||
if (included && columns != null) {
|
||||
// JSONObject expected = obj.getJSONObject("expected");
|
||||
|
||||
columns.add(new ColumnInfo(names2, path2
|
||||
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */));
|
||||
}
|
||||
|
||||
return (included ? 1 : 0) +
|
||||
countColumns(obj.getJSONArray("properties"), columns, names2, path2);
|
||||
} else {
|
||||
if (columns != null) {
|
||||
// JSONObject expected = obj.getJSONObject("expected");
|
||||
|
||||
columns.add(new ColumnInfo(names2, path2
|
||||
/* new FreebaseType(expected.getString("id"), expected.getString("name")) */ ));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static protected int countColumns(JSONArray a, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
|
||||
int c = 0;
|
||||
int l = a.length();
|
||||
for (int i = 0; i < l; i++) {
|
||||
c += countColumns(a.getJSONObject(i), columns, names, path);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
}
|
@ -0,0 +1,314 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.operations.recon;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.browsing.Engine;
|
||||
import com.google.refine.browsing.FilteredRows;
|
||||
import com.google.refine.browsing.RowVisitor;
|
||||
import com.google.refine.model.changes.DataExtensionChange;
|
||||
import com.google.refine.model.recon.DataExtensionJob;
|
||||
import com.google.refine.model.recon.DataExtensionJob.ColumnInfo;
|
||||
import com.google.refine.model.recon.DataExtensionJob.DataExtension;
|
||||
import com.google.refine.history.HistoryEntry;
|
||||
import com.google.refine.model.AbstractOperation;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.ReconCandidate;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.model.changes.CellAtRow;
|
||||
import com.google.refine.operations.EngineDependentOperation;
|
||||
import com.google.refine.operations.OperationRegistry;
|
||||
import com.google.refine.process.LongRunningProcess;
|
||||
import com.google.refine.process.Process;
|
||||
|
||||
public class ExtendDataOperation extends EngineDependentOperation {
|
||||
final protected String _baseColumnName;
|
||||
final protected JSONObject _extension;
|
||||
final protected int _columnInsertIndex;
|
||||
|
||||
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
||||
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||
|
||||
return new ExtendDataOperation(
|
||||
engineConfig,
|
||||
obj.getString("baseColumnName"),
|
||||
obj.getJSONObject("extension"),
|
||||
obj.getInt("columnInsertIndex")
|
||||
);
|
||||
}
|
||||
|
||||
public ExtendDataOperation(
|
||||
JSONObject engineConfig,
|
||||
String baseColumnName,
|
||||
JSONObject extension,
|
||||
int columnInsertIndex
|
||||
) {
|
||||
super(engineConfig);
|
||||
|
||||
_baseColumnName = baseColumnName;
|
||||
_extension = extension;
|
||||
_columnInsertIndex = columnInsertIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
||||
writer.key("description"); writer.value(getBriefDescription(null));
|
||||
writer.key("engineConfig"); writer.value(getEngineConfig());
|
||||
writer.key("columnInsertIndex"); writer.value(_columnInsertIndex);
|
||||
writer.key("baseColumnName"); writer.value(_baseColumnName);
|
||||
writer.key("extension"); writer.value(_extension);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getBriefDescription(Project project) {
|
||||
return "Extend data at index " + _columnInsertIndex +
|
||||
" based on column " + _baseColumnName;
|
||||
}
|
||||
|
||||
protected String createDescription(Column column, List<CellAtRow> cellsAtRows) {
|
||||
return "Extend data at index " + _columnInsertIndex +
|
||||
" based on column " + column.getName() +
|
||||
" by filling " + cellsAtRows.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Process createProcess(Project project, Properties options) throws Exception {
|
||||
return new ExtendDataProcess(
|
||||
project,
|
||||
getEngineConfig(),
|
||||
getBriefDescription(null)
|
||||
);
|
||||
}
|
||||
|
||||
public class ExtendDataProcess extends LongRunningProcess implements Runnable {
|
||||
final protected Project _project;
|
||||
final protected JSONObject _engineConfig;
|
||||
final protected long _historyEntryID;
|
||||
protected int _cellIndex;
|
||||
protected FreebaseDataExtensionJob _job;
|
||||
|
||||
public ExtendDataProcess(
|
||||
Project project,
|
||||
JSONObject engineConfig,
|
||||
String description
|
||||
) throws JSONException {
|
||||
super(description);
|
||||
_project = project;
|
||||
_engineConfig = engineConfig;
|
||||
_historyEntryID = HistoryEntry.allocateID();
|
||||
|
||||
_job = new FreebaseDataExtensionJob(_extension);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(hashCode());
|
||||
writer.key("description"); writer.value(_description);
|
||||
writer.key("immediate"); writer.value(false);
|
||||
writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done"));
|
||||
writer.key("progress"); writer.value(_progress);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Runnable getRunnable() {
|
||||
return this;
|
||||
}
|
||||
|
||||
protected void populateRowsWithMatches(List<Integer> rowIndices) throws Exception {
|
||||
Engine engine = new Engine(_project);
|
||||
engine.initializeFromJSON(_engineConfig);
|
||||
|
||||
Column column = _project.columnModel.getColumnByName(_baseColumnName);
|
||||
if (column == null) {
|
||||
throw new Exception("No column named " + _baseColumnName);
|
||||
}
|
||||
|
||||
_cellIndex = column.getCellIndex();
|
||||
|
||||
FilteredRows filteredRows = engine.getAllFilteredRows();
|
||||
filteredRows.accept(_project, new RowVisitor() {
|
||||
List<Integer> _rowIndices;
|
||||
|
||||
public RowVisitor init(List<Integer> rowIndices) {
|
||||
_rowIndices = rowIndices;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(Project project) {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end(Project project) {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean visit(Project project, int rowIndex, Row row) {
|
||||
Cell cell = row.getCell(_cellIndex);
|
||||
if (cell != null && cell.recon != null && cell.recon.match != null) {
|
||||
_rowIndices.add(rowIndex);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}.init(rowIndices));
|
||||
}
|
||||
|
||||
protected int extendRows(
|
||||
List<Integer> rowIndices,
|
||||
List<DataExtension> dataExtensions,
|
||||
int from,
|
||||
int limit,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) {
|
||||
Set<String> ids = new HashSet<String>();
|
||||
|
||||
int end;
|
||||
for (end = from; end < limit && ids.size() < 10; end++) {
|
||||
int index = rowIndices.get(end);
|
||||
Row row = _project.rows.get(index);
|
||||
Cell cell = row.getCell(_cellIndex);
|
||||
|
||||
ids.add(cell.recon.match.id);
|
||||
}
|
||||
|
||||
Map<String, DataExtension> map = null;
|
||||
try {
|
||||
map = _job.extend(ids, reconCandidateMap);
|
||||
} catch (Exception e) {
|
||||
map = new HashMap<String, DataExtension>();
|
||||
}
|
||||
|
||||
for (int i = from; i < end; i++) {
|
||||
int index = rowIndices.get(i);
|
||||
Row row = _project.rows.get(index);
|
||||
Cell cell = row.getCell(_cellIndex);
|
||||
String guid = cell.recon.match.id;
|
||||
|
||||
if (map.containsKey(guid)) {
|
||||
dataExtensions.add(map.get(guid));
|
||||
} else {
|
||||
dataExtensions.add(null);
|
||||
}
|
||||
}
|
||||
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
List<Integer> rowIndices = new ArrayList<Integer>();
|
||||
List<DataExtension> dataExtensions = new ArrayList<DataExtension>();
|
||||
|
||||
try {
|
||||
populateRowsWithMatches(rowIndices);
|
||||
} catch (Exception e2) {
|
||||
// TODO : Not sure what to do here?
|
||||
e2.printStackTrace();
|
||||
}
|
||||
|
||||
int start = 0;
|
||||
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
|
||||
|
||||
while (start < rowIndices.size()) {
|
||||
int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap);
|
||||
start = end;
|
||||
|
||||
_progress = end * 100 / rowIndices.size();
|
||||
try {
|
||||
Thread.sleep(200);
|
||||
} catch (InterruptedException e) {
|
||||
if (_canceled) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!_canceled) {
|
||||
List<String> columnNames = new ArrayList<String>();
|
||||
for (ColumnInfo info : _job.columns) {
|
||||
columnNames.add(StringUtils.join(info.names, " - "));
|
||||
}
|
||||
|
||||
List<String> columnTypes = new ArrayList<String>();
|
||||
for (ColumnInfo info : _job.columns) {
|
||||
columnTypes.add(info.expectedType);
|
||||
}
|
||||
|
||||
HistoryEntry historyEntry = new HistoryEntry(
|
||||
_historyEntryID,
|
||||
_project,
|
||||
_description,
|
||||
ExtendDataOperation.this,
|
||||
new DataExtensionChange(
|
||||
_baseColumnName,
|
||||
_columnInsertIndex,
|
||||
columnNames,
|
||||
columnTypes,
|
||||
rowIndices,
|
||||
dataExtensions,
|
||||
_historyEntryID)
|
||||
);
|
||||
|
||||
_project.history.addEntry(historyEntry);
|
||||
_project.processManager.onDoneProcess(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -121,6 +121,8 @@ function registerCommands() {
|
||||
RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand());
|
||||
RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand());
|
||||
RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand());
|
||||
RS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.recon.PreviewExtendDataCommand());
|
||||
RS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.recon.ExtendDataCommand());
|
||||
|
||||
RS.registerCommand(module, "guess-types-of-column", new Packages.com.google.refine.commands.recon.GuessTypesOfColumnCommand());
|
||||
|
||||
@ -367,6 +369,7 @@ function init() {
|
||||
|
||||
"styles/index/default-importing-sources.less",
|
||||
"styles/views/data-table-view.less", // for the preview table's styles
|
||||
"styles/views/extend-data-preview-dialog.less",
|
||||
"styles/index/fixed-width-parser-ui.less",
|
||||
"styles/index/xml-parser-ui.less",
|
||||
"styles/index/json-parser-ui.less"
|
||||
@ -431,6 +434,7 @@ function init() {
|
||||
"scripts/reconciliation/standard-service-panel.js",
|
||||
|
||||
"scripts/dialogs/expression-preview-dialog.js",
|
||||
"scripts/dialogs/extend-data-preview-dialog.js",
|
||||
"scripts/dialogs/clustering-dialog.js",
|
||||
"scripts/dialogs/scatterplot-dialog.js",
|
||||
"scripts/dialogs/templating-exporter-dialog.js",
|
||||
|
@ -0,0 +1,424 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDone) {
|
||||
this._column = column;
|
||||
this._columnIndex = columnIndex;
|
||||
this._rowIndices = rowIndices;
|
||||
this._onDone = onDone;
|
||||
this._extension = { properties: [] };
|
||||
|
||||
var self = this;
|
||||
this._dialog = $(DOM.loadHTML("core", "scripts/views/data-table/extend-data-preview-dialog.html"));
|
||||
this._elmts = DOM.bind(this._dialog);
|
||||
this._elmts.dialogHeader.html("Add columns by reconciled column " + column.name);
|
||||
this._elmts.resetButton.click(function() {
|
||||
self._extension.properties = [];
|
||||
self._update();
|
||||
});
|
||||
|
||||
this._elmts.okButton.click(function() {
|
||||
if (self._extension.properties.length === 0) {
|
||||
alert("Please add some properties first.");
|
||||
} else {
|
||||
DialogSystem.dismissUntil(self._level - 1);
|
||||
self._onDone(self._extension);
|
||||
}
|
||||
});
|
||||
this._elmts.cancelButton.click(function() {
|
||||
DialogSystem.dismissUntil(self._level - 1);
|
||||
});
|
||||
|
||||
var dismissBusy = DialogSystem.showBusy();
|
||||
var type = (column.reconConfig) && (column.reconConfig.type) ? column.reconConfig.type.id : "";
|
||||
|
||||
this._proposePropertiesUrl = null;
|
||||
this._fetchColumnUrl = null;
|
||||
this._serviceMetadata = null;
|
||||
if ("reconConfig" in column) {
|
||||
var service = column.reconConfig.service;
|
||||
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
|
||||
this._serviceMetadata = serviceMetadata;
|
||||
if ("extend" in serviceMetadata) {
|
||||
var extend = serviceMetadata.extend;
|
||||
if ("propose_properties" in extend) {
|
||||
var endpoint = extend.propose_properties;
|
||||
this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path;
|
||||
}
|
||||
if ("fetch_column" in extend) {
|
||||
var endpoint = extend.fetch_column;
|
||||
this._fetchColumnUrl = endpoint.service_url + endpoint.service_path;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ExtendReconciledDataPreviewDialog.getAllProperties(this._proposePropertiesUrl, type, function(properties) {
|
||||
dismissBusy();
|
||||
self._show(properties);
|
||||
});
|
||||
}
|
||||
|
||||
ExtendReconciledDataPreviewDialog.getAllProperties = function(url, typeID, onDone) {
|
||||
if(url == null) {
|
||||
onDone([]);
|
||||
} else {
|
||||
var done = false;
|
||||
$.getJSON(
|
||||
url +"?type=" + typeID + "&callback=?",
|
||||
null,
|
||||
function(data) {
|
||||
if (done) return;
|
||||
done = true;
|
||||
|
||||
var allProperties = [];
|
||||
for (var i = 0; i < data.properties.length; i++) {
|
||||
var property = data.properties[i];
|
||||
var property2 = {
|
||||
id: property.id,
|
||||
name: property.name
|
||||
};
|
||||
/*if ("id2" in property) {
|
||||
property2.expected = property.schema2;
|
||||
property2.properties = [{
|
||||
id: property.id2,
|
||||
name: property.name2,
|
||||
expected: property.expects
|
||||
}];
|
||||
} else {
|
||||
property2.expected = property.expects;
|
||||
} */
|
||||
allProperties.push(property2);
|
||||
}
|
||||
allProperties.sort(function(a, b) { return a.name.localeCompare(b.name); });
|
||||
|
||||
onDone(allProperties);
|
||||
}
|
||||
);
|
||||
|
||||
window.setTimeout(function() {
|
||||
if (done) return;
|
||||
|
||||
done = true;
|
||||
onDone([]);
|
||||
}, 7000); // time to give up?
|
||||
}
|
||||
};
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._show = function(properties) {
|
||||
this._level = DialogSystem.showDialog(this._dialog);
|
||||
|
||||
var n = this._elmts.suggestedPropertyContainer.offset().top +
|
||||
this._elmts.suggestedPropertyContainer.outerHeight(true) -
|
||||
this._elmts.addPropertyInput.offset().top;
|
||||
|
||||
this._elmts.previewContainer.height(Math.floor(n));
|
||||
|
||||
var self = this;
|
||||
var container = this._elmts.suggestedPropertyContainer;
|
||||
var renderSuggestedProperty = function(property) {
|
||||
var label = ("properties" in property) ? (property.name + " » " + property.properties[0].name) : property.name;
|
||||
var div = $('<div>').addClass("suggested-property").appendTo(container);
|
||||
|
||||
$('<a>')
|
||||
.attr("href", "javascript:{}")
|
||||
.html(label)
|
||||
.appendTo(div)
|
||||
.click(function() {
|
||||
self._addProperty(property);
|
||||
});
|
||||
};
|
||||
for (var i = 0; i < properties.length; i++) {
|
||||
renderSuggestedProperty(properties[i]);
|
||||
}
|
||||
|
||||
var suggestConfig = $.extend({}, this._serviceMetadata.suggest.property);
|
||||
suggestConfig.key = null;
|
||||
suggestConfig.query_param_name = "prefix";
|
||||
/* var suggestConfig = {
|
||||
filter: '(all type:/type/property)'
|
||||
};
|
||||
if ((this._column.reconConfig) && (this._column.reconConfig.type)) {
|
||||
suggestConfig.filter = '(all type:/type/property (any namespace:/type/object namespace:' + this._column.reconConfig.type.id + '))';
|
||||
} */
|
||||
|
||||
this._elmts.addPropertyInput.suggestP(suggestConfig).bind("fb-select", function(evt, data) {
|
||||
var expected = data.expected_type;
|
||||
self._addProperty({
|
||||
id : data.id,
|
||||
name: data.name,
|
||||
/* expected: {
|
||||
id: expected.id,
|
||||
name: expected.name
|
||||
} */
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._update = function() {
|
||||
this._elmts.previewContainer.empty().text("Querying THE service...");
|
||||
|
||||
var self = this;
|
||||
var params = {
|
||||
project: theProject.id,
|
||||
columnName: this._column.name
|
||||
};
|
||||
|
||||
$.post(
|
||||
"command/core/preview-extend-data?" + $.param(params),
|
||||
{
|
||||
rowIndices: JSON.stringify(this._rowIndices),
|
||||
extension: JSON.stringify(this._extension)
|
||||
},
|
||||
function(data) {
|
||||
self._renderPreview(data);
|
||||
},
|
||||
"json"
|
||||
).fail(function(data) {
|
||||
console.log(data);
|
||||
});
|
||||
};
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._addProperty = function(p) {
|
||||
var addSeveralToList = function(properties, oldProperties) {
|
||||
for (var i = 0; i < properties.length; i++) {
|
||||
addToList(properties[i], oldProperties);
|
||||
}
|
||||
};
|
||||
var addToList = function(property, oldProperties) {
|
||||
for (var i = 0; i < oldProperties.length; i++) {
|
||||
var oldProperty = oldProperties[i];
|
||||
if (oldProperty.id == property.id) {
|
||||
if ("included" in property) {
|
||||
oldProperty.included = "included" in oldProperty ?
|
||||
(oldProperty.included || property.included) :
|
||||
property.included;
|
||||
}
|
||||
|
||||
if ("properties" in property) {
|
||||
if ("properties" in oldProperty) {
|
||||
addSeveralToList(property.properties, oldProperty.properties);
|
||||
} else {
|
||||
oldProperty.properties = property.properties;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
oldProperties.push(property);
|
||||
};
|
||||
|
||||
addToList(p, this._extension.properties);
|
||||
|
||||
this._update();
|
||||
};
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
|
||||
var self = this;
|
||||
var container = this._elmts.previewContainer.empty();
|
||||
if (data.code == "error") {
|
||||
container.text("Error.");
|
||||
return;
|
||||
}
|
||||
|
||||
var table = $('<table>')[0];
|
||||
var trHead = table.insertRow(table.rows.length);
|
||||
$('<th>').appendTo(trHead).text(this._column.name);
|
||||
|
||||
var renderColumnHeader = function(column) {
|
||||
var th = $('<th>').appendTo(trHead);
|
||||
|
||||
$('<span>').html(column.names.join(" » ")).appendTo(th);
|
||||
$('<br>').appendTo(th);
|
||||
|
||||
$('<a href="javascript:{}"></a>')
|
||||
.text("remove")
|
||||
.addClass("action")
|
||||
.attr("title", "Remove this column")
|
||||
.click(function() {
|
||||
self._removeProperty(column.path);
|
||||
}).appendTo(th);
|
||||
|
||||
$('<a href="javascript:{}"></a>')
|
||||
.text("constrain")
|
||||
.addClass("action")
|
||||
.attr("title", "Add constraints to this column")
|
||||
.click(function() {
|
||||
self._constrainProperty(column.path);
|
||||
}).appendTo(th);
|
||||
};
|
||||
for (var c = 0; c < data.columns.length; c++) {
|
||||
renderColumnHeader(data.columns[c]);
|
||||
}
|
||||
|
||||
for (var r = 0; r < data.rows.length; r++) {
|
||||
var tr = table.insertRow(table.rows.length);
|
||||
var row = data.rows[r];
|
||||
|
||||
for (var c = 0; c < row.length; c++) {
|
||||
var td = tr.insertCell(tr.cells.length);
|
||||
var cell = row[c];
|
||||
if (cell !== null) {
|
||||
if ($.isPlainObject(cell)) {
|
||||
$('<a>').attr("href", "http://www.freebase.com/view" + cell.id).text(cell.name).appendTo(td);
|
||||
} else {
|
||||
$('<span>').text(cell).appendTo(td);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
container.append(table);
|
||||
};
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(path) {
|
||||
var removeFromList = function(path, index, properties) {
|
||||
var id = path[index];
|
||||
|
||||
for (var i = properties.length - 1; i >= 0; i--) {
|
||||
var property = properties[i];
|
||||
if (property.id == id) {
|
||||
if (index === path.length - 1) {
|
||||
if ("included" in property) {
|
||||
delete property.included;
|
||||
}
|
||||
} else if ("properties" in property && property.properties.length > 0) {
|
||||
removeFromList(path, index + 1, property.properties);
|
||||
}
|
||||
|
||||
if (!("properties" in property) || property.properties.length === 0) {
|
||||
properties.splice(i, 1);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
removeFromList(path, 0, this._extension.properties);
|
||||
|
||||
this._update();
|
||||
};
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._findProperty = function(path) {
|
||||
var find = function(path, index, properties) {
|
||||
var id = path[index];
|
||||
|
||||
for (var i = properties.length - 1; i >= 0; i--) {
|
||||
var property = properties[i];
|
||||
if (property.id == id) {
|
||||
if (index === path.length - 1) {
|
||||
return property;
|
||||
} else if ("properties" in property && property.properties.length > 0) {
|
||||
return find(path, index + 1, property.properties);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
return find(path, 0, this._extension.properties);
|
||||
};
|
||||
|
||||
ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) {
|
||||
var self = this;
|
||||
var property = this._findProperty(path);
|
||||
|
||||
var frame = DialogSystem.createDialog();
|
||||
frame.width("500px");
|
||||
|
||||
var header = $('<div></div>').addClass("dialog-header").text("Constrain " + path.join(" > ")).appendTo(frame);
|
||||
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
|
||||
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
|
||||
|
||||
body.html(
|
||||
'<div class="grid-layout layout-normal layout-full"><table>' +
|
||||
'<tr><td>' +
|
||||
'Enter MQL query constraints as JSON' +
|
||||
'</td></tr>' +
|
||||
'<tr><td>' +
|
||||
'<textarea style="width: 100%; height: 300px; font-family: monospace;" bind="textarea"></textarea>' +
|
||||
'</td></tr>' +
|
||||
'</table></div>'
|
||||
);
|
||||
var bodyElmts = DOM.bind(body);
|
||||
|
||||
if ("constraints" in property) {
|
||||
bodyElmts.textarea[0].value = JSON.stringify(property.constraints, null, 2);
|
||||
} else {
|
||||
bodyElmts.textarea[0].value = JSON.stringify({ "limit" : 10 }, null, 2);
|
||||
}
|
||||
|
||||
footer.html(
|
||||
'<button class="button" bind="okButton"> OK </button>' +
|
||||
'<button class="button" bind="cancelButton">Cancel</button>'
|
||||
);
|
||||
var footerElmts = DOM.bind(footer);
|
||||
|
||||
var level = DialogSystem.showDialog(frame);
|
||||
var dismiss = function() {
|
||||
DialogSystem.dismissUntil(level - 1);
|
||||
};
|
||||
|
||||
footerElmts.cancelButton.click(dismiss);
|
||||
footerElmts.okButton.click(function() {
|
||||
try {
|
||||
var o = JSON.parse(bodyElmts.textarea[0].value);
|
||||
if (o === undefined) {
|
||||
alert("Please ensure that the JSON you enter is valid.");
|
||||
return;
|
||||
}
|
||||
|
||||
if ($.isArray(o) && o.length == 1) {
|
||||
o = o[0];
|
||||
}
|
||||
if (!$.isPlainObject(o)) {
|
||||
alert("The JSON you enter must be an object, that is, it is of this form { ... }.");
|
||||
return;
|
||||
}
|
||||
|
||||
property.constraints = o;
|
||||
|
||||
dismiss();
|
||||
|
||||
self._update();
|
||||
} catch (e) {
|
||||
//console.log(e);
|
||||
}
|
||||
});
|
||||
|
||||
bodyElmts.textarea.focus();
|
||||
};
|
||||
|
@ -0,0 +1,27 @@
|
||||
<div class="dialog-frame extend-data-preview-dialog" style="width: 800px;">
|
||||
<div class="dialog-header" bind="dialogHeader"></div>
|
||||
<div class="dialog-body" bind="dialogBody">
|
||||
<div class="grid-layout layout-normal layout-full"><table rows="4">
|
||||
<tr>
|
||||
<td width="300" height="1" bind="addPropertyHeader"></td>
|
||||
<td height="1" bind="previewHeader"></td>
|
||||
<td height="1" width="1%"><button class="button" bind="resetButton">Reset</button></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="vertical-align: top;" height="1"><div class="input-container"><input bind="addPropertyInput" /></div></td>
|
||||
<td style="vertical-align: top;" rowspan="3" colspan="2"><div class="preview-container" bind="previewContainer"></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="1" bind="suggestedPropertyHeader"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div class="suggested-property-container" bind="suggestedPropertyContainer"></div></td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</div>
|
||||
<div class="dialog-footer" bind="dialogFooter">
|
||||
<button class="button" bind="okButton"></button>
|
||||
<button class="button" bind="cancelButton"></button>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -0,0 +1,26 @@
|
||||
<div class="dialog-frame extend-data-preview-dialog" style="width: 800px;">
|
||||
<div class="dialog-header" bind="dialogHeader"></div>
|
||||
<div class="dialog-body" bind="dialogBody">
|
||||
<div class="grid-layout layout-normal layout-full"><table rows="4">
|
||||
<tr>
|
||||
<td width="300" height="1">Add Property</td>
|
||||
<td height="1">Preview</td>
|
||||
<td height="1" width="1%"><button class="button" bind="resetButton">Reset</button></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="vertical-align: top;" height="1"><div class="input-container"><input bind="addPropertyInput" /></div></td>
|
||||
<td style="vertical-align: top;" rowspan="3" colspan="2"><div class="preview-container" bind="previewContainer"></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="1">Suggested Properties</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div class="suggested-property-container" bind="suggestedPropertyContainer"></div></td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</div>
|
||||
<div class="dialog-footer" bind="dialogFooter">
|
||||
<button class="button" bind="okButton"> OK </button>
|
||||
<button class="button" bind="cancelButton">Cancel</button>
|
||||
</div>
|
||||
</div>
|
@ -146,6 +146,74 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
||||
});
|
||||
};
|
||||
|
||||
var doAddColumnByReconciliation = function() {
|
||||
var columnIndex = Refine.columnNameToColumnIndex(column.name);
|
||||
var o = DataTableView.sampleVisibleRows(column);
|
||||
new ExtendReconciledDataPreviewDialog(
|
||||
column,
|
||||
columnIndex,
|
||||
o.rowIndices,
|
||||
function(extension) {
|
||||
Refine.postProcess(
|
||||
"core",
|
||||
"extend-data",
|
||||
{
|
||||
baseColumnName: column.name,
|
||||
columnInsertIndex: columnIndex + 1
|
||||
},
|
||||
{
|
||||
extension: JSON.stringify(extension)
|
||||
},
|
||||
{ rowsChanged: true, modelsChanged: true }
|
||||
);
|
||||
}
|
||||
); */
|
||||
};
|
||||
|
||||
/*
|
||||
var doAddColumnByReconciliation = function() {
|
||||
var frame = $(
|
||||
DOM.loadHTML("core", "scripts/views/data-table/add-column-by-reconciliation.html"));
|
||||
|
||||
var elmts = DOM.bind(frame);
|
||||
elmts.dialogHeader.text($.i18n._('core-views')["add-by-recon"]);
|
||||
|
||||
elmts.suggestedPropertyHeader.html('Suggested properties');
|
||||
elmts.previewHeader.html('Preview');
|
||||
elmts.addPropertyHeader.html('Add property');
|
||||
elmts.okButton.html($.i18n._('core-buttons')["ok"]);
|
||||
elmts.cancelButton.text($.i18n._('core-buttons')["cancel"]);
|
||||
|
||||
var level = DialogSystem.showDialog(frame);
|
||||
var dismiss = function() { DialogSystem.dismissUntil(level - 1); };
|
||||
|
||||
elmts.cancelButton.click(dismiss);
|
||||
elmts.okButton.click(function() {
|
||||
var columnName = $.trim(elmts.columnNameInput[0].value);
|
||||
if (!columnName.length) {
|
||||
alert($.i18n._('core-views')["warning-col-name"]);
|
||||
return;
|
||||
}
|
||||
|
||||
Refine.postCoreProcess(
|
||||
"add-column-by-fetching-urls",
|
||||
{
|
||||
baseColumnName: column.name,
|
||||
urlExpression: previewWidget.getExpression(true),
|
||||
newColumnName: columnName,
|
||||
columnInsertIndex: columnIndex + 1,
|
||||
delay: elmts.throttleDelayInput[0].value,
|
||||
onError: $('input[name="dialog-onerror-choice"]:checked')[0].value,
|
||||
cacheResponses: $('input[name="dialog-cache-responses"]')[0].checked,
|
||||
},
|
||||
null,
|
||||
{ modelsChanged: true }
|
||||
);
|
||||
dismiss();
|
||||
});
|
||||
};
|
||||
*/
|
||||
|
||||
var doRemoveColumn = function() {
|
||||
Refine.postCoreProcess(
|
||||
"remove-column",
|
||||
@ -298,6 +366,11 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
||||
label: $.i18n._('core-views')["add-by-urls"]+"...",
|
||||
click: doAddColumnByFetchingURLs
|
||||
},
|
||||
{
|
||||
id: "core/add-column-by-reconciliation",
|
||||
label: $.i18n._('core-views')["add-by-recon"]+"...",
|
||||
click: doAddColumnByReconciliation
|
||||
},
|
||||
{},
|
||||
{
|
||||
id: "core/rename-column",
|
||||
|
@ -0,0 +1,71 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
@import-less url("../theme.less");
|
||||
|
||||
.extend-data-preview-dialog .suggested-property-container {
|
||||
border: 1px solid #aaa;
|
||||
padding: 5px;
|
||||
overflow: auto;
|
||||
height: 375px;
|
||||
}
|
||||
|
||||
.extend-data-preview-dialog .suggested-property {
|
||||
padding: 5px;
|
||||
}
|
||||
|
||||
.extend-data-preview-dialog input.property-suggest {
|
||||
display: block;
|
||||
padding: 2%;
|
||||
width: 96%;
|
||||
}
|
||||
|
||||
.extend-data-preview-dialog .preview-container {
|
||||
border: 1px solid #aaa;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.extend-data-preview-dialog .preview-container table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
.extend-data-preview-dialog .preview-container td, .extend-data-preview-dialog .preview-container th {
|
||||
padding: 3px 5px;
|
||||
border-bottom: 1px solid #ddd;
|
||||
border-right: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.extend-data-preview-dialog .preview-container th img {
|
||||
vertical-align: top;
|
||||
margin-left: 5px;
|
||||
}
|
Loading…
Reference in New Issue
Block a user