2010-10-20 22:45:52 +02:00
|
|
|
/*
|
|
|
|
|
|
|
|
Copyright 2010, Google Inc.
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are
|
|
|
|
met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above
|
|
|
|
copyright notice, this list of conditions and the following disclaimer
|
|
|
|
in the documentation and/or other materials provided with the
|
|
|
|
distribution.
|
|
|
|
* Neither the name of Google Inc. nor the names of its
|
|
|
|
contributors may be used to endorse or promote products derived from
|
|
|
|
this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
2010-09-22 19:04:10 +02:00
|
|
|
package com.google.refine;
|
2010-04-06 07:35:48 +02:00
|
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.HashMap;
|
2013-08-17 19:45:22 +02:00
|
|
|
import java.util.Iterator;
|
2010-04-06 07:35:48 +02:00
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Map.Entry;
|
|
|
|
|
2010-09-22 19:04:10 +02:00
|
|
|
import com.google.refine.expr.ExpressionUtils;
|
|
|
|
import com.google.refine.expr.HasFieldsListImpl;
|
|
|
|
import com.google.refine.expr.WrappedRow;
|
|
|
|
import com.google.refine.model.Column;
|
|
|
|
import com.google.refine.model.Project;
|
|
|
|
import com.google.refine.model.Row;
|
2019-03-23 15:15:00 +01:00
|
|
|
import com.google.refine.util.JoinException;
|
2010-04-06 07:35:48 +02:00
|
|
|
|
|
|
|
public class InterProjectModel {
|
|
|
|
static public class ProjectJoin {
|
|
|
|
final public long fromProjectID;
|
|
|
|
final public String fromProjectColumnName;
|
|
|
|
final public long toProjectID;
|
|
|
|
final public String toProjectColumnName;
|
|
|
|
|
|
|
|
final public Map<Object, List<Integer>> valueToRowIndices =
|
|
|
|
new HashMap<Object, List<Integer>>();
|
|
|
|
|
|
|
|
ProjectJoin(
|
|
|
|
long fromProjectID,
|
|
|
|
String fromProjectColumnName,
|
|
|
|
long toProjectID,
|
|
|
|
String toProjectColumnName
|
|
|
|
) {
|
|
|
|
this.fromProjectID = fromProjectID;
|
|
|
|
this.fromProjectColumnName = fromProjectColumnName;
|
|
|
|
this.toProjectID = toProjectID;
|
|
|
|
this.toProjectColumnName = toProjectColumnName;
|
|
|
|
}
|
2017-10-27 00:37:10 +02:00
|
|
|
|
|
|
|
public HasFieldsListImpl getRows(Object value) {
|
|
|
|
if (ExpressionUtils.isNonBlankData(value) && valueToRowIndices.containsKey(value)) {
|
|
|
|
Project toProject = ProjectManager.singleton.getProject(toProjectID);
|
|
|
|
if (toProject != null) {
|
|
|
|
HasFieldsListImpl rows = new HasFieldsListImpl();
|
|
|
|
for (Integer r : valueToRowIndices.get(value)) {
|
|
|
|
Row row = toProject.rows.get(r);
|
|
|
|
rows.add(new WrappedRow(toProject, r, row));
|
2010-04-06 07:35:48 +02:00
|
|
|
}
|
2017-10-27 00:37:10 +02:00
|
|
|
|
|
|
|
return rows;
|
2010-04-06 07:35:48 +02:00
|
|
|
}
|
|
|
|
}
|
2017-10-27 00:37:10 +02:00
|
|
|
return null;
|
2010-04-06 07:35:48 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected Map<String, ProjectJoin> _joins = new HashMap<String, ProjectJoin>();
|
|
|
|
|
2017-06-26 03:04:00 +02:00
|
|
|
/**
|
|
|
|
* Compute the ProjectJoin based on combination key, return the cached one from the HashMap if already computed
|
|
|
|
*
|
|
|
|
* @param fromProject
|
|
|
|
* @param fromColumn
|
|
|
|
* @param toProject
|
|
|
|
* @param toColumn
|
|
|
|
* @return
|
|
|
|
*/
|
2019-03-23 15:15:00 +01:00
|
|
|
public ProjectJoin getJoin(Long fromProject, String fromColumn, Long toProject, String toColumn) throws JoinException {
|
2010-04-06 07:35:48 +02:00
|
|
|
String key = fromProject + ";" + fromColumn + ";" + toProject + ";" + toColumn;
|
|
|
|
if (!_joins.containsKey(key)) {
|
|
|
|
ProjectJoin join = new ProjectJoin(
|
2019-03-17 14:14:58 +01:00
|
|
|
fromProject,
|
2010-04-06 07:35:48 +02:00
|
|
|
fromColumn,
|
2019-03-17 14:14:58 +01:00
|
|
|
toProject,
|
2010-04-06 07:35:48 +02:00
|
|
|
toColumn
|
|
|
|
);
|
2017-10-27 00:37:10 +02:00
|
|
|
|
|
|
|
computeJoin(join);
|
2010-04-06 07:35:48 +02:00
|
|
|
|
2013-03-03 15:36:43 +01:00
|
|
|
synchronized (_joins) {
|
|
|
|
_joins.put(key, join);
|
|
|
|
}
|
2010-04-06 07:35:48 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return _joins.get(key);
|
|
|
|
}
|
|
|
|
|
|
|
|
public void flushJoinsInvolvingProject(long projectID) {
|
2013-03-03 15:36:43 +01:00
|
|
|
synchronized (_joins) {
|
2013-08-17 19:45:22 +02:00
|
|
|
for (Iterator<Entry<String, ProjectJoin>> it = _joins.entrySet().iterator(); it.hasNext();) {
|
|
|
|
Entry<String, ProjectJoin> entry = it.next();
|
2013-03-03 15:36:43 +01:00
|
|
|
ProjectJoin join = entry.getValue();
|
|
|
|
if (join.fromProjectID == projectID || join.toProjectID == projectID) {
|
2013-08-17 19:45:22 +02:00
|
|
|
it.remove();
|
2013-03-03 15:36:43 +01:00
|
|
|
}
|
2010-04-06 07:35:48 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public void flushJoinsInvolvingProjectColumn(long projectID, String columnName) {
|
2013-03-03 15:36:43 +01:00
|
|
|
synchronized (_joins) {
|
2013-08-17 19:45:22 +02:00
|
|
|
for (Iterator<Entry<String, ProjectJoin>> it = _joins.entrySet().iterator(); it.hasNext();) {
|
|
|
|
Entry<String, ProjectJoin> entry = it.next();
|
2013-03-03 15:36:43 +01:00
|
|
|
ProjectJoin join = entry.getValue();
|
|
|
|
if (join.fromProjectID == projectID && join.fromProjectColumnName.equals(columnName) ||
|
|
|
|
join.toProjectID == projectID && join.toProjectColumnName.equals(columnName)) {
|
2013-08-17 19:45:22 +02:00
|
|
|
it.remove();
|
2013-03-03 15:36:43 +01:00
|
|
|
}
|
2010-04-06 07:35:48 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-23 15:15:00 +01:00
|
|
|
protected void computeJoin(ProjectJoin join) throws JoinException {
|
2010-04-06 07:35:48 +02:00
|
|
|
if (join.fromProjectID < 0 || join.toProjectID < 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
Project fromProject = ProjectManager.singleton.getProject(join.fromProjectID);
|
2019-03-23 15:15:00 +01:00
|
|
|
ProjectMetadata fromProjectMD = ProjectManager.singleton.getProjectMetadata(join.fromProjectID);
|
2010-04-06 07:35:48 +02:00
|
|
|
Project toProject = ProjectManager.singleton.getProject(join.toProjectID);
|
2019-03-23 15:15:00 +01:00
|
|
|
ProjectMetadata toProjectMD = ProjectManager.singleton.getProjectMetadata(join.toProjectID);
|
|
|
|
|
|
|
|
// split this test to check each one and throw an appropriate error
|
2010-04-06 07:35:48 +02:00
|
|
|
if (fromProject == null || toProject == null) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
Column fromColumn = fromProject.columnModel.getColumnByName(join.fromProjectColumnName);
|
|
|
|
Column toColumn = toProject.columnModel.getColumnByName(join.toProjectColumnName);
|
2019-03-23 15:15:00 +01:00
|
|
|
if (fromColumn == null) {
|
|
|
|
throw new JoinException("Unable to find column " + join.fromProjectColumnName + " in project " + fromProjectMD.getName());
|
|
|
|
}
|
|
|
|
if (toColumn == null) {
|
|
|
|
throw new JoinException("Unable to find column " + join.toProjectColumnName + " in project " + toProjectMD.getName());
|
2010-04-06 07:35:48 +02:00
|
|
|
}
|
2017-10-27 00:37:10 +02:00
|
|
|
|
2010-04-06 07:35:48 +02:00
|
|
|
for (Row fromRow : fromProject.rows) {
|
2017-10-27 00:37:10 +02:00
|
|
|
Object value = fromRow.getCellValue(fromColumn.getCellIndex());
|
|
|
|
if (ExpressionUtils.isNonBlankData(value) && !join.valueToRowIndices.containsKey(value)) {
|
|
|
|
join.valueToRowIndices.put(value, new ArrayList<Integer>());
|
2010-04-06 07:35:48 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int count = toProject.rows.size();
|
|
|
|
for (int r = 0; r < count; r++) {
|
|
|
|
Row toRow = toProject.rows.get(r);
|
|
|
|
|
|
|
|
Object value = toRow.getCellValue(toColumn.getCellIndex());
|
2010-04-08 22:46:02 +02:00
|
|
|
if (ExpressionUtils.isNonBlankData(value) && join.valueToRowIndices.containsKey(value)) {
|
|
|
|
join.valueToRowIndices.get(value).add(r);
|
2010-04-06 07:35:48 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|