Merge pull request #1290 from claussni/cross-func-split

Extend cross() function to support multiple-value-cell-input
This commit is contained in:
Thad Guidry 2017-10-26 14:23:23 -05:00 committed by GitHub
commit 3d0e96a0ce
3 changed files with 94 additions and 42 deletions

View File

@ -68,22 +68,38 @@ public class InterProjectModel {
this.toProjectID = toProjectID; this.toProjectID = toProjectID;
this.toProjectColumnName = toProjectColumnName; this.toProjectColumnName = toProjectColumnName;
} }
public HasFieldsListImpl getRows(Object value) { public HasFieldsListImpl getRows(final Object rowKey, String separatorRegexp) {
if (ExpressionUtils.isNonBlankData(value) && valueToRowIndices.containsKey(value)) { Project toProject = ProjectManager.singleton.getProject(toProjectID);
Project toProject = ProjectManager.singleton.getProject(toProjectID); if (toProject == null) {
if (toProject != null) { return null;
HasFieldsListImpl rows = new HasFieldsListImpl(); }
for (Integer r : valueToRowIndices.get(value)) {
Row row = toProject.rows.get(r); HasFieldsListImpl resultFieldList = null;
rows.add(new WrappedRow(toProject, r, row));
if (ExpressionUtils.isNonBlankData(rowKey)) {
Object[] rowKeys;
if (separatorRegexp != null && !separatorRegexp.isEmpty() && rowKey instanceof String) {
rowKeys = ((String) rowKey).split(separatorRegexp);
} else {
rowKeys = new Object[]{rowKey};
}
resultFieldList = new HasFieldsListImpl();
for (Object k : rowKeys) {
if (valueToRowIndices.containsKey(k)) {
for (Integer rowIndex : valueToRowIndices.get(k)) {
Row row = toProject.rows.get(rowIndex);
resultFieldList.add(new WrappedRow(toProject, rowIndex, row));
}
} }
return rows;
} }
} }
return null;
// Returning null instead of an empty list is expected
return resultFieldList.isEmpty() ? null : resultFieldList;
} }
} }
protected Map<String, ProjectJoin> _joins = new HashMap<String, ProjectJoin>(); protected Map<String, ProjectJoin> _joins = new HashMap<String, ProjectJoin>();
@ -95,9 +111,10 @@ public class InterProjectModel {
* @param fromColumn * @param fromColumn
* @param toProject * @param toProject
* @param toColumn * @param toColumn
* @param separatorRegexp
* @return * @return
*/ */
public ProjectJoin getJoin(String fromProject, String fromColumn, String toProject, String toColumn) { public ProjectJoin getJoin(String fromProject, String fromColumn, String toProject, String toColumn, String separatorRegexp) {
String key = fromProject + ";" + fromColumn + ";" + toProject + ";" + toColumn; String key = fromProject + ";" + fromColumn + ";" + toProject + ";" + toColumn;
if (!_joins.containsKey(key)) { if (!_joins.containsKey(key)) {
ProjectJoin join = new ProjectJoin( ProjectJoin join = new ProjectJoin(
@ -106,8 +123,8 @@ public class InterProjectModel {
ProjectManager.singleton.getProjectID(toProject), ProjectManager.singleton.getProjectID(toProject),
toColumn toColumn
); );
computeJoin(join); computeJoin(join, separatorRegexp);
synchronized (_joins) { synchronized (_joins) {
_joins.put(key, join); _joins.put(key, join);
@ -142,7 +159,7 @@ public class InterProjectModel {
} }
} }
protected void computeJoin(ProjectJoin join) { protected void computeJoin(ProjectJoin join, String separatorRegexp) {
if (join.fromProjectID < 0 || join.toProjectID < 0) { if (join.fromProjectID < 0 || join.toProjectID < 0) {
return; return;
} }
@ -158,11 +175,21 @@ public class InterProjectModel {
if (fromColumn == null || toColumn == null) { if (fromColumn == null || toColumn == null) {
return; return;
} }
for (Row fromRow : fromProject.rows) { for (Row fromRow : fromProject.rows) {
Object value = fromRow.getCellValue(fromColumn.getCellIndex()); Object fromRowKey = fromRow.getCellValue(fromColumn.getCellIndex());
if (ExpressionUtils.isNonBlankData(value) && !join.valueToRowIndices.containsKey(value)) { if (ExpressionUtils.isNonBlankData(fromRowKey)) {
join.valueToRowIndices.put(value, new ArrayList<Integer>()); Object[] fromRowKeys;
if (separatorRegexp != null && !separatorRegexp.isEmpty() && fromRowKey instanceof String) {
fromRowKeys = ((String) fromRowKey).split(separatorRegexp);
} else {
fromRowKeys = new Object[]{fromRowKey};
}
for (Object k : fromRowKeys) {
if (!join.valueToRowIndices.containsKey(k)) {
join.valueToRowIndices.put(k, new ArrayList<Integer>());
}
}
} }
} }
@ -176,4 +203,5 @@ public class InterProjectModel {
} }
} }
} }
} }

View File

@ -47,33 +47,39 @@ import com.google.refine.grel.Function;
import com.google.refine.model.Project; import com.google.refine.model.Project;
public class Cross implements Function { public class Cross implements Function {
public static final String EVAL_ERROR_MESSAGE =
" expects a string or cell, a project name to join with, and a column name in that project. " +
"Optional accepts a regular expression separator for source values.";
@Override @Override
public Object call(Properties bindings, Object[] args) { public Object call(Properties bindings, Object[] args) {
if (args.length == 3) { if (args.length >= 3) {
// 1st argument can take either value or cell(for backward compatibility) // 1st argument can take either value or cell(for backward compatibility)
Object v = args[0]; Object v = args[0];
Object toProjectName = args[1]; Object toProjectName = args[1];
Object toColumnName = args[2]; Object toColumnName = args[2];
String separatorRegexp = (args.length > 3) ? String.valueOf(args[3]) : null;
if (v != null &&
if (v != null &&
( v instanceof String || v instanceof WrappedCell ) && ( v instanceof String || v instanceof WrappedCell ) &&
toProjectName != null && toProjectName instanceof String && toProjectName != null && toProjectName instanceof String &&
toColumnName != null && toColumnName instanceof String) { toColumnName != null && toColumnName instanceof String) {
ProjectJoin join = ProjectManager.singleton.getInterProjectModel().getJoin( ProjectJoin join = ProjectManager.singleton.getInterProjectModel().getJoin(
ProjectManager.singleton.getProjectMetadata(((Project) bindings.get("project")).id).getName(), ProjectManager.singleton.getProjectMetadata(((Project) bindings.get("project")).id).getName(),
(String) bindings.get("columnName"), (String) bindings.get("columnName"),
(String) toProjectName, (String) toProjectName,
(String) toColumnName (String) toColumnName,
); separatorRegexp
);
String srcValue = v instanceof String ? (String)v : (String)((WrappedCell) v).cell.value; String srcValue = v instanceof String ? (String)v : (String)((WrappedCell) v).cell.value;
return join.getRows(srcValue); return join.getRows(srcValue, separatorRegexp);
} }
} }
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string or cell, a project name to join with, and a column name in that project"); return new EvalError(ControlFunctionRegistry.getFunctionName(this) + EVAL_ERROR_MESSAGE);
} }
@Override @Override
@ -82,7 +88,7 @@ public class Cross implements Function {
writer.object(); writer.object();
writer.key("description"); writer.value("join with another project by column"); writer.key("description"); writer.value("join with another project by column");
writer.key("params"); writer.value("cell c or string value, string projectName, string columnName"); writer.key("params"); writer.value("cell c or string value, string projectName, string columnName(, string separatorRegexp)");
writer.key("returns"); writer.value("array"); writer.key("returns"); writer.value("array");
writer.endObject(); writer.endObject();
} }

View File

@ -9,6 +9,7 @@ import java.util.Calendar;
import java.util.List; import java.util.List;
import java.util.Properties; import java.util.Properties;
import com.google.refine.expr.functions.Cross;
import org.json.JSONObject; import org.json.JSONObject;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert; import org.testng.Assert;
@ -163,17 +164,34 @@ public class CrossFunctionTests extends RefineTest {
* rest of cells shows "Error: cross expects a string or cell, a project name to join with, and a column name in that project" * rest of cells shows "Error: cross expects a string or cell, a project name to join with, and a column name in that project"
*/ */
@Test @Test
public void crossFunctionNonLiteralValue() throws Exception { public void crossFunctionIntegerValue() throws Exception {
Assert.assertEquals(((EvalError) invoke("cross", 1, "My Address Book", "friend")).message, String message = ((EvalError) invoke("cross", 1, "My Address Book", "friend")).message;
"cross expects a string or cell, a project name to join with, and a column name in that project"); Assert.assertTrue(message.contains(Cross.EVAL_ERROR_MESSAGE),
String.format("Message should contain `%s` but is `%s`", Cross.EVAL_ERROR_MESSAGE, message));
Assert.assertEquals(((EvalError) invoke("cross", null, "My Address Book", "friend")).message,
"cross expects a string or cell, a project name to join with, and a column name in that project");
Assert.assertEquals(((EvalError) invoke("cross", Calendar.getInstance(), "My Address Book", "friend")).message,
"cross expects a string or cell, a project name to join with, and a column name in that project");
} }
/**
* rest of cells shows "Error: cross expects a string or cell, a project name to join with, and a column name in
* that project"
*/
@Test
public void crossFunctionNull() throws Exception {
String message = ((EvalError) invoke("cross", null, "My Address Book", "friend")).message;
Assert.assertTrue(message.contains(Cross.EVAL_ERROR_MESSAGE),
String.format("Message should contain `%s` but is `%s`", Cross.EVAL_ERROR_MESSAGE, message));
}
/**
* rest of cells shows "Error: cross expects a string or cell, a project name to join with, and a column name in
* that project"
*/
@Test
public void crossFunctionCalendarInstance() throws Exception {
String message = ((EvalError) invoke("cross", Calendar.getInstance(), "My Address Book", "friend")).message;
Assert.assertTrue(message.contains(Cross.EVAL_ERROR_MESSAGE),
String.format("Message should contain `%s` but is `%s`", Cross.EVAL_ERROR_MESSAGE, message));
}
/** /**
* Lookup a control function by name and invoke it with a variable number of args * Lookup a control function by name and invoke it with a variable number of args
*/ */