Made facets deal with java.util.Collection rather than just Object[].

Documented the browsing.* packages.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@330 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-03-21 07:14:39 +00:00
parent d90e75dff1
commit 7648126a5e
17 changed files with 284 additions and 99 deletions

View File

@ -7,6 +7,10 @@ import com.metaweb.gridworks.browsing.filters.RowFilter;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
/**
* Encapsulate logic for visiting rows that match all give row filters. Also visit
* context rows and dependent rows if configured so.
*/
public class ConjunctiveFilteredRows implements FilteredRows {
final protected List<RowFilter> _rowFilters = new LinkedList<RowFilter>();
final protected boolean _includeContextual;
@ -22,61 +26,67 @@ public class ConjunctiveFilteredRows implements FilteredRows {
}
public void accept(Project project, RowVisitor visitor) {
int lastVisitedRow = -1;
int lastRecordRowAccepted = -1;
int lastVisitedRowRowIndex = -1;
int lastRecordRowAcceptedRowIndex = -1;
int c = project.rows.size();
for (int i = 0; i < c; i++) {
Row row = project.rows.get(i);
for (int rowIndex = 0; rowIndex < c; rowIndex++) {
Row row = project.rows.get(rowIndex);
if (checkRow(project, i, row)) {
if (matchRow(project, rowIndex, row)) {
if (row.recordIndex >= 0) {
// this is a record row itself
lastRecordRowAccepted = i;
lastRecordRowAcceptedRowIndex = rowIndex; // this is a record row itself
}
visitRow(project, visitor, i, row, lastVisitedRow);
visitRow(project, visitor, rowIndex, row, lastVisitedRowRowIndex);
lastVisitedRow = i;
lastVisitedRowRowIndex = rowIndex;
} else if (
// this row doesn't match by itself but ...
// we want to include dependent rows
_includeDependent &&
// and this row is a dependent row since it's not a record row
row.recordIndex < 0 &&
row.contextRows != null &&
row.contextRows.size() > 0) {
row.contextRows.size() > 0
) {
if (row.contextRows.get(0) == lastRecordRowAccepted) {
visitor.visit(project, i, row, false, true);
lastVisitedRow = i;
if (row.contextRows.get(0) == lastRecordRowAcceptedRowIndex) {
// this row depends on the last previously matched record row,
// so we visit it as well as a dependent row
visitor.visit(project, rowIndex, row, false, true);
lastVisitedRowRowIndex = rowIndex;
}
}
}
}
protected void visitRow(Project project, RowVisitor visitor, int rowIndex, Row row, int lastVisitedRow) {
if (_includeContextual) {
if (row.contextRows != null && lastVisitedRow < rowIndex - 1) {
for (int contextRowIndex : row.contextRows) {
if (contextRowIndex > lastVisitedRow) {
visitor.visit(
project,
contextRowIndex,
project.rows.get(contextRowIndex),
true,
false
);
lastVisitedRow = contextRowIndex;
}
if (_includeContextual && // we need to include any context row and
row.contextRows != null && // this row itself isn't a context row and
lastVisitedRow < rowIndex - 1 // there is definitely some rows before this row
// that we haven't visited yet
) {
for (int contextRowIndex : row.contextRows) {
if (contextRowIndex > lastVisitedRow) {
visitor.visit(
project,
contextRowIndex,
project.rows.get(contextRowIndex),
true, // is visited as a context row
false // is not visited as a dependent row
);
lastVisitedRow = contextRowIndex;
}
}
visitor.visit(project, rowIndex, row, false, false);
} else {
visitor.visit(project, rowIndex, row, false, false);
}
visitor.visit(project, rowIndex, row, false, false);
}
protected boolean checkRow(Project project, int rowIndex, Row row) {
protected boolean matchRow(Project project, int rowIndex, Row row) {
for (RowFilter rowFilter : _rowFilters) {
if (!rowFilter.filterRow(project, rowIndex, row)) {
return false;

View File

@ -7,9 +7,16 @@ import org.json.JSONWriter;
import com.metaweb.gridworks.Jsonizable;
/**
* Store a value and its text label, in case the value is not a string itself.
* For instance, if a value is a date, then its label can be one particular
* rendering of that date.
*
* Facet choices that are presented to the user as text are stored as decorated values.
*/
public class DecoratedValue implements Jsonizable {
final public Object value;
final public String label;
final public String label;
public DecoratedValue(Object value, String label) {
this.value = value;

View File

@ -17,6 +17,9 @@ import com.metaweb.gridworks.browsing.facets.TextSearchFacet;
import com.metaweb.gridworks.browsing.filters.RowFilter;
import com.metaweb.gridworks.model.Project;
/**
* Faceted browsing engine.
*/
public class Engine implements Jsonizable {
protected Project _project;
protected List<Facet> _facets = new LinkedList<Facet>();

View File

@ -2,6 +2,18 @@ package com.metaweb.gridworks.browsing;
import com.metaweb.gridworks.model.Project;
/**
* Interface for anything that can decide which rows match and which rows don't match
* based on some particular criteria.
*/
public interface FilteredRows {
/**
* Go through the rows of the given project, determine which match and which don't,
* and call visitor.visit() on those that match, and possibly their context and
* dependent rows.
*
* @param project
* @param visitor
*/
public void accept(Project project, RowVisitor visitor);
}

View File

@ -3,12 +3,20 @@ package com.metaweb.gridworks.browsing;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
/**
* Interface for visiting rows one by one. The rows visited are only those that match some
* particular criteria, such as facets' constraints, or those that are related to the matched
* rows. The related rows can be those that the matched rows depend on, or those that depend
* on the matched rows.
*/
public interface RowVisitor {
public boolean visit(
Project project,
int rowIndex, // zero-based row index
Row row,
boolean contextual, // true if this row is included because it's the context row of an included row
boolean dependent // true if this row is included because it depends on an included row
boolean contextual, // true if this row is included because it's the context row
// of a matched row, that is, a matched row depends on it
boolean dependent // true if this row is included because it depends on a matched row,
// that is, it depends on a matched row
);
}

View File

@ -1,5 +1,6 @@
package com.metaweb.gridworks.browsing.facets;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
@ -12,10 +13,20 @@ import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
/**
* Visit matched rows and group them into facet choices based on the values computed
* from a given expression.
*/
public class ExpressionNominalRowGrouper implements RowVisitor {
/*
* Configuration
*/
final protected Evaluable _evaluable;
final protected int _cellIndex;
/*
* Computed results
*/
final public Map<Object, NominalFacetChoice> choices = new HashMap<Object, NominalFacetChoice>();
public int blankCount = 0;
public int errorCount = 0;
@ -32,14 +43,22 @@ public class ExpressionNominalRowGrouper implements RowVisitor {
ExpressionUtils.bind(bindings, row, rowIndex, cell);
Object value = _evaluable.evaluate(bindings);
if (value != null && value.getClass().isArray()) {
Object[] a = (Object[]) value;
for (Object v : a) {
processValue(v);
}
} else {
processValue(value);
if (value != null) {
if (value.getClass().isArray()) {
Object[] a = (Object[]) value;
for (Object v : a) {
processValue(v);
}
return false;
} else if (value instanceof Collection<?>) {
for (Object v : ExpressionUtils.toObjectCollection(value)) {
processValue(v);
}
return false;
} // else, fall through
}
processValue(value);
return false;
}

View File

@ -1,5 +1,6 @@
package com.metaweb.gridworks.browsing.facets;
import java.util.Collection;
import java.util.Properties;
import com.metaweb.gridworks.browsing.RowVisitor;
@ -9,13 +10,22 @@ import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
/**
* Visit matched rows and slot them into bins based on the numbers computed
* from a given expression.
*/
public class ExpressionNumericRowBinner implements RowVisitor {
/*
* Configuration
*/
final protected Evaluable _evaluable;
final protected int _cellIndex;
final protected NumericBinIndex _index;
final protected NumericBinIndex _index; // base bins
/*
* Computed results
*/
final public int[] bins;
public int numericCount;
public int nonNumericCount;
public int blankCount;
@ -35,14 +45,22 @@ public class ExpressionNumericRowBinner implements RowVisitor {
ExpressionUtils.bind(bindings, row, rowIndex, cell);
Object value = _evaluable.evaluate(bindings);
if (value != null && value.getClass().isArray()) {
Object[] a = (Object[]) value;
for (Object v : a) {
processValue(v);
}
} else {
processValue(value);
if (value != null) {
if (value.getClass().isArray()) {
Object[] a = (Object[]) value;
for (Object v : a) {
processValue(v);
}
return false;
} else if (value instanceof Collection<?>) {
for (Object v : ExpressionUtils.toObjectCollection(value)) {
processValue(v);
}
return false;
} // else, fall through
}
processValue(value);
return false;
}

View File

@ -7,6 +7,9 @@ import com.metaweb.gridworks.browsing.FilteredRows;
import com.metaweb.gridworks.browsing.filters.RowFilter;
import com.metaweb.gridworks.model.Project;
/**
* Interface of facets.
*/
public interface Facet extends Jsonizable {
public RowFilter getRowFilter();

View File

@ -21,24 +21,31 @@ import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.util.JSONUtilities;
public class ListFacet implements Facet {
protected List<NominalFacetChoice> _selection = new LinkedList<NominalFacetChoice>();
/*
* Configuration
*/
protected String _name;
protected String _expression;
protected String _columnName;
// If true, then facet won't show the blank and error choices
protected boolean _omitBlank;
protected boolean _omitError;
protected List<NominalFacetChoice> _selection = new LinkedList<NominalFacetChoice>();
protected boolean _selectBlank;
protected boolean _selectError;
protected String _name;
protected String _expression;
protected String _columnName;
/*
* Derived configuration
*/
protected int _cellIndex;
protected Evaluable _eval;
protected String _errorMessage;
// computed
/*
* Computed results
*/
protected List<NominalFacetChoice> _choices = new LinkedList<NominalFacetChoice>();
protected int _blankCount;
protected int _errorCount;
@ -157,9 +164,20 @@ public class ListFacet implements Facet {
for (NominalFacetChoice choice : _selection) {
String valueString = choice.decoratedValue.value.toString();
if (grouper.choices.containsKey(valueString)) {
grouper.choices.get(valueString).selected = true;
} else {
/*
* A selected choice can have zero count if it is selected together
* with other choices, and some other facets' constraints eliminate
* all rows projected to this choice altogether. For example, if you
* select both "car" and "bicycle" in the "type of vehicle" facet, and
* then constrain the "wheels" facet to more than 2, then the "bicycle"
* choice now has zero count even if it's still selected. The grouper
* won't be able to detect the "bicycle" choice, so we need to inject
* that choice into the choice list ourselves.
*/
choice.count = 0;
_choices.add(choice);
}

View File

@ -8,6 +8,10 @@ import org.json.JSONWriter;
import com.metaweb.gridworks.Jsonizable;
import com.metaweb.gridworks.browsing.DecoratedValue;
/**
* Store a facet choice that has a decorated value, a count of matched rows,
* and a flag of whether it has been selected.
*/
public class NominalFacetChoice implements Jsonizable {
final public DecoratedValue decoratedValue;
public int count;

View File

@ -1,6 +1,7 @@
package com.metaweb.gridworks.browsing.facets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
@ -10,6 +11,16 @@ import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
/**
* A utility class for computing the base bins that form the base histograms of
* numeric range facets. It evaluates an expression on all the rows of a project to
* get numeric values, determines how many bins to distribute those values in, and
* bins the rows accordingly.
*
* This class processes all rows rather than just the filtered rows because it
* needs to compute the base bins of a numeric range facet, which remain unchanged
* as the user interacts with the facet.
*/
public class NumericBinIndex {
private double _min;
private double _max;
@ -38,6 +49,12 @@ public class NumericBinIndex {
processValue(((Number) v).doubleValue(), allValues);
}
}
} else if (value instanceof Collection<?>) {
for (Object v : ExpressionUtils.toObjectCollection(value)) {
if (v instanceof Number) {
processValue(((Number) v).doubleValue(), allValues);
}
}
} else if (value instanceof Number) {
processValue(((Number) value).doubleValue(), allValues);
}

View File

@ -17,33 +17,45 @@ import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.util.JSONUtilities;
public class RangeFacet implements Facet {
protected String _name;
protected String _expression;
protected String _columnName;
/*
* Configuration, from the client side
*/
protected String _name; // name of facet
protected String _expression; // expression to compute numeric value(s) per row
protected String _columnName; // column to base expression on, if any
protected String _mode; // "range", "min", "max"
protected double _from; // the numeric selection
protected double _to;
protected boolean _selectNumeric; // whether the numeric selection applies, default true
protected boolean _selectNonNumeric;
protected boolean _selectBlank;
protected boolean _selectError;
/*
* Derived configuration data
*/
protected int _cellIndex;
protected Evaluable _eval;
protected String _errorMessage;
protected boolean _selected; // false if we're certain that all rows will match
// and there isn't any filtering to do
protected String _mode;
/*
* Computed data, to return to the client side
*/
protected double _min;
protected double _max;
protected double _step;
protected int[] _baseBins;
protected int[] _bins;
protected int _numericCount;
protected int _nonNumericCount;
protected int _blankCount;
protected int _errorCount;
protected double _from;
protected double _to;
protected boolean _selected;
protected boolean _selectNumeric;
protected boolean _selectNonNumeric;
protected boolean _selectBlank;
protected boolean _selectError;
public RangeFacet() {
}

View File

@ -15,15 +15,21 @@ import com.metaweb.gridworks.gel.ast.VariableExpr;
import com.metaweb.gridworks.model.Project;
public class TextSearchFacet implements Facet {
/*
* Configuration
*/
protected String _name;
protected String _columnName;
protected int _cellIndex;
protected String _query;
protected Pattern _pattern;
protected String _mode;
protected boolean _caseSensitive;
/*
* Derived configuration
*/
protected int _cellIndex;
protected Pattern _pattern;
public TextSearchFacet() {
}
@ -42,6 +48,7 @@ public class TextSearchFacet implements Facet {
public void initializeFromJSON(Project project, JSONObject o) throws Exception {
_name = o.getString("name");
_columnName = o.getString("columnName");
_cellIndex = project.columnModel.getColumnByName(_columnName).getCellIndex();
if (!o.isNull("query")) {

View File

@ -1,5 +1,6 @@
package com.metaweb.gridworks.browsing.filters;
import java.util.Collection;
import java.util.Properties;
import com.metaweb.gridworks.expr.Evaluable;
@ -8,14 +9,28 @@ import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
/**
* Judge if a row matches by evaluating a given expression on the row, based on a particular
* column, and checking the result. It's a match if the result is any one of a given list of
* values, or if the result is blank or error and we want blank or error values.
*/
public class ExpressionEqualRowFilter implements RowFilter {
final protected Evaluable _evaluable;
final protected int _cellIndex;
final protected Evaluable _evaluable; // the expression to evaluate
final protected int _cellIndex; // the expression is based on this column;
// -1 if based on no column in particular,
// for expression such as "row.starred".
final protected Object[] _matches;
final protected boolean _selectBlank;
final protected boolean _selectError;
public ExpressionEqualRowFilter(Evaluable evaluable, int cellIndex, Object[] matches, boolean selectBlank, boolean selectError) {
public ExpressionEqualRowFilter(
Evaluable evaluable,
int cellIndex,
Object[] matches,
boolean selectBlank,
boolean selectError
) {
_evaluable = evaluable;
_cellIndex = cellIndex;
_matches = matches;
@ -30,17 +45,26 @@ public class ExpressionEqualRowFilter implements RowFilter {
ExpressionUtils.bind(bindings, row, rowIndex, cell);
Object value = _evaluable.evaluate(bindings);
if (value != null && value.getClass().isArray()) {
Object[] a = (Object[]) value;
for (Object v : a) {
if (testValue(v)) {
return true;
if (value != null) {
if (value.getClass().isArray()) {
Object[] a = (Object[]) value;
for (Object v : a) {
if (testValue(v)) {
return true;
}
}
}
} else {
return testValue(value);
return false;
} else if (value instanceof Collection<?>) {
for (Object v : ExpressionUtils.toObjectCollection(value)) {
if (testValue(v)) {
return true;
}
}
return false;
} // else, fall through
}
return false;
return testValue(value);
}
protected boolean testValue(Object v) {

View File

@ -1,5 +1,6 @@
package com.metaweb.gridworks.browsing.filters;
import java.util.Collection;
import java.util.Properties;
import com.metaweb.gridworks.expr.Evaluable;
@ -8,6 +9,12 @@ import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
/**
* Judge if a row matches by evaluating a given expression on the row, based on a particular
* column, and checking the result. It's a match if the result satisfies some numeric comparisons,
* or if the result is non-numeric or blank or error and we want non-numeric or blank or error
* values.
*/
abstract public class ExpressionNumberComparisonRowFilter implements RowFilter {
final protected Evaluable _evaluable;
final protected int _cellIndex;
@ -33,25 +40,32 @@ abstract public class ExpressionNumberComparisonRowFilter implements RowFilter {
}
public boolean filterRow(Project project, int rowIndex, Row row) {
Cell cell = row.getCell(_cellIndex);
Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex);
Properties bindings = ExpressionUtils.createBindings(project);
ExpressionUtils.bind(bindings, row, rowIndex, cell);
Object value = _evaluable.evaluate(bindings);
if (value != null && value.getClass().isArray()) {
Object[] a = (Object[]) value;
for (Object v : a) {
if (checkValue(v)) {
return true;
if (value != null) {
if (value.getClass().isArray()) {
Object[] a = (Object[]) value;
for (Object v : a) {
if (checkValue(v)) {
return true;
}
}
}
} else {
if (checkValue(value)) {
return true;
}
return false;
} else if (value instanceof Collection<?>) {
for (Object v : ExpressionUtils.toObjectCollection(value)) {
if (checkValue(v)) {
return true;
}
}
return false;
} // else, fall through
}
return false;
return checkValue(value);
}
protected boolean checkValue(Object v) {

View File

@ -8,9 +8,13 @@ import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
/**
* Judge if a row matches by evaluating a given expression on the row, based on a particular
* column, and checking the result. It's a match if the result satisfies some string comparisons.
*/
abstract public class ExpressionStringComparisonRowFilter implements RowFilter {
final protected Evaluable _evaluable;
final protected int _cellIndex;
final protected Evaluable _evaluable;
final protected int _cellIndex;
public ExpressionStringComparisonRowFilter(Evaluable evaluable, int cellIndex) {
_evaluable = evaluable;
@ -18,7 +22,8 @@ abstract public class ExpressionStringComparisonRowFilter implements RowFilter {
}
public boolean filterRow(Project project, int rowIndex, Row row) {
Cell cell = row.getCell(_cellIndex);
Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex);
Properties bindings = ExpressionUtils.createBindings(project);
ExpressionUtils.bind(bindings, row, rowIndex, cell);

View File

@ -3,6 +3,10 @@ package com.metaweb.gridworks.browsing.filters;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
/**
* Interface for judging if a particular row matches or doesn't match some
* particular criterion, such as a facet constraint.
*/
public interface RowFilter {
public boolean filterRow(Project project, int rowIndex, Row row);
}