Refactor TimeRangeFacet for JSON serialization

This commit is contained in:
Antonin Delpeuch 2018-09-04 18:19:18 +01:00
parent 8e8b3c18c5
commit 1186776235
2 changed files with 209 additions and 43 deletions

View File

@ -41,37 +41,128 @@ import org.json.JSONWriter;
import com.google.refine.browsing.FilteredRecords;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RecordFilter;
import com.google.refine.browsing.RowFilter;
import com.google.refine.browsing.filters.AnyRowRecordFilter;
import com.google.refine.browsing.filters.ExpressionTimeComparisonRowFilter;
import com.google.refine.browsing.util.ExpressionBasedRowEvaluable;
import com.google.refine.browsing.util.ExpressionTimeValueBinner;
import com.google.refine.browsing.util.RowEvaluable;
import com.google.refine.browsing.util.TimeBinIndex;
import com.google.refine.browsing.util.TimeBinRecordIndex;
import com.google.refine.browsing.util.TimeBinRowIndex;
import com.google.refine.expr.Evaluable;
import com.google.refine.expr.MetaParser;
import com.google.refine.expr.ParsingException;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
public class TimeRangeFacet extends RangeFacet {
public class TimeRangeFacet implements Facet {
/*
* Configuration, from the client side
*/
public static class TimeRangeFacetConfig implements FacetConfig {
protected String _name; // name of facet
protected String _expression; // expression to compute numeric value(s) per row
protected String _columnName; // column to base expression on, if any
protected double _from; // the numeric selection
protected double _to;
protected boolean _selectTime; // whether the time selection applies, default true
protected boolean _selectNonTime;
protected boolean _selectBlank;
protected boolean _selectError;
protected boolean _selected; // false if we're certain that all rows will match
// and there isn't any filtering to do
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("type"); writer.value("timerange");
writer.key("name"); writer.value(_name);
writer.key("expression"); writer.value(_expression);
writer.key("columnName"); writer.value(_columnName);
writer.key("selectTime"); writer.value(_selectTime);
writer.key("selectNonTime"); writer.value(_selectNonTime);
writer.key("selectBlank"); writer.value(_selectBlank);
writer.key("selectError"); writer.value(_selectError);
writer.key(FROM); writer.value((long)_from);
writer.key(TO); writer.value((long)_to);
writer.endObject();
}
public void initializeFromJSON(JSONObject o) throws JSONException {
_name = o.getString("name");
_expression = o.getString("expression");
_columnName = o.getString("columnName");
if (o.has(FROM) || o.has(TO)) {
_from = o.has(FROM) ? o.getDouble(FROM) : 0;
_to = o.has(TO) ? o.getDouble(TO) : 0;
_selected = true;
}
_selectTime = JSONUtilities.getBoolean(o, "selectTime", true);
_selectNonTime = JSONUtilities.getBoolean(o, "selectNonTime", true);
_selectBlank = JSONUtilities.getBoolean(o, "selectBlank", true);
_selectError = JSONUtilities.getBoolean(o, "selectError", true);
if (!_selectTime || !_selectNonTime || !_selectBlank || !_selectError) {
_selected = true;
}
}
@Override
public TimeRangeFacet apply(Project project) {
TimeRangeFacet facet = new TimeRangeFacet();
facet.initializeFromConfig(this, project);
return facet;
}
}
protected TimeRangeFacetConfig _config;
/*
* Derived configuration data
*/
protected int _cellIndex;
protected Evaluable _eval;
protected String _errorMessage;
protected double _min;
protected double _max;
protected double _step;
protected int[] _baseBins;
protected int[] _bins;
/*
* Computed data
*/
protected int _baseTimeCount;
protected int _baseNonTimeCount;
protected int _baseBlankCount;
protected int _baseErrorCount;
protected int _timeCount;
protected int _nonTimeCount;
protected int _blankCount;
protected int _errorCount;
protected static final String MIN = "min";
protected static final String MAX = "max";
protected static final String TO = "to";
protected static final String FROM = "from";
@Override
public void write(JSONWriter writer, Properties options) throws JSONException {
writer.object();
writer.key("name"); writer.value(_name);
writer.key("expression"); writer.value(_expression);
writer.key("columnName"); writer.value(_columnName);
writer.key("name"); writer.value(_config._name);
writer.key("expression"); writer.value(_config._expression);
writer.key("columnName"); writer.value(_config._columnName);
if (_errorMessage != null) {
writer.key("error"); writer.value(_errorMessage);
@ -93,8 +184,8 @@ public class TimeRangeFacet extends RangeFacet {
}
writer.endArray();
writer.key(FROM); writer.value(_from);
writer.key(TO); writer.value(_to);
writer.key(FROM); writer.value(_config._from);
writer.key(TO); writer.value(_config._to);
}
writer.key("baseTimeCount"); writer.value(_baseTimeCount);
@ -112,52 +203,40 @@ public class TimeRangeFacet extends RangeFacet {
@Override
public void initializeFromJSON(Project project, JSONObject o) throws JSONException {
_name = o.getString("name");
_expression = o.getString("expression");
_columnName = o.getString("columnName");
TimeRangeFacetConfig config = new TimeRangeFacetConfig();
config.initializeFromJSON(o);
initializeFromConfig(config, project);
}
if (_columnName.length() > 0) {
Column column = project.columnModel.getColumnByName(_columnName);
public void initializeFromConfig(TimeRangeFacetConfig config, Project project) {
_config = config;
if (_config._columnName.length() > 0) {
Column column = project.columnModel.getColumnByName(_config._columnName);
if (column != null) {
_cellIndex = column.getCellIndex();
} else {
_errorMessage = "No column named " + _columnName;
_errorMessage = "No column named " + _config._columnName;
}
} else {
_cellIndex = -1;
}
try {
_eval = MetaParser.parse(_expression);
_eval = MetaParser.parse(_config._expression);
} catch (ParsingException e) {
_errorMessage = e.getMessage();
}
if (o.has(FROM) || o.has(TO)) {
_from = o.has(FROM) ? o.getDouble(FROM) : _min;
_to = o.has(TO) ? o.getDouble(TO) : _max;
_selected = true;
}
_selectTime = JSONUtilities.getBoolean(o, "selectTime", true);
_selectNonTime = JSONUtilities.getBoolean(o, "selectNonTime", true);
_selectBlank = JSONUtilities.getBoolean(o, "selectBlank", true);
_selectError = JSONUtilities.getBoolean(o, "selectError", true);
if (!_selectTime || !_selectNonTime || !_selectBlank || !_selectError) {
_selected = true;
}
}
@Override
public RowFilter getRowFilter(Project project) {
if (_eval != null && _errorMessage == null && _selected) {
if (_eval != null && _errorMessage == null && _config._selected) {
return new ExpressionTimeComparisonRowFilter(
getRowEvaluable(project), _selectTime, _selectNonTime, _selectBlank, _selectError) {
getRowEvaluable(project), _config._selectTime, _config._selectNonTime, _config._selectBlank, _config._selectError) {
@Override
protected boolean checkValue(long t) {
return t >= _from && t <= _to;
return t >= _config._from && t <= _config._to;
};
};
} else {
@ -171,7 +250,7 @@ public class TimeRangeFacet extends RangeFacet {
RowEvaluable rowEvaluable = getRowEvaluable(project);
Column column = project.columnModel.getColumnByCellIndex(_cellIndex);
String key = "time-bin:row-based:" + _expression;
String key = "time-bin:row-based:" + _config._expression;
TimeBinIndex index = (TimeBinIndex) column.getPrecompute(key);
if (index == null) {
index = new TimeBinRowIndex(project, rowEvaluable);
@ -193,7 +272,7 @@ public class TimeRangeFacet extends RangeFacet {
RowEvaluable rowEvaluable = getRowEvaluable(project);
Column column = project.columnModel.getColumnByCellIndex(_cellIndex);
String key = "time-bin:record-based:" + _expression;
String key = "time-bin:record-based:" + _config._expression;
TimeBinIndex index = (TimeBinIndex) column.getPrecompute(key);
if (index == null) {
index = new TimeBinRecordIndex(project, rowEvaluable);
@ -221,12 +300,12 @@ public class TimeRangeFacet extends RangeFacet {
_baseBlankCount = index.getBlankRowCount();
_baseErrorCount = index.getErrorRowCount();
if (_selected) {
_from = Math.max(_from, _min);
_to = Math.min(_to, _max);
if (_config._selected) {
_config._from = Math.max(_config._from, _min);
_config._to = Math.min(_config._to, _max);
} else {
_from = _min;
_to = _max;
_config._from = _min;
_config._to = _max;
}
}
@ -237,4 +316,14 @@ public class TimeRangeFacet extends RangeFacet {
_blankCount = binner.blankCount;
_errorCount = binner.errorCount;
}
@Override
public RecordFilter getRecordFilter(Project project) {
RowFilter rowFilter = getRowFilter(project);
return rowFilter == null ? null : new AnyRowRecordFilter(rowFilter);
}
protected RowEvaluable getRowEvaluable(Project project) {
return new ExpressionBasedRowEvaluable(_config._columnName, _cellIndex, _eval);
}
}

View File

@ -0,0 +1,77 @@
package com.google.refine.tests.browsing.facets;
import java.time.OffsetDateTime;
import org.json.JSONObject;
import org.testng.annotations.Test;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.facets.TimeRangeFacet;
import com.google.refine.browsing.facets.TimeRangeFacet.TimeRangeFacetConfig;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils;
public class TimeRangeFacetTests extends RefineTest {
public static String facetJson = "{"
+ "\"name\":\"my column\","
+ "\"expression\":\"value\","
+ "\"columnName\":\"my column\","
+ "\"min\":1.199329445E12,"
+ "\"max\":1.51496695E12,"
+ "\"step\":3.1556952E10,"
+ "\"bins\":[1,0,0,0,1,0,0,0,0,0,1],"
+ "\"baseBins\":[1,0,0,0,1,0,0,0,0,0,1],"
+ "\"from\":1.262443349E12,"
+ "\"to\":1.51496695E12,"
+ "\"baseTimeCount\":3,"
+ "\"baseNonTimeCount\":1,"
+ "\"baseBlankCount\":0,"
+ "\"baseErrorCount\":0,"
+ "\"timeCount\":3,"
+ "\"nonTimeCount\":1,"
+ "\"blankCount\":0,"
+ "\"errorCount\":0}";
public static String configJson = "{\n" +
" \"selectNonTime\": true,\n" +
" \"expression\": \"value\",\n" +
" \"selectBlank\": true,\n" +
" \"selectError\": true,\n" +
" \"selectTime\": true,\n" +
" \"name\": \"my column\",\n" +
" \"from\": 1262443349000,\n" +
" \"to\": 1514966950000,\n" +
" \"type\": \"timerange\",\n" +
" \"columnName\": \"my column\"\n" +
" }";
@Test
public void serializeTimeRangeFacetConfig() {
TimeRangeFacetConfig config = new TimeRangeFacetConfig();
config.initializeFromJSON(new JSONObject(configJson));
TestUtils.isSerializedTo(config, configJson);
}
@Test
public void serializeTimeRangeFacet() {
Project project = createCSVProject("my column\n"
+ "placeholder\n"
+ "nontime\n"
+ "placeholder\n"
+ "placeholder\n");
project.rows.get(0).cells.set(0, new Cell(OffsetDateTime.parse("2018-01-03T08:09:10Z"), null));
project.rows.get(2).cells.set(0, new Cell(OffsetDateTime.parse("2008-01-03T03:04:05Z"), null));
project.rows.get(3).cells.set(0, new Cell(OffsetDateTime.parse("2012-04-05T02:00:01Z"), null));
Engine engine = new Engine(project);
TimeRangeFacetConfig config = new TimeRangeFacetConfig();
config.initializeFromJSON(new JSONObject(configJson));
TimeRangeFacet facet = config.apply(project);
facet.computeChoices(project, engine.getAllFilteredRows());
TestUtils.isSerializedTo(facet, facetJson);
}
}