Fix imprecise facet statistics in records mode (#2607)

* Fix bug in choice counts for records mode

* Add test for value grouper on records

* Refactor and comment code

* Count distinct instances of null/blank data

* Update test to check for blank data count in records

* Remove unnecessary import statement
This commit is contained in:
Joanne Ong 2020-06-16 01:38:50 +08:00 committed by GitHub
parent 947356ddad
commit d57d76f7df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 10 deletions

View File

@ -123,21 +123,21 @@ public class ExpressionNominalValueGrouper implements RowVisitor, RecordVisitor
@Override
public boolean visit(Project project, Record record) {
hasError = false;
hasBlank = false;
Properties bindings = ExpressionUtils.createBindings(project);
for (int r = record.fromRowIndex; r < record.toRowIndex; r++) {
hasError = false;
hasBlank = false;
Row row = project.rows.get(r);
visitRow(project, r, row, bindings, record.recordIndex);
}
if (hasError) {
errorCount++;
}
if (hasBlank) {
blankCount++;
if (hasError) {
errorCount++;
}
if (hasBlank) {
blankCount++;
}
}
return false;

View File

@ -51,7 +51,6 @@ import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
public class ExpressionNominalValueGrouperTests extends RefineTest {
// dependencies
//Variables
@ -174,4 +173,35 @@ public class ExpressionNominalValueGrouperTests extends RefineTest {
Assert.assertEquals(grouper.choices.get(dateTimeStringValue).decoratedValue.label,dateTimeStringValue);
Assert.assertEquals(grouper.choices.get(dateTimeStringValue).decoratedValue.value.toString(),dateTimeStringValue);
}
@Test
public void expressionNominalValueGrouperRecords() throws Exception {
String completeProjectJson = "col1,col2,col3\n"
+ "record1,1,a\n"
+ ",,a\n"
+ ",,a\n"
+ "record2,,a\n"
+ ",1,a\n";
project = createCSVProject(completeProjectJson);
bindings = new Properties();
bindings.put("project", project);
eval = MetaParser.parse("value");
grouper = new ExpressionNominalValueGrouper(eval, "col2", 1);
try {
grouper.start(project);
int c = project.recordModel.getRecordCount();
for (int r = 0; r < c; r++) {
grouper.visit(project, project.recordModel.getRecord(r));
}
} finally {
grouper.end(project);
}
Assert.assertEquals(grouper.blankCount, 3);
Assert.assertEquals(grouper.choices.size(), 1);
Assert.assertTrue(grouper.choices.containsKey(integerStringValue));
Assert.assertEquals(grouper.choices.get(integerStringValue).count, 2);
}
}