Fix identifier space for cells reconciled to New

This commit is contained in:
Antonin Delpeuch 2018-04-15 01:20:25 +02:00
parent 6235786a5b
commit 2b237433f9
2 changed files with 78 additions and 2 deletions

View File

@ -56,6 +56,7 @@ import com.google.refine.model.ReconCandidate;
import com.google.refine.model.Row;
import com.google.refine.model.changes.CellChange;
import com.google.refine.model.changes.ReconChange;
import com.google.refine.model.recon.ReconConfig;
import com.google.refine.operations.EngineDependentMassCellOperation;
import com.google.refine.operations.OperationRegistry;
@ -185,7 +186,8 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
@Override
protected RowVisitor createRowVisitor(Project project, List<CellChange> cellChanges, long historyEntryID) throws Exception {
Column column = project.columnModel.getColumnByName(_columnName);
ReconConfig reconConfig = column.getReconConfig();
return new RowVisitor() {
int _cellIndex;
List<CellChange> _cellChanges;
@ -221,7 +223,15 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
Recon recon = null;
if (_judgment == Judgment.New && _shareNewTopics) {
if (_sharedNewRecon == null) {
_sharedNewRecon = new Recon(_historyEntryID, null, null);
if (reconConfig != null) {
_sharedNewRecon = reconConfig.createNewRecon(_historyEntryID);
} else {
// This should only happen if we are creating new cells
// in a column that has not been reconciled before.
// In that case, we do not know which reconciliation service
// to use, so we fall back on the default one.
_sharedNewRecon = new Recon(_historyEntryID, null, null);
}
_sharedNewRecon.judgment = Judgment.New;
_sharedNewRecon.judgmentBatchSize = 0;
_sharedNewRecon.judgmentAction = "similar";

View File

@ -0,0 +1,66 @@
package com.google.refine.tests.operations.cell;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import java.util.Collections;
import java.util.Properties;
import org.json.JSONObject;
import org.slf4j.LoggerFactory;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.recon.ReconConfig;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.operations.recon.ReconJudgeSimilarCellsOperation;
import com.google.refine.process.Process;
import com.google.refine.tests.RefineTest;
public class ReconJudgeSimilarCellsTests extends RefineTest {
static final JSONObject ENGINE_CONFIG = new JSONObject("{\"mode\":\"row-based\"}}");
@Override
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
@Test
public void testMarkNewTopics() throws Exception {
Project project = createCSVProject(
"A,B\n"
+ "foo,bar\n"
+ "alpha,beta\n");
Column column = project.columnModel.columns.get(0);
ReconConfig config = new StandardReconConfig(
"http://my.database/recon_service",
"http://my.database/entity/",
"http://my.database/schema/",
null,
null,
true, Collections.emptyList());
column.setReconConfig(config);
AbstractOperation op = new ReconJudgeSimilarCellsOperation(
ENGINE_CONFIG,
"A",
"foo",
Recon.Judgment.New,
null, true);
Process process = op.createProcess(project, new Properties());
process.performImmediate();
Cell cell = project.rows.get(0).cells.get(0);
assertEquals(Recon.Judgment.New, cell.recon.judgment);
assertEquals("http://my.database/entity/", cell.recon.identifierSpace);
assertNull(project.rows.get(1).cells.get(0).recon);
}
}