Fix for issue 358 from Tomaz Solc. Don't return a NaN when comparing two 0-length word lists.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2088 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Tom Morris 2011-06-06 21:30:46 +00:00
parent f674a96973
commit 73acd497e9
2 changed files with 50 additions and 0 deletions

View File

@ -429,6 +429,10 @@ public class StandardReconConfig extends ReconConfig {
} }
static protected double wordDistance(Set<String> longWords, Set<String> shortWords) { static protected double wordDistance(Set<String> longWords, Set<String> shortWords) {
if (longWords.size() == 0) {
return 0.0;
}
double common = 0; double common = 0;
for (String word : shortWords) { for (String word : shortWords) {
if (longWords.contains(word)) { if (longWords.contains(word)) {

View File

@ -0,0 +1,46 @@
package com.google.refine.tests.model;
import java.util.ArrayList;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.tests.RefineTest;
public class ReconTests extends RefineTest {
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
private class StandardReconConfigTest extends StandardReconConfig {
public StandardReconConfigTest() {
super("", "", "", "", "", false, new ArrayList<ColumnDetail>());
}
public double wordDistanceTest(String s1, String s2) {
return wordDistance(s1, s2);
}
}
@Test
public void wordDistance() {
StandardReconConfigTest t = new StandardReconConfigTest();
double r = t.wordDistanceTest("Foo", "Foo bar");
Assert.assertEquals(r,0.5);
}
@Test
public void wordDistanceOnlyStopwords() {
StandardReconConfigTest t = new StandardReconConfigTest();
double r = t.wordDistanceTest("On and On", "On and On and On");
Assert.assertTrue(!Double.isInfinite(r));
Assert.assertTrue(!Double.isNaN(r));
}
}