Skip to content

Commit bf2f234

Browse files
committed
Merge branch 'improvement-TEXT-157'
Closes #111
2 parents 19df20d + eacfa36 commit bf2f234

5 files changed

Lines changed: 35 additions & 32 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ The <action> type attribute can be add,update,fix,remove.
5454
<action issue="TEXT-152" type="add" dev="" due-to="@CAPS50">Fix possible infinite loop in WordUtils.wrap for a regex pattern that would trigger on a match of 0 length</action>
5555
<action issue="TEXT-153" type="update" dev="" due-to="amirhadadi">Make prefixSet in LookupTranslator a BitSet</action>
5656
<action issue="TEXT-156" type="update" dev="aherbert">Fix the RegexTokenizer to use a static Pattern</action>
57+
<action issue="TEXT-157" type="update" dev="aherbert">Remove rounding from JaccardDistance and JaccardSimilarity</action>
5758
</release>
5859

5960
<release version="1.6" date="2018-10-12" description="Release 1.6">

src/main/java/org/apache/commons/text/similarity/JaccardDistance.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,6 @@ public Double apply(final CharSequence left, final CharSequence right) {
5050
if (left == null || right == null) {
5151
throw new IllegalArgumentException("Input cannot be null");
5252
}
53-
return Math.round((1 - jaccardSimilarity.apply(left, right)) * 100d) / 100d;
53+
return 1.0 - jaccardSimilarity.apply(left, right).doubleValue();
5454
}
5555
}

src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public Double apply(final CharSequence left, final CharSequence right) {
4848
if (left == null || right == null) {
4949
throw new IllegalArgumentException("Input cannot be null");
5050
}
51-
return Math.round(calculateJaccardSimilarity(left, right) * 100d) / 100d;
51+
return calculateJaccardSimilarity(left, right);
5252
}
5353

5454
/**

src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,22 @@ public static void setUp() {
3636

3737
@Test
3838
public void testGettingJaccardDistance() {
39-
assertEquals(1.00d, classBeingTested.apply("", ""), 0.00000000000000000001d);
40-
assertEquals(1.00d, classBeingTested.apply("left", ""), 0.00000000000000000001d);
41-
assertEquals(1.00d, classBeingTested.apply("", "right"), 0.00000000000000000001d);
42-
assertEquals(0.25d, classBeingTested.apply("frog", "fog"), 0.00000000000000000001d);
43-
assertEquals(1.00d, classBeingTested.apply("fly", "ant"), 0.00000000000000000001d);
44-
assertEquals(0.78d, classBeingTested.apply("elephant", "hippo"), 0.00000000000000000001d);
45-
assertEquals(0.36d, classBeingTested.apply("ABC Corporation", "ABC Corp"), 0.00000000000000000001d);
46-
assertEquals(0.24d, classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."),
47-
0.00000000000000000001d);
48-
assertEquals(0.11d, classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"),
49-
0.00000000000000000001d);
50-
assertEquals(0.10d, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.00000000000000000001d);
51-
assertEquals(0.87d, classBeingTested.apply("left", "right"), 0.00000000000000000001d);
52-
assertEquals(0.87d, classBeingTested.apply("leettteft", "ritttght"), 0.00000000000000000001d);
53-
assertEquals(0.0d, classBeingTested.apply("the same string", "the same string"), 0.00000000000000000001d);
39+
// Expected Jaccard distance = 1.0 - (intersect / union)
40+
assertEquals(1.0, classBeingTested.apply("", ""));
41+
assertEquals(1.0, classBeingTested.apply("left", ""));
42+
assertEquals(1.0, classBeingTested.apply("", "right"));
43+
assertEquals(1.0 - (3.0 / 4), classBeingTested.apply("frog", "fog"));
44+
assertEquals(1.0, classBeingTested.apply("fly", "ant"));
45+
assertEquals(1.0 - (2.0 / 9), classBeingTested.apply("elephant", "hippo"));
46+
assertEquals(1.0 - (7.0 / 11), classBeingTested.apply("ABC Corporation", "ABC Corp"));
47+
assertEquals(1.0 - (13.0 / 17),
48+
classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."));
49+
assertEquals(1.0 - (16.0 / 18),
50+
classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"));
51+
assertEquals(1.0 - (9.0 / 10), classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
52+
assertEquals(1.0 - (1.0 / 8), classBeingTested.apply("left", "right"));
53+
assertEquals(1.0 - (1.0 / 8), classBeingTested.apply("leettteft", "ritttght"));
54+
assertEquals(0.0, classBeingTested.apply("the same string", "the same string"));
5455
}
5556

5657
@Test

src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,22 @@ public static void setUp() {
3636

3737
@Test
3838
public void testGettingJaccardSimilarity() {
39-
assertEquals(0.00d, classBeingTested.apply("", ""), 0.00000000000000000001d);
40-
assertEquals(0.00d, classBeingTested.apply("left", ""), 0.00000000000000000001d);
41-
assertEquals(0.00d, classBeingTested.apply("", "right"), 0.00000000000000000001d);
42-
assertEquals(0.75d, classBeingTested.apply("frog", "fog"), 0.00000000000000000001d);
43-
assertEquals(0.00d, classBeingTested.apply("fly", "ant"), 0.00000000000000000001d);
44-
assertEquals(0.22d, classBeingTested.apply("elephant", "hippo"), 0.00000000000000000001d);
45-
assertEquals(0.64d, classBeingTested.apply("ABC Corporation", "ABC Corp"), 0.00000000000000000001d);
46-
assertEquals(0.76d, classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."),
47-
0.00000000000000000001d);
48-
assertEquals(0.89d, classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"),
49-
0.00000000000000000001d);
50-
assertEquals(0.9d, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.00000000000000000001d);
51-
assertEquals(0.13d, classBeingTested.apply("left", "right"), 0.00000000000000000001d);
52-
assertEquals(0.13d, classBeingTested.apply("leettteft", "ritttght"), 0.00000000000000000001d);
53-
assertEquals(1.0d, classBeingTested.apply("the same string", "the same string"), 0.00000000000000000001d);
39+
// Expected Jaccard similarity = (intersect / union)
40+
assertEquals(0.0, classBeingTested.apply("", ""));
41+
assertEquals(0.0, classBeingTested.apply("left", ""));
42+
assertEquals(0.0, classBeingTested.apply("", "right"));
43+
assertEquals(3.0 / 4, classBeingTested.apply("frog", "fog"));
44+
assertEquals(0.0, classBeingTested.apply("fly", "ant"));
45+
assertEquals(2.0 / 9, classBeingTested.apply("elephant", "hippo"));
46+
assertEquals(7.0 / 11, classBeingTested.apply("ABC Corporation", "ABC Corp"));
47+
assertEquals(13.0 / 17,
48+
classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."));
49+
assertEquals(16.0 / 18,
50+
classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"));
51+
assertEquals(9.0 / 10, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
52+
assertEquals(1.0 / 8, classBeingTested.apply("left", "right"));
53+
assertEquals(1.0 / 8, classBeingTested.apply("leettteft", "ritttght"));
54+
assertEquals(1.0, classBeingTested.apply("the same string", "the same string"));
5455
}
5556

5657
@Test

0 commit comments

Comments
 (0)