Skip to content

Commit d768027

Browse files
committed
TEXT-157: Remove rounding from JaccardSimilarity and Distance
1 parent 19df20d commit d768027

5 files changed

Lines changed: 37 additions & 32 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ The <action> type attribute can be add,update,fix,remove.
5454
<action issue="TEXT-152" type="add" dev="" due-to="@CAPS50">Fix possible infinite loop in WordUtils.wrap for a regex pattern that would trigger on a match of 0 length</action>
5555
<action issue="TEXT-153" type="update" dev="" due-to="amirhadadi">Make prefixSet in LookupTranslator a BitSet</action>
5656
<action issue="TEXT-156" type="update" dev="aherbert">Fix the RegexTokenizer to use a static Pattern</action>
57+
<action issue="TEXT-157" type="update" dev="aherbert">Remove rounding from JaccardDistance and JaccardSimilarity</action>
5758
</release>
5859

5960
<release version="1.6" date="2018-10-12" description="Release 1.6">

src/main/java/org/apache/commons/text/similarity/JaccardDistance.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,6 @@ public Double apply(final CharSequence left, final CharSequence right) {
5050
if (left == null || right == null) {
5151
throw new IllegalArgumentException("Input cannot be null");
5252
}
53-
return Math.round((1 - jaccardSimilarity.apply(left, right)) * 100d) / 100d;
53+
return 1.0 - jaccardSimilarity.apply(left, right).doubleValue();
5454
}
5555
}

src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public Double apply(final CharSequence left, final CharSequence right) {
4848
if (left == null || right == null) {
4949
throw new IllegalArgumentException("Input cannot be null");
5050
}
51-
return Math.round(calculateJaccardSimilarity(left, right) * 100d) / 100d;
51+
return calculateJaccardSimilarity(left, right);
5252
}
5353

5454
/**

src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,23 @@ public static void setUp() {
3636

3737
@Test
3838
public void testGettingJaccardDistance() {
39-
assertEquals(1.00d, classBeingTested.apply("", ""), 0.00000000000000000001d);
40-
assertEquals(1.00d, classBeingTested.apply("left", ""), 0.00000000000000000001d);
41-
assertEquals(1.00d, classBeingTested.apply("", "right"), 0.00000000000000000001d);
42-
assertEquals(0.25d, classBeingTested.apply("frog", "fog"), 0.00000000000000000001d);
43-
assertEquals(1.00d, classBeingTested.apply("fly", "ant"), 0.00000000000000000001d);
44-
assertEquals(0.78d, classBeingTested.apply("elephant", "hippo"), 0.00000000000000000001d);
45-
assertEquals(0.36d, classBeingTested.apply("ABC Corporation", "ABC Corp"), 0.00000000000000000001d);
46-
assertEquals(0.24d, classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."),
47-
0.00000000000000000001d);
48-
assertEquals(0.11d, classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"),
49-
0.00000000000000000001d);
50-
assertEquals(0.10d, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.00000000000000000001d);
51-
assertEquals(0.87d, classBeingTested.apply("left", "right"), 0.00000000000000000001d);
52-
assertEquals(0.87d, classBeingTested.apply("leettteft", "ritttght"), 0.00000000000000000001d);
53-
assertEquals(0.0d, classBeingTested.apply("the same string", "the same string"), 0.00000000000000000001d);
39+
// Results generated using the python distance library using:
40+
// distance.jaccard(seq1, seq2)
41+
assertEquals(1.0, classBeingTested.apply("", ""));
42+
assertEquals(1.0, classBeingTested.apply("left", ""));
43+
assertEquals(1.0, classBeingTested.apply("", "right"));
44+
assertEquals(0.25, classBeingTested.apply("frog", "fog"));
45+
assertEquals(1.0, classBeingTested.apply("fly", "ant"));
46+
assertEquals(0.7777777777777778, classBeingTested.apply("elephant", "hippo"));
47+
assertEquals(0.36363636363636365, classBeingTested.apply("ABC Corporation", "ABC Corp"));
48+
assertEquals(0.23529411764705888,
49+
classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."));
50+
assertEquals(0.11111111111111116,
51+
classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"));
52+
assertEquals(0.09999999999999998, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
53+
assertEquals(0.875, classBeingTested.apply("left", "right"));
54+
assertEquals(0.875, classBeingTested.apply("leettteft", "ritttght"));
55+
assertEquals(0.0, classBeingTested.apply("the same string", "the same string"));
5456
}
5557

5658
@Test

src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,23 @@ public static void setUp() {
3636

3737
@Test
3838
public void testGettingJaccardSimilarity() {
39-
assertEquals(0.00d, classBeingTested.apply("", ""), 0.00000000000000000001d);
40-
assertEquals(0.00d, classBeingTested.apply("left", ""), 0.00000000000000000001d);
41-
assertEquals(0.00d, classBeingTested.apply("", "right"), 0.00000000000000000001d);
42-
assertEquals(0.75d, classBeingTested.apply("frog", "fog"), 0.00000000000000000001d);
43-
assertEquals(0.00d, classBeingTested.apply("fly", "ant"), 0.00000000000000000001d);
44-
assertEquals(0.22d, classBeingTested.apply("elephant", "hippo"), 0.00000000000000000001d);
45-
assertEquals(0.64d, classBeingTested.apply("ABC Corporation", "ABC Corp"), 0.00000000000000000001d);
46-
assertEquals(0.76d, classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."),
47-
0.00000000000000000001d);
48-
assertEquals(0.89d, classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"),
49-
0.00000000000000000001d);
50-
assertEquals(0.9d, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.00000000000000000001d);
51-
assertEquals(0.13d, classBeingTested.apply("left", "right"), 0.00000000000000000001d);
52-
assertEquals(0.13d, classBeingTested.apply("leettteft", "ritttght"), 0.00000000000000000001d);
53-
assertEquals(1.0d, classBeingTested.apply("the same string", "the same string"), 0.00000000000000000001d);
39+
// Results generated using the python distance library using:
40+
// 1 - distance.jaccard(seq1, seq2)
41+
assertEquals(0.0, classBeingTested.apply("", ""));
42+
assertEquals(0.0, classBeingTested.apply("left", ""));
43+
assertEquals(0.0, classBeingTested.apply("", "right"));
44+
assertEquals(0.75, classBeingTested.apply("frog", "fog"));
45+
assertEquals(0.0, classBeingTested.apply("fly", "ant"));
46+
assertEquals(0.2222222222222222, classBeingTested.apply("elephant", "hippo"));
47+
assertEquals(0.6363636363636364, classBeingTested.apply("ABC Corporation", "ABC Corp"));
48+
assertEquals(0.7647058823529411,
49+
classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."));
50+
assertEquals(0.8888888888888888,
51+
classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"));
52+
assertEquals(0.9, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
53+
assertEquals(0.125, classBeingTested.apply("left", "right"));
54+
assertEquals(0.125, classBeingTested.apply("leettteft", "ritttght"));
55+
assertEquals(1.0, classBeingTested.apply("the same string", "the same string"));
5456
}
5557

5658
@Test

0 commit comments

Comments
 (0)