Skip to content

Commit 74afb2d

Browse files
authored
Merge pull request #142 from kinow/TEXT-158
[TEXT-158]: empty strings must have similarity of 1, and distance of 0 (i.e. identical)
2 parents b378a48 + 38727e3 commit 74afb2d

4 files changed

Lines changed: 6 additions & 2 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ The <action> type attribute can be add,update,fix,remove.
4545
</properties>
4646
<body>
4747
<release version="1.9.1" date="202Y-MM-DD" description="Release 1.9.1. Requires Java 8.">
48+
<action issue="TEXT-158" type="fix" dev="kinow">Incorrect values for Jaccard similarity with empty strings</action>
4849
<action issue="TEXT-185" type="add" dev="ggregory" due-to="Larry West, Gary Gregory">Release Notes page hasn't been updated for 1.9 release yet.</action>
4950
<action type="add" dev="ggregory" due-to="Gary Gregory">Update spotbugs.plugin.version 4.0.0 to 4.0.4.</action>
5051
<action type="add" dev="ggregory" due-to="Dependabot">Update mockito-inline from 3.4.4 to 3.4.6 #143.</action>

src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ public Double apply(final CharSequence left, final CharSequence right) {
6464
private Double calculateJaccardSimilarity(final CharSequence left, final CharSequence right) {
6565
final int leftLength = left.length();
6666
final int rightLength = right.length();
67+
if (leftLength == 0 && rightLength == 0) {
68+
return 1d;
69+
}
6770
if (leftLength == 0 || rightLength == 0) {
6871
return 0d;
6972
}

src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public static void setUp() {
3737
@Test
3838
public void testGettingJaccardDistance() {
3939
// Expected Jaccard distance = 1.0 - (intersect / union)
40-
assertEquals(1.0, classBeingTested.apply("", ""));
40+
assertEquals(0.0, classBeingTested.apply("", ""));
4141
assertEquals(1.0, classBeingTested.apply("left", ""));
4242
assertEquals(1.0, classBeingTested.apply("", "right"));
4343
assertEquals(1.0 - (3.0 / 4), classBeingTested.apply("frog", "fog"));

src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public static void setUp() {
3737
@Test
3838
public void testGettingJaccardSimilarity() {
3939
// Expected Jaccard similarity = (intersect / union)
40-
assertEquals(0.0, classBeingTested.apply("", ""));
40+
assertEquals(1.0, classBeingTested.apply("", ""));
4141
assertEquals(0.0, classBeingTested.apply("left", ""));
4242
assertEquals(0.0, classBeingTested.apply("", "right"));
4343
assertEquals(3.0 / 4, classBeingTested.apply("frog", "fog"));

0 commit comments

Comments
 (0)