Skip to content

Commit eacfa36

Browse files
committed
TEXT-157: Use expected=(intersect/union) in Jaccard tests
1 parent d768027 commit eacfa36

2 files changed

Lines changed: 18 additions & 20 deletions

File tree

src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,22 +36,21 @@ public static void setUp() {
3636

3737
@Test
3838
public void testGettingJaccardDistance() {
39-
// Results generated using the python distance library using:
40-
// distance.jaccard(seq1, seq2)
39+
// Expected Jaccard distance = 1.0 - (intersect / union)
4140
assertEquals(1.0, classBeingTested.apply("", ""));
4241
assertEquals(1.0, classBeingTested.apply("left", ""));
4342
assertEquals(1.0, classBeingTested.apply("", "right"));
44-
assertEquals(0.25, classBeingTested.apply("frog", "fog"));
43+
assertEquals(1.0 - (3.0 / 4), classBeingTested.apply("frog", "fog"));
4544
assertEquals(1.0, classBeingTested.apply("fly", "ant"));
46-
assertEquals(0.7777777777777778, classBeingTested.apply("elephant", "hippo"));
47-
assertEquals(0.36363636363636365, classBeingTested.apply("ABC Corporation", "ABC Corp"));
48-
assertEquals(0.23529411764705888,
45+
assertEquals(1.0 - (2.0 / 9), classBeingTested.apply("elephant", "hippo"));
46+
assertEquals(1.0 - (7.0 / 11), classBeingTested.apply("ABC Corporation", "ABC Corp"));
47+
assertEquals(1.0 - (13.0 / 17),
4948
classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."));
50-
assertEquals(0.11111111111111116,
49+
assertEquals(1.0 - (16.0 / 18),
5150
classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"));
52-
assertEquals(0.09999999999999998, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
53-
assertEquals(0.875, classBeingTested.apply("left", "right"));
54-
assertEquals(0.875, classBeingTested.apply("leettteft", "ritttght"));
51+
assertEquals(1.0 - (9.0 / 10), classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
52+
assertEquals(1.0 - (1.0 / 8), classBeingTested.apply("left", "right"));
53+
assertEquals(1.0 - (1.0 / 8), classBeingTested.apply("leettteft", "ritttght"));
5554
assertEquals(0.0, classBeingTested.apply("the same string", "the same string"));
5655
}
5756

src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,22 +36,21 @@ public static void setUp() {
3636

3737
@Test
3838
public void testGettingJaccardSimilarity() {
39-
// Results generated using the python distance library using:
40-
// 1 - distance.jaccard(seq1, seq2)
39+
// Expected Jaccard similarity = (intersect / union)
4140
assertEquals(0.0, classBeingTested.apply("", ""));
4241
assertEquals(0.0, classBeingTested.apply("left", ""));
4342
assertEquals(0.0, classBeingTested.apply("", "right"));
44-
assertEquals(0.75, classBeingTested.apply("frog", "fog"));
43+
assertEquals(3.0 / 4, classBeingTested.apply("frog", "fog"));
4544
assertEquals(0.0, classBeingTested.apply("fly", "ant"));
46-
assertEquals(0.2222222222222222, classBeingTested.apply("elephant", "hippo"));
47-
assertEquals(0.6363636363636364, classBeingTested.apply("ABC Corporation", "ABC Corp"));
48-
assertEquals(0.7647058823529411,
45+
assertEquals(2.0 / 9, classBeingTested.apply("elephant", "hippo"));
46+
assertEquals(7.0 / 11, classBeingTested.apply("ABC Corporation", "ABC Corp"));
47+
assertEquals(13.0 / 17,
4948
classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."));
50-
assertEquals(0.8888888888888888,
49+
assertEquals(16.0 / 18,
5150
classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"));
52-
assertEquals(0.9, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
53-
assertEquals(0.125, classBeingTested.apply("left", "right"));
54-
assertEquals(0.125, classBeingTested.apply("leettteft", "ritttght"));
51+
assertEquals(9.0 / 10, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
52+
assertEquals(1.0 / 8, classBeingTested.apply("left", "right"));
53+
assertEquals(1.0 / 8, classBeingTested.apply("leettteft", "ritttght"));
5554
assertEquals(1.0, classBeingTested.apply("the same string", "the same string"));
5655
}
5756

0 commit comments

Comments
 (0)