Skip to content

Commit 9fdfd93

Browse files
committed
added Jaccard distance
1 parent c57a437 commit 9fdfd93

2 files changed

Lines changed: 108 additions & 0 deletions

File tree

src/metrics/distance/jaccard.rs

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
//! # Jaccard Distance
2+
//!
3+
//! Jaccard Distance measures dissimilarity between two integer-valued vectors of the same length.
4+
//! Given two vectors \\( x \in ℝ^n \\), \\( y \in ℝ^n \\) the Jaccard distance between \\( x \\) and \\( y \\) is defined as
5+
//!
6+
//! \\[ d(x, y) = 1 - \frac{|x \cap y|}{|x \cup y|} \\]
7+
//!
8+
//! where \\(|x \cap y|\\) is the number of positions where both vectors are non-zero,
9+
//! and \\(|x \cup y|\\) is the number of positions where at least one of the vectors is non-zero.
10+
//!
11+
//! Example:
12+
//!
13+
//! ```
14+
//! use smartcore::metrics::distance::Distance;
15+
//! use smartcore::metrics::distance::jaccard::Jaccard;
16+
//!
17+
//! let a = vec![1, 0, 1, 1];
18+
//! let b = vec![1, 1, 0, 1];
19+
//!
20+
//! let j: f64 = Jaccard::new().distance(&a, &b);
21+
//!
22+
//! ```
23+
//!
24+
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
25+
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
26+
27+
#[cfg(feature = "serde")]
28+
use serde::{Deserialize, Serialize};
29+
use std::marker::PhantomData;
30+
31+
use super::Distance;
32+
use crate::linalg::basic::arrays::ArrayView1;
33+
use crate::numbers::basenum::Number;
34+
35+
/// Jaccard distance between two integer-valued vectors
36+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
37+
#[derive(Debug, Clone)]
38+
pub struct Jaccard<T: Number> {
39+
_t: PhantomData<T>,
40+
}
41+
42+
impl<T: Number> Jaccard<T> {
43+
/// instatiate the initial structure
44+
pub fn new() -> Jaccard<T> {
45+
Jaccard { _t: PhantomData }
46+
}
47+
}
48+
49+
impl<T: Number> Default for Jaccard<T> {
50+
fn default() -> Self {
51+
Self::new()
52+
}
53+
}
54+
55+
impl<T: Number, A: ArrayView1<T>> Distance<A> for Jaccard<T> {
56+
fn distance(&self, x: &A, y: &A) -> f64 {
57+
if x.shape() != y.shape() {
58+
panic!("Input vector sizes are different");
59+
}
60+
61+
let (intersection, union): (usize, usize) = x
62+
.iterator(0)
63+
.zip(y.iterator(0))
64+
.map(|(a, b)| {
65+
let a_nz = *a != T::zero();
66+
let b_nz = *b != T::zero();
67+
68+
match (a_nz, b_nz) {
69+
(true, true) => (1, 1),
70+
(true, false) | (false, true) => (0, 1),
71+
(false, false) => (0, 0),
72+
}
73+
})
74+
.fold((0, 0), |acc, v| (acc.0 + v.0, acc.1 + v.1));
75+
76+
if union == 0 {
77+
0.0
78+
} else {
79+
1.0 - intersection as f64 / union as f64
80+
}
81+
}
82+
}
83+
84+
#[cfg(test)]
85+
mod tests {
86+
use super::*;
87+
88+
#[cfg_attr(
89+
all(target_arch = "wasm32", not(target_os = "wasi")),
90+
wasm_bindgen_test::wasm_bindgen_test
91+
)]
92+
#[test]
93+
fn jaccard_distance() {
94+
let a = vec![1, 0, 1, 1];
95+
let b = vec![1, 1, 0, 1];
96+
97+
let j: f64 = Jaccard::new().distance(&a, &b);
98+
99+
assert!((j - 0.5).abs() < 1e-8);
100+
}
101+
}

src/metrics/distance/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ pub mod cosine;
1919
pub mod euclidian;
2020
/// Hamming Distance between two strings is the number of positions at which the corresponding symbols are different.
2121
pub mod hamming;
22+
/// Jaccard distance between two integer-valued vectors.
23+
pub mod jaccard;
2224
/// The Mahalanobis distance is the distance between two points in multivariate space.
2325
pub mod mahalanobis;
2426
/// Also known as rectilinear distance, city block distance, taxicab metric.
@@ -67,6 +69,11 @@ impl Distances {
6769
hamming::Hamming::new()
6870
}
6971

72+
/// Jaccard distance, see [`Jaccard`](jaccard/index.html)
73+
pub fn jaccard<T: Number>() -> jaccard::Jaccard<T> {
74+
jaccard::Jaccard::new()
75+
}
76+
7077
/// Mahalanobis distance, see [`Mahalanobis`](mahalanobis/index.html)
7178
pub fn mahalanobis<T: Number, M: Array2<T>, C: Array2<f64> + LUDecomposable<f64>>(
7279
data: &M,

0 commit comments

Comments
 (0)