|
| 1 | +//! # Jaccard Distance |
| 2 | +//! |
| 3 | +//! Jaccard Distance measures dissimilarity between two integer-valued vectors of the same length. |
| 4 | +//! Given two vectors \\( x \in ℝ^n \\), \\( y \in ℝ^n \\) the Jaccard distance between \\( x \\) and \\( y \\) is defined as |
| 5 | +//! |
| 6 | +//! \\[ d(x, y) = 1 - \frac{|x \cap y|}{|x \cup y|} \\] |
| 7 | +//! |
| 8 | +//! where \\(|x \cap y|\\) is the number of positions where both vectors are non-zero, |
| 9 | +//! and \\(|x \cup y|\\) is the number of positions where at least one of the vectors is non-zero. |
| 10 | +//! |
| 11 | +//! Example: |
| 12 | +//! |
| 13 | +//! ``` |
| 14 | +//! use smartcore::metrics::distance::Distance; |
| 15 | +//! use smartcore::metrics::distance::jaccard::Jaccard; |
| 16 | +//! |
| 17 | +//! let a = vec![1, 0, 1, 1]; |
| 18 | +//! let b = vec![1, 1, 0, 1]; |
| 19 | +//! |
| 20 | +//! let j: f64 = Jaccard::new().distance(&a, &b); |
| 21 | +//! |
| 22 | +//! ``` |
| 23 | +//! |
| 24 | +//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> |
| 25 | +//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script> |
| 26 | +
|
| 27 | +#[cfg(feature = "serde")] |
| 28 | +use serde::{Deserialize, Serialize}; |
| 29 | +use std::marker::PhantomData; |
| 30 | + |
| 31 | +use super::Distance; |
| 32 | +use crate::linalg::basic::arrays::ArrayView1; |
| 33 | +use crate::numbers::basenum::Number; |
| 34 | + |
| 35 | +/// Jaccard distance between two integer-valued vectors |
| 36 | +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] |
| 37 | +#[derive(Debug, Clone)] |
| 38 | +pub struct Jaccard<T: Number> { |
| 39 | + _t: PhantomData<T>, |
| 40 | +} |
| 41 | + |
| 42 | +impl<T: Number> Jaccard<T> { |
| 43 | + /// instatiate the initial structure |
| 44 | + pub fn new() -> Jaccard<T> { |
| 45 | + Jaccard { _t: PhantomData } |
| 46 | + } |
| 47 | +} |
| 48 | + |
| 49 | +impl<T: Number> Default for Jaccard<T> { |
| 50 | + fn default() -> Self { |
| 51 | + Self::new() |
| 52 | + } |
| 53 | +} |
| 54 | + |
| 55 | +impl<T: Number, A: ArrayView1<T>> Distance<A> for Jaccard<T> { |
| 56 | + fn distance(&self, x: &A, y: &A) -> f64 { |
| 57 | + if x.shape() != y.shape() { |
| 58 | + panic!("Input vector sizes are different"); |
| 59 | + } |
| 60 | + |
| 61 | + let (intersection, union): (usize, usize) = x |
| 62 | + .iterator(0) |
| 63 | + .zip(y.iterator(0)) |
| 64 | + .map(|(a, b)| { |
| 65 | + let a_nz = *a != T::zero(); |
| 66 | + let b_nz = *b != T::zero(); |
| 67 | + |
| 68 | + match (a_nz, b_nz) { |
| 69 | + (true, true) => (1, 1), |
| 70 | + (true, false) | (false, true) => (0, 1), |
| 71 | + (false, false) => (0, 0), |
| 72 | + } |
| 73 | + }) |
| 74 | + .fold((0, 0), |acc, v| (acc.0 + v.0, acc.1 + v.1)); |
| 75 | + |
| 76 | + if union == 0 { |
| 77 | + 0.0 |
| 78 | + } else { |
| 79 | + 1.0 - intersection as f64 / union as f64 |
| 80 | + } |
| 81 | + } |
| 82 | +} |
| 83 | + |
| 84 | +#[cfg(test)] |
| 85 | +mod tests { |
| 86 | + use super::*; |
| 87 | + |
| 88 | + #[cfg_attr( |
| 89 | + all(target_arch = "wasm32", not(target_os = "wasi")), |
| 90 | + wasm_bindgen_test::wasm_bindgen_test |
| 91 | + )] |
| 92 | + #[test] |
| 93 | + fn jaccard_distance() { |
| 94 | + let a = vec![1, 0, 1, 1]; |
| 95 | + let b = vec![1, 1, 0, 1]; |
| 96 | + |
| 97 | + let j: f64 = Jaccard::new().distance(&a, &b); |
| 98 | + |
| 99 | + assert!((j - 0.5).abs() < 1e-8); |
| 100 | + } |
| 101 | +} |
0 commit comments