Skip to content
This repository was archived by the owner on Jul 16, 2021. It is now read-only.

Commit 34a5417

Browse files
sinhrksAtheMathmo
authored andcommitted
ENH: Add datasets (#161)
* ENH: Add datasets * add feature gates * Fix travis tests * Fix lib structure / added comment * do not add defaults and tests features
1 parent 033f8e1 commit 34a5417

7 files changed

Lines changed: 245 additions & 1 deletion

File tree

.travis.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@ script:
1111
- cargo test --verbose
1212
- cargo build --features stats
1313
- cargo test --features stats
14+
- cargo build --features datasets
15+
- cargo test --features datasets

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ license = "MIT"
1212

1313
[features]
1414
stats = []
15+
datasets = []
1516

1617
[dependencies]
1718
num = { version = "0.1.35", default-features = false }

src/datasets/iris.rs

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
use rulinalg::matrix::Matrix;
2+
use rulinalg::vector::Vector;
3+
4+
use super::Dataset;
5+
6+
/// Load iris dataset.
7+
///
8+
/// The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant.
9+
///
10+
/// ## Attribute Information
11+
///
12+
/// ### Data
13+
///
14+
/// ``Matrix<f64>`` contains following columns.
15+
///
16+
/// - sepal length in cm
17+
/// - sepal width in cm
18+
/// - petal length in cm
19+
/// - petal width in cm
20+
///
21+
/// ### Target
22+
///
23+
/// ``Vector<usize>`` contains numbers corresponding to iris species:
24+
///
25+
/// - ``0``: Iris Setosa
26+
/// - ``1``: Iris Versicolour
27+
/// - ``2``: Iris Virginica
28+
///
29+
/// Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml].
30+
/// Irvine, CA: University of California, School of Information and Computer Science.
31+
pub fn load() -> Dataset<Matrix<f64>, Vector<usize>> {
32+
let data: Matrix<f64> = matrix![5.1, 3.5, 1.4, 0.2;
33+
4.9, 3.0, 1.4, 0.2;
34+
4.7, 3.2, 1.3, 0.2;
35+
4.6, 3.1, 1.5, 0.2;
36+
5.0, 3.6, 1.4, 0.2;
37+
5.4, 3.9, 1.7, 0.4;
38+
4.6, 3.4, 1.4, 0.3;
39+
5.0, 3.4, 1.5, 0.2;
40+
4.4, 2.9, 1.4, 0.2;
41+
4.9, 3.1, 1.5, 0.1;
42+
5.4, 3.7, 1.5, 0.2;
43+
4.8, 3.4, 1.6, 0.2;
44+
4.8, 3.0, 1.4, 0.1;
45+
4.3, 3.0, 1.1, 0.1;
46+
5.8, 4.0, 1.2, 0.2;
47+
5.7, 4.4, 1.5, 0.4;
48+
5.4, 3.9, 1.3, 0.4;
49+
5.1, 3.5, 1.4, 0.3;
50+
5.7, 3.8, 1.7, 0.3;
51+
5.1, 3.8, 1.5, 0.3;
52+
5.4, 3.4, 1.7, 0.2;
53+
5.1, 3.7, 1.5, 0.4;
54+
4.6, 3.6, 1.0, 0.2;
55+
5.1, 3.3, 1.7, 0.5;
56+
4.8, 3.4, 1.9, 0.2;
57+
5.0, 3.0, 1.6, 0.2;
58+
5.0, 3.4, 1.6, 0.4;
59+
5.2, 3.5, 1.5, 0.2;
60+
5.2, 3.4, 1.4, 0.2;
61+
4.7, 3.2, 1.6, 0.2;
62+
4.8, 3.1, 1.6, 0.2;
63+
5.4, 3.4, 1.5, 0.4;
64+
5.2, 4.1, 1.5, 0.1;
65+
5.5, 4.2, 1.4, 0.2;
66+
4.9, 3.1, 1.5, 0.1;
67+
5.0, 3.2, 1.2, 0.2;
68+
5.5, 3.5, 1.3, 0.2;
69+
4.9, 3.1, 1.5, 0.1;
70+
4.4, 3.0, 1.3, 0.2;
71+
5.1, 3.4, 1.5, 0.2;
72+
5.0, 3.5, 1.3, 0.3;
73+
4.5, 2.3, 1.3, 0.3;
74+
4.4, 3.2, 1.3, 0.2;
75+
5.0, 3.5, 1.6, 0.6;
76+
5.1, 3.8, 1.9, 0.4;
77+
4.8, 3.0, 1.4, 0.3;
78+
5.1, 3.8, 1.6, 0.2;
79+
4.6, 3.2, 1.4, 0.2;
80+
5.3, 3.7, 1.5, 0.2;
81+
5.0, 3.3, 1.4, 0.2;
82+
7.0, 3.2, 4.7, 1.4;
83+
6.4, 3.2, 4.5, 1.5;
84+
6.9, 3.1, 4.9, 1.5;
85+
5.5, 2.3, 4.0, 1.3;
86+
6.5, 2.8, 4.6, 1.5;
87+
5.7, 2.8, 4.5, 1.3;
88+
6.3, 3.3, 4.7, 1.6;
89+
4.9, 2.4, 3.3, 1.0;
90+
6.6, 2.9, 4.6, 1.3;
91+
5.2, 2.7, 3.9, 1.4;
92+
5.0, 2.0, 3.5, 1.0;
93+
5.9, 3.0, 4.2, 1.5;
94+
6.0, 2.2, 4.0, 1.0;
95+
6.1, 2.9, 4.7, 1.4;
96+
5.6, 2.9, 3.6, 1.3;
97+
6.7, 3.1, 4.4, 1.4;
98+
5.6, 3.0, 4.5, 1.5;
99+
5.8, 2.7, 4.1, 1.0;
100+
6.2, 2.2, 4.5, 1.5;
101+
5.6, 2.5, 3.9, 1.1;
102+
5.9, 3.2, 4.8, 1.8;
103+
6.1, 2.8, 4.0, 1.3;
104+
6.3, 2.5, 4.9, 1.5;
105+
6.1, 2.8, 4.7, 1.2;
106+
6.4, 2.9, 4.3, 1.3;
107+
6.6, 3.0, 4.4, 1.4;
108+
6.8, 2.8, 4.8, 1.4;
109+
6.7, 3.0, 5.0, 1.7;
110+
6.0, 2.9, 4.5, 1.5;
111+
5.7, 2.6, 3.5, 1.0;
112+
5.5, 2.4, 3.8, 1.1;
113+
5.5, 2.4, 3.7, 1.0;
114+
5.8, 2.7, 3.9, 1.2;
115+
6.0, 2.7, 5.1, 1.6;
116+
5.4, 3.0, 4.5, 1.5;
117+
6.0, 3.4, 4.5, 1.6;
118+
6.7, 3.1, 4.7, 1.5;
119+
6.3, 2.3, 4.4, 1.3;
120+
5.6, 3.0, 4.1, 1.3;
121+
5.5, 2.5, 4.0, 1.3;
122+
5.5, 2.6, 4.4, 1.2;
123+
6.1, 3.0, 4.6, 1.4;
124+
5.8, 2.6, 4.0, 1.2;
125+
5.0, 2.3, 3.3, 1.0;
126+
5.6, 2.7, 4.2, 1.3;
127+
5.7, 3.0, 4.2, 1.2;
128+
5.7, 2.9, 4.2, 1.3;
129+
6.2, 2.9, 4.3, 1.3;
130+
5.1, 2.5, 3.0, 1.1;
131+
5.7, 2.8, 4.1, 1.3;
132+
6.3, 3.3, 6.0, 2.5;
133+
5.8, 2.7, 5.1, 1.9;
134+
7.1, 3.0, 5.9, 2.1;
135+
6.3, 2.9, 5.6, 1.8;
136+
6.5, 3.0, 5.8, 2.2;
137+
7.6, 3.0, 6.6, 2.1;
138+
4.9, 2.5, 4.5, 1.7;
139+
7.3, 2.9, 6.3, 1.8;
140+
6.7, 2.5, 5.8, 1.8;
141+
7.2, 3.6, 6.1, 2.5;
142+
6.5, 3.2, 5.1, 2.0;
143+
6.4, 2.7, 5.3, 1.9;
144+
6.8, 3.0, 5.5, 2.1;
145+
5.7, 2.5, 5.0, 2.0;
146+
5.8, 2.8, 5.1, 2.4;
147+
6.4, 3.2, 5.3, 2.3;
148+
6.5, 3.0, 5.5, 1.8;
149+
7.7, 3.8, 6.7, 2.2;
150+
7.7, 2.6, 6.9, 2.3;
151+
6.0, 2.2, 5.0, 1.5;
152+
6.9, 3.2, 5.7, 2.3;
153+
5.6, 2.8, 4.9, 2.0;
154+
7.7, 2.8, 6.7, 2.0;
155+
6.3, 2.7, 4.9, 1.8;
156+
6.7, 3.3, 5.7, 2.1;
157+
7.2, 3.2, 6.0, 1.8;
158+
6.2, 2.8, 4.8, 1.8;
159+
6.1, 3.0, 4.9, 1.8;
160+
6.4, 2.8, 5.6, 2.1;
161+
7.2, 3.0, 5.8, 1.6;
162+
7.4, 2.8, 6.1, 1.9;
163+
7.9, 3.8, 6.4, 2.0;
164+
6.4, 2.8, 5.6, 2.2;
165+
6.3, 2.8, 5.1, 1.5;
166+
6.1, 2.6, 5.6, 1.4;
167+
7.7, 3.0, 6.1, 2.3;
168+
6.3, 3.4, 5.6, 2.4;
169+
6.4, 3.1, 5.5, 1.8;
170+
6.0, 3.0, 4.8, 1.8;
171+
6.9, 3.1, 5.4, 2.1;
172+
6.7, 3.1, 5.6, 2.4;
173+
6.9, 3.1, 5.1, 2.3;
174+
5.8, 2.7, 5.1, 1.9;
175+
6.8, 3.2, 5.9, 2.3;
176+
6.7, 3.3, 5.7, 2.5;
177+
6.7, 3.0, 5.2, 2.3;
178+
6.3, 2.5, 5.0, 1.9;
179+
6.5, 3.0, 5.2, 2.0;
180+
6.2, 3.4, 5.4, 2.3;
181+
5.9, 3.0, 5.1, 1.8];
182+
let target: Vec<usize> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
183+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
185+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
186+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
187+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2];
188+
189+
Dataset{ data: data,
190+
target: Vector::new(target) }
191+
}

src/datasets/mod.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
use std::fmt::Debug;
2+
3+
/// Module for iris dataset.
4+
pub mod iris;
5+
6+
/// Dataset container
7+
#[derive(Clone, Debug)]
8+
pub struct Dataset<D, T> where D: Clone + Debug, T: Clone + Debug {
9+
10+
data: D,
11+
target: T
12+
}
13+
14+
impl<D, T> Dataset<D, T> where D: Clone + Debug, T: Clone + Debug {
15+
16+
/// Returns explanatory variable (features)
17+
pub fn data(&self) -> &D {
18+
&self.data
19+
}
20+
21+
/// Returns objective variable (target)
22+
pub fn target(&self) -> &T {
23+
&self.target
24+
}
25+
}

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,7 @@ pub mod analysis {
221221
pub mod cross_validation;
222222
pub mod score;
223223
}
224+
225+
#[cfg(feature = "datasets")]
226+
/// Module for datasets.
227+
pub mod datasets;

tests/datasets.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
extern crate rusty_machine as rm;
2+
3+
4+
#[cfg(datasets)]
5+
mod test {
6+
7+
use rm::datasets::iris;
8+
use rm::linalg::BaseMatrix;
9+
10+
#[test]
11+
fn test_iris() {
12+
let dt = iris::load_();
13+
assert_eq!(dt.data().rows(), 150);
14+
assert_eq!(dt.data().cols(), 4);
15+
16+
assert_eq!(dt.target().size(), 150);
17+
}
18+
}

tests/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,7 @@ pub mod learning {
1010
pub mod optim {
1111
mod grad_desc;
1212
}
13-
}
13+
}
14+
15+
#[cfg(datasets)]
16+
pub mod datasets;

0 commit comments

Comments
 (0)