Skip to content

Commit 3888849

Browse files
fix: fix sphinx warning and add test
1 parent 7afaffa commit 3888849

3 files changed

Lines changed: 56 additions & 11 deletions

File tree

docs/conf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
]
4343

4444
nitpick_ignore_regex = [
45-
("py:class", r".*\._[\w_]*"), # Ignore private classes from nitpick errors
46-
("py:obj", r".*\._[\w_]*"), # Ignore private objects from nitpick errors
45+
(r"py:(class|obj)", r"(.*\.)?_[\w_]*"), # Ignore private objects
4746
("py:class", r"abc\..*"),
4847
("py:class", r"com\..*"),
4948
("py:class", r"java\..*"),

src/capymoa/datasets/_downloader.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Literal, Optional, Union
44

55
from capymoa.stream._stream import Schema
6-
from moa.streams import InstanceStream
6+
from moa.streams import InstanceStream as _InstanceStream
77

88
from capymoa.stream import Stream, stream_from_file
99
from capymoa.datasets._utils import (
@@ -69,7 +69,7 @@ def __str__(self) -> str:
6969
class _DownloadableARFF(_DownloadableDataset, Stream):
7070
schema: Schema
7171
stream: Stream
72-
moa_stream: Optional[InstanceStream]
72+
moa_stream: Optional[_InstanceStream]
7373
_target_type: Literal["numeric", "categorical"] | None = None
7474

7575
def __init__(
@@ -107,7 +107,7 @@ def next_instance(self):
107107
def get_schema(self) -> Schema:
108108
return self.schema
109109

110-
def get_moa_stream(self) -> Optional[InstanceStream]:
110+
def get_moa_stream(self) -> Optional[_InstanceStream]:
111111
return self.moa_stream
112112

113113
def restart(self):

tests/test_datasets.py

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import Sized, Type
22
import capymoa.datasets as capymoa_datasets
3+
from capymoa.stream import Stream
34
from capymoa.datasets import ElectricityTiny
45
from tempfile import TemporaryDirectory
56
import pytest
@@ -59,13 +60,58 @@ def test_electricity_tiny_schema():
5960
@pytest.mark.parametrize("dataset_type", _ALL_DOWNLOADABLE_DATASET)
6061
def test_all_datasets(dataset_type: Type[_DownloadableDataset]):
6162
with TemporaryDirectory() as tmp_dir:
62-
dataset = dataset_type(directory=tmp_dir)
63+
dataset_arff = dataset_type(directory=tmp_dir)
64+
assert isinstance(dataset_arff, Stream)
6365

6466
i = 0
65-
while dataset.has_more_instances():
66-
dataset.next_instance()
67+
while dataset_arff.has_more_instances():
68+
dataset_arff.next_instance()
6769
i += 1
6870

69-
assert str(dataset)
70-
assert isinstance(dataset, Sized), "Dataset must be an instance of Sized"
71-
assert len(dataset) == i, "Dataset length must be correct"
71+
assert str(dataset_arff)
72+
assert isinstance(dataset_arff, Sized), "Dataset must be an instance of Sized"
73+
assert len(dataset_arff) == i, "Dataset length must be correct"
74+
dataset_arff.restart()
75+
76+
try:
77+
dataset_csv = dataset_type(directory=tmp_dir, file_type="csv")
78+
assert isinstance(dataset_csv, Stream)
79+
except ValueError:
80+
return # If the dataset does not support CSV, skip the rest of the test
81+
82+
# Both should return a schema object
83+
assert dataset_arff.get_schema() is not None
84+
assert dataset_csv.get_schema() is not None
85+
86+
i = 0
87+
while dataset_arff.has_more_instances() and dataset_csv.has_more_instances():
88+
instance_arff = dataset_arff.next_instance()
89+
instance_csv = dataset_csv.next_instance()
90+
91+
assert instance_arff.x == pytest.approx(instance_csv.x)
92+
if dataset_csv.get_schema().is_classification():
93+
assert instance_arff.y_index == pytest.approx(instance_csv.y_index)
94+
elif dataset_csv.get_schema().is_regression():
95+
assert instance_arff.y_value == pytest.approx(instance_csv.y_value)
96+
97+
i += 1
98+
99+
# Both datasets should be exhausted by now.
100+
assert not dataset_arff.has_more_instances()
101+
assert not dataset_csv.has_more_instances()
102+
103+
# The datasets should be restartable.
104+
dataset_arff.restart()
105+
dataset_csv.restart()
106+
107+
# After restarting, the datasets should have more instances.
108+
assert dataset_arff.has_more_instances()
109+
assert dataset_csv.has_more_instances()
110+
111+
# The string representation of the datasets should not throw an error
112+
assert str(dataset_arff)
113+
assert str(dataset_csv)
114+
# The datasets should be the same length, and should have a size.
115+
assert isinstance(dataset_arff, Sized)
116+
assert isinstance(dataset_csv, Sized)
117+
assert len(dataset_arff) == len(dataset_csv) == i

0 commit comments

Comments
 (0)