-
Notifications
You must be signed in to change notification settings - Fork 36
Expand file tree
/
Copy pathdata_reader_interface.py
More file actions
37 lines (31 loc) · 1.29 KB
/
Copy pathdata_reader_interface.py
File metadata and controls
37 lines (31 loc) · 1.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from cdisc_rules_engine.models.dataset import PandasDataset
from cdisc_rules_engine.constants import DEFAULT_ENCODING
class DataReaderInterface:
"""
Interface for reading binary data from different file typs into pandas dataframes
"""
def __init__(
self,
dataset_implementation=PandasDataset,
encoding: str = DEFAULT_ENCODING,
variables_csv_path: str = None,
):
"""
:param DatasetInterface dataset_implementation : The dataset type to return.
:param str encoding : The encoding to use when reading files. Defaults to DEFAULT_ENCODING (e.g. utf-8).
:param str variables_csv_path : Optional path to a `_variables.csv` declaring variable metadata
"""
self.dataset_implementation = dataset_implementation
self.encoding = encoding
self.variables_csv_path = variables_csv_path
def read(self, data):
"""
Function for reading data from a specific file type and returning a
pandas dataframe of the data.
"""
raise NotImplementedError
def from_file(self, file_path):
raise NotImplementedError
def to_parquet(self, file_path) -> tuple[int, str]:
"""Returns number of rows and path to the parquet file"""
raise NotImplementedError