Skip to content

Commit 73c5605

Browse files
committed
EDA Phase
1 parent db68706 commit 73c5605

8 files changed

Lines changed: 5 additions & 5 deletions

File tree

streamline/p1_eda_processing/dataprocess.py renamed to streamline/p1_data_process/data_process.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from streamline.utils.job import Job
1919
from streamline.utils.dataset import Dataset
20-
from streamline.dataprep.kfold_partitioning import KFoldPartitioner
20+
from streamline.p1_eda_processing.utils.kfold_partitioning import KFoldPartitioner
2121
from scipy.stats import chi2_contingency, mannwhitneyu, skew, kurtosis, f_oneway, spearmanr
2222
import seaborn as sns
2323
import warnings
@@ -37,7 +37,7 @@ def __init__(self, dataset, experiment_path, ignore_features=None,
3737
categorical_features=None, quantitative_features=None, exclude_eda_output=None,
3838
categorical_cutoff=10, sig_cutoff=0.05, featureeng_missingness=0.5,
3939
cleaning_missingness=0.5, correlation_removal_threshold=1.0,
40-
partition_method="Stratified", n_splits=10,
40+
partition_method="Stratified", n_splits=10, one_hot_encoding=True,
4141
random_state=None, show_plots=False):
4242
"""
4343
Initialization function for Exploratory Data Analysis Class. Parameters are defined below.
File renamed without changes.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from streamline.utils.cluster import get_cluster
1414

1515

16-
class DataProcessRunner:
16+
class P1Runner:
1717
"""
1818
Description: Phase 1 of STREAMLINE - This 'Main' script manages Phase 1 run parameters, \
1919
updates the metadata file (with user specified run parameters across pipeline run) \

streamline/p1_eda_processing/utils/kfold_partitioning.py renamed to streamline/p1_data_process/utils/kfold_partitioning.py

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

streamline/p2_imputation_scaling/p2_runner.py renamed to streamline/p2_impute_scale/p2_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
import dask
66
from pathlib import Path
77
from joblib import Parallel, delayed
8-
from streamline.dataprep.scale_and_impute import ScaleAndImpute
8+
from streamline.p2_imputation_scaling.imputaion import ImputeAndScale
99
from streamline.utils.runners import runner_fn, num_cores
1010
from streamline.utils.cluster import get_cluster
1111

1212

13-
class ImputationRunner:
13+
class P2Runner:
1414
"""
1515
Runner class for Data Processing Jobs of CV Splits
1616
"""

0 commit comments

Comments
 (0)