-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathconfig.py
More file actions
112 lines (102 loc) · 2.79 KB
/
config.py
File metadata and controls
112 lines (102 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
GoPredict Configuration File
This file contains all configuration settings for the GoPredict project.
Modify these settings to customize the pipeline behavior.
"""
# Data paths
DATA_PATHS = {
'raw_train': 'data/raw/train.csv',
'raw_test': 'data/raw/test.csv',
'processed_train': 'data/processed/feature_engineered_train.csv',
'processed_test': 'data/processed/feature_engineered_test.csv',
'precipitation': 'data/external/precipitation.csv',
'gmaps_train': 'data/processed/gmapsdata/gmaps_train_data.csv',
'gmaps_test': 'data/processed/gmapsdata/gmaps_test_data.csv',
'historical_weather': 'data/processed/historical_weather.csv'
}
# Output paths
OUTPUT_PATHS = {
'models': 'saved_models',
'logs': 'logs',
'submissions': 'output',
'figures': 'notebooks/figures'
}
# Model configurations
MODEL_CONFIGS = {
'linear_regression': {
'fit_intercept': True
},
'ridge_regression': {
'alpha': 0.5
},
'lasso_regression': {
'alpha': 0.1,
'max_iter': 5000
},
'svr': {
'kernel': 'rbf',
'C': 1.0,
'gamma': 'scale'
},
'xgboost': {
'n_estimators': 500,
'learning_rate': 0.045,
'max_depth': 9,
'reg_lambda': 0.5
},
'random_forest': {
'n_estimators': 500,
'random_state': 42
},
'neural_network': {
'epochs': 150,
'batch_size': 50,
'validation_split': 0.2
}
}
# Hyperparameter tuning configurations
HYPERPARAMETER_TUNING = {
'xgboost': {
'max_depths': [7, 8, 9, 10, 11],
'learning_rates': [0.04, 0.042, 0.044, 0.046, 0.048, 0.05],
'n_estimators': 500,
'reg_lambda': 0.5
}
}
# Data preprocessing settings
PREPROCESSING = {
'test_size': 0.2,
'random_state': 1,
'normalization_ranges': {
'coordinates': (-1, 1),
'distances': (0, 10),
'precipitation': (0, 1),
'time_features': (0, 5)
}
}
# Logging configuration
LOGGING = {
'level': 'INFO',
'format': '%(asctime)s [%(levelname)s] %(message)s',
'date_format': '%Y-%m-%d %H:%M:%S'
}
# Available models
AVAILABLE_MODELS = [
'LINREG', # Linear Regression
'RIDGE', # Ridge Regression
'LASSO', # Lasso Regression
'SVR', # Support Vector Regression
'XGB', # XGBoost
'RF', # Random Forest
'NN' # Neural Network
]
# Feature columns
FEATURE_COLUMNS = {
'coordinates': ['start_lng', 'start_lat', 'end_lng', 'end_lat'],
'distances': ['manhattan', 'euclidean', 'gmaps_distance', 'gmaps_duration'],
'precipitation': ['precipitation'],
'time_features': ['weekday', 'hour'],
'flags': ['holiday', 'airport', 'citycenter', 'standalone', 'routing_error', 'short_trip']
}
# Target column
TARGET_COLUMN = 'duration'