Skip to content

Commit dd77a09

Browse files
committed
Cloud s3 data storage
1 parent 1756297 commit dd77a09

6 files changed

Lines changed: 55 additions & 12 deletions

File tree

app.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
import certifi
88
import pandas as pd
9-
import pymongo
109
from dotenv import load_dotenv
1110
from fastapi import FastAPI, File, Request, UploadFile
1211
from fastapi.middleware.cors import CORSMiddleware
@@ -32,17 +31,12 @@
3231

3332

3433
load_dotenv()
35-
username = os.getenv("MONGO_DB_USERNAME")
36-
password = os.getenv("MONGO_DB_PASSWORD")
37-
38-
username = quote_plus(username)
39-
password = quote_plus(password)
34+
username = quote_plus(os.getenv("MONGO_DB_USERNAME"))
35+
password = quote_plus(os.getenv("MONGO_DB_PASSWORD"))
4036

4137
mongo_db_url: str = f"mongodb+srv://{username}:{password}@cluster0.l5ee6dv.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
4238

43-
client = MongoClient(mongo_db_url, server_api=ServerApi("1"))
44-
45-
client = pymongo.MongoClient(mongo_db_url, tlsCAFile=ca)
39+
client = MongoClient(mongo_db_url, server_api=ServerApi("1"), tlsCAFile=ca)
4640

4741

4842
database = client[DATA_INGESTION_DATABASE_NAME]

final_model/model.pkl

-11.3 MB
Binary file not shown.

final_model/preprocessor.pkl

0 Bytes
Binary file not shown.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import os
2+
3+
4+
class S3Sync:
5+
def sync_folder_to_s3(self, folder: str, aws_bucket_url: str) -> None:
6+
command = f"aws s3 sync {folder} {aws_bucket_url} "
7+
os.system(command)
8+
9+
def sync_folder_from_s3(self, folder: str, aws_bucket_url: str) -> None:
10+
command = f"aws s3 sync {aws_bucket_url} {folder} "
11+
os.system(command)

network_security/constant/training_pipeline/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,8 @@
6666
MODEL_TRAINER_TRAINED_MODEL_NAME: str = "model.pkl"
6767
MODEL_TRAINER_EXPECTED_SCORE: float = 0.6
6868
MODEL_TRAINER_OVER_FIITING_UNDER_FITTING_THRESHOLD: float = 0.05
69+
70+
71+
## The bucket name should be unique globally and cannot be changed
72+
## Make a s3 bucket and change it below.
73+
TRAINING_BUCKET_NAME = "demo-networksecurity"

network_security/pipeline/training_pipeline.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import sys
22

3+
from network_security.cloud.s3_syncer import S3Sync
34
from network_security.components.data_ingestion import DataIngestion
45
from network_security.components.data_transformation import DataTransformation
56
from network_security.components.data_validation import DataValidation
67
from network_security.components.model_trainer import ModelTrainer
8+
from network_security.constant.training_pipeline import TRAINING_BUCKET_NAME
79
from network_security.entity.artifact_entity import (
810
DataIngestionArtifact,
911
DataTransformationArtifact,
@@ -24,6 +26,7 @@
2426
class TrainingPipeline:
2527
def __init__(self) -> None:
2628
self.training_pipeline_config = TrainingPipelineConfig()
29+
self.s3_sync = S3Sync()
2730

2831
def start_data_ingestion(self) -> DataIngestionArtifact:
2932
try:
@@ -43,7 +46,9 @@ def start_data_ingestion(self) -> DataIngestionArtifact:
4346
except Exception as e:
4447
raise NetworkSecurityException(e, sys)
4548

46-
def start_data_validation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataValidationArtifact:
49+
def start_data_validation(
50+
self, data_ingestion_artifact: DataIngestionArtifact,
51+
) -> DataValidationArtifact:
4752
try:
4853
self.data_validation_config = DataValidationConfig(
4954
training_pipeline_config=self.training_pipeline_config,
@@ -61,7 +66,9 @@ def start_data_validation(self, data_ingestion_artifact: DataIngestionArtifact)
6166
except Exception as e:
6267
raise NetworkSecurityException(e, sys)
6368

64-
def start_data_transformation(self, data_validation_artifact: DataValidationArtifact) -> DataTransformationArtifact:
69+
def start_data_transformation(
70+
self, data_validation_artifact: DataValidationArtifact,
71+
) -> DataTransformationArtifact:
6572
try:
6673
self.data_transformation_config = DataTransformationConfig(
6774
training_pipeline_config=self.training_pipeline_config,
@@ -82,7 +89,9 @@ def start_data_transformation(self, data_validation_artifact: DataValidationArti
8289
except Exception as e:
8390
raise NetworkSecurityException(e, sys)
8491

85-
def start_model_trainer(self, data_transformation_artifact: DataTransformationArtifact) -> ModelTrainerArtifact:
92+
def start_model_trainer(
93+
self, data_transformation_artifact: DataTransformationArtifact,
94+
) -> ModelTrainerArtifact:
8695
try:
8796
self.model_trainer_config: ModelTrainerConfig = ModelTrainerConfig(
8897
training_pipeline_config=self.training_pipeline_config,
@@ -102,6 +111,28 @@ def start_model_trainer(self, data_transformation_artifact: DataTransformationAr
102111
except Exception as e:
103112
raise NetworkSecurityException(e, sys)
104113

114+
## Local artifact -----> s3 bucket
115+
def sync_artifact_dir_to_s3(self) -> None:
116+
try:
117+
aws_bucket_url = f"s3://{TRAINING_BUCKET_NAME}/artifact/{self.training_pipeline_config.timestamp}"
118+
self.s3_sync.sync_folder_to_s3(
119+
folder=self.training_pipeline_config.artifact_dir,
120+
aws_bucket_url=aws_bucket_url,
121+
)
122+
except Exception as e:
123+
raise NetworkSecurityException(e, sys)
124+
125+
## Local final model -----> s3 bucket
126+
def sync_saved_model_dir_to_s3(self) -> None:
127+
try:
128+
aws_bucket_url = f"s3://{TRAINING_BUCKET_NAME}/final_model/{self.training_pipeline_config.timestamp}"
129+
self.s3_sync.sync_folder_to_s3(
130+
folder=self.training_pipeline_config.model_dir,
131+
aws_bucket_url=aws_bucket_url,
132+
)
133+
except Exception as e:
134+
raise NetworkSecurityException(e, sys)
135+
105136
def run_pipeline(self) -> ModelTrainerArtifact:
106137
try:
107138
data_ingestion_artifact = self.start_data_ingestion()
@@ -114,6 +145,8 @@ def run_pipeline(self) -> ModelTrainerArtifact:
114145
model_trainer_artifact = self.start_model_trainer(
115146
data_transformation_artifact=data_transformation_artifact,
116147
)
148+
self.sync_artifact_dir_to_s3()
149+
self.sync_saved_model_dir_to_s3()
117150

118151
return model_trainer_artifact
119152
except Exception as e:

0 commit comments

Comments
 (0)