Skip to content

Commit 72268a1

Browse files
committed
ETL pipeline completed with MongoDB
1 parent 5633527 commit 72268a1

6 files changed

Lines changed: 108 additions & 39 deletions

File tree

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,7 @@ wheels/
1313
.env
1414

1515
# System-generated files
16-
.DS_Store
16+
.DS_Store
17+
18+
# Log files
19+
logs/

push_data.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import json
2+
import os
3+
import sys
4+
from urllib.parse import quote_plus
5+
6+
import certifi
7+
import pandas as pd
8+
import pymongo
9+
from dotenv import load_dotenv
10+
11+
from network_security.exception.exception import NetworkSecurityException
12+
from network_security.logging.logger import logging
13+
14+
load_dotenv()
15+
username = os.getenv("MONGO_DB_USERNAME")
16+
password = os.getenv("MONGO_DB_PASSWORD")
17+
18+
username = quote_plus(username)
19+
password = quote_plus(password)
20+
21+
MONGO_DB_URL: str = f"mongodb+srv://{username}:{password}@cluster0.l5ee6dv.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
22+
print(MONGO_DB_URL)
23+
24+
ca = certifi.where()
25+
26+
27+
class NetworkSecurityExtract:
28+
def __init__(self) -> None:
29+
try:
30+
pass
31+
except Exception as e:
32+
raise NetworkSecurityException(e, sys)
33+
34+
def csv_to_json_convertor(self, file_path: str) -> list:
35+
try:
36+
data = pd.read_csv(file_path)
37+
data.reset_index(drop=True, inplace=True)
38+
records = list(json.loads(data.T.to_json()).values())
39+
return records
40+
except Exception as e:
41+
raise NetworkSecurityException(e, sys)
42+
43+
def insert_data_mongodb(self, records: list, database: str, collection: str) -> int:
44+
try:
45+
self.database = database
46+
self.collection = collection
47+
self.records = records
48+
49+
self.mongo_client = pymongo.MongoClient(MONGO_DB_URL)
50+
self.database = self.mongo_client[self.database]
51+
52+
self.collection = self.database[self.collection]
53+
self.collection.insert_many(self.records)
54+
return len(self.records)
55+
except Exception as e:
56+
raise NetworkSecurityException(e, sys)
57+
58+
59+
if __name__ == "__main__":
60+
FILE_PATH = "Network_Data/phisingData.csv"
61+
DATABASE = "TEST_DB"
62+
Collection = "NetworkData"
63+
networkobj = NetworkSecurityExtract()
64+
records = networkobj.csv_to_json_convertor(file_path=FILE_PATH)
65+
print(records)
66+
no_of_records = networkobj.insert_data_mongodb(records, DATABASE, Collection)
67+
print(no_of_records)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ dependencies = [
88
"certifi>=2025.6.15",
99
"numpy>=2.3.0",
1010
"pandas>=2.3.0",
11-
"pymongo>=4.13.2",
11+
"pymongo[srv]>=3.12.0",
1212
"python-dotenv>=1.1.0",
1313
"setuptools>=80.9.0",
1414
]

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@ pandas
33
numpy
44
pymongo
55
certifi
6+
pymongo[srv]==3.12
67

78
# -e .

test_mongodb.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
from urllib.parse import quote_plus
3+
4+
from dotenv import load_dotenv
5+
from pymongo.mongo_client import MongoClient
6+
from pymongo.server_api import ServerApi
7+
8+
load_dotenv()
9+
username = os.getenv("MONGO_DB_USERNAME")
10+
password = os.getenv("MONGO_DB_PASSWORD")
11+
12+
username = quote_plus(username)
13+
password = quote_plus(password)
14+
15+
uri: str = f"mongodb+srv://{username}:{password}@cluster0.l5ee6dv.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
16+
17+
client = MongoClient(uri, server_api=ServerApi("1"))
18+
19+
20+
# Send a ping to confirm a successful connection
21+
try:
22+
client.admin.command("ping")
23+
print("Pinged your deployment. You successfully connected to MongoDB!")
24+
except Exception as e:
25+
print(e)

uv.lock

Lines changed: 10 additions & 37 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)