Skip to content

Commit 309c367

Browse files
committed
Feat : Refactoring 2
1 parent 1063203 commit 309c367

9 files changed

Lines changed: 144 additions & 117 deletions

File tree

.gitignore

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,4 @@ LogFile/*
191191
Docker/*
192192

193193
# test python
194-
backup.py
195-
196-
백업/*upstock-sentiment.py
194+
backup.py

main.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1-
# INFO compatibility issue import
1+
"""
2+
Main entry point
3+
"""
4+
# compatibility issue import
25
# from keras.preprocessing.text import Tokenizer
36
# from keras.callbacks import EarlyStopping
47
# from keras.preprocessing.sequence import pad_sequences
58
# from keras.callbacks import TensorBoard
69

710
# pip github connect | pip freeze > piplist.txt
8-
from upstock.storage.model_downloader import download_model_file
11+
from upstock.storage.downloader import download_model_file
912
from upstock.builders.pipeline import run_pipeline
1013

1114
if __name__ == '__main__':

upstock/builders/pipeline.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
1+
"""
2+
training or predict
3+
"""
4+
15
import os
2-
from upstock.config import model_path, tokenizer_path
3-
from upstock.nodes.predict_node import run_predict
4-
from upstock.nodes.train_node import run_trian
6+
from upstock.config import paths
7+
from upstock.nodes.predict import run_predict
8+
from upstock.nodes.train import run_train
59

6-
# save model exists => predict
7-
# save model not exists => DeepLearning
810
def run_pipeline():
9-
if os.path.exists(model_path) and os.path.exists(tokenizer_path):
11+
"""
12+
save model exists => predict
13+
save model not exists => DeepLearning
14+
"""
15+
if os.path.exists(paths.model) and os.path.exists(paths.tokenizer):
1016
run_predict()
1117
else:
1218
print('Sentiment Model and Tokenizer is not exists, Start DeepLearning')
13-
run_trian()
19+
run_train()

upstock/config.py

Lines changed: 30 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -6,67 +6,39 @@
66
"""
77

88
import os
9+
import logging
10+
import sys
11+
912
from dotenv import load_dotenv
1013
from supabase import create_client, Client
1114
from dataclasses import dataclass # create data class
1215

13-
@dataclass(frozen=True) # immutable
14-
class Config:
15-
# online
16-
supabase_url: str
17-
supabase_key: str
18-
bucket: str = 'sentiemnt_file' # supabase bucket name
19-
tbl_sentiment: str = 'news_sentiment' # table name
20-
source: str = 'finviz'
21-
tz_target: str = 'Asia/Seoul'
22-
23-
# local path
24-
sentiment_path: str = 'DataSets/upstock-sentiment-data.csv' # sentiment data
25-
tokenizer_path: str = 'SaveModel/upstock_sentiment_tokenizer.pickle'
26-
model_path: str = 'SaveModel/upstock_sentiment_model.keras'
27-
model_path_h5: str = 'SaveModel/upstock_sentiment_model.h5' # compatibility issue .h5
28-
29-
# 논문 근거
30-
model_pkl_path: str = 'SaveModel/upstock_sentiment_pkl.pkl' # import matplotlib.pyplot as plt
31-
32-
def load_config() -> Config:
33-
load_dotenv() # env load
34-
url = os.getenv('SUPABASE_URL')
35-
key = os.getenv('SUPABASE_KEY')
36-
if not url or not key:
37-
raise RuntimeError('Supabase URL, KEY Error')
38-
return Config(supabase_url=url, supabase_key=key)
16+
# env load
17+
load_dotenv()
3918

40-
# TEST
41-
# model.summary()
19+
logging.basicConfig(
20+
level=logging.INFO,
21+
format='%(asctime)s [%(levelname)s] %(name)s - %(message)s',
22+
stream=sys.stdout
23+
)
4224

43-
# 데이터셋 null 개수 출력
44-
# print(news_data.isnull().sum())
45-
# RESULT
46-
# Text 0
47-
# Sentiment 0
48-
# dtype: int64
49-
50-
# 길이 열 추가해서 카운트하고 싶으면 Added lenght column
51-
# sentiment_data['lenght'] = sentiment_data['Text'].str.len()
52-
# print(sentiment_data['Text'].str.len().max())
53-
# RESULT 154
54-
55-
# 데이터셋 길이 통계 요약 출력
56-
# print(sentiment_data['Text'].str.len().describe())
57-
# RESULT
58-
# count 5791.000000
59-
# mean 78.507857
60-
# std 37.409135
61-
# min 6.000000
62-
# 25% 48.000000
63-
# 50% 79.000000
64-
# 75% 106.000000
65-
# max 154.000000
66-
# Name: Text, dtype: float64
67-
68-
# RESULT 데이터셋 maxlen 95
69-
# lengths = sentiment_data['Text'].str.len()
70-
# print(lengths.quantile(0.90)) # 133.0
71-
# print(lengths.quantile(0.95)) # 141.0
72-
# exit()
25+
@dataclass(frozen=True) # 불변
26+
class SupabaseConfig:
27+
url: str = os.getenv('SUPABASE_URL')
28+
key: str = os.getenv('SUPABASE_KEY')
29+
30+
@property
31+
def client(self) -> Client:
32+
return create_client(self.url, self.key)
33+
34+
@dataclass(frozen=True)
35+
class PathConfig:
36+
sentiment_data: str = 'DataSets/upstock-sentiment-data.csv' # sentiment data
37+
tokenizer: str = 'SaveModel/upstock_sentiment_tokenizer.pickle'
38+
model: str = 'SaveModel/upstock_sentiment_model.keras'
39+
model_h5: str = 'SaveModel/upstock_sentiment_model.h5'
40+
history: str = 'SaveModel/upstock_sentiment_pkl.pkl' # import matplotlib.pyplot as plt
41+
42+
# export
43+
supabase = SupabaseConfig().client
44+
paths = PathConfig()

upstock/integrations/supabase_client.py

Whitespace-only changes.

upstock/models/artifacts.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,57 @@
1+
"""
2+
loading models, tokenizer, dataset
3+
4+
*using logging
5+
"""
6+
17
import pickle
28
import os
39
import pandas as pd
10+
import logging
411

512
from tensorflow.keras.models import load_model
613

7-
# load file < predict task에는 필요없음
8-
def load_file(path, description):
14+
logger = logging.getLogger(__name__)
15+
16+
def load_csv(path: str, description: str = 'CSV'):
917
if os.path.exists(path):
1018
try:
11-
print(f'{description} load complete')
19+
logger.info(f'{description} loaded complete')
1220
return pd.read_csv(path)
1321
except Exception as e:
14-
print(f'{description} exists but, load fail {e}')
22+
logger.error(f'{description} exists but failed to load {e}')
1523
return None
1624
else:
17-
print(f'{description} not exists')
25+
logger.warning(f'{description} not exists {path}')
1826
return None
1927

2028
# load save model : https://www.tensorflow.org/guide/keras/save_and_serialize?hl=ko#savedmodel_%ED%98%95%EC%8B%9D
2129
# model load 지침
22-
def check_all_model(path, description):
30+
def load_model_safe(path: str, description: str = 'Model'):
2331
if os.path.exists(path):
2432
try:
2533
model = load_model(path)
26-
print(f'{description} load complete')
34+
logger.info(f'{description} loaded complete')
2735
return model
2836
except Exception as e:
29-
print(f'{description} load fail : {e}')
37+
logger.error(f'{description} exists but failed to load {e}')
3038
return None
3139
else:
32-
print(f'{description} not exists')
40+
logger.warning(f'{description} not exists {path}')
3341
return None
3442

3543
# load tokenizer => TextVectorization로 변경 가능성 유의
36-
def load_pickle(path, description):
44+
def load_pickle(path: str, description: str = 'Pickle'):
3745
if os.path.exists(path):
3846
try:
3947
with open(path, 'rb') as f:
4048
tokenizer = pickle.load(f)
41-
print(f'{description} load complete')
49+
logger.info(f'{description} loaded complete')
4250
return tokenizer
4351
except Exception as e:
44-
print(f'{description} load fail : {e}')
52+
logger.error(f'{description} exsists but failed to load {e}')
4553
return None
4654
else:
47-
print(f'{description} not exists')
55+
logger.warning(f'{description} not exists {path}')
4856
return None
57+
Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,31 @@
1+
"""
2+
Predict node
3+
4+
fetches news, runs sentiment model, uploads results to supabase
5+
"""
6+
17
import hashlib # hash
28
import datetime
39
import pandas as pd
10+
import logging
11+
412
from tensorflow.keras.preprocessing.sequence import pad_sequences
513
from finvizfinance.news import News
14+
from zoneinfo import ZoneInfo
615

7-
from upstock.config import model_path, tokenizer_path, supabase
8-
from upstock.models.artifacts import check_all_model, load_pickle
16+
from upstock.config import paths, supabase
17+
from upstock.models.artifacts import load_model_safe, load_pickle
18+
19+
logger = logging.getLogger(__name__)
920

1021
def run_predict():
1122

12-
model = check_all_model(model_path, 'Sentiment Model .keras version')
13-
tokenizer = load_pickle(tokenizer_path, 'Tokenizer')
23+
model = load_model_safe(paths.model, 'Sentiment Model (.keras) version')
24+
tokenizer = load_pickle(paths.tokenizer, 'Tokenizer')
25+
26+
if model is None or tokenizer is None:
27+
logger.error('model or tokenizer not available') # flow task check
28+
return
1429

1530
# def load_news():
1631
# fnews = News()
@@ -26,14 +41,18 @@ def run_predict():
2641
all_news = fnews.get_news()
2742
news_df = all_news['news']
2843
except Exception as e:
29-
print(f'finviz news parse fail : {e}')
30-
31-
# string to date time
32-
news_df['parsed_date'] = pd.to_datetime(news_df['Date'], errors='coerce') # BUG format 지정
44+
logger.error(f'finviz news parse failed : {e}')
45+
return
3346

47+
# print(news_df['Date'].head(10))
3448
today = datetime.date.today() # today
35-
today_news = news_df[news_df['parsed_date'].dt.date == today] # today == parse data date
3649

50+
# string to date time
51+
news_df['parsed_date'] = pd.to_datetime(
52+
news_df['Date'],
53+
errors='coerce'
54+
)
55+
today_news = news_df[news_df['parsed_date'].dt.date == today] # today == parse data date
3756
predict_texts = today_news['Title'].tolist() # insert pare data
3857

3958
# past predict data
@@ -47,7 +66,6 @@ def run_predict():
4766

4867
predict_data = tokenizer.texts_to_sequences(predict_texts)
4968
predict_data = pad_sequences(predict_data, maxlen=141) # str.len result 95% 141
50-
5169
prediction = model.predict(predict_data)
5270
# print(prediction)
5371

@@ -65,19 +83,19 @@ def run_predict():
6583

6684
for text, percent in zip(predict_texts, prediction):
6785
# 강한 긍정과 강한 부정만 끌어다가 쓰기
68-
score = float(percent[0])
86+
score = float(percent[0]) # float 필수
6987
if score >= 0.8:
7088
label = "positive"
7189
elif score <= 0.3:
7290
label = "negative"
7391
else:
7492
continue
7593

76-
print(f"[{label}] {text}\n : {score:.2f}\n") # :.2f
94+
logger.info(f"[{label}] {text} : {score:.2f}\n") # :.2f
7795

7896
sb_result.append({
7997
'text': text,
80-
'percent': score, # BUG type error float32
98+
'percent': score,
8199
'label': label,
82100
'source': 'finviz',
83101
'run_at': datetime.datetime.now(datetime.timezone.utc).isoformat(), # utc time
@@ -91,11 +109,11 @@ def run_predict():
91109
.upsert(sb_result, on_conflict='hash')
92110
.execute()
93111
)
94-
print(f'supbase upload complete : {len(response.data)}')
112+
logger.info(f'supbase upload complete : {len(response.data)} rows')
95113

96114
except Exception as e:
97-
print(f'supabase upload fail : {e}')
115+
logger.error(f'supabase upload failed : {e}')
98116

99117
else:
100-
print('upload data not exist')
118+
logger.warning('upload data not exist')
101119

0 commit comments

Comments
 (0)