Skip to content

Commit 09f6199

Browse files
committed
Fixed workflow time
1 parent 0892643 commit 09f6199

4 files changed

Lines changed: 216 additions & 3 deletions

File tree

โ€Ž.github/workflows/sentiment.ymlโ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: Sentiment Pipeline
22

33
on:
44
schedule:
5-
- cron: "0 15-23,0-14 * * *" # korean time
5+
- cron: "0 */3 * * *" # every 3 hours
66
workflow_dispatch: # ์ˆ˜๋™ ์‹คํ–‰ ๋ฒ„ํŠผ
77

88
jobs:

โ€Ž.gitignoreโ€Ž

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,4 +191,8 @@ LogFile/*
191191
last python file
192192

193193
# docker
194-
Docker/
194+
Docker/
195+
196+
# test python
197+
test.py
198+
test2.py

โ€Župstock-sentiment.pyโ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def load_pickle(path, description):
180180
print(f'finviz news parse fail : {e}')
181181

182182
# string to date time
183-
news_df['parsed_date'] = pd.to_datetime(news_df['Date'], errors='coerce') # format ์ง€์ •
183+
news_df['parsed_date'] = pd.to_datetime(news_df['Date'], errors='coerce') # BUG format ์ง€์ •
184184

185185
today = datetime.date.today() # today
186186
today_news = news_df[news_df['parsed_date'].dt.date == today] # today == parse data date

โ€Župstock-stock.pyโ€Ž

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
# INFO compatibility issue import
2+
# from keras.preprocessing.text import Tokenizer
3+
# from keras.callbacks import EarlyStopping
4+
# from keras.preprocessing.sequence import pad_sequences
5+
# from keras.callbacks import TensorBoard
6+
7+
import tensorflow as tf
8+
import numpy as np
9+
import pandas as pd
10+
import pickle
11+
import os
12+
import datetime
13+
import supabase
14+
import yfinance as yf
15+
16+
# ๊ณผ๊ฑฐ log file ์ƒ์„ฑ ๊ทœ์น™์— ์‚ฌ์šฉํ•œ import
17+
# import time
18+
19+
# validation x > split Test Data
20+
from sklearn.model_selection import train_test_split
21+
22+
# supabase from
23+
from dotenv import load_dotenv
24+
from supabase import create_client, Client
25+
26+
# recent keras import
27+
from tensorflow.keras.models import load_model
28+
from tensorflow.keras.preprocessing.text import Tokenizer
29+
from tensorflow.keras.preprocessing.sequence import pad_sequences
30+
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
31+
32+
load_dotenv()
33+
34+
supabase_url = os.getenv('SupaBase_Url')
35+
supabase_key = os.getenv('SupaBase_Key')
36+
supabase: Client = create_client(supabase_url, supabase_key)
37+
38+
stock_inputdata_path = 'DataSets/upstock_nasdaq.csv'
39+
40+
tokenizer_path = 'SaveModel/upstock_stock_tokenizer.pickle'
41+
model_path = 'SaveModel/upstock_stock_model.keras'
42+
model_path_h5 = 'SaveModel/upstock_stock_model.h5' # compatibility issue .h5
43+
44+
# Get Yahoo Finance Data
45+
def load_stock(stockName, startDate):
46+
try:
47+
StockData = yf.download(
48+
stockName, # stock number
49+
start= startDate,
50+
auto_adjust=True, # ๊ณผ๊ฑฐ ์ฃผ๊ฐ€์™€ ํ˜„์žฌ ์ฃผ๊ฐ€์˜ ์ฐจ์ด์ ์„ ์™„ํ™” ๋ณ‘ํ•ฉ ํ˜น์€ ๋ถ„ํ•  ๊ทธ๋ฆฌ๊ณ  ์ƒ์Šน์œผ๋กœ ์ธํ•œ ์ฐจ์ด
51+
progress=True
52+
)
53+
54+
StockData.reset_index(inplace=True)
55+
56+
# ๋งŒ์•ฝ MultiIndex ์ปฌ๋Ÿผ์ผ ๊ฒฝ์šฐ๋งŒ droplevel ์ˆ˜ํ–‰
57+
if isinstance(StockData.columns, pd.MultiIndex):
58+
StockData.columns = StockData.columns.droplevel(1)
59+
60+
return StockData
61+
62+
except Exception as e:
63+
print(f'Download Fail : {e}')
64+
return None
65+
66+
# download finance data
67+
nasdaq_df = load_stock('^NDX', '2000-01-01')
68+
vix_df = load_stock('^VIX', '2000-01-01')
69+
70+
# PART rsi task
71+
def task_RSI(data: pd.DataFrame, window: int = 14) -> pd.Series: # 14์ผ ๊ธฐ์ค€
72+
delta = data['Close'].diff()
73+
gain = np.where(delta > 0, delta, 0)
74+
loss = np.where(delta < 0, -delta, 0)
75+
76+
roll_up = pd.Series(gain).rolling(window=window).mean()
77+
roll_down = pd.Series(loss).rolling(window=window).mean()
78+
79+
RS = roll_up / roll_down
80+
RSI = 100 - (100 / (1 + RS))
81+
return RSI
82+
83+
# PART macd task
84+
def task_MACD(data: pd.DataFrame, short_window=12, long_window=26, signal_window=9):
85+
short_ema = data['Close'].ewm(span=short_window, adjust=False).mean()
86+
long_ema = data['Close'].ewm(span=long_window, adjust=False).mean()
87+
88+
data['MACD'] = short_ema - long_ema
89+
data['Signal'] = data['MACD'].ewm(span=signal_window, adjust=False).mean()
90+
data['Histogram'] = data['MACD'] - data['Signal']
91+
return data
92+
93+
# indicator task
94+
nasdaq_df['RSI'] = task_RSI(nasdaq_df, 14)
95+
nasdaq_df = task_MACD(nasdaq_df)
96+
97+
# create label
98+
nasdaq_df['Return'] = nasdaq_df['Close'].pct_change().shift(-1)
99+
nasdaq_df['Label'] = (nasdaq_df['Return'] > 0).astype(int)
100+
101+
# merge nasdaq, rsi and vix
102+
nasdaq_df['Date'] = nasdaq_df.index
103+
vix_df['Date'] = vix_df.index
104+
merged = pd.merge(nasdaq_df, vix_df[['Date','Close']], on='Date', how='inner', suffixes=('', '_VIX'))
105+
106+
# nan data delete task
107+
merged = merged.dropna()
108+
109+
data = nasdaq_df.to_csv(stock_inputdata_path, index=False)
110+
111+
print(data)
112+
113+
# ๊ณผ๊ฑฐ ์ดˆ๊ธฐ ๋ชจ๋ธ์— ์‚ฌ์šฉํ–ˆ๋˜ ์ฝ”๋“œ
114+
# analyst_ratings_processed date ๋ช…์‹œ๋กœ์ธํ•œ ํ†ต์ผ์„ฑ ๋ถ€์—ฌ
115+
# StockData.rename(columns={'Date' : 'date'}, inplace=True)
116+
117+
118+
119+
120+
121+
122+
123+
124+
125+
126+
127+
128+
# text, chart, label ๋ฐ์ดํ„ฐ ์ชผ๊ฐœ๊ธฐ 0.2
129+
X_train_text, X_val_text, X_train_chart, X_val_chart, y_train, y_val = train_test_split(
130+
titles, chart, labels, test_size=0.2, random_state=42
131+
)
132+
133+
# nomalization, ์ „์ฒด ๋ฐ์ดํ„ฐ์—์„œ ํ•˜๋‚˜์˜ ํ‰๊ท ๊ณผ ๋ถ„์‚ฐ์„ ์‚ฌ์šฉ
134+
low_preprocessing = tf.keras.layers.Normalization(axis=None)
135+
low_preprocessing.adapt(np.array(merged['Low']))
136+
high_preprocessing = tf.keras.layers.Normalization(axis=None)
137+
high_preprocessing.adapt(np.array(merged['High']))
138+
open_preprocessing = tf.keras.layers.Normalization(axis=None)
139+
open_preprocessing.adapt(np.array(merged['Open']))
140+
close_preprocessing = tf.keras.layers.Normalization(axis=None)
141+
close_preprocessing.adapt(np.array(merged['Close']))
142+
volume_preprocessing = tf.keras.layers.Normalization(axis=None)
143+
volume_preprocessing.adapt(np.array(merged['Volume']))
144+
145+
# nomalization result print
146+
# normalized_close = close_preprocessing(np.array(merged['Close']))
147+
# print(normalized_close.numpy())
148+
149+
# create input
150+
low_input = tf.keras.Input(shape=(1, ), name='Low')
151+
high_input = tf.keras.Input(shape=(1, ), name='High')
152+
open_input = tf.keras.Input(shape=(1, ), name='Open')
153+
close_input = tf.keras.Input(shape=(1, ), name='Close')
154+
volume_input = tf.keras.Input(shape=(1, ), name='Volume')
155+
156+
x_low = low_preprocessing(low_input)
157+
x_high = high_preprocessing(high_input)
158+
x_open = open_preprocessing(open_input)
159+
x_close = close_preprocessing(close_input)
160+
x_volume = volume_preprocessing(volume_input)
161+
162+
# using functional api
163+
model_input = tf.keras.Input(shape=(110,), name='model_input')
164+
embedding = tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)(model_input)
165+
bidirectional = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(embedding)
166+
maxpool1d = tf.keras.layers.GlobalMaxPool1D()(bidirectional)
167+
concat_layer = tf.keras.layers.Concatenate()([x_low, x_high, x_open, x_close, x_volume, maxpool1d])
168+
169+
dense1 = tf.keras.layers.Dense(64, activation='relu')(concat_layer)
170+
dropout1 = tf.keras.layers.Dropout(0.3)(dense1)
171+
dense2 = tf.keras.layers.Dense(64, activation='relu')(dropout1)
172+
dropout2 = tf.keras.layers.Dropout(0.3)(dense2)
173+
dense3 = tf.keras.layers.Dense(32, activation='relu')(dropout2)
174+
model_output = tf.keras.layers.Dense(1, activation='sigmoid')(dense3)
175+
176+
model = tf.keras.Model(inputs=[model_input, low_input, high_input, open_input, close_input, volume_input], outputs=model_output)
177+
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
178+
179+
train_inputs = {
180+
'model_input' : X_train_text,
181+
'Low' : np.array(X_train_chart['Low']).reshape(-1, 1),
182+
'High' : np.array(X_train_chart['High']).reshape(-1, 1),
183+
'Open' : np.array(X_train_chart['Open']).reshape(-1, 1),
184+
'Close' : np.array(X_train_chart['Close']).reshape(-1, 1),
185+
'Volume' : np.array(X_train_chart['Volume']).reshape(-1, 1),
186+
}
187+
188+
val_inputs = {
189+
'model_input': X_val_text,
190+
'Low': np.array(X_val_chart['Low']).reshape(-1, 1),
191+
'High': np.array(X_val_chart['High']).reshape(-1, 1),
192+
'Open': np.array(X_val_chart['Open']).reshape(-1, 1),
193+
'Close': np.array(X_val_chart['Close']).reshape(-1, 1),
194+
'Volume': np.array(X_val_chart['Volume']).reshape(-1, 1),
195+
}
196+
197+
# Part callback | tensorboard --logdir=LogFile/
198+
# time.time() ํฐ ์ˆซ์ž๊ฐ€ ์ตœ์‹ 
199+
#TODO ๋‚˜์ค‘์— ๋ชจ๋ธ ๋”ฅ๋Ÿฌ๋‹ํ•  ๋•Œ ์ ์šฉ์‹œํ‚ฌ ๊ฒƒ : datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
200+
# tensorboard = TensorBoard(log_dir='LogFile/Log{}'.format('_Model_' + str(int(time.time()))) )
201+
tensorboard = TensorBoard(log_dir='LogFile/Log{}'.format('Stock_Model_' + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")) )
202+
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)
203+
204+
# train
205+
model.fit(train_inputs, y_train, validation_data=(val_inputs, y_val), batch_size=32, epochs=50, callbacks=[early_stop, tensorboard])
206+
model.summary()
207+
model.save(model_path)
208+
# ๋น„์ƒ์šฉ
209+
model.save(model_path_h5)

0 commit comments

Comments
ย (0)