1+ # INFO compatibility issue import
2+ # from keras.preprocessing.text import Tokenizer
3+ # from keras.callbacks import EarlyStopping
4+ # from keras.preprocessing.sequence import pad_sequences
5+ # from keras.callbacks import TensorBoard
6+
7+ import tensorflow as tf
8+ import numpy as np
9+ import pandas as pd
10+ import pickle
11+ import os
12+ import datetime
13+ import supabase
14+ import yfinance as yf
15+
16+ # ๊ณผ๊ฑฐ log file ์์ฑ ๊ท์น์ ์ฌ์ฉํ import
17+ # import time
18+
19+ # validation x > split Test Data
20+ from sklearn .model_selection import train_test_split
21+
22+ # supabase from
23+ from dotenv import load_dotenv
24+ from supabase import create_client , Client
25+
26+ # recent keras import
27+ from tensorflow .keras .models import load_model
28+ from tensorflow .keras .preprocessing .text import Tokenizer
29+ from tensorflow .keras .preprocessing .sequence import pad_sequences
30+ from tensorflow .keras .callbacks import TensorBoard , EarlyStopping
31+
32+ load_dotenv ()
33+
34+ supabase_url = os .getenv ('SupaBase_Url' )
35+ supabase_key = os .getenv ('SupaBase_Key' )
36+ supabase : Client = create_client (supabase_url , supabase_key )
37+
38+ stock_inputdata_path = 'DataSets/upstock_nasdaq.csv'
39+
40+ tokenizer_path = 'SaveModel/upstock_stock_tokenizer.pickle'
41+ model_path = 'SaveModel/upstock_stock_model.keras'
42+ model_path_h5 = 'SaveModel/upstock_stock_model.h5' # compatibility issue .h5
43+
44+ # Get Yahoo Finance Data
45+ def load_stock (stockName , startDate ):
46+ try :
47+ StockData = yf .download (
48+ stockName , # stock number
49+ start = startDate ,
50+ auto_adjust = True , # ๊ณผ๊ฑฐ ์ฃผ๊ฐ์ ํ์ฌ ์ฃผ๊ฐ์ ์ฐจ์ด์ ์ ์ํ ๋ณํฉ ํน์ ๋ถํ ๊ทธ๋ฆฌ๊ณ ์์น์ผ๋ก ์ธํ ์ฐจ์ด
51+ progress = True
52+ )
53+
54+ StockData .reset_index (inplace = True )
55+
56+ # ๋ง์ฝ MultiIndex ์ปฌ๋ผ์ผ ๊ฒฝ์ฐ๋ง droplevel ์ํ
57+ if isinstance (StockData .columns , pd .MultiIndex ):
58+ StockData .columns = StockData .columns .droplevel (1 )
59+
60+ return StockData
61+
62+ except Exception as e :
63+ print (f'Download Fail : { e } ' )
64+ return None
65+
66+ # download finance data
67+ nasdaq_df = load_stock ('^NDX' , '2000-01-01' )
68+ vix_df = load_stock ('^VIX' , '2000-01-01' )
69+
70+ # PART rsi task
71+ def task_RSI (data : pd .DataFrame , window : int = 14 ) -> pd .Series : # 14์ผ ๊ธฐ์ค
72+ delta = data ['Close' ].diff ()
73+ gain = np .where (delta > 0 , delta , 0 )
74+ loss = np .where (delta < 0 , - delta , 0 )
75+
76+ roll_up = pd .Series (gain ).rolling (window = window ).mean ()
77+ roll_down = pd .Series (loss ).rolling (window = window ).mean ()
78+
79+ RS = roll_up / roll_down
80+ RSI = 100 - (100 / (1 + RS ))
81+ return RSI
82+
83+ # PART macd task
84+ def task_MACD (data : pd .DataFrame , short_window = 12 , long_window = 26 , signal_window = 9 ):
85+ short_ema = data ['Close' ].ewm (span = short_window , adjust = False ).mean ()
86+ long_ema = data ['Close' ].ewm (span = long_window , adjust = False ).mean ()
87+
88+ data ['MACD' ] = short_ema - long_ema
89+ data ['Signal' ] = data ['MACD' ].ewm (span = signal_window , adjust = False ).mean ()
90+ data ['Histogram' ] = data ['MACD' ] - data ['Signal' ]
91+ return data
92+
93+ # indicator task
94+ nasdaq_df ['RSI' ] = task_RSI (nasdaq_df , 14 )
95+ nasdaq_df = task_MACD (nasdaq_df )
96+
97+ # create label
98+ nasdaq_df ['Return' ] = nasdaq_df ['Close' ].pct_change ().shift (- 1 )
99+ nasdaq_df ['Label' ] = (nasdaq_df ['Return' ] > 0 ).astype (int )
100+
101+ # merge nasdaq, rsi and vix
102+ nasdaq_df ['Date' ] = nasdaq_df .index
103+ vix_df ['Date' ] = vix_df .index
104+ merged = pd .merge (nasdaq_df , vix_df [['Date' ,'Close' ]], on = 'Date' , how = 'inner' , suffixes = ('' , '_VIX' ))
105+
106+ # nan data delete task
107+ merged = merged .dropna ()
108+
109+ data = nasdaq_df .to_csv (stock_inputdata_path , index = False )
110+
111+ print (data )
112+
113+ # ๊ณผ๊ฑฐ ์ด๊ธฐ ๋ชจ๋ธ์ ์ฌ์ฉํ๋ ์ฝ๋
114+ # analyst_ratings_processed date ๋ช
์๋ก์ธํ ํต์ผ์ฑ ๋ถ์ฌ
115+ # StockData.rename(columns={'Date' : 'date'}, inplace=True)
116+
117+
118+
119+
120+
121+
122+
123+
124+
125+
126+
127+
128+ # text, chart, label ๋ฐ์ดํฐ ์ชผ๊ฐ๊ธฐ 0.2
129+ X_train_text , X_val_text , X_train_chart , X_val_chart , y_train , y_val = train_test_split (
130+ titles , chart , labels , test_size = 0.2 , random_state = 42
131+ )
132+
133+ # nomalization, ์ ์ฒด ๋ฐ์ดํฐ์์ ํ๋์ ํ๊ท ๊ณผ ๋ถ์ฐ์ ์ฌ์ฉ
134+ low_preprocessing = tf .keras .layers .Normalization (axis = None )
135+ low_preprocessing .adapt (np .array (merged ['Low' ]))
136+ high_preprocessing = tf .keras .layers .Normalization (axis = None )
137+ high_preprocessing .adapt (np .array (merged ['High' ]))
138+ open_preprocessing = tf .keras .layers .Normalization (axis = None )
139+ open_preprocessing .adapt (np .array (merged ['Open' ]))
140+ close_preprocessing = tf .keras .layers .Normalization (axis = None )
141+ close_preprocessing .adapt (np .array (merged ['Close' ]))
142+ volume_preprocessing = tf .keras .layers .Normalization (axis = None )
143+ volume_preprocessing .adapt (np .array (merged ['Volume' ]))
144+
145+ # nomalization result print
146+ # normalized_close = close_preprocessing(np.array(merged['Close']))
147+ # print(normalized_close.numpy())
148+
149+ # create input
150+ low_input = tf .keras .Input (shape = (1 , ), name = 'Low' )
151+ high_input = tf .keras .Input (shape = (1 , ), name = 'High' )
152+ open_input = tf .keras .Input (shape = (1 , ), name = 'Open' )
153+ close_input = tf .keras .Input (shape = (1 , ), name = 'Close' )
154+ volume_input = tf .keras .Input (shape = (1 , ), name = 'Volume' )
155+
156+ x_low = low_preprocessing (low_input )
157+ x_high = high_preprocessing (high_input )
158+ x_open = open_preprocessing (open_input )
159+ x_close = close_preprocessing (close_input )
160+ x_volume = volume_preprocessing (volume_input )
161+
162+ # using functional api
163+ model_input = tf .keras .Input (shape = (110 ,), name = 'model_input' )
164+ embedding = tf .keras .layers .Embedding (input_dim = len (tokenizer .word_index ) + 1 , output_dim = 128 )(model_input )
165+ bidirectional = tf .keras .layers .Bidirectional (tf .keras .layers .LSTM (64 , return_sequences = True ))(embedding )
166+ maxpool1d = tf .keras .layers .GlobalMaxPool1D ()(bidirectional )
167+ concat_layer = tf .keras .layers .Concatenate ()([x_low , x_high , x_open , x_close , x_volume , maxpool1d ])
168+
169+ dense1 = tf .keras .layers .Dense (64 , activation = 'relu' )(concat_layer )
170+ dropout1 = tf .keras .layers .Dropout (0.3 )(dense1 )
171+ dense2 = tf .keras .layers .Dense (64 , activation = 'relu' )(dropout1 )
172+ dropout2 = tf .keras .layers .Dropout (0.3 )(dense2 )
173+ dense3 = tf .keras .layers .Dense (32 , activation = 'relu' )(dropout2 )
174+ model_output = tf .keras .layers .Dense (1 , activation = 'sigmoid' )(dense3 )
175+
176+ model = tf .keras .Model (inputs = [model_input , low_input , high_input , open_input , close_input , volume_input ], outputs = model_output )
177+ model .compile (loss = 'binary_crossentropy' , optimizer = 'adam' , metrics = ['accuracy' ])
178+
179+ train_inputs = {
180+ 'model_input' : X_train_text ,
181+ 'Low' : np .array (X_train_chart ['Low' ]).reshape (- 1 , 1 ),
182+ 'High' : np .array (X_train_chart ['High' ]).reshape (- 1 , 1 ),
183+ 'Open' : np .array (X_train_chart ['Open' ]).reshape (- 1 , 1 ),
184+ 'Close' : np .array (X_train_chart ['Close' ]).reshape (- 1 , 1 ),
185+ 'Volume' : np .array (X_train_chart ['Volume' ]).reshape (- 1 , 1 ),
186+ }
187+
188+ val_inputs = {
189+ 'model_input' : X_val_text ,
190+ 'Low' : np .array (X_val_chart ['Low' ]).reshape (- 1 , 1 ),
191+ 'High' : np .array (X_val_chart ['High' ]).reshape (- 1 , 1 ),
192+ 'Open' : np .array (X_val_chart ['Open' ]).reshape (- 1 , 1 ),
193+ 'Close' : np .array (X_val_chart ['Close' ]).reshape (- 1 , 1 ),
194+ 'Volume' : np .array (X_val_chart ['Volume' ]).reshape (- 1 , 1 ),
195+ }
196+
197+ # Part callback | tensorboard --logdir=LogFile/
198+ # time.time() ํฐ ์ซ์๊ฐ ์ต์
199+ #TODO ๋์ค์ ๋ชจ๋ธ ๋ฅ๋ฌ๋ํ ๋ ์ ์ฉ์ํฌ ๊ฒ : datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
200+ # tensorboard = TensorBoard(log_dir='LogFile/Log{}'.format('_Model_' + str(int(time.time()))) )
201+ tensorboard = TensorBoard (log_dir = 'LogFile/Log{}' .format ('Stock_Model_' + datetime .datetime .now ().strftime ("%Y-%m-%d_%H-%M" )) )
202+ early_stop = EarlyStopping (monitor = 'val_loss' , patience = 3 , restore_best_weights = True , verbose = 1 )
203+
204+ # train
205+ model .fit (train_inputs , y_train , validation_data = (val_inputs , y_val ), batch_size = 32 , epochs = 50 , callbacks = [early_stop , tensorboard ])
206+ model .summary ()
207+ model .save (model_path )
208+ # ๋น์์ฉ
209+ model .save (model_path_h5 )
0 commit comments