1+ from itertools import count , pairwise
2+
13import numpy as np
24import pandas as pd
35from filecache import filecache
46
5- from utils import HTTP , http_get_with_retry , mark_days_since , mark_highs_lows
7+ from utils import HTTP , mark_days_since , mark_highs_lows
8+
9+ HALVING_INTERVAL = 210_000
10+ GENESIS_BLOCK_REWARD = 50.0
11+ BLOCKS_PER_DAY = 144
12+
13+
14+ def fetch_block_halving ():
15+ """
16+ Fetch Bitcoin halving data by probing raw blocks at halving heights.
17+ """
18+ halving_data : list [tuple [pd .Timestamp , int , float ]] = []
19+
20+ for halving_index in count ():
21+ block_height = halving_index * HALVING_INTERVAL
22+ block_reward = GENESIS_BLOCK_REWARD / (2 ** halving_index )
23+
24+ response = HTTP .get (f'https://blockchain.info/rawblock/{ block_height } ' )
25+ if response .status_code == 404 :
26+ break
627
7- # Known Bitcoin halving dates and block heights for accurate calculations
8- HALVING_DATA = [
9- # (date, block_height, block_reward)
10- ('2009-01-03' , 0 , 50.0 ), # Genesis block
11- ('2012-11-28' , 210000 , 25.0 ), # 1st halving
12- ('2016-07-09' , 420000 , 12.5 ), # 2nd halving
13- ('2020-05-11' , 630000 , 6.25 ), # 3rd halving
14- ('2024-04-20' , 840000 , 3.125 ), # 4th halving
15- ]
28+ response .raise_for_status ()
29+ block_time = response .json ()['time' ]
30+ block_date = pd .to_datetime (block_time , unit = 's' ).tz_localize (None ).floor ('d' )
31+ halving_data .append ((block_date , block_height , block_reward ))
32+
33+ return halving_data
1634
1735
1836def fetch_blockchain_data () -> pd .DataFrame :
1937 """
2038 Fetches historical Bitcoin blockchain data from Blockchain.com API.
2139 Uses miners-revenue chart for USD mining revenue.
2240 Block heights and BTC generation are calculated from halving schedule.
23-
41+
2442 Returns:
25- DataFrame with Date, TotalBlocks, MinBlockID, MaxBlockID,
43+ DataFrame with Date, TotalBlocks, MinBlockID, MaxBlockID,
2644 TotalGeneration, TotalGenerationUSD columns.
2745 """
28- # Fetch mining revenue from Blockchain.com (free, reliable)
29- response = http_get_with_retry (
46+ halving_data = fetch_block_halving ()
47+
48+ # Fetch mining revenue from Blockchain.com
49+ response = HTTP .get (
3050 'https://api.blockchain.info/charts/miners-revenue' ,
3151 params = {
3252 'timespan' : 'all' ,
3353 'format' : 'json' ,
3454 'sampled' : 'false' ,
3555 },
3656 )
37- revenue_data = response .json ()
38-
57+
3958 # Create DataFrame from mining revenue data
40- df = pd .DataFrame (revenue_data ['values' ])
59+ response .raise_for_status ()
60+ df = pd .DataFrame (response .json ()['values' ])
4161 df .columns = ['DateTimestamp' , 'TotalGenerationUSD' ]
4262 df ['Date' ] = pd .to_datetime (df ['DateTimestamp' ], unit = 's' ).dt .floor ('d' )
43-
44- # Calculate block heights based on known halving dates
45- # Average ~144 blocks per day (one block every 10 minutes)
46- genesis_date = pd .Timestamp ('2009-01-03' )
47-
48- # Create halving schedule DataFrame for interpolation
49- halving_df = pd .DataFrame (HALVING_DATA , columns = ['Date' , 'BlockHeight' , 'BlockReward' ])
50- halving_df ['Date' ] = pd .to_datetime (halving_df ['Date' ])
51-
63+
5264 # Calculate approximate block height for each day using linear interpolation
5365 # between known halving points
54- def estimate_block_height (date ):
55- date = pd .Timestamp (date )
56- if date < genesis_date :
57- return 0
58-
66+ def estimate_block_height (date : pd .Timestamp ):
5967 # Find the halving period this date falls into
60- for i in range (len (HALVING_DATA ) - 1 ):
61- start_date = pd .Timestamp (HALVING_DATA [i ][0 ])
62- end_date = pd .Timestamp (HALVING_DATA [i + 1 ][0 ])
63- start_height = HALVING_DATA [i ][1 ]
64- end_height = HALVING_DATA [i + 1 ][1 ]
65-
68+ for (start_date , start_height , _ ), (end_date , end_height , _ ) in pairwise (halving_data ):
6669 if start_date <= date < end_date :
6770 # Linear interpolation within this halving period
6871 total_days = (end_date - start_date ).days
6972 days_elapsed = (date - start_date ).days
7073 height = start_height + (end_height - start_height ) * days_elapsed / total_days
7174 return int (height )
72-
73- # After the last known halving, extrapolate at ~144 blocks/day
74- last_date = pd .Timestamp (HALVING_DATA [- 1 ][0 ])
75- last_height = HALVING_DATA [- 1 ][1 ]
75+
76+ # After the last known halving, extrapolate
77+ (last_date , last_height , _ ) = halving_data [- 1 ]
7678 days_since = (date - last_date ).days
77- return int (last_height + days_since * 144 )
78-
79+ return int (last_height + days_since * BLOCKS_PER_DAY )
80+
7981 def get_block_reward (block_height ):
8082 """Get block reward for a given block height."""
81- halving_interval = 210000
82- halvings = block_height // halving_interval
83- return 50.0 / (2 ** halvings )
84-
83+ halvings = block_height // HALVING_INTERVAL
84+ return GENESIS_BLOCK_REWARD / (2 ** halvings )
85+
8586 # Calculate block data for each day
8687 df ['MaxBlockID' ] = df ['Date' ].apply (estimate_block_height )
87- df ['MinBlockID' ] = df ['MaxBlockID' ].shift (1 ). fillna ( 0 ). astype ( int )
88+ df ['MinBlockID' ] = df ['MaxBlockID' ].shift (1 , fill_value = 0 )
8889 df ['TotalBlocks' ] = df ['MaxBlockID' ] - df ['MinBlockID' ]
89- df ['TotalBlocks' ] = df ['TotalBlocks' ].clip (lower = 1 ) # Ensure at least 1 block
90-
90+
9191 # Calculate BTC generation based on block reward
92- # Store in satoshis (multiply by 1e8) to match original Blockchair format
9392 df ['BlockReward' ] = df ['MaxBlockID' ].apply (get_block_reward )
9493 df ['TotalGeneration' ] = df ['TotalBlocks' ] * df ['BlockReward' ] * 1e8 # Convert to satoshis
95-
94+
9695 # Select and order columns to match original format
9796 df = df [['Date' , 'TotalBlocks' , 'MinBlockID' , 'MaxBlockID' , 'TotalGeneration' , 'TotalGenerationUSD' ]]
9897 df = df .sort_values ('Date' ).reset_index (drop = True )
99-
98+
99+ # Add halving markers
100+ df ['Halving' ] = 0
101+ for _ , block_height , _ in halving_data [1 :]:
102+ df .loc [(df ['MinBlockID' ] < block_height ) & (df ['MaxBlockID' ] >= block_height ), 'Halving' ] = 1
103+
100104 return df
101105
102106
@@ -111,7 +115,6 @@ def fetch_bitcoin_data() -> pd.DataFrame:
111115 """
112116 print ('📈 Requesting historical Bitcoin data…' )
113117
114- # Use Blockchain.com API instead of Blockchair (which is blocked)
115118 df = fetch_blockchain_data ()
116119
117120 df ['Date' ] = pd .to_datetime (df ['Date' ])
@@ -132,7 +135,6 @@ def fetch_bitcoin_data() -> pd.DataFrame:
132135 df .reset_index (drop = True , inplace = True )
133136
134137 df = fix_current_day_data (df )
135- df = add_block_halving_data (df )
136138 df = mark_highs_lows (df , 'Price' , False , round (365 * 2 ), 180 )
137139
138140 # move 2021' peak to the first price peak
@@ -144,24 +146,23 @@ def fetch_bitcoin_data() -> pd.DataFrame:
144146
145147
146148def fetch_price_data () -> pd .DataFrame :
147- response = http_get_with_retry (
149+ response = HTTP . get (
148150 'https://api.coinmarketcap.com/data-api/v3/cryptocurrency/detail/chart' ,
149151 params = {
150152 'id' : 1 ,
151153 'range' : 'ALL' ,
152154 },
153155 )
154156
157+ response .raise_for_status ()
155158 response_json = response .json ()
156159 response_x = [float (k ) for k in response_json ['data' ]['points' ]]
157160 response_y = [value ['v' ][0 ] for value in response_json ['data' ]['points' ].values ()]
158161
159- df = pd .DataFrame (
160- {
161- 'Date' : response_x ,
162- 'Price' : response_y ,
163- }
164- )
162+ df = pd .DataFrame ({
163+ 'Date' : response_x ,
164+ 'Price' : response_y ,
165+ })
165166 df ['Date' ] = pd .to_datetime (df ['Date' ], unit = 's' ).dt .tz_localize (None ).dt .floor ('d' )
166167 df .sort_values (by = 'Date' , inplace = True )
167168 df .drop_duplicates ('Date' , keep = 'last' , inplace = True )
@@ -172,45 +173,10 @@ def fetch_price_data() -> pd.DataFrame:
172173def fix_current_day_data (df : pd .DataFrame ) -> pd .DataFrame :
173174 row = df .iloc [- 1 ].copy ()
174175
175- target_total_blocks = 24 * 6
176- target_scale = target_total_blocks / row ['TotalBlocks' ]
176+ target_scale = BLOCKS_PER_DAY / row ['TotalBlocks' ]
177177
178178 for col_name in ['TotalBlocks' , 'TotalGeneration' , 'TotalGenerationUSD' ]:
179179 row [col_name ] *= target_scale
180180
181181 df .iloc [- 1 ] = row
182182 return df
183-
184-
185- def add_block_halving_data (df : pd .DataFrame ) -> pd .DataFrame :
186- reward_halving_every = 210000
187- current_block_halving_id = reward_halving_every
188- current_block_production = 50
189- df ['Halving' ] = 0
190- df ['NextHalvingBlock' ] = current_block_halving_id
191-
192- while True :
193- df .loc [
194- (current_block_halving_id - reward_halving_every ) <= df ['MaxBlockID' ],
195- 'BlockGeneration' ,
196- ] = current_block_production
197-
198- block_halving_rows = df [
199- (df ['MinBlockID' ] <= current_block_halving_id ) & (df ['MaxBlockID' ] >= current_block_halving_id )
200- ]
201-
202- if len (block_halving_rows ) == 0 :
203- break
204-
205- # Take the first matching row if multiple match
206- block_halving_row = block_halving_rows .iloc [0 ]
207- row_index = block_halving_rows .index [0 ]
208-
209- current_block_halving_id += reward_halving_every
210- current_block_production /= 2
211- df .loc [row_index , 'Halving' ] = 1
212- df .loc [df .index > row_index , 'NextHalvingBlock' ] = current_block_halving_id
213-
214- df ['DaysToHalving' ] = pd .to_timedelta ((df ['NextHalvingBlock' ] - df ['MaxBlockID' ]) / (24 * 6 ), unit = 'D' )
215- df ['NextHalvingDate' ] = df ['Date' ] + df ['DaysToHalving' ]
216- return df
0 commit comments