Skip to content

Commit 4c4d8ae

Browse files
committed
2 parents ad49361 + c024586 commit 4c4d8ae

8 files changed

Lines changed: 207 additions & 20 deletions

File tree

docusaurus/docs/Data/external-data.md

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ The framework provides two ways to load external data:
1313
1. **Data Sources** — Declare `DataSource.from_csv()`, `DataSource.from_json()`, or `DataSource.from_parquet()` in your strategy's `data_sources` list. Data is fetched automatically and available in your `data` dict.
1414
2. **Context methods** — Call `context.fetch_csv()`, `context.fetch_json()`, or `context.fetch_parquet()` on demand inside your strategy's `run_strategy` method.
1515

16-
Both approaches support caching, refresh intervals, date parsing, and pre/post-processing callbacks.
16+
Both approaches support caching, refresh intervals, date parsing, request headers, and pre/post-processing callbacks.
1717

1818
## Supported Formats
1919

@@ -112,6 +112,7 @@ class MyStrategy(TradingStrategy):
112112
earnings = context.fetch_json(
113113
url="https://api.example.com/earnings",
114114
date_column="report_date",
115+
headers={"Authorization": "Bearer <token>"},
115116
)
116117

117118
# Fetch Parquet on demand
@@ -132,9 +133,50 @@ All three factory methods and context methods accept the same core parameters:
132133
| `date_format` | `str` | `None` | strftime format for parsing dates (e.g., `"%Y-%m-%d"`). Auto-detected if omitted. |
133134
| `cache` | `bool` | `True` | Cache fetched data locally to avoid repeated downloads. |
134135
| `refresh_interval` | `str` | `None` | How often to re-fetch: `"1m"`, `"5m"`, `"15m"`, `"30m"`, `"1h"`, `"4h"`, `"1d"`, `"1W"`. |
136+
| `headers` | `dict` | `None` | Optional HTTP headers to send with the request, such as API keys or bearer tokens. |
135137
| `pre_process` | `callable` | `None` | Transform raw text before parsing. Receives `str`, returns `str`. Not available for Parquet. |
136138
| `post_process` | `callable` | `None` | Transform the parsed DataFrame. Receives `DataFrame`, returns `DataFrame`. |
137139

140+
## Authenticated APIs
141+
142+
Use `headers` when an external data API requires authentication. For example, Adanos Market Sentiment can be loaded as an optional alternative-data signal without writing a custom provider:
143+
144+
```python
145+
import json
146+
import os
147+
148+
import polars as pl
149+
150+
from investing_algorithm_framework import TimeUnit, TradingStrategy
151+
152+
153+
def extract_adanos_stocks(raw_text):
154+
payload = json.loads(raw_text)
155+
return json.dumps(payload.get("stocks", []))
156+
157+
158+
class SentimentStrategy(TradingStrategy):
159+
time_unit = TimeUnit.DAY
160+
interval = 1
161+
symbols = ["AAPL", "MSFT"]
162+
163+
def run_strategy(self, context, data):
164+
sentiment = context.fetch_json(
165+
url=(
166+
"https://api.adanos.org/news/stocks/v1/compare"
167+
"?tickers=AAPL,MSFT&days=7"
168+
),
169+
headers={"X-API-Key": os.environ["ADANOS_API_KEY"]},
170+
pre_process=extract_adanos_stocks,
171+
cache=True,
172+
refresh_interval="1d",
173+
)
174+
175+
aapl = sentiment.filter(pl.col("ticker") == "AAPL")
176+
if len(aapl) and aapl["sentiment_score"][0] > 0.2:
177+
context.create_limit_order(...)
178+
```
179+
138180
## Pre/Post Processing
139181

140182
### Pre-Processing

investing_algorithm_framework/app/context.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2382,13 +2382,23 @@ def get_stop_losses(
23822382

23832383
return self.trade_stop_loss_service.get_all(query_params)
23842384

2385+
def _get_url_provider_cache_key(self, url, headers):
2386+
if not headers:
2387+
return url
2388+
2389+
return (
2390+
url,
2391+
tuple(sorted(headers.items()))
2392+
)
2393+
23852394
def fetch_csv(
23862395
self,
23872396
url,
23882397
date_column=None,
23892398
date_format=None,
23902399
cache=True,
23912400
refresh_interval=None,
2401+
headers=None,
23922402
pre_process=None,
23932403
post_process=None,
23942404
):
@@ -2408,6 +2418,8 @@ def fetch_csv(
24082418
cache (bool): Cache fetched data locally (default: True).
24092419
refresh_interval (str, optional): Re-fetch interval
24102420
(e.g., "1d", "1h").
2421+
headers (dict, optional): HTTP headers to send with the
2422+
request.
24112423
pre_process (callable, optional): Transform raw CSV text
24122424
before parsing.
24132425
post_process (callable, optional): Transform the parsed
@@ -2431,20 +2443,23 @@ def run_strategy(self, context, data):
24312443
if not hasattr(self, '_csv_url_providers'):
24322444
self._csv_url_providers = {}
24332445

2434-
if url not in self._csv_url_providers:
2446+
provider_key = self._get_url_provider_cache_key(url, headers)
2447+
2448+
if provider_key not in self._csv_url_providers:
24352449
provider = CSVURLDataProvider(
24362450
url=url,
24372451
date_column=date_column,
24382452
date_format=date_format,
24392453
cache=cache,
24402454
refresh_interval=refresh_interval,
2455+
headers=headers,
24412456
pre_process=pre_process,
24422457
post_process=post_process,
24432458
)
24442459
provider.config = self.configuration_service.get_config()
2445-
self._csv_url_providers[url] = provider
2460+
self._csv_url_providers[provider_key] = provider
24462461

2447-
return self._csv_url_providers[url].get_data()
2462+
return self._csv_url_providers[provider_key].get_data()
24482463

24492464
def fetch_json(
24502465
self,
@@ -2453,6 +2468,7 @@ def fetch_json(
24532468
date_format=None,
24542469
cache=True,
24552470
refresh_interval=None,
2471+
headers=None,
24562472
pre_process=None,
24572473
post_process=None,
24582474
):
@@ -2475,6 +2491,8 @@ def fetch_json(
24752491
cache (bool): Cache fetched data locally (default: True).
24762492
refresh_interval (str, optional): Re-fetch interval
24772493
(e.g., "1d", "1h").
2494+
headers (dict, optional): HTTP headers to send with the
2495+
request.
24782496
pre_process (callable, optional): Transform raw JSON text
24792497
before parsing.
24802498
post_process (callable, optional): Transform the parsed
@@ -2497,20 +2515,23 @@ def run_strategy(self, context, data):
24972515
if not hasattr(self, '_json_url_providers'):
24982516
self._json_url_providers = {}
24992517

2500-
if url not in self._json_url_providers:
2518+
provider_key = self._get_url_provider_cache_key(url, headers)
2519+
2520+
if provider_key not in self._json_url_providers:
25012521
provider = JSONURLDataProvider(
25022522
url=url,
25032523
date_column=date_column,
25042524
date_format=date_format,
25052525
cache=cache,
25062526
refresh_interval=refresh_interval,
2527+
headers=headers,
25072528
pre_process=pre_process,
25082529
post_process=post_process,
25092530
)
25102531
provider.config = self.configuration_service.get_config()
2511-
self._json_url_providers[url] = provider
2532+
self._json_url_providers[provider_key] = provider
25122533

2513-
return self._json_url_providers[url].get_data()
2534+
return self._json_url_providers[provider_key].get_data()
25142535

25152536
def fetch_parquet(
25162537
self,
@@ -2519,6 +2540,7 @@ def fetch_parquet(
25192540
date_format=None,
25202541
cache=True,
25212542
refresh_interval=None,
2543+
headers=None,
25222544
post_process=None,
25232545
):
25242546
"""
@@ -2537,6 +2559,8 @@ def fetch_parquet(
25372559
cache (bool): Cache fetched data locally (default: True).
25382560
refresh_interval (str, optional): Re-fetch interval
25392561
(e.g., "1d", "1h").
2562+
headers (dict, optional): HTTP headers to send with the
2563+
request.
25402564
post_process (callable, optional): Transform the parsed
25412565
DataFrame.
25422566
@@ -2556,19 +2580,22 @@ def run_strategy(self, context, data):
25562580
if not hasattr(self, '_parquet_url_providers'):
25572581
self._parquet_url_providers = {}
25582582

2559-
if url not in self._parquet_url_providers:
2583+
provider_key = self._get_url_provider_cache_key(url, headers)
2584+
2585+
if provider_key not in self._parquet_url_providers:
25602586
provider = ParquetURLDataProvider(
25612587
url=url,
25622588
date_column=date_column,
25632589
date_format=date_format,
25642590
cache=cache,
25652591
refresh_interval=refresh_interval,
2592+
headers=headers,
25662593
post_process=post_process,
25672594
)
25682595
provider.config = self.configuration_service.get_config()
2569-
self._parquet_url_providers[url] = provider
2596+
self._parquet_url_providers[provider_key] = provider
25702597

2571-
return self._parquet_url_providers[url].get_data()
2598+
return self._parquet_url_providers[provider_key].get_data()
25722599

25732600
def batch_order(self, orders, market=None):
25742601
"""

investing_algorithm_framework/domain/models/data/data_source.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class DataSource:
4747
date_format: Optional[str] = None
4848
cache: bool = True
4949
refresh_interval: Optional[str] = None
50+
headers: Optional[dict] = None
5051
pre_process: Optional[Callable] = field(
5152
default=None, repr=False, compare=False
5253
)
@@ -133,6 +134,7 @@ def from_csv(
133134
date_format: str = None,
134135
cache: bool = True,
135136
refresh_interval: str = None,
137+
headers: dict = None,
136138
pre_process: Callable = None,
137139
post_process: Callable = None,
138140
) -> "DataSource":
@@ -149,6 +151,7 @@ def from_csv(
149151
refresh_interval: How often to re-fetch the data
150152
(e.g., "1d", "1h"). If None, data is fetched once and
151153
cached indefinitely.
154+
headers: Optional HTTP headers to send with the request.
152155
pre_process: Optional callback to transform the raw CSV
153156
text before parsing. Receives a string, must return
154157
a string.
@@ -178,6 +181,7 @@ def from_csv(
178181
date_format=date_format,
179182
cache=cache,
180183
refresh_interval=refresh_interval,
184+
headers=headers,
181185
pre_process=pre_process,
182186
post_process=post_process,
183187
)
@@ -191,6 +195,7 @@ def from_json(
191195
date_format: str = None,
192196
cache: bool = True,
193197
refresh_interval: str = None,
198+
headers: dict = None,
194199
pre_process: Callable = None,
195200
post_process: Callable = None,
196201
) -> "DataSource":
@@ -209,6 +214,7 @@ def from_json(
209214
(default: True).
210215
refresh_interval: How often to re-fetch the data
211216
(e.g., "1d", "1h").
217+
headers: Optional HTTP headers to send with the request.
212218
pre_process: Optional callback to transform the raw JSON
213219
text before parsing. Receives a string, must return
214220
a string.
@@ -234,6 +240,7 @@ def from_json(
234240
date_format=date_format,
235241
cache=cache,
236242
refresh_interval=refresh_interval,
243+
headers=headers,
237244
pre_process=pre_process,
238245
post_process=post_process,
239246
)
@@ -247,6 +254,7 @@ def from_parquet(
247254
date_format: str = None,
248255
cache: bool = True,
249256
refresh_interval: str = None,
257+
headers: dict = None,
250258
post_process: Callable = None,
251259
) -> "DataSource":
252260
"""
@@ -262,6 +270,7 @@ def from_parquet(
262270
(default: True).
263271
refresh_interval: How often to re-fetch the data
264272
(e.g., "1d", "1h").
273+
headers: Optional HTTP headers to send with the request.
265274
post_process: Optional callback to transform the parsed
266275
DataFrame.
267276
@@ -284,6 +293,7 @@ def from_parquet(
284293
date_format=date_format,
285294
cache=cache,
286295
refresh_interval=refresh_interval,
296+
headers=headers,
287297
post_process=post_process,
288298
)
289299

@@ -330,6 +340,10 @@ def to_dict(self):
330340
non_null_attributes['data_type'] = self.data_type.value
331341
if self.time_frame is not None:
332342
non_null_attributes['time_frame'] = self.time_frame.value
343+
if self.headers is not None:
344+
non_null_attributes['headers'] = {
345+
key: "***" for key in self.headers
346+
}
333347

334348
return non_null_attributes
335349

investing_algorithm_framework/infrastructure/data_providers/base_url.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def __init__(
4545
date_format=None,
4646
cache=True,
4747
refresh_interval=None,
48+
headers=None,
4849
pre_process=None,
4950
post_process=None,
5051
priority=5,
@@ -62,6 +63,7 @@ def __init__(
6263
self._date_format = date_format
6364
self._cache = cache
6465
self._refresh_interval = refresh_interval
66+
self._headers = headers or {}
6567
self._pre_process = pre_process
6668
self._post_process = post_process
6769
self._cached_data = None
@@ -194,6 +196,7 @@ def copy(self, data_source=None):
194196
date_format = self._date_format
195197
cache = self._cache
196198
refresh_interval = self._refresh_interval
199+
headers = self._headers
197200
pre_process = self._pre_process
198201
post_process = self._post_process
199202
identifier = self.data_provider_identifier
@@ -206,6 +209,7 @@ def copy(self, data_source=None):
206209
else cache
207210
refresh_interval = data_source.refresh_interval \
208211
or refresh_interval
212+
headers = data_source.headers or headers
209213
pre_process = data_source.pre_process or pre_process
210214
post_process = data_source.post_process or post_process
211215

@@ -215,6 +219,7 @@ def copy(self, data_source=None):
215219
date_format=date_format,
216220
cache=cache,
217221
refresh_interval=refresh_interval,
222+
headers=headers,
218223
pre_process=pre_process,
219224
post_process=post_process,
220225
priority=self.priority,
@@ -277,9 +282,11 @@ def _fetch_and_parse(self):
277282

278283
# Fetch from URL
279284
ctx = ssl.create_default_context()
285+
headers = {"User-Agent": "investing-algorithm-framework"}
286+
headers.update(self._headers)
280287
req = urllib.request.Request(
281288
url,
282-
headers={"User-Agent": "investing-algorithm-framework"}
289+
headers=headers
283290
)
284291
with urllib.request.urlopen(req, context=ctx) as response:
285292
raw_bytes = response.read()
@@ -350,8 +357,12 @@ def _get_cache_path(self):
350357
if storage_dir is None:
351358
storage_dir = os.path.join(os.getcwd(), ".data_cache")
352359

360+
cache_key = self._url
361+
if self._headers:
362+
cache_key = f"{cache_key}|headers:{sorted(self._headers.items())}"
363+
353364
url_hash = hashlib.md5(
354-
self._url.encode()
365+
cache_key.encode()
355366
).hexdigest()[:12]
356367
suffix = self._cache_file_suffix()
357368
return os.path.join(storage_dir, f"url_{url_hash}{suffix}")

0 commit comments

Comments
 (0)