coding-kitties
diff --git a/‎docusaurus/docs/Data/external-data.md‎
Lines changed: 43 additions & 1 deletion b/‎docusaurus/docs/Data/external-data.md‎
Lines changed: 43 additions & 1 deletion
diff --git a/‎investing_algorithm_framework/app/context.py‎
Lines changed: 36 additions & 9 deletions b/‎investing_algorithm_framework/app/context.py‎
Lines changed: 36 additions & 9 deletions
diff --git a/‎investing_algorithm_framework/domain/models/data/data_source.py‎
Lines changed: 14 additions & 0 deletions b/‎investing_algorithm_framework/domain/models/data/data_source.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎investing_algorithm_framework/infrastructure/data_providers/base_url.py‎
Lines changed: 13 additions & 2 deletions b/‎investing_algorithm_framework/infrastructure/data_providers/base_url.py‎
Lines changed: 13 additions & 2 deletions
@@ -13,7 +13,7 @@ The framework provides two ways to load external data:
 1. **Data Sources** — Declare `DataSource.from_csv()`, `DataSource.from_json()`, or `DataSource.from_parquet()` in your strategy's `data_sources` list. Data is fetched automatically and available in your `data` dict.
 2. **Context methods** — Call `context.fetch_csv()`, `context.fetch_json()`, or `context.fetch_parquet()` on demand inside your strategy's `run_strategy` method.
 
-Both approaches support caching, refresh intervals, date parsing, and pre/post-processing callbacks.
+Both approaches support caching, refresh intervals, date parsing, request headers, and pre/post-processing callbacks.
 
 ## Supported Formats
 
@@ -112,6 +112,7 @@ class MyStrategy(TradingStrategy):
         earnings = context.fetch_json(
             url="https://api.example.com/earnings",
             date_column="report_date",
+            headers={"Authorization": "Bearer <token>"},
         )
 
         # Fetch Parquet on demand
@@ -132,9 +133,50 @@ All three factory methods and context methods accept the same core parameters:
 | `date_format` | `str` | `None` | strftime format for parsing dates (e.g., `"%Y-%m-%d"`). Auto-detected if omitted. |
 | `cache` | `bool` | `True` | Cache fetched data locally to avoid repeated downloads. |
 | `refresh_interval` | `str` | `None` | How often to re-fetch: `"1m"`, `"5m"`, `"15m"`, `"30m"`, `"1h"`, `"4h"`, `"1d"`, `"1W"`. |
+| `headers` | `dict` | `None` | Optional HTTP headers to send with the request, such as API keys or bearer tokens. |
 | `pre_process` | `callable` | `None` | Transform raw text before parsing. Receives `str`, returns `str`. Not available for Parquet. |
 | `post_process` | `callable` | `None` | Transform the parsed DataFrame. Receives `DataFrame`, returns `DataFrame`. |
 
+## Authenticated APIs
+
+Use `headers` when an external data API requires authentication. For example, Adanos Market Sentiment can be loaded as an optional alternative-data signal without writing a custom provider:
+
+```python
+import json
+import os
+
+import polars as pl
+
+from investing_algorithm_framework import TimeUnit, TradingStrategy
+
+
+def extract_adanos_stocks(raw_text):
+    payload = json.loads(raw_text)
+    return json.dumps(payload.get("stocks", []))
+
+
+class SentimentStrategy(TradingStrategy):
+    time_unit = TimeUnit.DAY
+    interval = 1
+    symbols = ["AAPL", "MSFT"]
+
+    def run_strategy(self, context, data):
+        sentiment = context.fetch_json(
+            url=(
+                "https://api.adanos.org/news/stocks/v1/compare"
+                "?tickers=AAPL,MSFT&days=7"
+            ),
+            headers={"X-API-Key": os.environ["ADANOS_API_KEY"]},
+            pre_process=extract_adanos_stocks,
+            cache=True,
+            refresh_interval="1d",
+        )
+
+        aapl = sentiment.filter(pl.col("ticker") == "AAPL")
+        if len(aapl) and aapl["sentiment_score"][0] > 0.2:
+            context.create_limit_order(...)
+```
+
 ## Pre/Post Processing
 
 ### Pre-Processing
 
@@ -2382,13 +2382,23 @@ def get_stop_losses(
 
         return self.trade_stop_loss_service.get_all(query_params)
 
+    def _get_url_provider_cache_key(self, url, headers):
+        if not headers:
+            return url
+
+        return (
+            url,
+            tuple(sorted(headers.items()))
+        )
+
     def fetch_csv(
         self,
         url,
         date_column=None,
         date_format=None,
         cache=True,
         refresh_interval=None,
+        headers=None,
         pre_process=None,
         post_process=None,
     ):
@@ -2408,6 +2418,8 @@ def fetch_csv(
             cache (bool): Cache fetched data locally (default: True).
             refresh_interval (str, optional): Re-fetch interval
                 (e.g., "1d", "1h").
+            headers (dict, optional): HTTP headers to send with the
+                request.
             pre_process (callable, optional): Transform raw CSV text
                 before parsing.
             post_process (callable, optional): Transform the parsed
@@ -2431,20 +2443,23 @@ def run_strategy(self, context, data):
         if not hasattr(self, '_csv_url_providers'):
             self._csv_url_providers = {}
 
-        if url not in self._csv_url_providers:
+        provider_key = self._get_url_provider_cache_key(url, headers)
+
+        if provider_key not in self._csv_url_providers:
             provider = CSVURLDataProvider(
                 url=url,
                 date_column=date_column,
                 date_format=date_format,
                 cache=cache,
                 refresh_interval=refresh_interval,
+                headers=headers,
                 pre_process=pre_process,
                 post_process=post_process,
             )
             provider.config = self.configuration_service.get_config()
-            self._csv_url_providers[url] = provider
+            self._csv_url_providers[provider_key] = provider
 
-        return self._csv_url_providers[url].get_data()
+        return self._csv_url_providers[provider_key].get_data()
 
     def fetch_json(
         self,
@@ -2453,6 +2468,7 @@ def fetch_json(
         date_format=None,
         cache=True,
         refresh_interval=None,
+        headers=None,
         pre_process=None,
         post_process=None,
     ):
@@ -2475,6 +2491,8 @@ def fetch_json(
             cache (bool): Cache fetched data locally (default: True).
             refresh_interval (str, optional): Re-fetch interval
                 (e.g., "1d", "1h").
+            headers (dict, optional): HTTP headers to send with the
+                request.
             pre_process (callable, optional): Transform raw JSON text
                 before parsing.
             post_process (callable, optional): Transform the parsed
@@ -2497,20 +2515,23 @@ def run_strategy(self, context, data):
         if not hasattr(self, '_json_url_providers'):
             self._json_url_providers = {}
 
-        if url not in self._json_url_providers:
+        provider_key = self._get_url_provider_cache_key(url, headers)
+
+        if provider_key not in self._json_url_providers:
             provider = JSONURLDataProvider(
                 url=url,
                 date_column=date_column,
                 date_format=date_format,
                 cache=cache,
                 refresh_interval=refresh_interval,
+                headers=headers,
                 pre_process=pre_process,
                 post_process=post_process,
             )
             provider.config = self.configuration_service.get_config()
-            self._json_url_providers[url] = provider
+            self._json_url_providers[provider_key] = provider
 
-        return self._json_url_providers[url].get_data()
+        return self._json_url_providers[provider_key].get_data()
 
     def fetch_parquet(
         self,
@@ -2519,6 +2540,7 @@ def fetch_parquet(
         date_format=None,
         cache=True,
         refresh_interval=None,
+        headers=None,
         post_process=None,
     ):
         """
@@ -2537,6 +2559,8 @@ def fetch_parquet(
             cache (bool): Cache fetched data locally (default: True).
             refresh_interval (str, optional): Re-fetch interval
                 (e.g., "1d", "1h").
+            headers (dict, optional): HTTP headers to send with the
+                request.
             post_process (callable, optional): Transform the parsed
                 DataFrame.
 
@@ -2556,19 +2580,22 @@ def run_strategy(self, context, data):
         if not hasattr(self, '_parquet_url_providers'):
             self._parquet_url_providers = {}
 
-        if url not in self._parquet_url_providers:
+        provider_key = self._get_url_provider_cache_key(url, headers)
+
+        if provider_key not in self._parquet_url_providers:
             provider = ParquetURLDataProvider(
                 url=url,
                 date_column=date_column,
                 date_format=date_format,
                 cache=cache,
                 refresh_interval=refresh_interval,
+                headers=headers,
                 post_process=post_process,
             )
             provider.config = self.configuration_service.get_config()
-            self._parquet_url_providers[url] = provider
+            self._parquet_url_providers[provider_key] = provider
 
-        return self._parquet_url_providers[url].get_data()
+        return self._parquet_url_providers[provider_key].get_data()
 
     def batch_order(self, orders, market=None):
         """
 
@@ -47,6 +47,7 @@ class DataSource:
     date_format: Optional[str] = None
     cache: bool = True
     refresh_interval: Optional[str] = None
+    headers: Optional[dict] = None
     pre_process: Optional[Callable] = field(
         default=None, repr=False, compare=False
     )
@@ -133,6 +134,7 @@ def from_csv(
         date_format: str = None,
         cache: bool = True,
         refresh_interval: str = None,
+        headers: dict = None,
         pre_process: Callable = None,
         post_process: Callable = None,
     ) -> "DataSource":
@@ -149,6 +151,7 @@ def from_csv(
             refresh_interval: How often to re-fetch the data
                 (e.g., "1d", "1h"). If None, data is fetched once and
                 cached indefinitely.
+            headers: Optional HTTP headers to send with the request.
             pre_process: Optional callback to transform the raw CSV
                 text before parsing. Receives a string, must return
                 a string.
@@ -178,6 +181,7 @@ def from_csv(
             date_format=date_format,
             cache=cache,
             refresh_interval=refresh_interval,
+            headers=headers,
             pre_process=pre_process,
             post_process=post_process,
         )
@@ -191,6 +195,7 @@ def from_json(
         date_format: str = None,
         cache: bool = True,
         refresh_interval: str = None,
+        headers: dict = None,
         pre_process: Callable = None,
         post_process: Callable = None,
     ) -> "DataSource":
@@ -209,6 +214,7 @@ def from_json(
                 (default: True).
             refresh_interval: How often to re-fetch the data
                 (e.g., "1d", "1h").
+            headers: Optional HTTP headers to send with the request.
             pre_process: Optional callback to transform the raw JSON
                 text before parsing. Receives a string, must return
                 a string.
@@ -234,6 +240,7 @@ def from_json(
             date_format=date_format,
             cache=cache,
             refresh_interval=refresh_interval,
+            headers=headers,
             pre_process=pre_process,
             post_process=post_process,
         )
@@ -247,6 +254,7 @@ def from_parquet(
         date_format: str = None,
         cache: bool = True,
         refresh_interval: str = None,
+        headers: dict = None,
         post_process: Callable = None,
     ) -> "DataSource":
         """
@@ -262,6 +270,7 @@ def from_parquet(
                 (default: True).
             refresh_interval: How often to re-fetch the data
                 (e.g., "1d", "1h").
+            headers: Optional HTTP headers to send with the request.
             post_process: Optional callback to transform the parsed
                 DataFrame.
 
@@ -284,6 +293,7 @@ def from_parquet(
             date_format=date_format,
             cache=cache,
             refresh_interval=refresh_interval,
+            headers=headers,
             post_process=post_process,
         )
 
@@ -330,6 +340,10 @@ def to_dict(self):
             non_null_attributes['data_type'] = self.data_type.value
         if self.time_frame is not None:
             non_null_attributes['time_frame'] = self.time_frame.value
+        if self.headers is not None:
+            non_null_attributes['headers'] = {
+                key: "***" for key in self.headers
+            }
 
         return non_null_attributes
 
 
@@ -45,6 +45,7 @@ def __init__(
         date_format=None,
         cache=True,
         refresh_interval=None,
+        headers=None,
         pre_process=None,
         post_process=None,
         priority=5,
@@ -62,6 +63,7 @@ def __init__(
         self._date_format = date_format
         self._cache = cache
         self._refresh_interval = refresh_interval
+        self._headers = headers or {}
         self._pre_process = pre_process
         self._post_process = post_process
         self._cached_data = None
@@ -194,6 +196,7 @@ def copy(self, data_source=None):
         date_format = self._date_format
         cache = self._cache
         refresh_interval = self._refresh_interval
+        headers = self._headers
         pre_process = self._pre_process
         post_process = self._post_process
         identifier = self.data_provider_identifier
@@ -206,6 +209,7 @@ def copy(self, data_source=None):
                 else cache
             refresh_interval = data_source.refresh_interval \
                 or refresh_interval
+            headers = data_source.headers or headers
             pre_process = data_source.pre_process or pre_process
             post_process = data_source.post_process or post_process
 
@@ -215,6 +219,7 @@ def copy(self, data_source=None):
             date_format=date_format,
             cache=cache,
             refresh_interval=refresh_interval,
+            headers=headers,
             pre_process=pre_process,
             post_process=post_process,
             priority=self.priority,
@@ -277,9 +282,11 @@ def _fetch_and_parse(self):
 
         # Fetch from URL
         ctx = ssl.create_default_context()
+        headers = {"User-Agent": "investing-algorithm-framework"}
+        headers.update(self._headers)
         req = urllib.request.Request(
             url,
-            headers={"User-Agent": "investing-algorithm-framework"}
+            headers=headers
         )
         with urllib.request.urlopen(req, context=ctx) as response:
             raw_bytes = response.read()
@@ -350,8 +357,12 @@ def _get_cache_path(self):
         if storage_dir is None:
             storage_dir = os.path.join(os.getcwd(), ".data_cache")
 
+        cache_key = self._url
+        if self._headers:
+            cache_key = f"{cache_key}|headers:{sorted(self._headers.items())}"
+
         url_hash = hashlib.md5(
-            self._url.encode()
+            cache_key.encode()
         ).hexdigest()[:12]
         suffix = self._cache_file_suffix()
         return os.path.join(storage_dir, f"url_{url_hash}{suffix}")