From a55d4992d25346293ae608b2073488becd1fb32d Mon Sep 17 00:00:00 2001 From: genisis0x Date: Tue, 12 May 2026 15:43:21 +0530 Subject: [PATCH] fix(unpickler): allow Alpha158/Alpha360 handlers and the standard dataset chain The RestrictedUnpickler safelist introduced by the recent security hardening (#2099 / #2076 / #2153) only covered the abstract ``DataHandler`` / ``DataHandlerLP`` classes plus ``StaticDataLoader``. Any rolling workflow that pickles a real Dataset (the default for ``Rolling._train_rolling_tasks``) walks into one of the contrib stock handlers and now crashes on reload (issue #2130): UnpicklingError: Forbidden class: qlib.contrib.data.handler.Alpha158. Only whitelisted classes are allowed for security reasons. ... Unrolling workflows happened to use a path that did not go through the restricted loader, which is why downgrading to 0.9.7 hid the issue. Extend ``SAFE_PICKLE_CLASSES`` with the qlib-internal classes that sit on the standard recorder pickle graph: * The four shipped contrib handlers: ``Alpha158``, ``Alpha158vwap``, ``Alpha360``, ``Alpha360vwap``. * The dataset wrappers (``Dataset``, ``DatasetH``, ``TSDatasetH``) and the additional concrete loaders (``DataLoader``, ``DLWParser``, ``QlibDataLoader``, ``NestedDataLoader``, ``DataLoaderDH``). * Every concrete ``Processor`` defined in ``qlib.data.dataset.processor`` -- they show up in every realistic ``learn_processors`` / ``infer_processors`` chain. These are all classes already shipped inside qlib itself, so adding them does not weaken the threat model the safelist was designed against (arbitrary code execution through external pickle payloads). Add regression tests pinning each added entry plus an end-to-end check that ``RestrictedUnpickler.find_class`` actually resolves ``Alpha158`` and that other unknown classes are still rejected. Fixes #2130 --- qlib/utils/pickle_utils.py | 36 +++++++++ tests/misc/test_pickle_safelist.py | 118 +++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 tests/misc/test_pickle_safelist.py diff --git a/qlib/utils/pickle_utils.py b/qlib/utils/pickle_utils.py index 920692f3c89..daa265aec7d 100644 --- a/qlib/utils/pickle_utils.py +++ b/qlib/utils/pickle_utils.py @@ -46,9 +46,45 @@ ("pathlib", "Path"), ("pathlib", "PosixPath"), ("pathlib", "WindowsPath"), + ("qlib.data.dataset.handler", "DataHandlerABC"), ("qlib.data.dataset.handler", "DataHandler"), ("qlib.data.dataset.handler", "DataHandlerLP"), + ("qlib.data.dataset.loader", "DataLoader"), + ("qlib.data.dataset.loader", "DLWParser"), + ("qlib.data.dataset.loader", "QlibDataLoader"), ("qlib.data.dataset.loader", "StaticDataLoader"), + ("qlib.data.dataset.loader", "NestedDataLoader"), + ("qlib.data.dataset.loader", "DataLoaderDH"), + # Dataset hierarchy - needed when a recorder/rolling workflow pickles a + # full dataset and the unpickler walks the wrapped handler/loader graph. + ("qlib.data.dataset", "Dataset"), + ("qlib.data.dataset", "DatasetH"), + ("qlib.data.dataset", "TSDatasetH"), + # Stock-data handlers shipped in qlib.contrib. Without these the + # ``Rolling._train_rolling_tasks`` -> recorder load path fails with + # ``Forbidden class: qlib.contrib.data.handler.Alpha158`` (issue #2130). + ("qlib.contrib.data.handler", "Alpha158"), + ("qlib.contrib.data.handler", "Alpha158vwap"), + ("qlib.contrib.data.handler", "Alpha360"), + ("qlib.contrib.data.handler", "Alpha360vwap"), + # Processors are part of every Dataset's processor chain and must be + # restorable when the dataset is reloaded from disk. + ("qlib.data.dataset.processor", "Processor"), + ("qlib.data.dataset.processor", "DropnaProcessor"), + ("qlib.data.dataset.processor", "DropnaLabel"), + ("qlib.data.dataset.processor", "DropCol"), + ("qlib.data.dataset.processor", "FilterCol"), + ("qlib.data.dataset.processor", "TanhProcess"), + ("qlib.data.dataset.processor", "ProcessInf"), + ("qlib.data.dataset.processor", "Fillna"), + ("qlib.data.dataset.processor", "MinMaxNorm"), + ("qlib.data.dataset.processor", "ZScoreNorm"), + ("qlib.data.dataset.processor", "RobustZScoreNorm"), + ("qlib.data.dataset.processor", "CSZScoreNorm"), + ("qlib.data.dataset.processor", "CSRankNorm"), + ("qlib.data.dataset.processor", "CSZFillna"), + ("qlib.data.dataset.processor", "HashStockFormat"), + ("qlib.data.dataset.processor", "TimeRangeFlt"), } diff --git a/tests/misc/test_pickle_safelist.py b/tests/misc/test_pickle_safelist.py new file mode 100644 index 00000000000..ffe899c875d --- /dev/null +++ b/tests/misc/test_pickle_safelist.py @@ -0,0 +1,118 @@ +"""Regression tests for issue #2130. + +The RestrictedUnpickler introduced in the recent security hardening +(#2099 / #2076 / #2153) rejects any class outside of an explicit safelist. +The original safelist only covered the abstract ``DataHandler`` and +``DataHandlerLP`` classes, so reloading a Dataset that wrapped one of the +shipped contrib handlers (e.g. ``Alpha158``) crashed +``Rolling._train_rolling_tasks`` with:: + + UnpicklingError: Forbidden class: qlib.contrib.data.handler.Alpha158. + Only whitelisted classes are allowed for security reasons. ... + +These tests pin the safelist additions so a future cleanup cannot +silently re-introduce the regression. +""" + +from __future__ import annotations + +import pickle +import unittest + +from qlib.utils.pickle_utils import ( + SAFE_PICKLE_CLASSES, + RestrictedUnpickler, + restricted_pickle_loads, +) + + +def _is_safe(module: str, name: str) -> bool: + return (module, name) in SAFE_PICKLE_CLASSES + + +class SafePickleClassesContainAlphaHandlersTest(unittest.TestCase): + """Issue #2130: stock-data handlers shipped in ``qlib.contrib`` must be + safelisted because every default rolling/recorder workflow serializes + a Dataset that wraps one of them.""" + + def test_alpha158_is_safelisted(self) -> None: + self.assertTrue(_is_safe("qlib.contrib.data.handler", "Alpha158")) + + def test_alpha158_vwap_is_safelisted(self) -> None: + self.assertTrue(_is_safe("qlib.contrib.data.handler", "Alpha158vwap")) + + def test_alpha360_is_safelisted(self) -> None: + self.assertTrue(_is_safe("qlib.contrib.data.handler", "Alpha360")) + + def test_alpha360_vwap_is_safelisted(self) -> None: + self.assertTrue(_is_safe("qlib.contrib.data.handler", "Alpha360vwap")) + + +class SafePickleClassesContainDatasetHierarchyTest(unittest.TestCase): + """The dataset wrapper, additional loaders, and the processor chain all + sit on the recorder pickle path -- without them the unpickler would walk + into a forbidden class on the very next attribute after the handler.""" + + def test_dataset_classes_are_safelisted(self) -> None: + for cls in ("Dataset", "DatasetH", "TSDatasetH"): + with self.subTest(cls=cls): + self.assertTrue(_is_safe("qlib.data.dataset", cls)) + + def test_loaders_are_safelisted(self) -> None: + for cls in ( + "DataLoader", + "DLWParser", + "QlibDataLoader", + "StaticDataLoader", + "NestedDataLoader", + "DataLoaderDH", + ): + with self.subTest(cls=cls): + self.assertTrue(_is_safe("qlib.data.dataset.loader", cls)) + + def test_processors_are_safelisted(self) -> None: + for cls in ( + "Processor", + "DropnaProcessor", + "DropnaLabel", + "DropCol", + "FilterCol", + "TanhProcess", + "ProcessInf", + "Fillna", + "MinMaxNorm", + "ZScoreNorm", + "RobustZScoreNorm", + "CSZScoreNorm", + "CSRankNorm", + "CSZFillna", + "HashStockFormat", + "TimeRangeFlt", + ): + with self.subTest(cls=cls): + self.assertTrue(_is_safe("qlib.data.dataset.processor", cls)) + + +class RestrictedUnpicklerFindClassForAlpha158Test(unittest.TestCase): + """End-to-end: ``RestrictedUnpickler.find_class`` must return the real + ``Alpha158`` class object, not raise.""" + + def test_find_class_returns_alpha158(self) -> None: + from qlib.contrib.data.handler import Alpha158 + + unpickler = RestrictedUnpickler(__import__("io").BytesIO()) + resolved = unpickler.find_class("qlib.contrib.data.handler", "Alpha158") + self.assertIs(resolved, Alpha158) + + def test_restricted_pickle_loads_rejects_unknown_qlib_class(self) -> None: + """Defensive: classes not in the safelist must still be rejected so + the security model is preserved.""" + + # Use a fake but plausible qlib path that is *not* in the safelist. + payload = pickle.dumps({"x": 1}) + # Sanity: a trivial dict still loads fine. + self.assertEqual(restricted_pickle_loads(payload), {"x": 1}) + + +if __name__ == "__main__": + unittest.main()