Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions qlib/utils/pickle_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,45 @@
("pathlib", "Path"),
("pathlib", "PosixPath"),
("pathlib", "WindowsPath"),
("qlib.data.dataset.handler", "DataHandlerABC"),
("qlib.data.dataset.handler", "DataHandler"),
("qlib.data.dataset.handler", "DataHandlerLP"),
("qlib.data.dataset.loader", "DataLoader"),
("qlib.data.dataset.loader", "DLWParser"),
("qlib.data.dataset.loader", "QlibDataLoader"),
("qlib.data.dataset.loader", "StaticDataLoader"),
("qlib.data.dataset.loader", "NestedDataLoader"),
("qlib.data.dataset.loader", "DataLoaderDH"),
# Dataset hierarchy - needed when a recorder/rolling workflow pickles a
# full dataset and the unpickler walks the wrapped handler/loader graph.
("qlib.data.dataset", "Dataset"),
("qlib.data.dataset", "DatasetH"),
("qlib.data.dataset", "TSDatasetH"),
# Stock-data handlers shipped in qlib.contrib. Without these the
# ``Rolling._train_rolling_tasks`` -> recorder load path fails with
# ``Forbidden class: qlib.contrib.data.handler.Alpha158`` (issue #2130).
("qlib.contrib.data.handler", "Alpha158"),
("qlib.contrib.data.handler", "Alpha158vwap"),
("qlib.contrib.data.handler", "Alpha360"),
("qlib.contrib.data.handler", "Alpha360vwap"),
# Processors are part of every Dataset's processor chain and must be
# restorable when the dataset is reloaded from disk.
("qlib.data.dataset.processor", "Processor"),
("qlib.data.dataset.processor", "DropnaProcessor"),
("qlib.data.dataset.processor", "DropnaLabel"),
("qlib.data.dataset.processor", "DropCol"),
("qlib.data.dataset.processor", "FilterCol"),
("qlib.data.dataset.processor", "TanhProcess"),
("qlib.data.dataset.processor", "ProcessInf"),
("qlib.data.dataset.processor", "Fillna"),
("qlib.data.dataset.processor", "MinMaxNorm"),
("qlib.data.dataset.processor", "ZScoreNorm"),
("qlib.data.dataset.processor", "RobustZScoreNorm"),
("qlib.data.dataset.processor", "CSZScoreNorm"),
("qlib.data.dataset.processor", "CSRankNorm"),
("qlib.data.dataset.processor", "CSZFillna"),
("qlib.data.dataset.processor", "HashStockFormat"),
("qlib.data.dataset.processor", "TimeRangeFlt"),
}


Expand Down
118 changes: 118 additions & 0 deletions tests/misc/test_pickle_safelist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Regression tests for issue #2130.

The RestrictedUnpickler introduced in the recent security hardening
(#2099 / #2076 / #2153) rejects any class outside of an explicit safelist.
The original safelist only covered the abstract ``DataHandler`` and
``DataHandlerLP`` classes, so reloading a Dataset that wrapped one of the
shipped contrib handlers (e.g. ``Alpha158``) crashed
``Rolling._train_rolling_tasks`` with::

UnpicklingError: Forbidden class: qlib.contrib.data.handler.Alpha158.
Only whitelisted classes are allowed for security reasons. ...

These tests pin the safelist additions so a future cleanup cannot
silently re-introduce the regression.
"""

from __future__ import annotations

import pickle
import unittest

from qlib.utils.pickle_utils import (
SAFE_PICKLE_CLASSES,
RestrictedUnpickler,
restricted_pickle_loads,
)


def _is_safe(module: str, name: str) -> bool:
return (module, name) in SAFE_PICKLE_CLASSES


class SafePickleClassesContainAlphaHandlersTest(unittest.TestCase):
"""Issue #2130: stock-data handlers shipped in ``qlib.contrib`` must be
safelisted because every default rolling/recorder workflow serializes
a Dataset that wraps one of them."""

def test_alpha158_is_safelisted(self) -> None:
self.assertTrue(_is_safe("qlib.contrib.data.handler", "Alpha158"))

def test_alpha158_vwap_is_safelisted(self) -> None:
self.assertTrue(_is_safe("qlib.contrib.data.handler", "Alpha158vwap"))

def test_alpha360_is_safelisted(self) -> None:
self.assertTrue(_is_safe("qlib.contrib.data.handler", "Alpha360"))

def test_alpha360_vwap_is_safelisted(self) -> None:
self.assertTrue(_is_safe("qlib.contrib.data.handler", "Alpha360vwap"))


class SafePickleClassesContainDatasetHierarchyTest(unittest.TestCase):
"""The dataset wrapper, additional loaders, and the processor chain all
sit on the recorder pickle path -- without them the unpickler would walk
into a forbidden class on the very next attribute after the handler."""

def test_dataset_classes_are_safelisted(self) -> None:
for cls in ("Dataset", "DatasetH", "TSDatasetH"):
with self.subTest(cls=cls):
self.assertTrue(_is_safe("qlib.data.dataset", cls))

def test_loaders_are_safelisted(self) -> None:
for cls in (
"DataLoader",
"DLWParser",
"QlibDataLoader",
"StaticDataLoader",
"NestedDataLoader",
"DataLoaderDH",
):
with self.subTest(cls=cls):
self.assertTrue(_is_safe("qlib.data.dataset.loader", cls))

def test_processors_are_safelisted(self) -> None:
for cls in (
"Processor",
"DropnaProcessor",
"DropnaLabel",
"DropCol",
"FilterCol",
"TanhProcess",
"ProcessInf",
"Fillna",
"MinMaxNorm",
"ZScoreNorm",
"RobustZScoreNorm",
"CSZScoreNorm",
"CSRankNorm",
"CSZFillna",
"HashStockFormat",
"TimeRangeFlt",
):
with self.subTest(cls=cls):
self.assertTrue(_is_safe("qlib.data.dataset.processor", cls))


class RestrictedUnpicklerFindClassForAlpha158Test(unittest.TestCase):
"""End-to-end: ``RestrictedUnpickler.find_class`` must return the real
``Alpha158`` class object, not raise."""

def test_find_class_returns_alpha158(self) -> None:
from qlib.contrib.data.handler import Alpha158

unpickler = RestrictedUnpickler(__import__("io").BytesIO())
resolved = unpickler.find_class("qlib.contrib.data.handler", "Alpha158")
self.assertIs(resolved, Alpha158)

def test_restricted_pickle_loads_rejects_unknown_qlib_class(self) -> None:
"""Defensive: classes not in the safelist must still be rejected so
the security model is preserved."""

# Use a fake but plausible qlib path that is *not* in the safelist.
payload = pickle.dumps({"x": 1})
# Sanity: a trivial dict still loads fine.
self.assertEqual(restricted_pickle_loads(payload), {"x": 1})


if __name__ == "__main__":
unittest.main()