From e4fb7c92e1b92852fcfc8f41d5f1d2a687c789ae Mon Sep 17 00:00:00 2001 From: anish-devgit Date: Mon, 12 Jan 2026 19:53:47 +0530 Subject: [PATCH 1/2] fix(storage): resolve file paths with correct case in FileFeatureStorage (#2053) - Changed FileFeatureStorage.file_name to a property that checks for existing file paths. - Preserves case sensitivity for new files and existing uppercase directories. - Maintains backward compatibility for lowercase paths. - Added verification test case. --- qlib/data/storage/file_storage.py | 27 +++++++- tests/storage_tests/test_issue_2053.py | 86 ++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 tests/storage_tests/test_issue_2053.py diff --git a/qlib/data/storage/file_storage.py b/qlib/data/storage/file_storage.py index 8a100a2d19e..293817e83e8 100644 --- a/qlib/data/storage/file_storage.py +++ b/qlib/data/storage/file_storage.py @@ -286,7 +286,32 @@ class FileFeatureStorage(FileStorageMixin, FeatureStorage): def __init__(self, instrument: str, field: str, freq: str, provider_uri: dict = None, **kwargs): super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs) self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri) - self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin" + # self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin" + + @property + def file_name(self) -> str: + # Check if the file exists with the original instrument name + # If not, check if it exists with the lowercase instrument name + # If neither, return the original instrument name (for creating new files) + + # NOTE: This depends on self.dpm and self.storage_name which are properties of FileStorageMixin/BaseStorage. + # self.storage_name for FeatureStorage is likely "feature". + + base_uri = self.dpm.get_data_uri(self.freq).joinpath(f"{self.storage_name}s") + + # Candidate 1: Original Case (Preferred for correct behavior on Linux) + name_orig = f"{self.instrument}/{self.field.lower()}.{self.freq.lower()}.bin" + if (base_uri / name_orig).exists(): + return name_orig + + # Candidate 2: Lowercase (Backward Compatibility) + name_lower = f"{self.instrument.lower()}/{self.field.lower()}.{self.freq.lower()}.bin" + if (base_uri / name_lower).exists(): + return name_lower + + # Default: Original Case (For new files) + return name_orig + def clear(self): with self.uri.open("wb") as _: diff --git a/tests/storage_tests/test_issue_2053.py b/tests/storage_tests/test_issue_2053.py new file mode 100644 index 00000000000..82da522ffaf --- /dev/null +++ b/tests/storage_tests/test_issue_2053.py @@ -0,0 +1,86 @@ +import shutil +import unittest +from pathlib import Path +import numpy as np +import pandas as pd +from qlib.data.storage.file_storage import FileFeatureStorage + +class TestIssue2053(unittest.TestCase): + def setUp(self): + self.data_dir = Path("test_issue_2053_data").absolute() + if self.data_dir.exists(): + shutil.rmtree(self.data_dir) + self.data_dir.mkdir() + + self.day_dir = self.data_dir / "day" + self.features_dir = self.day_dir / "features" + self.features_dir.mkdir(parents=True) + + self.provider_uri = {"day": self.day_dir} + + import qlib + qlib.init(provider_uri=self.provider_uri) + + def tearDown(self): + if self.data_dir.exists(): + shutil.rmtree(self.data_dir) + + def test_case_sensitivity_check(self): + # Case 1: Uppercase Directory Exists + inst_name = "AAPL" + inst_dir = self.features_dir / inst_name + inst_dir.mkdir() + + # Create a dummy binary file to ensure storage object considers it valid + bin_file = inst_dir / "close.day.bin" + data = np.array([1.0, 2.0, 3.0], dtype=" Date: Tue, 13 Jan 2026 16:58:24 +0530 Subject: [PATCH 2/2] Fix(storage): add uppercase fallback for case-sensitive filesystems (fixes #2053) --- qlib/data/storage/file_storage.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/qlib/data/storage/file_storage.py b/qlib/data/storage/file_storage.py index 293817e83e8..97059ba6192 100644 --- a/qlib/data/storage/file_storage.py +++ b/qlib/data/storage/file_storage.py @@ -303,8 +303,13 @@ def file_name(self) -> str: name_orig = f"{self.instrument}/{self.field.lower()}.{self.freq.lower()}.bin" if (base_uri / name_orig).exists(): return name_orig + + # Candidate 2: Uppercase (Fix for lowercase input finding uppercase folder on case-sensitive OS) + name_upper = f"{self.instrument.upper()}/{self.field.lower()}.{self.freq.lower()}.bin" + if (base_uri / name_upper).exists(): + return name_upper - # Candidate 2: Lowercase (Backward Compatibility) + # Candidate 3: Lowercase (Backward Compatibility) name_lower = f"{self.instrument.lower()}/{self.field.lower()}.{self.freq.lower()}.bin" if (base_uri / name_lower).exists(): return name_lower