diff --git a/qlib/data/storage/file_storage.py b/qlib/data/storage/file_storage.py index 8a100a2d19e..97059ba6192 100644 --- a/qlib/data/storage/file_storage.py +++ b/qlib/data/storage/file_storage.py @@ -286,7 +286,37 @@ class FileFeatureStorage(FileStorageMixin, FeatureStorage): def __init__(self, instrument: str, field: str, freq: str, provider_uri: dict = None, **kwargs): super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs) self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri) - self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin" + # self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin" + + @property + def file_name(self) -> str: + # Check if the file exists with the original instrument name + # If not, check if it exists with the lowercase instrument name + # If neither, return the original instrument name (for creating new files) + + # NOTE: This depends on self.dpm and self.storage_name which are properties of FileStorageMixin/BaseStorage. + # self.storage_name for FeatureStorage is likely "feature". + + base_uri = self.dpm.get_data_uri(self.freq).joinpath(f"{self.storage_name}s") + + # Candidate 1: Original Case (Preferred for correct behavior on Linux) + name_orig = f"{self.instrument}/{self.field.lower()}.{self.freq.lower()}.bin" + if (base_uri / name_orig).exists(): + return name_orig + + # Candidate 2: Uppercase (Fix for lowercase input finding uppercase folder on case-sensitive OS) + name_upper = f"{self.instrument.upper()}/{self.field.lower()}.{self.freq.lower()}.bin" + if (base_uri / name_upper).exists(): + return name_upper + + # Candidate 3: Lowercase (Backward Compatibility) + name_lower = f"{self.instrument.lower()}/{self.field.lower()}.{self.freq.lower()}.bin" + if (base_uri / name_lower).exists(): + return name_lower + + # Default: Original Case (For new files) + return name_orig + def clear(self): with self.uri.open("wb") as _: diff --git a/tests/storage_tests/test_issue_2053.py b/tests/storage_tests/test_issue_2053.py new file mode 100644 index 00000000000..82da522ffaf --- /dev/null +++ b/tests/storage_tests/test_issue_2053.py @@ -0,0 +1,86 @@ +import shutil +import unittest +from pathlib import Path +import numpy as np +import pandas as pd +from qlib.data.storage.file_storage import FileFeatureStorage + +class TestIssue2053(unittest.TestCase): + def setUp(self): + self.data_dir = Path("test_issue_2053_data").absolute() + if self.data_dir.exists(): + shutil.rmtree(self.data_dir) + self.data_dir.mkdir() + + self.day_dir = self.data_dir / "day" + self.features_dir = self.day_dir / "features" + self.features_dir.mkdir(parents=True) + + self.provider_uri = {"day": self.day_dir} + + import qlib + qlib.init(provider_uri=self.provider_uri) + + def tearDown(self): + if self.data_dir.exists(): + shutil.rmtree(self.data_dir) + + def test_case_sensitivity_check(self): + # Case 1: Uppercase Directory Exists + inst_name = "AAPL" + inst_dir = self.features_dir / inst_name + inst_dir.mkdir() + + # Create a dummy binary file to ensure storage object considers it valid + bin_file = inst_dir / "close.day.bin" + data = np.array([1.0, 2.0, 3.0], dtype="