Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion qlib/data/storage/file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,37 @@ class FileFeatureStorage(FileStorageMixin, FeatureStorage):
def __init__(self, instrument: str, field: str, freq: str, provider_uri: dict = None, **kwargs):
super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs)
self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri)
self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin"
# self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin"

@property
def file_name(self) -> str:
# Check if the file exists with the original instrument name
# If not, check if it exists with the lowercase instrument name
# If neither, return the original instrument name (for creating new files)

# NOTE: This depends on self.dpm and self.storage_name which are properties of FileStorageMixin/BaseStorage.
# self.storage_name for FeatureStorage is likely "feature".

base_uri = self.dpm.get_data_uri(self.freq).joinpath(f"{self.storage_name}s")

# Candidate 1: Original Case (Preferred for correct behavior on Linux)
name_orig = f"{self.instrument}/{self.field.lower()}.{self.freq.lower()}.bin"
if (base_uri / name_orig).exists():
return name_orig

# Candidate 2: Uppercase (Fix for lowercase input finding uppercase folder on case-sensitive OS)
name_upper = f"{self.instrument.upper()}/{self.field.lower()}.{self.freq.lower()}.bin"
if (base_uri / name_upper).exists():
return name_upper

# Candidate 3: Lowercase (Backward Compatibility)
name_lower = f"{self.instrument.lower()}/{self.field.lower()}.{self.freq.lower()}.bin"
if (base_uri / name_lower).exists():
return name_lower

# Default: Original Case (For new files)
return name_orig


def clear(self):
with self.uri.open("wb") as _:
Expand Down
86 changes: 86 additions & 0 deletions tests/storage_tests/test_issue_2053.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import shutil
import unittest
from pathlib import Path
import numpy as np
import pandas as pd
from qlib.data.storage.file_storage import FileFeatureStorage

class TestIssue2053(unittest.TestCase):
def setUp(self):
self.data_dir = Path("test_issue_2053_data").absolute()
if self.data_dir.exists():
shutil.rmtree(self.data_dir)
self.data_dir.mkdir()

self.day_dir = self.data_dir / "day"
self.features_dir = self.day_dir / "features"
self.features_dir.mkdir(parents=True)

self.provider_uri = {"day": self.day_dir}

import qlib
qlib.init(provider_uri=self.provider_uri)

def tearDown(self):
if self.data_dir.exists():
shutil.rmtree(self.data_dir)

def test_case_sensitivity_check(self):
# Case 1: Uppercase Directory Exists
inst_name = "AAPL"
inst_dir = self.features_dir / inst_name
inst_dir.mkdir()

# Create a dummy binary file to ensure storage object considers it valid
bin_file = inst_dir / "close.day.bin"
data = np.array([1.0, 2.0, 3.0], dtype="<f")
index = 0
with bin_file.open("wb") as fp:
np.hstack([index, data]).astype("<f").tofile(fp)

storage = FileFeatureStorage(instrument=inst_name, field="close", freq="day", provider_uri=self.provider_uri)

# Critical Assertion: The generated URI path MUST contain the UPPERCASE instrument name
# This proves that the logic detected the existing folder and didn't force lowercase.
# On Windows, both paths point to the same file, but for cross-platform correctness (Linux),
# we strictly require the path string to match the filesystem.
self.assertIn(inst_name, str(storage.uri), "Storage URI should preserve case if directory exists")
self.assertNotIn(inst_name.lower(), str(storage.uri).replace(inst_name, ""), "Storage URI should NOT contain lowercase version if uppercase exists")

# Verify data access still works
self.assertIsNotNone(storage[0], "Should be able to read data")

def test_backward_compatibility(self):
# Case 2: Lowercase Directory Exists (Old Behavior)
inst_name = "MSFT"
# We create the directory in LOWERCASE
inst_dir = self.features_dir / inst_name.lower()
inst_dir.mkdir()

bin_file = inst_dir / "close.day.bin"
data = np.array([4.0, 5.0, 6.0], dtype="<f")
index = 0
with bin_file.open("wb") as fp:
np.hstack([index, data]).astype("<f").tofile(fp)

# We access it using UPPERCASE name
storage = FileFeatureStorage(instrument=inst_name, field="close", freq="day", provider_uri=self.provider_uri)

# Assertion: It should FALLBACK to lowercase path because uppercase dir doesn't exist
self.assertIn(inst_name.lower(), str(storage.uri).lower(), "Storage URI should resolve to lowercase if upper doesn't exist")

# Verify data access works
self.assertIsNotNone(storage[0], "Should be able to read data from fallback lowercase path")

def test_new_instrument_defaults(self):
# Case 3: Neither exists (New/Write scenario)
inst_name = "GOOG"
# We define a storage for a non-existent instrument
storage = FileFeatureStorage(instrument=inst_name, field="close", freq="day", provider_uri=self.provider_uri)

# If neither exists, we prefer the ORIGINAL case (or lowercase? The plan said original case).
# Let's assert Original Case to allow users to create new uppercase folders.
self.assertIn(inst_name, str(storage.uri), "New paths should respect input case")

if __name__ == "__main__":
unittest.main()
Loading