@@ -54,6 +54,7 @@ class FileNames(str, Enum):
5454 NEIGHBOR_INDICES = "neighbor_indices.npy"
5555 NEIGHBOR_INDICES_PTR = "neighbor_indptr.npy"
5656 NEIGHBOR_VALUES = "neighbor_values.npy"
57+ HEADER = "header.sch"
5758
5859
5960class Mode (str , Enum ):
@@ -248,7 +249,6 @@ def __init__(
248249 """
249250 self ._version : str = importlib .metadata .version ("bionemo.scdl" )
250251 self .data_path : str = data_path
251- self .header_file_name : str = "header.sch"
252252 self .header : SCDLHeader = None
253253 self .mode : Mode = mode
254254 self .paginated_load_cutoff = paginated_load_cutoff
@@ -309,6 +309,30 @@ def __init__(
309309 case _:
310310 raise ValueError ("An np.memmap path, an h5ad path, or the number of elements and rows is required" )
311311
312+ def _path_in_archive (self , filename : str | Path ) -> str :
313+ """Returns the full path to a file within the archive, joining self.data_path and the filename.
314+
315+ Args:
316+ filename: The filename or Path object to resolve within the archive.
317+
318+ Returns:
319+ The full path as a string.
320+ """
321+ if isinstance (filename , Path ):
322+ filename = str (filename )
323+ return os .path .join (self .data_path , filename )
324+
325+ @property
326+ def header_path (self ) -> str :
327+ """Returns the full path to the header file in the archive.
328+
329+ Example:
330+ >>> ds = SingleCellMemMapDataset(data_path="my_data")
331+ >>> ds.header_path
332+ 'my_data/scdl_header.json'
333+ """
334+ return self ._path_in_archive (FileNames .HEADER .value )
335+
312336 def _init_neighbor_args (self , neighbor_key , neighbor_sampling_strategy , fallback_to_identity ):
313337 # Neighbor tracking
314338 self ._has_neighbors = False # Track if neighbor data was successfully loaded/found
@@ -686,7 +710,7 @@ def features(self) -> Optional[RowFeatureIndex]:
686710
687711 def _load_mmap_file_if_exists (self , file_path , dtype ):
688712 if os .path .exists (file_path ):
689- return np .memmap (file_path , dtype = dtype , mode = self .mode )
713+ return np .memmap (file_path , dtype = dtype , mode = self .mode . value )
690714 else :
691715 raise FileNotFoundError (f"The mmap file at { file_path } is missing" )
692716
@@ -708,15 +732,15 @@ def load(self, stored_path: str) -> None:
708732 )
709733 self .data_path = stored_path
710734 self .mode = Mode .READ_APPEND
711- # self.header_path = Path(stored_path) / self.header_file_name
712735 # Load header if present; keep None if missing or unreadable
713- if os .path .exists (self .data_path / self . header_file_name ):
736+ if os .path .exists (self .header_path ):
714737 try :
715- self .header = SCDLHeader .load (str (self .data_path / self . header_file_name ))
738+ self .header = SCDLHeader .load (str (self .header_path ))
716739 except Exception as e :
717- warnings .warn (f"Failed to load SCDL header at { Path ( self .data_path ) / self . header_file_name } : { e } " )
740+ warnings .warn (f"Failed to load SCDL header at { self .header_path } : { e } " )
718741 self .header = None
719742 else :
743+ warnings .warn (f"SCDL header missing at { self .header_path } ; continuing without header." )
720744 self .header = None
721745
722746 # Metadata is required, so we must check if it exists and fail if not.
@@ -812,7 +836,12 @@ def regular_load_h5ad(
812836 self .row_index [0 : num_rows + 1 ] = count_data .indptr .astype (int )
813837
814838 vars = adata .var
815- adata .file .close ()
839+ file_handle = getattr (adata , "file" , None )
840+ if file_handle is not None :
841+ try :
842+ file_handle .close ()
843+ except Exception :
844+ pass
816845
817846 return vars , num_rows
818847
@@ -882,7 +911,12 @@ def paginated_load_h5ad(
882911 shape = (n_elements ,),
883912 )
884913 vars = adata .var
885- adata .file .close ()
914+ file_handle = getattr (adata , "file" , None )
915+ if file_handle is not None :
916+ try :
917+ file_handle .close ()
918+ except Exception :
919+ pass
886920
887921 return vars , num_rows
888922
@@ -1024,7 +1058,7 @@ def _write_header(self):
10241058 indexes ,
10251059 )
10261060 )
1027- header .save (Path ( self .data_path ) / self . header_file_name )
1061+ header .save (self .header_path )
10281062
10291063 def save (self , output_path : Optional [str ] = None ) -> None :
10301064 """Saves the class to a given output path.
0 commit comments