1414from pyfive .h5py import Datatype
1515
1616
17-
1817class Group (Mapping ):
1918 """
2019 An HDF5 Group which may hold attributes, datasets, or other groups.
@@ -64,7 +63,6 @@ def __getitem__(self, y):
6463 """
6564 return self .__getitem_lazy_control (y , noindex = False )
6665
67-
6866 def get_lazy_view (self , y ):
6967 """
7068 This instantiates the object y, and if it is a
@@ -81,7 +79,6 @@ def get_lazy_view(self, y):
8179
8280 return self .__getitem_lazy_control (y , noindex = True )
8381
84-
8582 def __getitem_lazy_control (self , y , noindex ):
8683 """
8784 This is the routine which actually does the get item
@@ -130,7 +127,7 @@ def __getitem_lazy_control(self, y, noindex):
130127 if additional_obj != '.' :
131128 raise KeyError ('%s is a dataset, not a group' % (obj_name ))
132129 return Dataset (obj_name , DatasetID (dataobjs , noindex = noindex ), self )
133-
130+
134131 try :
135132 # if true, this may well raise a NotImplementedError, if so, we need
136133 # to warn the user, who may be able to use other parts of the data.
@@ -263,14 +260,35 @@ def __init__(self, filename, mode='r'):
263260 self .userblock_size = 0
264261 super (File , self ).__init__ ('/' , dataobjects , self )
265262
263+ @property
264+ def consolidated_metadata (self ):
265+ """Returns True if all B-tree nodes for chunked datasets are located before the first chunk in the file."""
266+ is_consolidated = True
267+ f = self
268+
269+ # for all chunked datasets, check if all btree nodes are located before any dataset chunk
270+ max_btree , min_chunk = None , None
271+ for ds in f :
272+ if isinstance (f [ds ], Dataset ):
273+ if f [ds ].id .layout_class == 2 :
274+ if max_btree is None or f [ds ].id .btree_range [1 ] > max_btree :
275+ max_btree = f [ds ].id .btree_range [1 ]
276+ if min_chunk is None or f [ds ].id .first_chunk < min_chunk :
277+ min_chunk = f [ds ].id .first_chunk
278+
279+ if max_btree is not None and min_chunk is not None :
280+ is_consolidated = max_btree < min_chunk
281+
282+ return is_consolidated
283+
266284 def __repr__ (self ):
267285 return '<HDF5 file "%s" (mode r)>' % (os .path .basename (self .filename ))
268286
269287 def _get_object_by_address (self , obj_addr ):
270288 """ Return the object pointed to by a given address. """
271289 if self ._dataobjects .offset == obj_addr :
272290 return self
273-
291+
274292 queue = deque ([(self .name .rstrip ('/' ), self )])
275293 while queue :
276294 base , grp = queue .popleft ()
@@ -288,6 +306,7 @@ def close(self):
288306 """ Close the file. """
289307 if self ._close :
290308 self ._fh .close ()
309+
291310 __del__ = close
292311
293312 def __enter__ (self ):
@@ -340,7 +359,6 @@ class Dataset(object):
340359 Group instance containing this dataset.
341360
342361 """
343-
344362
345363 def __init__ (self , name , datasetid , parent ):
346364 """ initalize. """
@@ -349,15 +367,14 @@ def __init__(self, name, datasetid, parent):
349367 self .name = name
350368 self ._attrs = None
351369 self ._astype = None
352-
353- self .id = datasetid
370+
371+ self .id = datasetid
354372 """ This is the DatasetID instance which provides the actual data access methods. """
355373
356- #horrible kludge for now,
357- #https://github.com/NCAS-CMS/pyfive/issues/13#issuecomment-2557121461
358- #we hide stuff we need here
374+ # horrible kludge for now,
375+ # https://github.com/NCAS-CMS/pyfive/issues/13#issuecomment-2557121461
376+ # we hide stuff we need here
359377 self ._dataobjects = self .id ._meta
360-
361378
362379 def __repr__ (self ):
363380 info = (os .path .basename (self .name ), self .shape , self .dtype )
@@ -392,16 +409,15 @@ def astype(self, dtype):
392409 def len (self ):
393410 """ Return the size of the first axis. """
394411 return self .shape [0 ]
395-
412+
396413 def iter_chunks (self , * args ):
397414 return self .id .iter_chunks (args )
398-
399415
400416 @property
401417 def shape (self ):
402418 """ shape attribute. """
403419 return self .id .shape
404-
420+
405421 @property
406422 def maxshape (self ):
407423 """ maxshape attribute. (None for unlimited dimensions) """
@@ -473,15 +489,17 @@ def dims(self):
473489 def attrs (self ):
474490 """ attrs attribute. """
475491 return self .id ._meta .attributes
476-
492+
493+
477494class DimensionManager (Sequence ):
478495 """ Represents a collection of dimensions associated with a dataset. """
496+
479497 def __init__ (self , dset ):
480498 ndim = len (dset .shape )
481- dim_list = [[]]* ndim
499+ dim_list = [[]] * ndim
482500 if 'DIMENSION_LIST' in dset .attrs :
483501 dim_list = dset .attrs ['DIMENSION_LIST' ]
484- dim_labels = [b'' ]* ndim
502+ dim_labels = [b'' ] * ndim
485503 if 'DIMENSION_LABELS' in dset .attrs :
486504 dim_labels = dset .attrs ['DIMENSION_LABELS' ]
487505 self ._dims = [
@@ -521,8 +539,9 @@ class AstypeContext(object):
521539 """
522540 Context manager which allows changing the type read from a dataset.
523541 """
524- #FIXME:ENUM should this allow a conversion from enum base types to values using dictionary?
525- #Probably not, as it would be additional functionality to the h5py interface???
542+
543+ # FIXME:ENUM should this allow a conversion from enum base types to values using dictionary?
544+ # Probably not, as it would be additional functionality to the h5py interface???
526545
527546 def __init__ (self , dset , dtype ):
528547 self ._dset = dset
@@ -533,4 +552,3 @@ def __enter__(self):
533552
534553 def __exit__ (self , * args ):
535554 self ._dset ._astype = None
536-
0 commit comments