add support to h5 files

taigw · taigw · commit a4634f26262a · 2025-08-02T12:41:51.000+08:00
1, add support to h5 files
2, edit Rescale, Pad and Rotate so that setting output size to a 2D list is allowed for 3D images
diff --git a/pymic/io/image_read_write.py b/pymic/io/image_read_write.py
@@ -81,8 +81,8 @@ def load_image_as_nd_array(image_name):
     if (image_name.endswith(".nii.gz") or image_name.endswith(".nii") or
         image_name.endswith(".mha")):
         image_dict = load_nifty_volume_as_4d_array(image_name)
-    elif(image_name.endswith(".jpg") or image_name.endswith(".jpeg") or
-         image_name.endswith(".tif") or image_name.endswith(".png")):
+    elif(image_name.lower().endswith(".jpg") or image_name.lower().endswith(".jpeg") or
+         image_name.lower().endswith(".tif") or image_name.lower().endswith(".png")):
         image_dict = load_rgb_image_as_3d_array(image_name)
     else:
         raise ValueError("unsupported image format: {0:}".format(image_name))
diff --git a/pymic/io/nifty_dataset.py b/pymic/io/nifty_dataset.py
@@ -3,50 +3,91 @@
 
 import logging
 import os
+import h5py 
 import pandas as pd
 import numpy as np
 from torch.utils.data import Dataset
 from pymic import TaskType
 from pymic.io.image_read_write import load_image_as_nd_array
 
+def check_and_expand_dim(x, img_dim):
+    """
+    check the input dim and expand it with a channel dimension if necessary.
+    For 2D images, return a 3D numpy array with a shape of [C, H, W]
+    for 3D images, return a 3D numpy array with a shape of [C, D, H, W]
+    """
+    input_dim  = len(x.shape)
+    if(input_dim == 2 and img_dim == 2):
+        x = np.expand_dims(x, axis = 0)
+    elif(input_dim == 3 and img_dim == 3):
+        x = np.expand_dims(x, axis = 0)
+    return x
+
 class NiftyDataset(Dataset):
     """
     Dataset for loading images for segmentation. It generates 4D tensors with
     dimention order [C, D, H, W] for 3D images, and 3D tensors 
     with dimention order [C, H, W] for 2D images.
 
     :param root_dir: (str) Directory with all the images. 
-    :param csv_file: (str) Path to the csv file with image names.
-    :param modal_num: (int) Number of modalities. 
+    :param csv: (str) Path to the csv file with image names. If it is None, 
+        the images will be those under root_dir. This only works for testing with 
+        a single input modality. If the images are stored in h5 files, the *.csv file
+        only has one column, while for other types of images such as .nii.gz and.png, 
+        each column is for  an input modality, and the last column is for label.
+    :param modal_num: (int) Number of modalities. This is only used if the data_file is *.csv.
+    :param image_dim: (int) Spacial dimension of the input image. This is ony used for h5 files.
     :param with_label: (bool) Load the data with segmentation ground truth or not.
     :param transform:  (list) List of transforms to be applied on a sample.
         The built-in transforms can listed in :mod:`pymic.transform.trans_dict`.
     """
-    # def __init__(self, root_dir, csv_file, modal_num = 1, 
-    def __init__(self, root_dir, csv_file, modal_num = 1, allow_missing_modal = False,
-            with_label = False, transform=None, task = TaskType.SEGMENTATION):
+    def __init__(self, root_dir, csv_file, modal_num = 1, image_dim = 3, allow_missing_modal = False,
+            with_label = True, transform=None, task = TaskType.SEGMENTATION):
         self.root_dir   = root_dir
-        self.csv_items  = pd.read_csv(csv_file)
+        if(csv_file is not None):
+            self.csv_items  = pd.read_csv(csv_file)
+        else:
+            img_names = os.listdir(root_dir)
+            img_names = [item for item in img_names if ("nii" in item or "jpg" in item or 
+                "jpeg" in item or "bmp" in item or "png" in item)]
+            csv_dict = {"image":img_names}
+            self.csv_items = pd.DataFrame.from_dict(csv_dict)
+
         self.modal_num  = modal_num
+        self.image_dim  = image_dim
         self.allow_emtpy= allow_missing_modal
         self.with_label = with_label
         self.transform  = transform
         self.task       = task
+        self.h5files    = False
         assert self.task in  [TaskType.SEGMENTATION, TaskType.RECONSTRUCTION]
        
-        csv_keys = list(self.csv_items.keys())
-        if('label' not in csv_keys):
+        # check if the files are h5 images, and if the labels are provided.
+        temp_name = self.csv_items.iloc[0, 0]
+        logging.warning(temp_name)
+        if(temp_name.endswith(".h5")):
+            self.h5files = True
+            temp_full_name = "{0:}/{1:}".format(self.root_dir, temp_name)
+            h5f = h5py.File(temp_full_name, 'r')
+            if('label' not in h5f):
+                self.with_label = False
+        else:
+            csv_keys = list(self.csv_items.keys())
+            if('label' not in csv_keys):
+                self.with_label = False
+            
+            self.image_weight_idx = None
+            self.pixel_weight_idx = None
+            if('image_weight' in csv_keys):
+                self.image_weight_idx = csv_keys.index('image_weight')
+            if('pixel_weight' in csv_keys):
+                self.pixel_weight_idx = csv_keys.index('pixel_weight')
+        if(not self.with_label):
             logging.warning("`label` section is not found in the csv file {0:}".format(
-                csv_file) + "\n -- This is only allowed for self-supervised learning" + 
+                csv_file) + "or the corresponding h5 file." + 
+                "\n -- This is only allowed for self-supervised learning" + 
                 "\n -- when `SelfSuperviseLabel` is used in the transform, or when" + 
                 "\n -- loading the unlabeled data for preprocessing.")
-            self.with_label = False
-        self.image_weight_idx = None
-        self.pixel_weight_idx = None
-        if('image_weight' in csv_keys):
-            self.image_weight_idx = csv_keys.index('image_weight')
-        if('pixel_weight' in csv_keys):
-            self.pixel_weight_idx = csv_keys.index('pixel_weight')
 
     def __len__(self):
         return len(self.csv_items)
@@ -92,36 +133,46 @@ def __get_pixel_weight__(self, idx):
     def __getitem__(self, idx):
         names_list, image_list = [], []
         image_shape = None 
-        for i in range (self.modal_num):
-            image_name = self.csv_items.iloc[idx, i]
-            image_full_name = "{0:}/{1:}".format(self.root_dir, image_name)
-            if(os.path.exists(image_full_name)):
-                image_dict = load_image_as_nd_array(image_full_name)
-                image_data = image_dict['data_array']
-            elif(self.allow_emtpy and image_shape is not None):
-                image_data = np.zeros(image_shape)
-            else:
-                raise KeyError("File not found: {0:}".format(image_full_name))
-            if(i == 0):
-                image_shape = image_data.shape
-            names_list.append(image_name)
-            image_list.append(image_data)
-        image = np.concatenate(image_list, axis = 0)
-        image = np.asarray(image, np.float32)    
-        
-        sample = {'image': image, 'names' : names_list, 
-                 'origin':image_dict['origin'],
-                 'spacing': image_dict['spacing'],
-                 'direction':image_dict['direction']}
-        if (self.with_label):   
-            sample['label'], label_name = self.__getlabel__(idx) 
-            sample['names'].append(label_name)
-            assert(image.shape[1:] == sample['label'].shape[1:])
-        if (self.image_weight_idx is not None):
-            sample['image_weight'] = self.csv_items.iloc[idx, self.image_weight_idx]
-        if (self.pixel_weight_idx is not None):
-            sample['pixel_weight'] = self.__get_pixel_weight__(idx) 
-            assert(image.shape[1:] == sample['pixel_weight'].shape[1:])
+        if(self.h5files):
+            sample_name = self.csv_items.iloc[idx, 0]
+            h5f = h5py.File(self.root_dir + '/' +  sample_name, 'r')
+            img = check_and_expand_dim(h5f['image'][:], self.image_dim)
+            sample = {'image':img}
+            if(self.with_label):
+                lab = check_and_expand_dim(h5f['label'][:], self.image_dim)
+                sample['label'] = lab
+            sample['names'] = [sample_name]
+        else:            
+            for i in range (self.modal_num):
+                image_name = self.csv_items.iloc[idx, i]
+                image_full_name = "{0:}/{1:}".format(self.root_dir, image_name)
+                if(os.path.exists(image_full_name)):
+                    image_dict = load_image_as_nd_array(image_full_name)
+                    image_data = image_dict['data_array']
+                elif(self.allow_emtpy and image_shape is not None):
+                    image_data = np.zeros(image_shape)
+                else:
+                    raise KeyError("File not found: {0:}".format(image_full_name))
+                if(i == 0):
+                    image_shape = image_data.shape
+                names_list.append(image_name)
+                image_list.append(image_data)
+            image = np.concatenate(image_list, axis = 0)
+            image = np.asarray(image, np.float32)    
+            
+            sample = {'image': image, 'names' : names_list, 
+                    'origin':image_dict['origin'],
+                    'spacing': image_dict['spacing'],
+                    'direction':image_dict['direction']}
+            if (self.with_label):   
+                sample['label'], label_name = self.__getlabel__(idx) 
+                sample['names'].append(label_name)
+                assert(image.shape[1:] == sample['label'].shape[1:])
+            if (self.image_weight_idx is not None):
+                sample['image_weight'] = self.csv_items.iloc[idx, self.image_weight_idx]
+            if (self.pixel_weight_idx is not None):
+                sample['pixel_weight'] = self.__get_pixel_weight__(idx) 
+                assert(image.shape[1:] == sample['pixel_weight'].shape[1:])
         if self.transform:
             sample = self.transform(sample)
 
diff --git a/pymic/net_run/agent_seg.py b/pymic/net_run/agent_seg.py
@@ -71,14 +71,18 @@ def get_stage_dataset_from_config(self, stage):
         modal_num  = self.config['dataset'].get('modal_num', 1)
         allow_miss = self.config['dataset'].get('allow_missing_modal', False)
         stage_dir  = self.config['dataset'].get('train_dir', None)
-        if(stage == 'valid' and "valid_dir" in self.config['dataset']):
-            stage_dir = self.config['dataset']['valid_dir']
-        if(stage == 'test' and "test_dir" in self.config['dataset']):
-            stage_dir = self.config['dataset']['test_dir']
+        stage_dim  = self.config['dataset'].get('train_dim', 3)
+        if(stage == 'valid'): # and "valid_dir" in self.config['dataset']):
+            stage_dir = self.config['dataset'].get('valid_dir', stage_dir)
+            stage_dim = self.config['dataset'].get('valid_dim', stage_dim)
+        if(stage == 'test'): # and "test_dir" in self.config['dataset']):
+            stage_dir = self.config['dataset'].get('test_dir', stage_dir)
+            stage_dim = self.config['dataset'].get('test_dim', stage_dim)
         logging.info("Creating dataset for {0:}".format(stage))
         dataset  = NiftyDataset(root_dir  = stage_dir,
                                 csv_file  = csv_file,
                                 modal_num = modal_num,
+                                image_dim = stage_dim,
                                 allow_missing_modal = allow_miss,
                                 with_label= with_label,
                                 transform = data_transform, 
diff --git a/pymic/transform/pad.py b/pymic/transform/pad.py
@@ -38,6 +38,11 @@ def __call__(self, sample):
         image = sample['image']
         input_shape = image.shape
         input_dim = len(input_shape) - 1
+        
+        if(input_dim == 3):
+            if(len(self.output_size) == 2):
+                # for 3D images, igore the z-axis
+                self.output_size = [input_shape[1]] + list(self.output_size)
         assert(len(self.output_size) == input_dim)
         if(self.ceil_mode):
             multiple = [int(math.ceil(float(input_shape[1+i])/self.output_size[i]))\
diff --git a/pymic/transform/rescale.py b/pymic/transform/rescale.py
@@ -17,9 +17,9 @@ class Rescale(AbstractTransform):
     following fields:
 
     :param `Rescale_output_size`: (list/tuple or int) The output size along each spatial axis, 
-        such as [D, H, W] or [H, W].  If D is None, the input image is only reslcaled in 2D.
-        If int, the smallest axis is matched to output_size keeping aspect ratio the same
-        as the input.
+        such as [D, H, W] or [H, W]. For 3D images, if D is None, or the lenght of tuple/list is 2,
+        the input image is only reslcaled in 2D. If int, the smallest axis is matched to output_size 
+        keeping aspect ratio the same as the input.
     :param `Rescale_inverse`: (optional, bool) 
         Is inverse transform needed for inference. Default is `True`.
     """
@@ -38,6 +38,8 @@ def __call__(self, sample):
             output_size = self.output_size
             if(output_size[0] is None):
                 output_size[0] = input_shape[1]
+            if(input_dim == 3 and len(self.output_size) == 2):
+                output_size = [input_shape[1]] + list(output_size)
             assert(len(output_size) == input_dim)
         else:
             min_edge = min(input_shape[1:])
diff --git a/pymic/transform/rotate.py b/pymic/transform/rotate.py
@@ -19,13 +19,19 @@ class RandomRotate(AbstractTransform):
 
     :param `RandomRotate_angle_range_d`: (list/tuple or None) 
         Rotation angle (degree) range along depth axis (x-y plane), e.g., (-90, 90).
+        The length of the list/tuple can be larger than 2, when `RandomRotate_discrete_mode` is True.
         If None, no rotation along this axis. 
     :param `RandomRotate_angle_range_h`: (list/tuple or None) 
         Rotation angle (degree) range along height axis (x-z plane), e.g., (-90, 90).
+        The length of the list/tuple can be larger than 2, when `RandomRotate_discrete_mode` is True.
         If None, no rotation along this axis. Only used for 3D images. 
     :param `RandomRotate_angle_range_w`: (list/tuple or None) 
         Rotation angle (degree) range along width axis (y-z plane), e.g., (-90, 90).
+        The length of the list/tuple can be larger than 2, when `RandomRotate_discrete_mode` is True.
         If None, no rotation along this axis. Only used for 3D images. 
+    :param `RandomRotate_discrete_mode`: (optional, bool) Whether the rotate angles
+        are discrete values in rangle range. For example, if you only want to rotate 
+        the images with a fixed set of angles like (90, 180, 270), then set discrete_mode mode as True.
     :param `RandomRotate_probability`: (optional, float) 
         The probability of applying RandomRotate. Default is 0.5.
     :param `RandomRotate_inverse`: (optional, bool) 
@@ -36,8 +42,11 @@ def __init__(self, params):
         self.angle_range_d  = params['RandomRotate_angle_range_d'.lower()]
         self.angle_range_h  = params.get('RandomRotate_angle_range_h'.lower(), None)
         self.angle_range_w  = params.get('RandomRotate_angle_range_w'.lower(), None)
+        self.discrete_mode  = params.get('RandomRotate_discrete_mode'.lower(), False)
         self.prob = params.get('RandomRotate_probability'.lower(), 0.5)
         self.inverse = params.get('RandomRotate_inverse'.lower(), True)
+        if(len(self.angle_range_d) > 2):
+            assert(self.discrete_mode)
 
     def __apply_transformation(self, image, transform_param_list, order = 1):
         """
@@ -63,15 +72,27 @@ def __call__(self, sample):
         
         transform_param_list = []
         if(self.angle_range_d is not None):
-            angle_d = np.random.uniform(self.angle_range_d[0], self.angle_range_d[1])
+            if(self.discrete_mode):
+                idx = random.randint(0, len(self.angle_range_d) - 1)
+                angle_d = self.angle_range_d[idx]
+            else:
+                angle_d = np.random.uniform(self.angle_range_d[0], self.angle_range_d[1])
             transform_param_list.append([angle_d, (-1, -2)])
         if(input_dim == 3):
             if(self.angle_range_h is not None):
-                angle_h = np.random.uniform(self.angle_range_h[0], self.angle_range_h[1])
-                transform_param_list.append([angle_h, (-1, -3)])
+                if(self.discrete_mode):
+                    idx = random.randint(0, len(self.angle_range_h) - 1)
+                    angle_h = self.angle_range_h[idx]
+                else:
+                    angle_h = np.random.uniform(self.angle_range_h[0], self.angle_range_h[1])
+                    transform_param_list.append([angle_h, (-1, -3)])
             if(self.angle_range_w is not None):
-                angle_w = np.random.uniform(self.angle_range_w[0], self.angle_range_w[1])
-                transform_param_list.append([angle_w, (-2, -3)])
+                if(self.discrete_mode):
+                    idx = random.randint(0, len(self.angle_range_w) - 1)
+                    angle_w = self.angle_range_w[idx]
+                else:
+                    angle_w = np.random.uniform(self.angle_range_w[0], self.angle_range_w[1])
+                    transform_param_list.append([angle_w, (-2, -3)])
         assert(len(transform_param_list) > 0)
         # select a random transform from the possible list rather than 
         # use a combination for higher efficiency