Skip to content

Commit c8c455b

Browse files
authored
Merge pull request #1229 from zlheui/add-bloodmnist-dataset
Add the bloodmnist dataset to the healthcare model zoo
2 parents 5482fe7 + 572f3d2 commit c8c455b

1 file changed

Lines changed: 240 additions & 0 deletions

File tree

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
import os
21+
import json
22+
from glob import glob
23+
import numpy as np
24+
from PIL import Image
25+
26+
27+
class Compose(object):
28+
"""Compose several transforms together.
29+
30+
Args:
31+
transforms: list of transforms to compose.
32+
33+
Example:
34+
>>> transforms.Compose([
35+
>>> transforms.ToTensor(),
36+
>>> transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
37+
>>> ])
38+
39+
"""
40+
41+
def __init__(self, transforms):
42+
self.transforms = transforms
43+
44+
def forward(self, img):
45+
"""
46+
Args:
47+
img (PIL Image or numpy array): Image to be processed.
48+
49+
Returns:
50+
PIL Image or numpy array: Processed image.
51+
"""
52+
for t in self.transforms:
53+
img = t.forward(img)
54+
return img
55+
56+
def __repr__(self):
57+
format_string = self.__class__.__name__ + '('
58+
for t in self.transforms:
59+
format_string += '\n'
60+
format_string += ' {0}'.format(t)
61+
format_string += '\n)'
62+
return format_string
63+
64+
65+
class ToTensor(object):
66+
"""Convert a ``PIL Image`` to ``numpy.ndarray``.
67+
68+
Converts a PIL Image (H x W x C) in the range [0, 255] to a ``numpy.array`` of shape
69+
(C x H x W) in the range [0.0, 1.0]
70+
if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1).
71+
72+
In the other cases, tensors are returned without scaling.
73+
74+
.. note::
75+
Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
76+
transforming target image masks.
77+
"""
78+
79+
def forward(self, pic):
80+
"""
81+
Args:
82+
pic (PIL Image): Image to be converted to array.
83+
84+
Returns:
85+
Array: Converted image.
86+
"""
87+
if not isinstance(pic, Image.Image):
88+
raise TypeError('pic should be PIL Image. Got {}'.format(type(pic)))
89+
90+
# Handle PIL Image
91+
mode_to_nptype = {'I': np.int32, 'I;16': np.int16, 'F': np.float32}
92+
img = np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True)
93+
94+
if pic.mode == '1':
95+
img = 255 * img
96+
97+
# Put it from HWC to CHW format
98+
img = np.transpose(img, (2, 0, 1))
99+
100+
if img.dtype == np.uint8:
101+
return np.array(np.float32(img) / 255.0, dtype=np.float)
102+
else:
103+
return np.float(img)
104+
105+
def __repr__(self):
106+
return self.__class__.__name__ + '()'
107+
108+
109+
class Normalize(object):
110+
"""Normalize a ``numpy.array`` image with mean and standard deviation.
111+
112+
This transform does not support PIL Image.
113+
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
114+
channels, this transform will normalize each channel of the input
115+
``numpy.array`` i.e.,
116+
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
117+
118+
.. note::
119+
This transform acts out of place, i.e., it does not mutate the input array.
120+
121+
Args:
122+
mean (Sequence): Sequence of means for each channel.
123+
std (Sequence): Sequence of standard deviations for each channel.
124+
inplace(bool, optional): Bool to make this operation in-place.
125+
126+
"""
127+
128+
def __init__(self, mean, std, inplace=False):
129+
super().__init__()
130+
self.mean = mean
131+
self.std = std
132+
self.inplace = inplace
133+
134+
def forward(self, img: np.ndarray):
135+
"""
136+
Args:
137+
img (Numpy ndarray): Array image to be normalized.
138+
139+
Returns:
140+
d_res (Numpy ndarray): Normalized Tensor image.
141+
"""
142+
if not isinstance(img, np.ndarray):
143+
raise TypeError('Input img should be a numpy array. Got {}.'.format(type(img)))
144+
145+
if not img.dtype == np.float:
146+
raise TypeError('Input array should be a float array. Got {}.'.format(img.dtype))
147+
148+
if img.ndim < 3:
149+
raise ValueError('Expected array to be an array image of size (..., C, H, W). Got img.shape = '
150+
'{}.'.format(img.shape))
151+
152+
if not self.inplace:
153+
img = img.copy()
154+
155+
dtype = img.dtype
156+
mean = np.array(self.mean, dtype=dtype)
157+
std = np.array(self.std, dtype=dtype)
158+
if (std == 0).any():
159+
raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype))
160+
s_res = np.subtract(img, mean[:, None, None])
161+
d_res = np.divide(s_res, std[:, None, None])
162+
163+
return d_res
164+
165+
def __repr__(self):
166+
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
167+
168+
169+
class ClassDataset(object):
170+
"""Fetch data from file and generate batches.
171+
172+
Load data from folder as PIL.Images and convert them into batch array.
173+
174+
Args:
175+
img_folder (Str): Folder path of the training/validation images.
176+
transforms (Transform): Preprocess transforms.
177+
"""
178+
179+
def __init__(self, img_folder, transforms):
180+
super(ClassDataset, self).__init__()
181+
182+
self.img_list = list()
183+
self.transforms = transforms
184+
185+
classes = os.listdir(img_folder)
186+
for i in classes:
187+
images = glob(os.path.join(img_folder, i, "*"))
188+
for img in images:
189+
self.img_list.append((img, i))
190+
191+
def __len__(self) -> int:
192+
return len(self.img_list)
193+
194+
def __getitem__(self, index: int):
195+
img_path, label_str = self.img_list[index]
196+
img = Image.open(img_path)
197+
img = self.transforms.forward(img)
198+
label = np.array(label_str, dtype=np.int32)
199+
200+
return img, label
201+
202+
def batchgenerator(self, indexes, batch_size, data_size):
203+
"""Generate batch arrays from transformed image list.
204+
205+
Args:
206+
indexes (Sequence): current batch indexes list, e.g. [n, n + 1, ..., n + batch_size]
207+
batch_size (int):
208+
data_size (Tuple): input image size of shape (C, H, W)
209+
210+
Return:
211+
batch_x (Numpy ndarray): batch array of input images (B, C, H, W)
212+
batch_y (Numpy ndarray): batch array of ground truth lables (B,)
213+
"""
214+
batch_x = np.zeros((batch_size,) + data_size)
215+
batch_y = np.zeros((batch_size,) + (1,), dtype=np.int32)
216+
for idx, i in enumerate(indexes):
217+
sample_x, sample_y = self.__getitem__(i)
218+
batch_x[idx, :, :, :] = sample_x
219+
batch_y[idx, :] = sample_y
220+
221+
return batch_x, batch_y
222+
223+
224+
def load(dir_path="tmp/bloodmnist"):
225+
# Dataset loading
226+
train_path = os.path.join(dir_path, "train")
227+
val_path = os.path.join(dir_path, "val")
228+
cfg_path = os.path.join(dir_path, "param.json")
229+
230+
with open(cfg_path, 'r') as load_f:
231+
num_class = json.load(load_f)["num_classes"]
232+
233+
# Define pre-processing methods (transforms)
234+
transforms = Compose([
235+
ToTensor(),
236+
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
237+
])
238+
train_dataset = ClassDataset(train_path, transforms)
239+
val_dataset = ClassDataset(val_path, transforms)
240+
return train_dataset, val_dataset, num_class

0 commit comments

Comments
 (0)