-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataLoader.py
More file actions
154 lines (138 loc) · 5.79 KB
/
DataLoader.py
File metadata and controls
154 lines (138 loc) · 5.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import random
import numpy as np
from scipy.misc import imread, imresize
from keras.utils.np_utils import to_categorical
from DatasetGenerator import rawImageLoader
def rawImageParser(datasetPath, binPath):
gen = rawImageLoader(datasetPath, binPath)
for _, classNum, suffix in gen:
yield os.path.join(datasetPath, suffix), classNum
"""
loads file and class information from the index file in the generated image directory
generator will keep loading files in order infinitely
Params:
folderPath: the path of the generated images folder we want to load
the folder is assumed to have an index file called index.tsv
Yeilds:
[0]: the path of the next file to read
[1]: the class label for the next file to read
"""
def generatedDatasetParser(folderPath):
print("loading data from %s" % folderPath)
indexFilePath = os.path.join(folderPath, "index.tsv")
while True:
with open(indexFilePath, 'r') as f:
for thisLine in f:
classVal, fileName = thisLine.rstrip().split()
filePath = os.path.join(folderPath, fileName)
yield filePath, classVal
"""
loads file and class information from the index file in the segmented image directory
generator will keep loading files in order infinitely
Params:
rootDir: the root path of the segmenteeed images we want to load
the folder is assumed to be structured with a set of 7 inner folders,
each containing an index called "px_groundtruth.txt" that has class information
Yeilds:
[0]: the path of the next file to read
[1]: the class label for the next file to read
"""
def segmentedDatasetParser(rootDir):
print("loading data from %s" % rootDir)
while True:
for dirNum in range(1, 7):
dirName = "p" + str(dirNum) + "_45_first"
dirPath = os.path.join(rootDir, dirName)
indexName = 'p' + str(dirNum) + '_groundtruth.txt'
indexPath = os.path.join(dirPath, indexName)
with open(indexPath, 'r') as f:
for thisLine in f:
thisLine = thisLine.rstrip().split()
fileName = thisLine[0].zfill(6) + ".png"
className = thisLine[2]
filePath = os.path.join(dirPath, fileName)
if os.path.exists(filePath):
yield filePath, className
"""
loads file and class information from the testing image directory
generator will keep loading files in order infinitely
Params:
rootDir: the root path of the segmenteeed images we want to load
the folder is assumed to be structured with a set of 7 inner folders,
each containing an index called "px_groundtruth.txt" that has class information
Yeilds:
[0]: the path of the next file to read
[1]: the class label for the next file to read
"""
def testDatasetParser(rootDir):
print("loading data from %s" % rootDir)
while True:
for dirNum in range(1, 7):
dirName = "p" + str(dirNum) + "_45_first"
dirPath = os.path.join(rootDir, dirName)
indexName = 'p' + str(dirNum) + '_groundtruth.txt'
indexPath = os.path.join(dirPath, indexName)
with open(indexPath, 'r') as f:
for thisLine in f:
thisLine = thisLine.rstrip().split()
className = thisLine[2]
folderName = thisLine[0].zfill(6)
folderPath = os.path.join(dirPath, folderName)
if os.path.exists(folderPath):
for imgFile in os.listdir(folderPath):
if ".png" in imgFile:
imgPath = os.path.join(folderPath, imgFile)
yield imgPath, className
"""
loads batches of images from the generated images directory
Params:
imageDir: the directory of the generated images
dataParser: an index parsergenerator, that yeilds file names and class values
batchSize: the number of images in each batch
imageSize: the size of the images being extracted
Yields:
[0]: a numpy matrix containing a batch of images
[1]: a numpy array of label values for each image
"""
def batchLoader(dataParser, imageSize=224, batchSize=64):
#find the file size
while True:
thisBatch = np.zeros([batchSize, imageSize, imageSize, 3])
labelBatch = np.zeros([batchSize, 1], dtype=int)
for i in range(batchSize):
filePath, classVal = next(dataParser)
thisImage = imread(filePath)
if thisImage.shape[0] != imageSize or thisImage.shape[1] != imageSize:
thisImage = imresize(thisImage, (imageSize, imageSize))
thisBatch[i,:,:,:] = thisImage
labelBatch[i] = classVal
yield thisBatch, labelBatch
"""
Finds the number of results in a generator object
Params:
generator: the generator to query
Returns:
[0]: the number of results in the generator
"""
def countResultsInGenerator(generator):
count = 0
for _ in generator:
count += 1
if count > 1000000:
return float("inf")
return count
"""
takes in a batch loader generator, and changes the labels to be in the one-hot format
Params:
batchLoader: a batch loader generator that yeilds imageBatch, labelBath pairs
numClasses: the number of classes possible for the labels
Yields:
[0]: a numpy matrix containing a batch of images
[1]: a numpy array of one-hot labels for each image
"""
def oneHotWrapper(batchLoader, numClasses=30):
for img, label in batchLoader:
label = label.reshape([img.shape[0],])
oneHotLabels = to_categorical(label, nb_classes=numClasses)
yield img, oneHotLabels