SeedRecognition/DataLoader.py at master · daniel-sanche/SeedRecognition · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import random
import numpy as np
from scipy.misc import imread, imresize
from keras.utils.np_utils import to_categorical
from DatasetGenerator import rawImageLoader

def rawImageParser(datasetPath, binPath):
	gen = rawImageLoader(datasetPath, binPath)
	for _, classNum, suffix in gen:
		yield os.path.join(datasetPath, suffix), classNum

"""
loads file and class information from the index file in the generated image directory
generator will keep loading files in order infinitely

Params:
    folderPath:  the path of the generated images folder we want to load
                 the folder is assumed to have an index file called index.tsv

Yeilds:
    [0]:    the path of the next file to read
    [1]:    the class label for the next file to read
"""
def generatedDatasetParser(folderPath):
    print("loading data from %s" % folderPath)
    indexFilePath = os.path.join(folderPath, "index.tsv")
    while True:
        with open(indexFilePath, 'r') as f:
            for thisLine in f:
                classVal, fileName = thisLine.rstrip().split()
                filePath = os.path.join(folderPath, fileName)
                yield filePath, classVal

"""
loads file and class information from the index file in the segmented image directory
generator will keep loading files in order infinitely

Params:
    rootDir:   the root path of the segmenteeed images we want to load
                the folder is assumed to be structured with a set of 7 inner folders,
                each containing an index called "px_groundtruth.txt" that has class information

Yeilds:
    [0]:    the path of the next file to read
    [1]:    the class label for the next file to read
"""
def segmentedDatasetParser(rootDir):
    print("loading data from %s" % rootDir)
    while True:
        for dirNum in range(1, 7):
            dirName = "p" + str(dirNum) + "_45_first"
            dirPath = os.path.join(rootDir, dirName)
            indexName = 'p' + str(dirNum) + '_groundtruth.txt'
            indexPath = os.path.join(dirPath, indexName)
            with open(indexPath, 'r') as f:
                for thisLine in f:
                    thisLine = thisLine.rstrip().split()
                    fileName = thisLine[0].zfill(6) + ".png"
                    className = thisLine[2]
                    filePath = os.path.join(dirPath, fileName)
                    if os.path.exists(filePath):
                        yield filePath, className

"""
loads file and class information from the testing image directory
generator will keep loading files in order infinitely

Params:
    rootDir:   the root path of the segmenteeed images we want to load
                the folder is assumed to be structured with a set of 7 inner folders,
                each containing an index called "px_groundtruth.txt" that has class information

Yeilds:
    [0]:    the path of the next file to read
    [1]:    the class label for the next file to read
"""
def testDatasetParser(rootDir):
    print("loading data from %s" % rootDir)
    while True:
        for dirNum in range(1, 7):
            dirName = "p" + str(dirNum) + "_45_first"
            dirPath = os.path.join(rootDir, dirName)
            indexName = 'p' + str(dirNum) + '_groundtruth.txt'
            indexPath = os.path.join(dirPath, indexName)
            with open(indexPath, 'r') as f:
                for thisLine in f:
                    thisLine = thisLine.rstrip().split()
                    className = thisLine[2]
                    folderName = thisLine[0].zfill(6)
                    folderPath = os.path.join(dirPath, folderName)
                    if os.path.exists(folderPath):
                        for imgFile in os.listdir(folderPath):
                            if ".png" in imgFile:
                                imgPath = os.path.join(folderPath, imgFile)
                                yield imgPath, className

"""
loads batches of images from the generated images directory

Params:
    imageDir:       the directory of the generated images
    dataParser:     an index parsergenerator, that yeilds file names and class values
    batchSize:      the number of images in each batch
    imageSize:      the size of the images being extracted
Yields:
    [0]:    a numpy matrix containing a batch of images
    [1]:    a numpy array of label values for each image
"""
def batchLoader(dataParser, imageSize=224, batchSize=64):
    #find the file size
    while True:
        thisBatch = np.zeros([batchSize, imageSize, imageSize, 3])
        labelBatch = np.zeros([batchSize, 1], dtype=int)
        for i in range(batchSize):
            filePath, classVal = next(dataParser)
            thisImage = imread(filePath)
            if thisImage.shape[0] != imageSize or thisImage.shape[1] != imageSize:
                thisImage = imresize(thisImage, (imageSize, imageSize))
            thisBatch[i,:,:,:] = thisImage
            labelBatch[i] = classVal
        yield thisBatch, labelBatch

"""
Finds the number of results in a generator object

Params:
    generator:    the generator to query
Returns:
    [0]:    the number of results in the generator
"""
def countResultsInGenerator(generator):
    count = 0
    for _ in generator:
        count += 1
        if count > 1000000:
            return float("inf")
    return count

"""
takes in a batch loader generator, and changes the labels to be in the one-hot format

Params:
    batchLoader:    a batch loader generator that yeilds imageBatch, labelBath pairs
    numClasses:     the number of classes possible for the labels
Yields:
    [0]:    a numpy matrix containing a batch of images
    [1]:    a numpy array of one-hot labels for each image
"""
def oneHotWrapper(batchLoader, numClasses=30):
    for img, label in batchLoader:
        label = label.reshape([img.shape[0],])
        oneHotLabels = to_categorical(label, nb_classes=numClasses)
        yield img, oneHotLabels