Skip to content

Commit 67bf264

Browse files
kvngvikramVikram K V N G
andauthored
bug fix for Relab library: Sometimes data files don't have description (#182)
* bug fix for Relab library: Sometimes data files don't have description * Added more test data for relab * changed the read_relab_file function * remove redundant lines Removed a block which checks if sufficient number of lines are present. But it's not necessary as if that is the case error would have already occurred before this block. Also, this kind of issue is because of errors in the dataset, and not on the code. --------- Co-authored-by: Vikram K V N G <k.v.n.g.vikram@gmail.com>
1 parent bbf4161 commit 67bf264

16 files changed

Lines changed: 46375 additions & 103 deletions

File tree

spectral/database/relab.py

Lines changed: 76 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -49,119 +49,94 @@ def __init__(self):
4949

5050

5151
def read_relab_file(filename):
52-
'''Reads a relab spectrum file.'''
52+
'''Reads a relab spectrum file.
53+
.asc files are structured as:
54+
Number of data lines
55+
Data lines (Wavelength in nm, Reflectance, and SD if any)
56+
Three blank lines
57+
File name
58+
A blank line
59+
Sample ID
60+
Comment lines
61+
62+
Note: Not considering any Standard Deviation (SD) data.
63+
'''
5364
with open_file(filename) as fin:
5465
lines = [line.rstrip('\n') for line in fin]
5566

56-
s = Signature()
67+
number_of_data = int(lines[0])
5768

58-
# Read signature spectrum
59-
pairs = []
60-
# Start line counter
61-
count = 0
62-
# Extract ReLab ID and store it
63-
relab_id = int(lines[0])
64-
s.sample["relab_id"] = relab_id
65-
s.measurement["relab_id"] = relab_id
66-
count = count + 1
67-
# Extract central wavelengths and reflectances
68-
for c in range(count, len(lines)):
69-
if (lines[c] != ""):
70-
out = lines[c].strip().split(" ")
71-
# Remove empty slots
72-
out1 = list(filter(None, out))
73-
#print(out1[0].strip(), out1[1].strip())
74-
pair = [float(out1[0].strip()), float(out1[1].strip())]
75-
pairs.append(pair)
76-
else:
77-
break
78-
79-
# Update line count
80-
count = count + c
81-
82-
[x, y] = [list(v) for v in zip(*pairs)]
69+
x = []
70+
y = []
71+
for i in range(1, 1+number_of_data):
72+
current_line = lines[i]
73+
parse1 = current_line.strip().split()
74+
x_val = float(parse1[0])
75+
y_val = float(parse1[1])
76+
x.append(x_val)
77+
y.append(y_val)
8378

8479
# Make sure wavelengths are ascending
8580
if float(x[0]) > float(x[-1]):
8681
x.reverse()
8782
y.reverse()
88-
s.x = [float(val) for val in x]
89-
s.y = [float(val) for val in y]
83+
84+
s = Signature()
85+
86+
s.x = x
87+
s.y = y
9088
s.measurement['first x value'] = x[0]
9189
s.measurement['last x value'] = x[-1]
92-
s.measurement['number of x values'] = len(x)
93-
94-
# Extract Metadata
95-
# Read sample metadata
96-
#pair = read_pair(fin, lpv[i])
97-
#s.sample[pair[0].lower()] = pair[1]
98-
99-
# Read measurement metadata
100-
#pair = read_pair(fin, lpv[i])
101-
#s.measurement[pair[0].lower()] = pair[1]
102-
103-
m = []
104-
description = ""
105-
stage = None
106-
107-
while (lines[count].strip() == ""):
108-
count = count + 1
109-
110-
# Filename extraction
111-
if(stage == None and lines[count].strip() != ""):
112-
# Remove heading and trailing spaces
113-
ml = lines[count].strip()
114-
if '.ASC' in ml:
115-
fname = ml.replace(' ','')
116-
m.append(fname)
117-
s.sample["name"] = str(fname)
118-
#print("File NAME %s" % (fname))
119-
stage = "ASC"
120-
121-
count = count + 1
122-
ml = lines[count].strip()
123-
124-
while (lines[count].strip() == ""):
125-
count = count + 1
126-
127-
# Extract Material Name after Filename extraction
128-
if(stage == "ASC" and lines[count].strip() != ""):
129-
ml = lines[count].strip()
130-
#print("Material NAME %s" % (ml))
131-
s.measurement['name'] = str(ml)
132-
stage = "name"
133-
134-
count = count + 1
135-
ml = lines[count].strip()
136-
137-
while (lines[count].strip() == ""):
138-
count = count + 1
139-
140-
if(lines[count].strip() != ""):
141-
ml = lines[count].strip()
142-
# Extract seprately date and time
143-
if 'Date' in ml:
144-
date = ml.split('Time:')[0]
145-
time = ml.split('Time:')[-1]
146-
s.sample["date"] = date.replace('Date:',"").replace(" "," ").strip()
147-
s.sample["time"] = time
148-
# Extract Source and Detection Angles & Voltage
149-
elif 'Volt' in ml:
150-
volt = ml.split('Volt:')[-1]
151-
dang = ml.split('Volt:')[-2]
152-
dang1 = dang.split('Detect Ang:')[-1]
153-
sang = dang.split('Detect Ang:')[-2]
154-
s.measurement['source_angle'] = sang.replace("Source Ang:","").strip()
155-
s.measurement['detect_angle'] = dang1.strip()
156-
s.measurement['volt'] = volt
157-
# All other cases
158-
else:
159-
description += ml + " "
160-
s.sample['description'] = description + " " + s.measurement['name']
90+
s.measurement['number of x values'] = number_of_data
91+
92+
# Extract ReLab ID and store it
93+
relab_id = lines[number_of_data+6].strip() # a string
94+
s.sample["relab_id"] = relab_id
95+
s.measurement["relab_id"] = relab_id
96+
97+
# ideally can be taken from argument to this function as well
98+
# converting to lower because actual names are in lowercase but stored as
99+
# uppercase in files
100+
filename_from_content = lines[number_of_data+4
101+
].strip().replace(' ', '').lower()
102+
103+
s.sample['name'] = filename_from_content
104+
105+
# The parsing of comment lines
106+
comment_lines = lines[number_of_data+7:]
107+
108+
if len(comment_lines) >= 2: # what if there are no commont lines
109+
last_line = comment_lines[-1]
110+
last_but_one_line = comment_lines[-2]
161111
else:
162-
while (lines[count].strip() == ""):
163-
count = count + 1
164-
ml = lines[count].strip()
112+
last_line = ''
113+
last_but_one_line = ''
114+
115+
if last_line.strip() and last_but_one_line.strip(): # both non black lines
116+
if last_line.strip().split()[0] == 'Date:' and \
117+
last_but_one_line.strip().split()[0] == 'Source':
118+
# all these modifications are to get rid of edge cases
119+
modified_line = last_but_one_line.strip().replace(':', ' ')
120+
modified_line = modified_line.replace(',', ' ')
121+
split = modified_line.split()
122+
if len(split) == 8:
123+
s.measurement['source_angle'] = float(split[2])
124+
s.measurement['detect_angle'] = float(split[5])
125+
s.measurement['volt'] = float(split[7])
126+
modified_line = last_line.strip().replace(',', ' ')
127+
split = modified_line.split()
128+
if len(split) == 4:
129+
s.sample['date'] = split[1]
130+
s.sample['time'] = split[3]
131+
132+
# delete the last two lines now
133+
comment_lines = comment_lines[:-2]
134+
135+
description = relab_id
136+
for i in comment_lines:
137+
description += ' ' + i.strip() if i.strip() else ''
138+
139+
s.sample['description'] = description
165140

166141
return s
167142

0 commit comments

Comments
 (0)