Skip to content

Commit a2894d9

Browse files
committed
check None-ness of slots, rather than boolean. D/M/S values can be 0
1 parent 8de1803 commit a2894d9

2 files changed

Lines changed: 63 additions & 30 deletions

File tree

src/main/python/opensextant/extractors/xcoord.py

Lines changed: 56 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,17 @@ def hemisphere_factor(sym: str) -> int:
6161
return HEMISPHERES.get(None)
6262

6363

64+
def one_value(*args):
65+
"""
66+
:param args:
67+
:return: first non-null value.
68+
"""
69+
for val in args:
70+
if val is not None:
71+
return val
72+
return None
73+
74+
6475
class Hemisphere:
6576
def __init__(self, axis, slots=None):
6677
self.axis = axis
@@ -180,17 +191,18 @@ def normalize(self):
180191
def decimal(self):
181192
pol = 1
182193
if self.hemi:
194+
# Validity check of presence of Hemisphere symbol is separate.
183195
pol = self.hemi.polarity
184196
if not pol:
185197
raise Exception("logic error - hemisphere was not resolved")
186198

187-
if self.seconds and self.min and self.degrees:
199+
if self.seconds is not None and self.min is not None and self.degrees is not None:
188200
if self.seconds < 60:
189201
return pol * (self.degrees + self.min / 60 + self.seconds / 3600)
190-
if self.min and self.degrees:
202+
if self.min is not None and self.degrees is not None:
191203
if self.min < 60:
192204
return pol * (self.degrees + self.min / 60)
193-
if self.degrees:
205+
if self.degrees is not None:
194206
return pol * self.degrees
195207
return None
196208

@@ -208,10 +220,10 @@ def _digest_slots(self, axis):
208220
deg = self.get_int(f"deg{axis}", "deg")
209221
deg2 = self.get_int(f"dmsDeg{axis}", "deg")
210222
deg3 = self.get_decimal(f"decDeg{axis}", "deg")
211-
self.degrees = deg or deg2 or deg3
212-
if self.degrees:
223+
self.degrees = one_value(deg, deg2, deg3)
224+
if self.degrees is not None:
213225
self.specificity = Specificity.DEG
214-
if deg3:
226+
if deg3 is not None:
215227
self.specificity = Specificity.SUBDEG
216228
else:
217229
return
@@ -222,33 +234,35 @@ def _digest_slots(self, axis):
222234
minutes3 = self.get_decimal(f"decMin{axis}", "min")
223235
mindash = self.get_decimal(f"decMin{axis}3", "min")
224236

225-
self.min = minutes or minutes2 or minutes3 or mindash
226-
if self.min:
237+
self.min = one_value(minutes, minutes2, minutes3, mindash)
238+
if self.min is not None:
227239
self.specificity = Specificity.MINUTE
228240

229241
min_fract = self.get_fractional(f"fractMin{axis}", "fmin")
230242
min_fract2 = self.get_fractional(f"fractMin{axis}3", "fmin")
231243
# variation 2, is a 3-digit or longer fraction
232244

233-
if min_fract or min_fract2:
245+
fmin = one_value(min_fract, min_fract2)
246+
if fmin is not None:
234247
self.specificity = Specificity.SUBMINUTE
235-
self.min += min_fract or min_fract2
248+
self.min += fmin
236249

237250
else:
238251
return
239252

240253
# SECONDS
241254
sec = self.get_int(f"sec{axis}", "sec")
242255
sec2 = self.get_int(f"dmsSec{axis}", "sec")
243-
self.seconds = sec or sec2
244-
if self.seconds:
256+
self.seconds = one_value(sec, sec2)
257+
if self.seconds is not None:
245258
self.specificity = Specificity.SECOND
246259

247260
fsec = self.get_fractional(f"fractSec{axis}", "fsec")
248261
fsec2 = self.get_fractional(f"fractSec{axis}Opt", "fsec")
249-
if fsec or fsec2:
262+
fseconds = one_value(fsec, fsec2)
263+
if fseconds is not None:
250264
self.specificity = Specificity.SUBSECOND
251-
self.seconds += fsec or fsec2
265+
self.seconds += fseconds
252266
return
253267

254268
def get_int(self, f, fnorm):
@@ -330,11 +344,12 @@ def filter_out(self, m: GeocoordMatch) -> tuple:
330344
class MGRSFilter(GeocoordFilter):
331345
def __init__(self):
332346
GeocoordFilter.__init__(self)
333-
self.date_formats = ["DDMMMYYYY", "DMMMYYHHmm", "DDMMMYYHHmm", "DMMMYY", "HHZZZYYYY"]
347+
self.date_formats = ["DDMMMYYYY", "DMMMYYHHmm", "DDMMMYYHHmm", "DDMMMYY", "DMMMYY", "HHZZZYYYY"]
334348
self.sequences = ["1234", "123456", "12345678", "1234567890"]
335349
self.today = arrow.utcnow()
336350
self.YEAR = self.today.date().year
337351
self.YY = self.YEAR - 2000
352+
self.RECENT_YEAR_THRESHOLD = 30
338353

339354
def filter_out(self, mgrs: GeocoordMatch) -> tuple:
340355
"""
@@ -346,6 +361,8 @@ def filter_out(self, mgrs: GeocoordMatch) -> tuple:
346361
# - is not a recent date;
347362
# - is not a rate ('NNN per LB');
348363
# - is not time with 'sec'
364+
if not mgrs.is_valid:
365+
return True, "invalid"
349366

350367
# Lexical filters:
351368
if not (mgrs.text.isupper() and len(mgrs.text.replace(" ", "")) > 6):
@@ -358,19 +375,25 @@ def filter_out(self, mgrs: GeocoordMatch) -> tuple:
358375
return True, "digit-seq"
359376

360377
# Date Filter
361-
date_test = mgrs.textnorm[0:10]
362378
for fmt in self.date_formats:
379+
fmtlen = len(fmt)
380+
date_test = mgrs.textnorm[0:fmtlen]
363381
try:
364382
dt = arrow.get(date_test, fmt)
365-
recent_year = abs(dt.date().year - self.YEAR) < 25
366-
if recent_year:
383+
if self._is_recent(dt):
367384
return True, "date"
368385
except Exception as parse_err:
369386
pass
370387

371388
# Not filtered out
372389
return False, None
373390

391+
def _is_recent(self, dt: arrow):
392+
"""
393+
checks if a year slot represents a recent YYYY or YY year.
394+
"""
395+
return abs(dt.date().year - self.YEAR) <= self.RECENT_YEAR_THRESHOLD
396+
374397

375398
class DMSFilter(GeocoordFilter):
376399
def __init__(self):
@@ -382,11 +405,14 @@ def filter_out(self, dms: GeocoordMatch) -> tuple:
382405
Easy filter -- if puncutation matches, this is an easy pattern to ignore.
383406
:return: True if filtered out, false positive.
384407
"""
408+
if not dms.is_valid:
409+
return True, "invalid"
385410
if dms.text[0].isalpha():
386411
return False, None
387412
for fmt in self.date_formats:
388413
try:
389414
dt = arrow.get(dms.text, fmt)
415+
# Recency matters not. Tests are literal date formats
390416
return True, "date"
391417
except Exception as err:
392418
pass
@@ -450,20 +476,20 @@ def normalize(self):
450476
z1 = slots.get("UTMZoneZZ") # 0-5\d
451477
z2 = slots.get("UTMZoneZ") # \d
452478

453-
ZZ = int(z or z1 or z2)
454-
band = slots.get("UTMBand")
455-
if not band:
456-
return
479+
try:
480+
ZZ = int(one_value(z, z1, z2))
481+
band = slots.get("UTMBand")
482+
if not band:
483+
return
457484

458-
hemi = band[0]
459-
e = slots.get("UTMEasting")
460-
n = slots.get("UTMNorthing")
461-
if e and n:
462-
try:
485+
hemi = band[0]
486+
e = slots.get("UTMEasting")
487+
n = slots.get("UTMNorthing")
488+
if e and n:
463489
self.geodetic = Utm(zone=ZZ, hemisphere=hemi, band=band, easting=int(e), northing=int(n))
464490
self._make_coordinate()
465-
except Exception as err:
466-
self.parsing_err = str(err)
491+
except Exception as err:
492+
self.parsing_err = str(err)
467493

468494

469495
class DegMinMatch(GeocoordMatch):
@@ -537,7 +563,7 @@ def validate(self):
537563
return
538564
lath = self.lat_ordinate.hemi
539565
lonh = self.lon_ordinate.hemi
540-
valid_hemi = lath and lonh and lath.is_alpha() and lonh.is_alpha()
566+
valid_hemi = lath and lonh and lath.is_alpha() and lonh.is_alpha()
541567
valid_sym = self.lat_ordinate.has_symbols() or self.lon_ordinate.has_symbols()
542568
self.is_valid = valid_hemi or valid_sym
543569

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11

22
from opensextant.extractors.xcoord import XCoord
33

4+
tester = XCoord(debug=True)
5+
# focused tests:
6+
mgrs = "10 JAN 94"
7+
matches = tester.extract(mgrs)
8+
for m in matches:
9+
print (m, m.filtered_out)
10+
411
results = XCoord(debug=True).default_tests()
512
for res in results:
613
print(res)

0 commit comments

Comments
 (0)