@@ -119,9 +119,10 @@ def normalize(self):
119119class DMSOrdinate :
120120 SYMBOLS = {"°" , "º" , "'" , "\" " , ":" , "lat" , "lon" , "geo" , "coord" , "deg" }
121121
122- def __init__ (self , axis : str , text : str , slots = None ):
122+ def __init__ (self , axis : str , text : str , fam : str , slots = None ):
123123 self .axis = axis
124124 self .text = text
125+ self .pattern_family = fam
125126 self .slots = slots
126127 self .degrees = None
127128 self .min = None
@@ -134,7 +135,7 @@ def __init__(self, axis: str, text: str, slots=None):
134135 self .normalize ()
135136
136137 def is_valid (self ):
137- if not self .degrees :
138+ if self .degrees is None :
138139 return False
139140 # Must have degrees, in range for the axis
140141 if self .axis == "lat" :
@@ -144,9 +145,9 @@ def is_valid(self):
144145 if not - 180 < self .degrees < 180 :
145146 return False
146147 # Min and Secs must be in range if specified
147- if self .min and not 0 < self .min < 60 :
148+ if self .min is not None and not 0 <= self .min < 60 :
148149 return False
149- if self .seconds and not 0 < self .seconds < 60 :
150+ if self .seconds is not None and not 0 <= self .seconds < 60 :
150151 return False
151152
152153 return True
@@ -216,6 +217,14 @@ def _digest_slots(self, axis):
216217 """
217218 Fields or slots are named xxxLatxx or xxxLonxx
218219 """
220+ if self .pattern_family == "DMS" :
221+ min_sec_sep = self .slots .get (f"ms{ axis } Sep" )
222+ deg_min_sep = self .slots .get (f"dm{ axis } Sep" )
223+ if min_sec_sep and deg_min_sep and min_sec_sep == "." and min_sec_sep != deg_min_sep :
224+ # valid coordinate, but separators like "DD MM.ss" suggest more DM pattern
225+ # whereas "DD.MM.SS" with consistent separators is DMS.
226+ return
227+
219228 # DEGREES
220229 deg = self .get_int (f"deg{ axis } " , "deg" )
221230 deg2 = self .get_int (f"dmsDeg{ axis } " , "deg" )
@@ -310,6 +319,7 @@ def __init__(self, *args, **kwargs):
310319 self .lat_ordinate = None
311320 self .lon_ordinate = None
312321 self .filter = None
322+ self .pattern_family = self .pattern_id .split ("-" , 1 )[0 ]
313323
314324 def __str__ (self ):
315325 return f"{ self .text } "
@@ -361,10 +371,11 @@ def filter_out(self, mgrs: GeocoordMatch) -> tuple:
361371 # - is not a recent date;
362372 # - is not a rate ('NNN per LB');
363373 # - is not time with 'sec'
374+ # Lexical filters:
364375 if not mgrs .is_valid :
376+ # parsed earlier as invalid.
365377 return True , "invalid"
366378
367- # Lexical filters:
368379 if not (mgrs .text .isupper () and len (mgrs .text .replace (" " , "" )) > 6 ):
369380 return True , "lexical"
370381 parts = set (mgrs .text .split ())
@@ -405,18 +416,20 @@ def filter_out(self, dms: GeocoordMatch) -> tuple:
405416 Easy filter -- if puncutation matches, this is an easy pattern to ignore.
406417 :return: True if filtered out, false positive.
407418 """
408- if not dms .is_valid :
409- return True , "invalid"
410- if dms .text [0 ].isalpha ():
419+ if dms .is_valid :
420+ if dms .text [0 ].isalpha ():
421+ return False , None
422+ for fmt in self .date_formats :
423+ try :
424+ dt = arrow .get (dms .text , fmt )
425+ # Recency matters not. Tests are literal date formats
426+ return True , "date"
427+ except Exception as err :
428+ pass
429+ # Not filtered. Is valid.
411430 return False , None
412- for fmt in self .date_formats :
413- try :
414- dt = arrow .get (dms .text , fmt )
415- # Recency matters not. Tests are literal date formats
416- return True , "date"
417- except Exception as err :
418- pass
419- return False , None
431+ # Filter out. invalid.
432+ return True , "invalid"
420433
421434
422435mgrs_filter = MGRSFilter ()
@@ -524,8 +537,8 @@ def normalize(self):
524537 # < hemiLonPre >\s? < degLon > < dmLonSep >\s? < minLon > < fractMinLon >? < msLonSep >?
525538
526539 # TODO: conditions that invalidate this pattern?
527- self .lat_ordinate = DMSOrdinate ("lat" , self .text , slots = self .attributes ())
528- self .lon_ordinate = DMSOrdinate ("lon" , self .text , slots = self .attributes ())
540+ self .lat_ordinate = DMSOrdinate ("lat" , self .text , self . pattern_family , slots = self .attributes ())
541+ self .lon_ordinate = DMSOrdinate ("lon" , self .text , self . pattern_family , slots = self .attributes ())
529542 self ._make_coordinate ()
530543 self .validate ()
531544
@@ -537,8 +550,8 @@ def __init__(self, *args, **kwargs):
537550
538551 def normalize (self ):
539552 GeocoordMatch .normalize (self )
540- self .lat_ordinate = DMSOrdinate ("lat" , self .text , slots = self .attributes ())
541- self .lon_ordinate = DMSOrdinate ("lon" , self .text , slots = self .attributes ())
553+ self .lat_ordinate = DMSOrdinate ("lat" , self .text , self . pattern_family , slots = self .attributes ())
554+ self .lon_ordinate = DMSOrdinate ("lon" , self .text , self . pattern_family , slots = self .attributes ())
542555 self ._make_coordinate ()
543556 self .validate ()
544557
@@ -571,7 +584,7 @@ def validate(self):
571584
572585 def normalize (self ):
573586 GeocoordMatch .normalize (self )
574- self .lat_ordinate = DMSOrdinate ("lat" , self .text , slots = self .attributes ())
575- self .lon_ordinate = DMSOrdinate ("lon" , self .text , slots = self .attributes ())
587+ self .lat_ordinate = DMSOrdinate ("lat" , self .text , self . pattern_family , slots = self .attributes ())
588+ self .lon_ordinate = DMSOrdinate ("lon" , self .text , self . pattern_family , slots = self .attributes ())
576589 self ._make_coordinate ()
577590 self .validate ()
0 commit comments