@@ -29,6 +29,14 @@ def identify_standard_columns(labels):
2929 s = 'hemisphere'
3030 elif regex .search ('(^k$)|(mm.*?3)|volume|voxels|size|extent' , lab ):
3131 s = 'size'
32+ elif (
33+ regex .search (r'\bx\b.*\by\b.*\bz\b' , lab )
34+ or regex .search (r'(peak\s*voxel\s*coordinate|talairach\s*coordinates?|mni\s*coordinates?)' , lab )
35+ or (regex .search (r'coordinates?' , lab ) and not regex .search (r'cluster|score|value' , lab ))
36+ ):
37+ # Some tables store x/y/z in one combined coordinate column.
38+ s = 'coord_triplet'
39+ found_coords = True
3240
3341 # --- START OF FIX ---
3442 # OLD: elif regex.match('\s*[xy]\s*$', lab):
@@ -59,7 +67,8 @@ def identify_standard_columns(labels):
5967 # --- END OF FIX ---
6068
6169 elif regex .search ('rdinate' , lab ):
62- continue
70+ s = 'coord_triplet'
71+ found_coords = True
6372 elif lab == 't' or regex .search ('^(max.*(z|t).*|.*(z|t).*(score|value|max))$' , lab ):
6473 s = 'statistic'
6574 elif regex .search ('p[\-\s]+.*val' , lab ):
@@ -158,6 +167,15 @@ def identify_repeating_groups(labels):
158167def create_activation (data , labels , standard_cols , group_labels = []):
159168
160169 activation = Activation ()
170+ coords_from_triplet = False
171+
172+ def _extract_triplet (value ):
173+ clean_val = regex .sub (r'(?<!\d)\.(?!\d)' , '' , str (value ))
174+ cs = '([-]?\d{1,3}\.?\d{0,2})'
175+ match = regex .search ('\n *%s[,;\s]+%s[,;\s]+%s' % (cs , cs , cs ), clean_val )
176+ if not match :
177+ return None
178+ return [regex .sub ('-\s+' , '-' , c .strip ()) for c in [match .group (1 ), match .group (2 ), match .group (3 )]]
161179
162180 for i , col in enumerate (data ):
163181
@@ -181,10 +199,26 @@ def create_activation(data, labels, standard_cols, group_labels=[]):
181199
182200 sc = standard_cols [i ]
183201
202+ if sc in ['coord_triplet' , 'x' , 'y' , 'z' ]:
203+ triplet = _extract_triplet (col )
204+ if triplet is not None :
205+ x , y , z = triplet
206+ logger .info ("Found coordinate triplet in %s column: %s -> %s, %s, %s" % (sc , col , x , y , z ))
207+ activation .set_coords (x , y , z )
208+ coords_from_triplet = True
209+ activation .add_col (labels [i ], col )
210+ continue
211+ if sc == 'coord_triplet' :
212+ activation .add_col (labels [i ], col )
213+ continue
214+
184215 # Validate XYZ columns: Should only be integers (and possible trailing decimals).
185216 # If they're not, keep only leading numbers. The exception is that ScienceDirect
186217 # journals often follow the minus sign with a space (e.g., - 35), which we strip.
187218 if regex .match ('[xyz]$' , sc ):
219+ if coords_from_triplet and str (col ).strip () == '' :
220+ activation .add_col (labels [i ], col )
221+ continue
188222 m = regex .match ('([-])\s?(\d+\.*\d*)$' , col )
189223 if m :
190224 col = "%s%s" % (m .group (1 ), m .group (2 ))
@@ -210,11 +244,9 @@ def create_activation(data, labels, standard_cols, group_labels=[]):
210244 # Also need to remove space between minus sign and numbers; some ScienceDirect
211245 # journals leave a gap.
212246 if not i in standard_cols :
213- cs = '([-]?\d{1,3}\.?\d{0,2})'
214- clean_col = regex .sub (r'(?<!\d)\.(?!\d)' , '' , str (col )) # Remove dots not part of numbers
215- m = regex .search ('\n *%s[,;\s]+%s[,;\s]+%s' % (cs , cs , cs ), clean_col )
216- if m :
217- x , y , z = [regex .sub ('-\s+' , '-' , c .strip ()) for c in [m .group (1 ), m .group (2 ), m .group (3 )]]
247+ triplet = _extract_triplet (col )
248+ if triplet is not None :
249+ x , y , z = triplet
218250 logger .info ("Found multi-coordinate column: %s\n ...and extracted: %s, %s, %s" % (col , x , y , z ))
219251 activation .set_coords (x , y , z )
220252
@@ -390,6 +422,3 @@ def parse_table(data, html=None):
390422
391423 table .finalize ()
392424 return table if len (table .activations ) else None
393-
394-
395-
0 commit comments