@@ -130,12 +130,12 @@ def get_orcid_token():
130130 return orcid_token
131131
132132
133- def record_search (url , email , interactive = False , search_type = "" ):
134- """Given a url (with a name or email ) do a record search looking for an orcid id.
133+ def record_search (url , terms , interactive = False , search_type = "" ):
134+ """Given a url (with a name or terms ) do a record search looking for an orcid id.
135135
136136 Arguments:
137137 - url (str) : url to perform request
138- - email (str) : email , used just for logging
138+ - terms (str) : terms , used just for logging
139139 - interactive (bool) : if True, ask user if there is more than a single response
140140 - search_type (str) : description on what search is based on, used just for logging
141141 """
@@ -152,19 +152,20 @@ def record_search(url, email, interactive=False, search_type=""):
152152 if len (results ) == 1 :
153153 return results [0 ]["orcid-id" ]
154154
155+ term_str = terms [0 ] % terms [1 :]
155156 # Only stream results to screen in interactive mode
156157 if not interactive :
157158 bot .info (
158- f"{ email } : found more than 1 ({ len (results )} ) result for ORCID search { search_type } , "
159+ f"{ term_str } : found more than one ({ len (results )} ) result for ORCID search { search_type } , "
159160 "run with --interactive mode to select."
160161 )
161- return
162+ return Ellipsis
162163
163164 # One or more results
164165 if len (results ) > 10 :
165166 bot .warning ("Found more than 10 results, will only show top 10." )
166167
167- print ("\n \n %s\n ======================================================" % email )
168+ print ("\n \n %s\n ======================================================" % term_str )
168169 for idx , r in enumerate (results ):
169170 # Limit is ten results, count starting at 0
170171 idx = idx + 1
@@ -191,6 +192,9 @@ def record_search(url, email, interactive=False, search_type=""):
191192 else :
192193 print ("[%s]\n %s\n " % (idx , record ))
193194
195+ # TODO: here we should remember for a person on what we already presented as
196+ # options and not to show them again.
197+ #
194198 # If interactive, ask for choice prompt
195199 if interactive :
196200 skip_choices = ["s" , "S" , "skip" ]
@@ -216,7 +220,7 @@ def record_search(url, email, interactive=False, search_type=""):
216220
217221 if choice in enter_choices :
218222 return entry_prompt (
219- f"Please enter the ORCID for { email } ." ,
223+ f"Please enter the ORCID for { term_str } ." ,
220224 regex = "[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$" ,
221225 )
222226
@@ -227,53 +231,88 @@ def record_search(url, email, interactive=False, search_type=""):
227231 return results [int (choice ) - 1 ]["orcid-id" ]
228232
229233
230- def get_orcid (email , name = None , interactive = False ):
231- """Get an orcid identifier for a given email or name."""
232- # We must have an email OR name
233- if not email and not name :
234- return
234+ def extended_search_url (q , * args ):
235+ """Helper to properly quote args and avoid duplicating URL etc"""
236+ # We will show only up to 10, so requesting 11, no need to get all default 1000
237+ url = f"https://pub.orcid.org/v3.0/expanded-search?q={ q } &args=11"
238+ if args :
239+ url %= tuple (map (urllib .parse .quote , args ))
240+ return url
241+
235242
236- def extended_search_url (q , * args ):
237- """Helper to properly quote args and avoid duplicating URL etc"""
238- url = f"https://pub.orcid.org/v3.0/expanded-search?q={ q } "
239- if args :
240- url %= tuple (map (urllib .parse .quote , args ))
241- return url
243+ strict , loose = True , False
242244
243- # First look for records based on email
244- orcid_id = None
245+
246+ def gen_searches ( email , name ):
245247 if email :
246- url = extended_search_url ("email:%s" , email )
247- orcid_id = record_search (url , email , interactive , "by email" )
248+ yield (("email:%s" , email ), "by email" , strict )
248249
249- # Attempt # 2 will use the first and last name
250- if not orcid_id and name is not None :
250+ # Next attempts will use name
251+ if name is not None :
251252 delim = "," if "," in name else " "
252253 cleaner = "," if delim == " " else " "
253254
254- parts = name .split (delim )
255+ parts = [ _ . strip ( cleaner ) for _ in name .split (delim )]
255256
256257 # No go if only a first or last name
257258 if len (parts ) == 1 :
258259 bot .debug (f"Skipping { name } , first and last are required for search." )
259- return orcid_id
260+ return
261+
262+ # Just as is
263+ yield (
264+ ('credit-name:"%s"+OR+other-names:"%s"' , name , name ),
265+ "by full credit or other names" ,
266+ strict ,
267+ )
260268
261- last , first = parts [0 ].strip (cleaner ), " " .join (parts [1 :]).strip (cleaner )
262- url = extended_search_url ("%s+AND+%s" , first , last )
263- orcid_id = record_search (url , name , interactive , "by name" )
269+ if delim == "," :
270+ # Last, First Middle
271+ last , given = parts [0 ], " " .join (parts [1 :])
272+ else :
273+ # First Middle Last
274+ given , last = " " .join (parts [:- 1 ]), parts [- 1 ]
275+
276+ yield (
277+ ('given-names:"%s"+AND+family-name:"%s"' , given , last ),
278+ "by name" ,
279+ strict ,
280+ )
264281
265282 # Attempt # 3 will try removing the middle name
266- if not orcid_id and " " in first :
267- url = extended_search_url (
268- "%s+AND+%s" ,
269- first .split (" " )[0 ].strip (),
270- last ,
283+ if " " in given :
284+ yield (
285+ (
286+ 'given-names:"%s"+AND+family-name:"%s"' ,
287+ given .split (" " )[0 ].strip (),
288+ last ,
289+ ),
290+ "by name" ,
291+ loose ,
271292 )
272- orcid_id = record_search (url , name , interactive , "by name without middle" )
273293
274- # Last attempt tries full name "as is"
275- if not orcid_id :
276- url = extended_search_url ("%s" , name )
277- orcid_id = record_search (url , name , interactive , "full name" )
294+ # Just a combination of all parts of the name
295+ yield (
296+ ("+AND+" .join (["%s" ] * len (parts )),) + tuple (parts ),
297+ "by name parts" ,
298+ loose ,
299+ )
300+
278301
279- return orcid_id
302+ def get_orcid (email : str | None , name : str | None = None , interactive = False ):
303+ """Get an orcid identifier for a given email or name."""
304+ # We must have an email OR name
305+ if not email and not name :
306+ return
307+
308+ for search_args , search_desc , strictness in gen_searches (email , name ):
309+ url = extended_search_url (* search_args )
310+ if (
311+ orcid_id := record_search (url , search_args , interactive , search_desc )
312+ ) is not Ellipsis and orcid_id :
313+ return orcid_id
314+ if orcid_id is Ellipsis :
315+ orcid_id = None
316+ if strict :
317+ break
318+ # if loose, and still got multiple results, continue
0 commit comments