Skip to content

Commit ca00dbe

Browse files
authored
Merge pull request #88 from con/enh-orcid2
Further improve search through ORCID
2 parents a973e67 + e04d4be commit ca00dbe

9 files changed

Lines changed: 92 additions & 53 deletions

File tree

.github/workflows/codespell.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,6 @@ jobs:
1717

1818
steps:
1919
- name: Checkout
20-
uses: actions/checkout@v3
20+
uses: actions/checkout@v4
2121
- name: Codespell
2222
uses: codespell-project/actions-codespell@v2

.github/workflows/deploy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
env:
1212
CONTAINER: quay.io/con/tributors
1313
steps:
14-
- uses: actions/checkout@v3
14+
- uses: actions/checkout@v4
1515
- name: Build Docker Image
1616
run: docker build -t "${CONTAINER}" .
1717
- name: Log In to Quay.io

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
runs-on: ubuntu-latest
1313
steps:
1414
- name: Checkout source
15-
uses: actions/checkout@v3
15+
uses: actions/checkout@v4
1616
with:
1717
fetch-depth: 0
1818

.github/workflows/shellcheck.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
run: |
1414
sudo apt-get update -qq
1515
sudo apt-get install shellcheck
16-
- uses: actions/checkout@v3
16+
- uses: actions/checkout@v4
1717
- name: Run shellcheck
1818
run: |
1919
# I: running only on a subset of scripts which are shellcheck clean ATM

.github/workflows/test-action.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
runs-on: ubuntu-latest
1010
steps:
1111
- name: Checkout Repository
12-
uses: actions/checkout@v3
12+
uses: actions/checkout@v4
1313
- name: Generate Updated Zenodo and Contributors
1414

1515
# Important! Update to release https://github.com/con/tributors
@@ -55,21 +55,21 @@ jobs:
5555
allcontrib_skip_generate: false
5656

5757
- name: Upload zenodo data as artifact
58-
uses: actions/upload-artifact@v3
58+
uses: actions/upload-artifact@v4
5959

6060
# Path is relative to GITHUB_WORKSPACE
6161
with:
6262
name: zenodo
6363
path: .zenodo.json
6464

6565
- name: Upload allcontributors data as artifact
66-
uses: actions/upload-artifact@v3
66+
uses: actions/upload-artifact@v4
6767
with:
6868
name: allcontrib
6969
path: .all-contributorsrc
7070

7171
- name: Upload README as artifact
72-
uses: actions/upload-artifact@v3
72+
uses: actions/upload-artifact@v4
7373
with:
7474
name: readme
7575
path: README.md

.github/workflows/test-tributors.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
formatting:
99
runs-on: ubuntu-latest
1010
steps:
11-
- uses: actions/checkout@v3
11+
- uses: actions/checkout@v4
1212
- name: Setup black environment
1313
run: conda create --quiet --name black pyflakes
1414

@@ -29,7 +29,7 @@ jobs:
2929
needs: formatting
3030
runs-on: ubuntu-latest
3131
steps:
32-
- uses: actions/checkout@v3
32+
- uses: actions/checkout@v4
3333
- name: Setup testing environment
3434
run: conda create --quiet --name testing pytest
3535

@@ -49,7 +49,7 @@ jobs:
4949
env:
5050
CONTAINER: quay.io/con/tributors
5151
steps:
52-
- uses: actions/checkout@v3
52+
- uses: actions/checkout@v4
5353
- name: Build Docker Image
5454
run: docker build -t "${CONTAINER}" .
5555
- name: Tag and Preview Container

.github/workflows/update-contributors.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: Checkout Repository
14-
uses: actions/checkout@v3
14+
uses: actions/checkout@v4
1515
- name: Tributors Update
1616

1717
# Important! Update to release https://github.com/con/tributors

tributors/main/github.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import requests
1616
import sys
1717

18-
repository_regex = "(?P<owner>[\w,\-,\_]+)/(?P<repo>[\w,\-,\_\.]+)"
18+
repository_regex = r"(?P<owner>[\w,\-,\_]+)/(?P<repo>[\w,\-,\_\.]+)"
1919

2020
bot = logging.getLogger("github")
2121

tributors/main/orcid.py

Lines changed: 79 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,12 @@ def get_orcid_token():
130130
return orcid_token
131131

132132

133-
def record_search(url, email, interactive=False, search_type=""):
134-
"""Given a url (with a name or email) do a record search looking for an orcid id.
133+
def record_search(url, terms, interactive=False, search_type=""):
134+
"""Given a url (with a name or terms) do a record search looking for an orcid id.
135135
136136
Arguments:
137137
- url (str) : url to perform request
138-
- email (str) : email, used just for logging
138+
- terms (str) : terms, used just for logging
139139
- interactive (bool) : if True, ask user if there is more than a single response
140140
- search_type (str) : description on what search is based on, used just for logging
141141
"""
@@ -152,19 +152,20 @@ def record_search(url, email, interactive=False, search_type=""):
152152
if len(results) == 1:
153153
return results[0]["orcid-id"]
154154

155+
term_str = terms[0] % terms[1:]
155156
# Only stream results to screen in interactive mode
156157
if not interactive:
157158
bot.info(
158-
f"{email}: found more than 1 ({len(results)}) result for ORCID search {search_type}, "
159+
f"{term_str}: found more than one ({len(results)}) result for ORCID search {search_type}, "
159160
"run with --interactive mode to select."
160161
)
161-
return
162+
return Ellipsis
162163

163164
# One or more results
164165
if len(results) > 10:
165166
bot.warning("Found more than 10 results, will only show top 10.")
166167

167-
print("\n\n%s\n======================================================" % email)
168+
print("\n\n%s\n======================================================" % term_str)
168169
for idx, r in enumerate(results):
169170
# Limit is ten results, count starting at 0
170171
idx = idx + 1
@@ -191,6 +192,9 @@ def record_search(url, email, interactive=False, search_type=""):
191192
else:
192193
print("[%s]\n%s\n" % (idx, record))
193194

195+
# TODO: here we should remember for a person on what we already presented as
196+
# options and not to show them again.
197+
#
194198
# If interactive, ask for choice prompt
195199
if interactive:
196200
skip_choices = ["s", "S", "skip"]
@@ -216,7 +220,7 @@ def record_search(url, email, interactive=False, search_type=""):
216220

217221
if choice in enter_choices:
218222
return entry_prompt(
219-
f"Please enter the ORCID for {email}.",
223+
f"Please enter the ORCID for {term_str}.",
220224
regex="[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$",
221225
)
222226

@@ -227,53 +231,88 @@ def record_search(url, email, interactive=False, search_type=""):
227231
return results[int(choice) - 1]["orcid-id"]
228232

229233

230-
def get_orcid(email, name=None, interactive=False):
231-
"""Get an orcid identifier for a given email or name."""
232-
# We must have an email OR name
233-
if not email and not name:
234-
return
234+
def extended_search_url(q, *args):
235+
"""Helper to properly quote args and avoid duplicating URL etc"""
236+
# We will show only up to 10, so requesting 11, no need to get all default 1000
237+
url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}&args=11"
238+
if args:
239+
url %= tuple(map(urllib.parse.quote, args))
240+
return url
241+
235242

236-
def extended_search_url(q, *args):
237-
"""Helper to properly quote args and avoid duplicating URL etc"""
238-
url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}"
239-
if args:
240-
url %= tuple(map(urllib.parse.quote, args))
241-
return url
243+
strict, loose = True, False
242244

243-
# First look for records based on email
244-
orcid_id = None
245+
246+
def gen_searches(email, name):
245247
if email:
246-
url = extended_search_url("email:%s", email)
247-
orcid_id = record_search(url, email, interactive, "by email")
248+
yield (("email:%s", email), "by email", strict)
248249

249-
# Attempt # 2 will use the first and last name
250-
if not orcid_id and name is not None:
250+
# Next attempts will use name
251+
if name is not None:
251252
delim = "," if "," in name else " "
252253
cleaner = "," if delim == " " else " "
253254

254-
parts = name.split(delim)
255+
parts = [_.strip(cleaner) for _ in name.split(delim)]
255256

256257
# No go if only a first or last name
257258
if len(parts) == 1:
258259
bot.debug(f"Skipping {name}, first and last are required for search.")
259-
return orcid_id
260+
return
261+
262+
# Just as is
263+
yield (
264+
('credit-name:"%s"+OR+other-names:"%s"', name, name),
265+
"by full credit or other names",
266+
strict,
267+
)
260268

261-
last, first = parts[0].strip(cleaner), " ".join(parts[1:]).strip(cleaner)
262-
url = extended_search_url("%s+AND+%s", first, last)
263-
orcid_id = record_search(url, name, interactive, "by name")
269+
if delim == ",":
270+
# Last, First Middle
271+
last, given = parts[0], " ".join(parts[1:])
272+
else:
273+
# First Middle Last
274+
given, last = " ".join(parts[:-1]), parts[-1]
275+
276+
yield (
277+
('given-names:"%s"+AND+family-name:"%s"', given, last),
278+
"by name",
279+
strict,
280+
)
264281

265282
# Attempt # 3 will try removing the middle name
266-
if not orcid_id and " " in first:
267-
url = extended_search_url(
268-
"%s+AND+%s",
269-
first.split(" ")[0].strip(),
270-
last,
283+
if " " in given:
284+
yield (
285+
(
286+
'given-names:"%s"+AND+family-name:"%s"',
287+
given.split(" ")[0].strip(),
288+
last,
289+
),
290+
"by name",
291+
loose,
271292
)
272-
orcid_id = record_search(url, name, interactive, "by name without middle")
273293

274-
# Last attempt tries full name "as is"
275-
if not orcid_id:
276-
url = extended_search_url("%s", name)
277-
orcid_id = record_search(url, name, interactive, "full name")
294+
# Just a combination of all parts of the name
295+
yield (
296+
("+AND+".join(["%s"] * len(parts)),) + tuple(parts),
297+
"by name parts",
298+
loose,
299+
)
300+
278301

279-
return orcid_id
302+
def get_orcid(email: str | None, name: str | None = None, interactive=False):
303+
"""Get an orcid identifier for a given email or name."""
304+
# We must have an email OR name
305+
if not email and not name:
306+
return
307+
308+
for search_args, search_desc, strictness in gen_searches(email, name):
309+
url = extended_search_url(*search_args)
310+
if (
311+
orcid_id := record_search(url, search_args, interactive, search_desc)
312+
) is not Ellipsis and orcid_id:
313+
return orcid_id
314+
if orcid_id is Ellipsis:
315+
orcid_id = None
316+
if strict:
317+
break
318+
# if loose, and still got multiple results, continue

0 commit comments

Comments
 (0)