Skip to content

Commit 53840c6

Browse files
authored
Merge branch 'master' into fix/babyru-false-positive
2 parents 068fff8 + f60de0d commit 53840c6

2 files changed

Lines changed: 105 additions & 22 deletions

File tree

.github/workflows/validate_modified_targets.yml

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,38 +17,57 @@ jobs:
1717
- name: Checkout repository
1818
uses: actions/checkout@v5
1919
with:
20+
# Checkout the base branch but fetch all history to avoid a second fetch call
2021
ref: ${{ github.base_ref }}
21-
fetch-depth: 1
22+
fetch-depth: 0
2223

2324
- name: Set up Python
2425
uses: actions/setup-python@v6
2526
with:
26-
python-version: '3.13'
27+
python-version: "3.13"
2728

2829
- name: Install Poetry
2930
uses: abatilo/actions-poetry@v4
3031
with:
31-
poetry-version: 'latest'
32+
poetry-version: "latest"
3233

3334
- name: Install dependencies
3435
run: |
3536
poetry install --no-interaction --with dev
3637
37-
- name: Drop in place updated manifest from base
38+
- name: Prepare JSON versions for comparison
3839
run: |
39-
cp sherlock_project/resources/data.json data.json.base
40-
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1
41-
git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json
42-
cp sherlock_project/resources/data.json data.json.head
40+
# Fetch only the PR's branch head (single network call in this step)
41+
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
42+
43+
# Find the merge-base commit between the target branch and the PR branch
44+
MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
45+
echo "Comparing PR head against merge-base commit: $MERGE_BASE"
46+
47+
# Safely extract the file from the PR's head and the merge-base commit
48+
git show pr:sherlock_project/resources/data.json > data.json.head
49+
git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
50+
51+
# CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
52+
# This ensures that pytest runs against the new, updated file.
53+
cp data.json.head sherlock_project/resources/data.json
4354
4455
- name: Discover modified targets
4556
id: discover-modified
4657
run: |
4758
CHANGED=$(
4859
python - <<'EOF'
4960
import json
50-
with open("data.json.base") as f: base = json.load(f)
51-
with open("data.json.head") as f: head = json.load(f)
61+
import sys
62+
try:
63+
with open("data.json.base") as f: base = json.load(f)
64+
with open("data.json.head") as f: head = json.load(f)
65+
except FileNotFoundError as e:
66+
print(f"Error: Could not find {e.filename}", file=sys.stderr)
67+
sys.exit(1)
68+
except json.JSONDecodeError as e:
69+
print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
70+
sys.exit(1)
5271
5372
changed = []
5473
for k, v in head.items():
@@ -63,6 +82,8 @@ jobs:
6382
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
6483
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
6584
85+
# --- The rest of the steps below are unchanged ---
86+
6687
- name: Validate modified targets
6788
if: steps.discover-modified.outputs.changed_targets != ''
6889
continue-on-error: true

sherlock_project/resources/data.json

Lines changed: 74 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,8 @@
259259
"username_claimed": "blue"
260260
},
261261
"Blitz Tactics": {
262-
"errorType": "status_code",
262+
"errorMsg": "That page doesn't exist",
263+
"errorType": "message",
263264
"url": "https://blitztactics.com/{}",
264265
"urlMain": "https://blitztactics.com/",
265266
"username_claimed": "Lance5500"
@@ -278,14 +279,7 @@
278279
"urlMain": "https://bsky.app/",
279280
"username_claimed": "mcuban"
280281
},
281-
"BoardGameGeek": {
282-
"errorType": "message",
283-
"regexCheck": "^[a-zA-Z0-9_]*$",
284-
"errorMsg": "User not found",
285-
"url": "https://boardgamegeek.com/user/{}",
286-
"urlMain": "https://boardgamegeek.com",
287-
"username_claimed": "blue"
288-
},
282+
289283
"BongaCams": {
290284
"errorType": "status_code",
291285
"isNSFW": true,
@@ -299,6 +293,14 @@
299293
"urlMain": "https://www.bookcrossing.com/",
300294
"username_claimed": "blue"
301295
},
296+
"BoardGameGeek": {
297+
"errorMsg": "\"isValid\":true",
298+
"errorType": "message",
299+
"url": "https://boardgamegeek.com/user/{}",
300+
"urlMain": "https://boardgamegeek.com/",
301+
"urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}",
302+
"username_claimed": "blue"
303+
},
302304
"BraveCommunity": {
303305
"errorType": "status_code",
304306
"url": "https://community.brave.com/u/{}/",
@@ -505,6 +507,15 @@
505507
"urlMain": "https://coderwall.com",
506508
"username_claimed": "hacker"
507509
},
510+
"Code Sandbox": {
511+
"errorType": "message",
512+
"errorMsg": "Could not find user with username",
513+
"regexCheck": "^[a-zA-Z0-9_-]{3,30}$",
514+
"url": "https://codesandbox.io/u/{}",
515+
"urlProbe": "https://codesandbox.io/api/v1/users/{}",
516+
"urlMain": "https://codesandbox.io",
517+
"username_claimed": "icyjoseph"
518+
},
508519
"Codewars": {
509520
"errorType": "status_code",
510521
"url": "https://www.codewars.com/users/{}",
@@ -537,6 +548,13 @@
537548
"urlMain": "https://coroflot.com/",
538549
"username_claimed": "blue"
539550
},
551+
"Cplusplus": {
552+
"errorType": "message",
553+
"errorMsg": "<title>404 Page Not Found</title>",
554+
"url": "https://cplusplus.com/user/{}",
555+
"urlMain": "https://cplusplus.com",
556+
"username_claimed": "mbozzi"
557+
},
540558
"Cracked": {
541559
"errorType": "response_url",
542560
"errorUrl": "https://www.cracked.com/",
@@ -683,7 +701,6 @@
683701
"Duolingo": {
684702
"errorMsg": "{\"users\":[]}",
685703
"errorType": "message",
686-
687704
"url": "https://www.duolingo.com/profile/{}",
688705
"urlMain": "https://duolingo.com/",
689706
"urlProbe": "https://www.duolingo.com/2017-06-30/users?username={}",
@@ -1056,6 +1073,13 @@
10561073
"urlMain": "https://www.heavy-r.com/",
10571074
"username_claimed": "kilroy222"
10581075
},
1076+
"Hive Blog": {
1077+
"errorMsg": "<title>User Not Found - Hive</title>",
1078+
"errorType": "message",
1079+
"url": "https://hive.blog/@{}",
1080+
"urlMain": "https://hive.blog/",
1081+
"username_claimed": "mango-juice"
1082+
},
10591083
"Holopin": {
10601084
"errorMsg": "true",
10611085
"errorType": "message",
@@ -1217,6 +1241,13 @@
12171241
"urlMain": "https://discourse.joplinapp.org/",
12181242
"username_claimed": "laurent"
12191243
},
1244+
"Jupyter Community Forum": {
1245+
"errorMsg": "Oops! That page doesn’t exist or is private.",
1246+
"errorType": "message",
1247+
"url": "https://discourse.jupyter.org/u/{}/summary",
1248+
"urlMain": "https://discourse.jupyter.org",
1249+
"username_claimed": "choldgraf"
1250+
},
12201251
"Kaggle": {
12211252
"errorType": "status_code",
12221253
"url": "https://www.kaggle.com/{}",
@@ -1739,13 +1770,26 @@
17391770
"urlMain": "http://promodj.com/",
17401771
"username_claimed": "blue"
17411772
},
1773+
"Pronouns.page": {
1774+
"errorType": "status_code",
1775+
"url": "https://pronouns.page/@{}",
1776+
"urlMain": "https://pronouns.page/",
1777+
"username_claimed": "andrea"
1778+
},
17421779
"PyPi": {
17431780
"errorType": "status_code",
17441781
"url": "https://pypi.org/user/{}",
17451782
"urlProbe": "https://pypi.org/_includes/administer-user-include/{}",
17461783
"urlMain": "https://pypi.org",
17471784
"username_claimed": "Blue"
17481785
},
1786+
"Python.org Discussions": {
1787+
"errorMsg": "Oops! That page doesn’t exist or is private.",
1788+
"errorType": "message",
1789+
"url": "https://discuss.python.org/u/{}/summary",
1790+
"urlMain": "https://discuss.python.org",
1791+
"username_claimed": "pablogsal"
1792+
},
17491793
"Rajce.net": {
17501794
"errorType": "status_code",
17511795
"regexCheck": "^[\\w@-]+?$",
@@ -1841,6 +1885,13 @@
18411885
"urlMain": "https://royalcams.com",
18421886
"username_claimed": "asuna-black"
18431887
},
1888+
"Ruby Forums": {
1889+
"errorMsg": "Oops! That page doesn’t exist or is private.",
1890+
"errorType": "message",
1891+
"url": "https://ruby-forum.com/u/{}/summary",
1892+
"urlMain": "https://ruby-forums.com",
1893+
"username_claimed": "rishard"
1894+
},
18441895
"RubyGems": {
18451896
"errorType": "status_code",
18461897
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]{1,40}",
@@ -2041,7 +2092,6 @@
20412092
},
20422093
"Spotify": {
20432094
"errorType": "status_code",
2044-
20452095
"url": "https://open.spotify.com/user/{}",
20462096
"urlMain": "https://open.spotify.com/",
20472097
"username_claimed": "blue"
@@ -2246,6 +2296,13 @@
22462296
"urlMain": "https://untappd.com/",
22472297
"username_claimed": "untappd"
22482298
},
2299+
"Valorant Forums": {
2300+
"errorMsg": "The page you requested could not be found.",
2301+
"errorType": "message",
2302+
"url": "https://valorantforums.com/u/{}",
2303+
"urlMain": "https://valorantforums.com",
2304+
"username_claimed": "Wolves"
2305+
},
22492306
"VK": {
22502307
"errorType": "response_url",
22512308
"errorUrl": "https://www.quora.com/profile/{}",
@@ -2320,6 +2377,12 @@
23202377
"urlMain": "https://discourse.wicg.io/",
23212378
"username_claimed": "stefano"
23222379
},
2380+
"Wakatime": {
2381+
"errorType": "status_code",
2382+
"url": "https://wakatime.com/@{}",
2383+
"urlMain": "https://wakatime.com/",
2384+
"username_claimed": "blue"
2385+
},
23232386
"Warrior Forum": {
23242387
"errorType": "status_code",
23252388
"url": "https://www.warriorforum.com/members/{}.html",
@@ -2464,7 +2527,6 @@
24642527
},
24652528
"YouTube": {
24662529
"errorType": "status_code",
2467-
24682530
"url": "https://www.youtube.com/@{}",
24692531
"urlMain": "https://www.youtube.com/",
24702532
"username_claimed": "youtube"

0 commit comments

Comments
 (0)