Skip to content

Commit 3b45d8b

Browse files
authored
Merge pull request #283 from creativecommons/update-fetch-action
Update fetch action/workflow
2 parents 34c1caa + 40dda66 commit 3b45d8b

File tree

1 file changed

+41
-19
lines changed

1 file changed

+41
-19
lines changed

.github/workflows/1-fetch.yml

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,10 @@ name: Fetch Data
33
on:
44
schedule:
55
# Normal schedule
6-
# # at 01:15 on all days in first month of each quarter
7-
# - cron: '15 1 * 1,4,7,10 *'
8-
# # at 01:15 on days 1-14 in second month of each quarter
9-
# - cron: '15 1 1-14 2,5,8,11 *'
10-
# Temp schedule
11-
# at 01:15 on all days in all months
12-
- cron: '15 1 * * *'
6+
# # at 03:15 on all days in first month of each quarter
7+
- cron: '15 3 * 1,4,7,10 *'
8+
# # at 03:15 on days 1-14 in second month of each quarter
9+
- cron: '15 3 1-14 2,5,8,11 *'
1310

1411
workflow_dispatch:
1512

@@ -29,7 +26,7 @@ jobs:
2926
git config --global user.email "${{ secrets.BOT_EMAIL }}"
3027
3128
- name: Checkout repository
32-
uses: actions/checkout@v4
29+
uses: actions/checkout@v6
3330
with:
3431
# Default fetch-depth is 1, however that value results in errors
3532
# when GitPython attempts to push changes:
@@ -38,7 +35,7 @@ jobs:
3835
token: ${{ secrets.BOT_TOKEN }}
3936

4037
- name: Set up Python
41-
uses: actions/setup-python@v5
38+
uses: actions/setup-python@v6
4239
with:
4340
python-version: '3.11'
4441

@@ -50,20 +47,45 @@ jobs:
5047
run: |
5148
pipenv sync --system
5249
53-
# CC Technology team members:
54-
# See cc-quantifying-bot Google Workspace entry in Bitwarden for
55-
# information on GCS_ secrets
56-
- name: Fetch from Google Custom Search (GCS)
57-
run: |
58-
./scripts/1-fetch/gcs_fetch.py \
59-
--limit=100 --enable-save --enable-git
60-
env:
61-
GCS_DEVELOPER_KEY: ${{ secrets.GCS_DEVELOPER_KEY }}
62-
GCS_CX: ${{ secrets.GCS_CX }}
50+
# Fetch from arXiv disabled due to long run time (~6 hours)
51+
#
52+
# For now, data is fetched manually :/
53+
54+
# Fetch from Europeana disabled due to being considered incomplete
55+
# https://github.com/creativecommons/quantifying/issues/224
56+
57+
# Fetch from GCS disabled due to Google blocking GitHub Action runners
58+
# # CC Technology team members:
59+
# # See cc-quantifying-bot Google Workspace entry in Bitwarden for
60+
# # information on GCS_ secrets
61+
# - name: Fetch from Google Custom Search (GCS)
62+
# run: |
63+
# ./scripts/1-fetch/gcs_fetch.py \
64+
# --limit=100 --enable-save --enable-git
65+
# env:
66+
# GCS_DEVELOPER_KEY: ${{ secrets.GCS_DEVELOPER_KEY }}
67+
# GCS_CX: ${{ secrets.GCS_CX }}
68+
#
69+
# For now, data is fetched manually :/
6370

6471
- name: Fetch from GitHub
6572
run: |
6673
./scripts/1-fetch/github_fetch.py \
6774
--enable-save --enable-git
6875
env:
6976
GH_TOKEN: ${{ secrets.BOT_TOKEN }}
77+
78+
# Fetch from Openverse disabled due to limitations of anonymous API
79+
# access
80+
81+
- name: Fetch from Smithsonian
82+
run: |
83+
./scripts/1-fetch/smithsonian_fetch.py \
84+
--enable-save --enable-git
85+
env:
86+
DATA_GOV_API_KEY: ${{ secrets.DATA_GOV_API_KEY }}
87+
88+
- name: Fetch from Wikipedia
89+
run: |
90+
./scripts/1-fetch/wikipedia_fetch.py \
91+
--enable-save --enable-git

0 commit comments

Comments
 (0)