Skip to content

Commit b9e16a5

Browse files
Copilotvgoehler
andauthored
fix: re-download PDFs when upstream release URL changes (new version)
- Manifest format changed from "filename\n" to "filename\turl\n" so the cached URL for each asset is persisted across runs. - download_upstream_pdfs.sh now compares the upstream URL against the cached URL; a changed URL (i.e. a new release was published) triggers a fresh download even if the local file already exists. - prune_pdfs.sh updated to parse the new tab-separated manifest format (reads only the first field, ignoring the URL column). - generate_courses.sh and Makefile use wc -l on the manifest — still correct since the line count per entry is unchanged. Agent-Logs-Url: https://github.com/TUBAF-IfI-LiaScript/TUBAF-IfI-LiaScript.github.io/sessions/8c4a2c8f-bd76-4f8a-901b-9c172ffdee6c Co-authored-by: vgoehler <1705385+vgoehler@users.noreply.github.com>
1 parent 1487040 commit b9e16a5

2 files changed

Lines changed: 36 additions & 16 deletions

File tree

scripts/download_upstream_pdfs.sh

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
# Env: GITHUB_TOKEN – optional, used to authenticate API calls and avoid rate-limiting
66
#
77
# Outputs:
8-
# assets/<course>/pdf/<name>.pdf – downloaded files (only when not already present)
9-
# .cache/<course>_upstream_pdfs – manifest: one PDF filename per line
8+
# assets/<course>/pdf/<name>.pdf – downloaded files (re-downloaded when upstream URL changes)
9+
# .cache/<course>_upstream_pdfs – manifest: one "filename<TAB>url" line per PDF
1010
#
1111
# Exit codes:
1212
# 0 – at least one upstream PDF was found / downloaded
@@ -22,10 +22,10 @@ fi
2222

2323
# Map course name → upstream repository name
2424
case "$COURSE" in
25-
digitalesysteme) REPO_NAME="EingebetteteSysteme" ;;
26-
prozprog) REPO_NAME="ProzeduraleProgrammierung" ;;
25+
digitalesysteme) REPO_NAME="EingebetteteSysteme" ;;
26+
prozprog) REPO_NAME="ProzeduraleProgrammierung" ;;
2727
softwareentwicklung) REPO_NAME="Softwareentwicklung" ;;
28-
robotikprojekt) REPO_NAME="SoftwareprojektRobotik" ;;
28+
robotikprojekt) REPO_NAME="SoftwareprojektRobotik" ;;
2929
*)
3030
echo "ℹ️ No upstream repo mapped for course '$COURSE'" >&2
3131
exit 1
@@ -55,13 +55,14 @@ if echo "$API_RESPONSE" | grep -q '"message"' && ! echo "$API_RESPONSE" | grep -
5555
exit 1
5656
fi
5757

58-
# Extract PDF asset names and their download URLs.
59-
# Deduplicate by name (keep first occurrence = most-recent release wins).
6058
if ! command -v jq >/dev/null 2>&1; then
6159
echo "⚠️ jq is not installed – cannot parse GitHub release assets" >&2
6260
exit 1
6361
fi
6462

63+
# Extract PDF asset names and their download URLs.
64+
# Releases API returns newest-first; deduplicate by filename so the newest
65+
# release's URL wins for each lesson PDF.
6566
mapfile -t ALL_NAMES < <(
6667
echo "$API_RESPONSE" \
6768
| jq -r '.[].assets[] | select(.name | endswith(".pdf")) | .name' 2>/dev/null \
@@ -79,7 +80,7 @@ if [ "${#ALL_NAMES[@]}" -eq 0 ]; then
7980
exit 1
8081
fi
8182

82-
# Deduplicate: keep the first occurrence of each filename
83+
# Deduplicate: keep the first occurrence of each filename (= most-recent release)
8384
declare -A SEEN
8485
NAMES=()
8586
URLS=()
@@ -94,8 +95,19 @@ done
9495

9596
echo "📦 Found ${#NAMES[@]} unique upstream PDF(s) for ${COURSE}"
9697

97-
# Download missing PDFs and build manifest
98+
# Load previously cached URLs so we can detect version updates.
99+
# Manifest format: "<filename>\t<url>"
100+
declare -A CACHED_URL
101+
if [ -f "$MANIFEST" ]; then
102+
while IFS=$'\t' read -r cached_name cached_url; do
103+
[[ -z "$cached_name" ]] && continue
104+
CACHED_URL["$cached_name"]="$cached_url"
105+
done < "$MANIFEST"
106+
fi
107+
108+
# Download PDFs that are missing or whose upstream URL has changed (new version).
98109
downloaded=0
110+
updated=0
99111
already_present=0
100112
> "${MANIFEST}.tmp"
101113

@@ -104,23 +116,30 @@ for i in "${!NAMES[@]}"; do
104116
url="${URLS[$i]}"
105117
target="${PDF_DIR}/${name}"
106118

107-
echo "${name}" >> "${MANIFEST}.tmp"
119+
printf '%s\t%s\n' "${name}" "${url}" >> "${MANIFEST}.tmp"
108120

109-
if [ -f "$target" ]; then
121+
if [ -f "$target" ] && [ "${CACHED_URL[$name]+x}" ] && [ "${CACHED_URL[$name]}" = "$url" ]; then
122+
# File exists and URL hasn't changed → already up-to-date
110123
already_present=$((already_present + 1))
111124
else
112-
echo " ⬇️ Downloading ${name}..."
125+
if [ -f "$target" ]; then
126+
echo " 🔄 Updating ${name} (new release available)..."
127+
updated=$((updated + 1))
128+
else
129+
echo " ⬇️ Downloading ${name}..."
130+
downloaded=$((downloaded + 1))
131+
fi
113132
if ! curl -fsSL --connect-timeout 30 "${CURL_AUTH[@]}" -o "$target" "$url"; then
114133
echo " ⚠️ Failed to download ${name} from ${url}" >&2
115134
rm -f "$target"
116-
else
117-
downloaded=$((downloaded + 1))
135+
# Remove from manifest so the next run retries
136+
grep -v "^${name} " "${MANIFEST}.tmp" > "${MANIFEST}.tmp2" && mv "${MANIFEST}.tmp2" "${MANIFEST}.tmp" || true
118137
fi
119138
fi
120139
done
121140

122141
# Atomically replace manifest
123142
mv "${MANIFEST}.tmp" "$MANIFEST"
124143

125-
echo "✅ Upstream PDFs: ${downloaded} downloaded, ${already_present} already present"
144+
echo "✅ Upstream PDFs: ${downloaded} new, ${updated} updated, ${already_present} already up-to-date"
126145
exit 0

scripts/prune_pdfs.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ while IFS= read -r rel; do
3131
done < <(printf "%s\n" "$referenced")
3232

3333
# Also protect upstream PDFs listed in .cache/*_upstream_pdfs manifests
34+
# Manifest format: "<filename>\t<url>"
3435
for manifest in .cache/*_upstream_pdfs; do
3536
[ -f "$manifest" ] || continue
3637
course="$(basename "$manifest" _upstream_pdfs)"
37-
while IFS= read -r pdf_name; do
38+
while IFS=$'\t' read -r pdf_name _url; do
3839
[[ -z "$pdf_name" ]] && continue
3940
abs="$repo_root/assets/${course}/pdf/${pdf_name}"
4041
keep["$abs"]=1

0 commit comments

Comments
 (0)