|
1 | 1 | import os |
2 | 2 | import xml.etree.ElementTree as ET |
3 | | -from datetime import datetime |
| 3 | +from datetime import datetime, timezone |
4 | 4 | from email.utils import parsedate_to_datetime |
5 | 5 |
|
6 | | -# Path to your docs folder |
| 6 | +# Path to your docs folder and RSS file |
7 | 7 | docs_folder = 'docs' |
8 | | -rss_file = 'docs/rss.xml' |
| 8 | +rss_file = os.path.join(docs_folder, 'rss.xml') |
9 | 9 |
|
10 | | -# Get a list of all PDF files in docs and its subdirectories |
| 10 | +# Collect all PDF files in docs/ |
11 | 11 | pdf_files = [] |
12 | 12 | for root, dirs, files in os.walk(docs_folder): |
13 | 13 | for file in files: |
|
16 | 16 |
|
17 | 17 | print(f"Found {len(pdf_files)} PDFs in the 'docs' folder.") |
18 | 18 |
|
19 | | -# Create or load RSS XML |
| 19 | +# Load existing RSS or create a new one |
20 | 20 | if os.path.exists(rss_file): |
21 | 21 | tree = ET.parse(rss_file) |
22 | 22 | root = tree.getroot() |
|
28 | 28 | ET.SubElement(channel, "link").text = "https://github.com/damirlj/modern_cpp_tutorials" |
29 | 29 | ET.SubElement(channel, "description").text = "New articles and updates in the docs/ folder" |
30 | 30 |
|
31 | | -# Map from GUID to <item> for quick lookup |
32 | | -existing_items = {} |
33 | | -for item in channel.findall("item"): |
34 | | - guid = item.find("guid").text if item.find("guid") is not None else None |
35 | | - if guid: |
36 | | - existing_items[guid] = item |
| 31 | +# Map existing GUIDs to pubDate |
| 32 | +existing_items = { |
| 33 | + item.find("guid").text: item.find("pubDate").text |
| 34 | + for item in channel.findall("item") if item.find("guid") is not None |
| 35 | +} |
37 | 36 |
|
38 | | -# Track updated list of items |
| 37 | +# Prepare new items |
39 | 38 | new_items = [] |
| 39 | +current_date = datetime.now(timezone.utc).strftime("%a, %d %b %Y %H:%M:%S GMT") |
| 40 | +changes_made = False |
40 | 41 |
|
41 | | -# Generate items from current PDF list |
42 | | -for pdf in pdf_files: |
| 42 | +for pdf in sorted(pdf_files): |
43 | 43 | relative_path = os.path.relpath(pdf, docs_folder) |
44 | | - commit_url = f"https://github.com/damirlj/modern_cpp_tutorials/blob/main/{relative_path}" |
45 | | - |
46 | | - # If this article already exists, preserve pubDate |
47 | | - if commit_url in existing_items: |
48 | | - old_item = existing_items[commit_url] |
49 | | - pub_date = old_item.find("pubDate").text |
50 | | - pub_datetime = parsedate_to_datetime(pub_date) |
51 | | - else: |
52 | | - # New item - use current time |
53 | | - pub_datetime = datetime.utcnow() |
| 44 | + link = f"https://github.com/damirlj/modern_cpp_tutorials/blob/main/{relative_path}" |
54 | 45 |
|
| 46 | + # Check if the item already exists |
55 | 47 | item = ET.Element("item") |
56 | 48 | ET.SubElement(item, "title").text = relative_path |
57 | | - ET.SubElement(item, "link").text = commit_url |
58 | | - ET.SubElement(item, "guid").text = commit_url |
59 | | - ET.SubElement(item, "pubDate").text = pub_datetime.strftime("%a, %d %b %Y %H:%M:%S GMT") |
| 49 | + ET.SubElement(item, "link").text = link |
| 50 | + ET.SubElement(item, "guid").text = link |
60 | 51 |
|
61 | | - new_items.append((pub_datetime, item)) |
| 52 | + # Preserve existing pubDate or assign new one |
| 53 | + if link in existing_items: |
| 54 | + pub_date = existing_items[link] # Preserve timestamp of existing item |
| 55 | + # Remove the old version if it's updated (duplicate) |
| 56 | + for old_item in channel.findall("item"): |
| 57 | + if old_item.find("guid").text == link: |
| 58 | + channel.remove(old_item) |
| 59 | + changes_made = True |
| 60 | + break |
| 61 | + else: |
| 62 | + pub_date = current_date # New items get the current date |
62 | 63 |
|
63 | | -# Sort items newest-first by pubDate |
64 | | -new_items.sort(key=lambda x: x[0], reverse=True) |
| 64 | + ET.SubElement(item, "pubDate").text = current_date if link not in existing_items else pub_date |
| 65 | + |
| 66 | + # If the item is new, add it to the list of new items |
| 67 | + if link not in existing_items: |
| 68 | + new_items.append((parsedate_to_datetime(pub_date), item)) |
| 69 | + changes_made = True |
| 70 | + else: |
| 71 | + # For updated items, we already removed the old item and added the new one |
| 72 | + new_items.append((parsedate_to_datetime(pub_date), item)) |
65 | 73 |
|
66 | | -# Clear old items and re-append in sorted order |
| 74 | +# If no changes were made (no new items and no updates), exit early |
| 75 | +if not changes_made: |
| 76 | + print("No new or updated items found. Exiting without writing.") |
| 77 | + exit(0) |
| 78 | + |
| 79 | +# Sort with newest items at the top |
| 80 | +new_items.sort(key=lambda tup: tup[0], reverse=True) |
| 81 | + |
| 82 | +# Clear old items and append new ones (sorted) |
67 | 83 | for old_item in channel.findall("item"): |
68 | 84 | channel.remove(old_item) |
| 85 | + |
69 | 86 | for _, item in new_items: |
70 | 87 | channel.append(item) |
71 | 88 |
|
72 | 89 | # Save the updated RSS feed |
73 | 90 | tree = ET.ElementTree(root) |
74 | 91 | tree.write(rss_file, encoding="UTF-8", xml_declaration=True) |
75 | | - |
76 | 92 | print(f"Generated RSS feed with {len(new_items)} articles.") |
0 commit comments