Skip to content

Commit a0596e6

Browse files
author
dalj8690
committed
Updated the script for generating RSS feeds
1 parent 85736af commit a0596e6

1 file changed

Lines changed: 48 additions & 32 deletions

File tree

generate_rss.py

Lines changed: 48 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import os
22
import xml.etree.ElementTree as ET
3-
from datetime import datetime
3+
from datetime import datetime, timezone
44
from email.utils import parsedate_to_datetime
55

6-
# Path to your docs folder
6+
# Path to your docs folder and RSS file
77
docs_folder = 'docs'
8-
rss_file = 'docs/rss.xml'
8+
rss_file = os.path.join(docs_folder, 'rss.xml')
99

10-
# Get a list of all PDF files in docs and its subdirectories
10+
# Collect all PDF files in docs/
1111
pdf_files = []
1212
for root, dirs, files in os.walk(docs_folder):
1313
for file in files:
@@ -16,7 +16,7 @@
1616

1717
print(f"Found {len(pdf_files)} PDFs in the 'docs' folder.")
1818

19-
# Create or load RSS XML
19+
# Load existing RSS or create a new one
2020
if os.path.exists(rss_file):
2121
tree = ET.parse(rss_file)
2222
root = tree.getroot()
@@ -28,49 +28,65 @@
2828
ET.SubElement(channel, "link").text = "https://github.com/damirlj/modern_cpp_tutorials"
2929
ET.SubElement(channel, "description").text = "New articles and updates in the docs/ folder"
3030

31-
# Map from GUID to <item> for quick lookup
32-
existing_items = {}
33-
for item in channel.findall("item"):
34-
guid = item.find("guid").text if item.find("guid") is not None else None
35-
if guid:
36-
existing_items[guid] = item
31+
# Map existing GUIDs to pubDate
32+
existing_items = {
33+
item.find("guid").text: item.find("pubDate").text
34+
for item in channel.findall("item") if item.find("guid") is not None
35+
}
3736

38-
# Track updated list of items
37+
# Prepare new items
3938
new_items = []
39+
current_date = datetime.now(timezone.utc).strftime("%a, %d %b %Y %H:%M:%S GMT")
40+
changes_made = False
4041

41-
# Generate items from current PDF list
42-
for pdf in pdf_files:
42+
for pdf in sorted(pdf_files):
4343
relative_path = os.path.relpath(pdf, docs_folder)
44-
commit_url = f"https://github.com/damirlj/modern_cpp_tutorials/blob/main/{relative_path}"
45-
46-
# If this article already exists, preserve pubDate
47-
if commit_url in existing_items:
48-
old_item = existing_items[commit_url]
49-
pub_date = old_item.find("pubDate").text
50-
pub_datetime = parsedate_to_datetime(pub_date)
51-
else:
52-
# New item - use current time
53-
pub_datetime = datetime.utcnow()
44+
link = f"https://github.com/damirlj/modern_cpp_tutorials/blob/main/{relative_path}"
5445

46+
# Check if the item already exists
5547
item = ET.Element("item")
5648
ET.SubElement(item, "title").text = relative_path
57-
ET.SubElement(item, "link").text = commit_url
58-
ET.SubElement(item, "guid").text = commit_url
59-
ET.SubElement(item, "pubDate").text = pub_datetime.strftime("%a, %d %b %Y %H:%M:%S GMT")
49+
ET.SubElement(item, "link").text = link
50+
ET.SubElement(item, "guid").text = link
6051

61-
new_items.append((pub_datetime, item))
52+
# Preserve existing pubDate or assign new one
53+
if link in existing_items:
54+
pub_date = existing_items[link] # Preserve timestamp of existing item
55+
# Remove the old version if it's updated (duplicate)
56+
for old_item in channel.findall("item"):
57+
if old_item.find("guid").text == link:
58+
channel.remove(old_item)
59+
changes_made = True
60+
break
61+
else:
62+
pub_date = current_date # New items get the current date
6263

63-
# Sort items newest-first by pubDate
64-
new_items.sort(key=lambda x: x[0], reverse=True)
64+
ET.SubElement(item, "pubDate").text = current_date if link not in existing_items else pub_date
65+
66+
# If the item is new, add it to the list of new items
67+
if link not in existing_items:
68+
new_items.append((parsedate_to_datetime(pub_date), item))
69+
changes_made = True
70+
else:
71+
# For updated items, we already removed the old item and added the new one
72+
new_items.append((parsedate_to_datetime(pub_date), item))
6573

66-
# Clear old items and re-append in sorted order
74+
# If no changes were made (no new items and no updates), exit early
75+
if not changes_made:
76+
print("No new or updated items found. Exiting without writing.")
77+
exit(0)
78+
79+
# Sort with newest items at the top
80+
new_items.sort(key=lambda tup: tup[0], reverse=True)
81+
82+
# Clear old items and append new ones (sorted)
6783
for old_item in channel.findall("item"):
6884
channel.remove(old_item)
85+
6986
for _, item in new_items:
7087
channel.append(item)
7188

7289
# Save the updated RSS feed
7390
tree = ET.ElementTree(root)
7491
tree.write(rss_file, encoding="UTF-8", xml_declaration=True)
75-
7692
print(f"Generated RSS feed with {len(new_items)} articles.")

0 commit comments

Comments
 (0)