|
1 | 1 | import os |
2 | 2 | import xml.etree.ElementTree as ET |
3 | | -from datetime import datetime |
4 | | -from email.utils import format_datetime, parsedate_to_datetime |
| 3 | +from datetime import datetime, timezone |
| 4 | +from email.utils import parsedate_to_datetime |
5 | 5 |
|
6 | 6 | # Path to your docs folder |
7 | 7 | docs_folder = 'docs' |
8 | | -rss_file = os.path.join(docs_folder, 'rss.xml') |
| 8 | +rss_file = 'docs/rss.xml' |
9 | 9 |
|
10 | | -# Get a list of all PDF files in docs and subdirectories |
| 10 | +# Get a list of all PDF files in docs and its subdirectories |
11 | 11 | pdf_files = [] |
12 | | -for root_dir, dirs, files in os.walk(docs_folder): |
| 12 | +for root, dirs, files in os.walk(docs_folder): |
13 | 13 | for file in files: |
14 | 14 | if file.endswith(".pdf"): |
15 | | - pdf_files.append(os.path.join(root_dir, file)) |
| 15 | + pdf_files.append(os.path.join(root, file)) |
16 | 16 |
|
17 | 17 | print(f"Found {len(pdf_files)} PDFs in the 'docs' folder.") |
18 | 18 |
|
|
28 | 28 | ET.SubElement(channel, "link").text = "https://github.com/damirlj/modern_cpp_tutorials" |
29 | 29 | ET.SubElement(channel, "description").text = "New articles and updates in the docs/ folder" |
30 | 30 |
|
31 | | -# Create a map of current items to remove duplicates |
32 | | -existing_items = {item.find("guid").text: item for item in channel.findall("item") if item.find("guid") is not None} |
| 31 | +# Create a dictionary of existing items by GUID (commit URL) |
| 32 | +existing_items = {item.find("guid").text: item for item in channel.findall("item")} |
33 | 33 |
|
34 | | -# Get current date in RFC 2822 format (for RSS) |
35 | | -current_date = format_datetime(datetime.utcnow()) |
| 34 | +# Get current UTC date with tzinfo |
| 35 | +current_date = datetime.now(timezone.utc).strftime("%a, %d %b %Y %H:%M:%S GMT") |
36 | 36 |
|
37 | | -# Add or update RSS items |
| 37 | +# Add or update items |
38 | 38 | for pdf in pdf_files: |
39 | 39 | relative_path = os.path.relpath(pdf, docs_folder) |
40 | 40 | commit_url = f"https://github.com/damirlj/modern_cpp_tutorials/blob/main/{relative_path}" |
41 | 41 |
|
42 | | - # Remove old item if it exists |
| 42 | + # If item exists, update pubDate; otherwise, create new |
43 | 43 | if commit_url in existing_items: |
44 | | - channel.remove(existing_items[commit_url]) |
| 44 | + existing_items[commit_url].find("pubDate").text = current_date |
| 45 | + else: |
| 46 | + item = ET.Element("item") |
| 47 | + ET.SubElement(item, "title").text = relative_path |
| 48 | + ET.SubElement(item, "link").text = commit_url |
| 49 | + ET.SubElement(item, "guid").text = commit_url |
| 50 | + ET.SubElement(item, "pubDate").text = current_date |
| 51 | + channel.append(item) |
45 | 52 |
|
46 | | - # Create and add the new item |
47 | | - item = ET.Element("item") |
48 | | - ET.SubElement(item, "title").text = relative_path |
49 | | - ET.SubElement(item, "link").text = commit_url |
50 | | - ET.SubElement(item, "guid").text = commit_url |
51 | | - ET.SubElement(item, "pubDate").text = current_date |
52 | | - channel.append(item) |
53 | | - |
54 | | -# Sort items by pubDate descending |
55 | | -items = channel.findall("item") |
56 | | - |
57 | | -# Parse pubDate strings to datetime objects for sorting |
| 53 | +# Sort all items by pubDate descending |
58 | 54 | def get_pub_date(item): |
59 | | - pub_date = item.find("pubDate").text |
60 | | - return parsedate_to_datetime(pub_date) |
| 55 | + pub_date_text = item.find("pubDate").text |
| 56 | + dt = parsedate_to_datetime(pub_date_text) |
| 57 | + return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc) |
61 | 58 |
|
| 59 | +items = channel.findall("item") |
62 | 60 | items.sort(key=get_pub_date, reverse=True) |
63 | 61 |
|
64 | | -# Optional: Keep only the latest N entries (e.g., 20) |
65 | | -#MAX_ENTRIES = 20 |
66 | | -#for item in channel.findall("item"): |
67 | | -# channel.remove(item) |
68 | | -#for item in items[:MAX_ENTRIES]: |
69 | | -# channel.append(item) |
| 62 | +# Clear old items and re-append in sorted order |
| 63 | +for item in channel.findall("item"): |
| 64 | + channel.remove(item) |
| 65 | +for item in items: |
| 66 | + channel.append(item) |
70 | 67 |
|
71 | | -# Save updated RSS feed |
| 68 | +# Save the updated RSS feed |
72 | 69 | tree = ET.ElementTree(root) |
73 | 70 | tree.write(rss_file, encoding="UTF-8", xml_declaration=True) |
74 | 71 |
|
75 | | -print(f"Generated RSS feed with {min(len(items), MAX_ENTRIES)} articles.") |
| 72 | +print(f"Generated RSS feed with {len(items)} article(s).") |
0 commit comments