|
| 1 | +"""This module contains functions to generate and manage the sitemap.xml file.""" |
| 2 | + |
| 3 | +from pathlib import Path |
| 4 | +from typing import Dict, List |
| 5 | +from xml.dom import minidom |
| 6 | +from xml.etree.ElementTree import Element, SubElement, tostring |
| 7 | + |
| 8 | +from reflex import constants |
| 9 | +from reflex.config import get_config |
| 10 | +from reflex.utils import prerequisites |
| 11 | + |
| 12 | +# _static folder in the .web directory containing the sitemap.xml file. |
| 13 | +_sitemap_folder_path: Path = ( |
| 14 | + Path.cwd() / prerequisites.get_web_dir() / constants.Dirs.STATIC |
| 15 | +) |
| 16 | + |
| 17 | +# sitemap file path |
| 18 | +_sitemap_file_path: Path = _sitemap_folder_path / "sitemap.xml" |
| 19 | + |
| 20 | + |
| 21 | +def check_sitemap_file_exists() -> bool: |
| 22 | + """Check if the sitemap file exists. |
| 23 | +
|
| 24 | + Returns: |
| 25 | + bool: True if the sitemap file exists in the .web/_static folder. |
| 26 | + """ |
| 27 | + return _sitemap_folder_path.exists() & _sitemap_file_path.exists() |
| 28 | + |
| 29 | + |
| 30 | +def read_sitemap_file() -> str: |
| 31 | + """Read the sitemap file. |
| 32 | +
|
| 33 | + Returns: |
| 34 | + str: The contents of the sitemap file. |
| 35 | + """ |
| 36 | + with _sitemap_file_path.open("r") as f: |
| 37 | + return f.read() |
| 38 | + |
| 39 | + |
| 40 | +def generate_xml(links: List[Dict[str, str]]) -> str: |
| 41 | + """Generate an XML sitemap from a list of links. |
| 42 | +
|
| 43 | + Args: |
| 44 | + links (List[Dict[str, Any]]): A list of dictionaries where each dictionary contains |
| 45 | + 'loc' (URL of the page), 'changefreq' (frequency of changes), and 'priority' (priority of the page). |
| 46 | +
|
| 47 | + Returns: |
| 48 | + str: A pretty-printed XML string representing the sitemap. |
| 49 | + """ |
| 50 | + urlset = Element("urlset", xmlns="https://www.sitemaps.org/schemas/sitemap/0.9") |
| 51 | + for link in links: |
| 52 | + url = SubElement(urlset, "url") |
| 53 | + loc = SubElement(url, "loc") |
| 54 | + loc.text = link["loc"] |
| 55 | + changefreq = SubElement(url, "changefreq") |
| 56 | + changefreq.text = link["changefreq"] |
| 57 | + priority = SubElement(url, "priority") |
| 58 | + priority.text = str(link["priority"]) |
| 59 | + rough_string = tostring(urlset, "utf-8") |
| 60 | + reparsed = minidom.parseString(rough_string) |
| 61 | + return reparsed.toprettyxml(indent=" ") |
| 62 | + |
| 63 | + |
| 64 | +def generate_sitemaps(sitemap_config: Dict[str, Dict[str, str]]) -> None: |
| 65 | + """Generate the sitemap.xml file. |
| 66 | +
|
| 67 | + This function generates the sitemap.xml file by crawling through the available pages in the app and generating a list |
| 68 | + of links with their respective sitemap properties such as location (URL), change frequency, and priority. Dynamic |
| 69 | + routes and the 404 page are excluded from the sitemap. |
| 70 | +
|
| 71 | + Args: |
| 72 | + sitemap_config: A dictionary containing the sitemap properties for each route. |
| 73 | + """ |
| 74 | + links = generate_links_for_sitemap(sitemap_config) |
| 75 | + generate_static_sitemap(links) |
| 76 | + |
| 77 | + |
| 78 | +def generate_links_for_sitemap( |
| 79 | + sitemap_config: Dict[str, Dict[str, str]], |
| 80 | +) -> List[dict[str, str]]: |
| 81 | + """Generate a list of links for which sitemaps are generated. |
| 82 | +
|
| 83 | + This function loops through sitemap_config and generates a list of links with their respective sitemap properties |
| 84 | + such as location (URL), change frequency, and priority. Dynamic routes and the 404 page are excluded from the |
| 85 | + sitemap. |
| 86 | +
|
| 87 | + Args: |
| 88 | + sitemap_config: A dictionary containing the sitemap properties for each route. |
| 89 | +
|
| 90 | + Returns: |
| 91 | + List: A list of dictionaries where each dictionary contains the 'loc' (URL of the page), 'priority' and |
| 92 | + 'changefreq' of each route. |
| 93 | + """ |
| 94 | + links = [] |
| 95 | + |
| 96 | + # find link of pages that are not dynamically created. |
| 97 | + for route in sitemap_config: |
| 98 | + # Ignore dynamic routes and 404 |
| 99 | + if ("[" in route and "]" in route) or route == "404": |
| 100 | + continue |
| 101 | + |
| 102 | + sitemap_changefreq = sitemap_config[route]["changefreq"] |
| 103 | + sitemap_priority = sitemap_config[route]["priority"] |
| 104 | + |
| 105 | + # Handle the index route |
| 106 | + if route == "index": |
| 107 | + route = "/" |
| 108 | + |
| 109 | + if not route.startswith("/"): |
| 110 | + route = f"/{route}" |
| 111 | + |
| 112 | + if ( |
| 113 | + sitemap_priority == constants.DefaultPage.SITEMAP_PRIORITY |
| 114 | + ): # indicates that user didn't set priority |
| 115 | + depth = route.count("/") |
| 116 | + sitemap_priority = max(0.5, 1.0 - (depth * 0.1)) |
| 117 | + |
| 118 | + deploy_url = get_config().deploy_url # pick domain url from the config file. |
| 119 | + |
| 120 | + links.append( |
| 121 | + { |
| 122 | + "loc": f"{deploy_url}{route}", |
| 123 | + "changefreq": sitemap_changefreq, |
| 124 | + "priority": sitemap_priority, |
| 125 | + } |
| 126 | + ) |
| 127 | + return links |
| 128 | + |
| 129 | + |
| 130 | +def generate_static_sitemap(links: List[Dict[str, str]]) -> None: |
| 131 | + """Generates the sitemaps for the pages stored in _pages. Store it in sitemap.xml. |
| 132 | +
|
| 133 | + This method is called from two methods: |
| 134 | + 1. Every time the web app is deployed onto the server. |
| 135 | + 2. When the user (or crawler) requests for the sitemap.xml file. |
| 136 | +
|
| 137 | + Args: |
| 138 | + links: The list of urls for which the sitemap is to be generated. |
| 139 | + """ |
| 140 | + sitemap = generate_xml(links) |
| 141 | + Path(_sitemap_folder_path).mkdir(parents=True, exist_ok=True) |
| 142 | + |
| 143 | + # this method is only called when old sitemap.xml is not retrieved. So we can safely replace an already existing xml |
| 144 | + # file. |
| 145 | + with _sitemap_file_path.open("w") as f: |
| 146 | + f.write(sitemap) |
0 commit comments