Skip to content

Commit 7d7be04

Browse files
committed
sitemap and its test methods
1 parent fae7b3c commit 7d7be04

File tree

7 files changed

+353
-9
lines changed

7 files changed

+353
-9
lines changed

reflex/app.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from types import SimpleNamespace
2121
from typing import TYPE_CHECKING, Any, BinaryIO, get_args, get_type_hints
2222

23-
from fastapi import FastAPI, HTTPException, Request
23+
from fastapi import FastAPI, HTTPException, Request, Response
2424
from fastapi import UploadFile as FastAPIUploadFile
2525
from fastapi.middleware import cors
2626
from fastapi.responses import JSONResponse, StreamingResponse
@@ -80,6 +80,7 @@
8080
replace_brackets_with_keywords,
8181
verify_route_validity,
8282
)
83+
from reflex.sitemap import generate_sitemaps, read_sitemap_file
8384
from reflex.state import (
8485
BaseState,
8586
RouterData,
@@ -423,6 +424,8 @@ class App(MiddlewareMixin, LifespanMixin):
423424
# Put the toast provider in the app wrap.
424425
toaster: Component | None = dataclasses.field(default_factory=toast.provider)
425426

427+
_sitemap_properties: dict[str, dict] = dataclasses.field(default_factory=dict)
428+
426429
@property
427430
def api(self) -> FastAPI | None:
428431
"""Get the backend api.
@@ -617,6 +620,19 @@ def _add_default_endpoints(self):
617620

618621
self.api.get(str(constants.Endpoint.PING))(ping)
619622
self.api.get(str(constants.Endpoint.HEALTH))(health)
623+
self.api.get(str(constants.Endpoint.SITEMAP))(self.serve_sitemap)
624+
625+
async def serve_sitemap(self) -> Response:
626+
"""Asynchronously serve the sitemap as an XML response.
627+
628+
This function checks if a sitemap.xml file exists in the root directory of the app. If so, this file is served
629+
as a Response. Otherwise, a new sitemap is generated and saved to sitemap.xml before being served.
630+
631+
Returns:
632+
Response: An HTTP response with the XML sitemap content and the media type set to "application/xml".
633+
"""
634+
sitemaps = read_sitemap_file()
635+
return Response(content=sitemaps, media_type="application/xml")
620636

621637
def _add_optional_endpoints(self):
622638
"""Add optional api endpoints (_upload)."""
@@ -695,6 +711,8 @@ def add_page(
695711
on_load: EventType[()] | None = None,
696712
meta: list[dict[str, str]] = constants.DefaultPage.META_LIST,
697713
context: dict[str, Any] | None = None,
714+
sitemap_priority: float = constants.DefaultPage.SITEMAP_PRIORITY,
715+
sitemap_changefreq: str = constants.DefaultPage.SITEMAP_CHANGEFREQ,
698716
):
699717
"""Add a page to the app.
700718
@@ -710,6 +728,9 @@ def add_page(
710728
on_load: The event handler(s) that will be called each time the page load.
711729
meta: The metadata of the page.
712730
context: Values passed to page for custom page-specific logic.
731+
sitemap_priority: The priority of the page in the sitemap. If None, the priority is calculated based on the
732+
depth of the route.
733+
sitemap_changefreq: The change frequency of the page in the sitemap. Default to 'weekly'
713734
714735
Raises:
715736
PageValueError: When the component is not set for a non-404 page.
@@ -775,7 +796,7 @@ def add_page(
775796
)
776797

777798
# Setup dynamic args for the route.
778-
# this state assignment is only required for tests using the deprecated state kwarg for App
799+
# This state assignment is only required for tests using the deprecated state kwarg for App
779800
state = self._state if self._state else State
780801
state.setup_dynamic_args(get_route_args(route))
781802

@@ -784,6 +805,11 @@ def add_page(
784805
on_load if isinstance(on_load, list) else [on_load]
785806
)
786807

808+
self._sitemap_properties[route] = {
809+
"priority": sitemap_priority,
810+
"changefreq": sitemap_changefreq,
811+
}
812+
787813
self._unevaluated_pages[route] = unevaluated_page
788814

789815
def _compile_page(self, route: str, save_page: bool = True):
@@ -811,6 +837,10 @@ def _compile_page(self, route: str, save_page: bool = True):
811837
if save_page:
812838
self._pages[route] = component
813839

840+
def get_sitemap_properties(self) -> dict[str, dict]:
841+
"""Get the sitemap properties."""
842+
return self._sitemap_properties
843+
814844
def get_load_events(self, route: str) -> list[IndividualEventType[()]]:
815845
"""Get the load events for a route.
816846
@@ -1103,6 +1133,9 @@ def _compile(self, export: bool = False):
11031133

11041134
self._pages = {}
11051135

1136+
# generate sitemaps from sitemap properties
1137+
generate_sitemaps(self._sitemap_properties)
1138+
11061139
def get_compilation_time() -> str:
11071140
return str(datetime.now().time()).split(".")[0]
11081141

reflex/constants/event.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class Endpoint(Enum):
1313
AUTH_CODESPACE = "auth-codespace"
1414
HEALTH = "_health"
1515
ALL_ROUTES = "_all_routes"
16+
SITEMAP = "sitemap.xml"
1617

1718
def __str__(self) -> str:
1819
"""Get the string representation of the endpoint.

reflex/constants/route.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ class DefaultPage(SimpleNamespace):
6060
IMAGE = "favicon.ico"
6161
# The default meta list to show for Reflex apps.
6262
META_LIST = []
63+
# The default changefrequency for sitemap generation.
64+
SITEMAP_CHANGEFREQ = "weekly"
65+
# The default priority for sitemap generation.
66+
SITEMAP_PRIORITY = 10.0
6367

6468

6569
# 404 variables

reflex/sitemap.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
"""This module contains functions to generate and manage the sitemap.xml file."""
2+
3+
from pathlib import Path
4+
from typing import Dict, List
5+
from xml.dom import minidom
6+
from xml.etree.ElementTree import Element, SubElement, tostring
7+
8+
from reflex import constants
9+
from reflex.config import get_config
10+
from reflex.utils import prerequisites
11+
12+
# _static folder in the .web directory containing the sitemap.xml file.
13+
_sitemap_folder_path: Path = (
14+
Path.cwd() / prerequisites.get_web_dir() / constants.Dirs.STATIC
15+
)
16+
17+
# sitemap file path
18+
_sitemap_file_path: Path = _sitemap_folder_path / "sitemap.xml"
19+
20+
21+
def check_sitemap_file_exists() -> bool:
22+
"""Check if the sitemap file exists.
23+
24+
Returns:
25+
bool: True if the sitemap file exists in the .web/_static folder.
26+
"""
27+
return _sitemap_folder_path.exists() & _sitemap_file_path.exists()
28+
29+
30+
def read_sitemap_file() -> str:
31+
"""Read the sitemap file.
32+
33+
Returns:
34+
str: The contents of the sitemap file.
35+
"""
36+
with _sitemap_file_path.open("r") as f:
37+
return f.read()
38+
39+
40+
def generate_xml(links: List[Dict[str, str]]) -> str:
41+
"""Generate an XML sitemap from a list of links.
42+
43+
Args:
44+
links (List[Dict[str, Any]]): A list of dictionaries where each dictionary contains
45+
'loc' (URL of the page), 'changefreq' (frequency of changes), and 'priority' (priority of the page).
46+
47+
Returns:
48+
str: A pretty-printed XML string representing the sitemap.
49+
"""
50+
urlset = Element("urlset", xmlns="https://www.sitemaps.org/schemas/sitemap/0.9")
51+
for link in links:
52+
url = SubElement(urlset, "url")
53+
loc = SubElement(url, "loc")
54+
loc.text = link["loc"]
55+
changefreq = SubElement(url, "changefreq")
56+
changefreq.text = link["changefreq"]
57+
priority = SubElement(url, "priority")
58+
priority.text = str(link["priority"])
59+
rough_string = tostring(urlset, "utf-8")
60+
reparsed = minidom.parseString(rough_string)
61+
return reparsed.toprettyxml(indent=" ")
62+
63+
64+
def generate_sitemaps(sitemap_config: Dict[str, Dict[str, str]]) -> None:
65+
"""Generate the sitemap.xml file.
66+
67+
This function generates the sitemap.xml file by crawling through the available pages in the app and generating a list
68+
of links with their respective sitemap properties such as location (URL), change frequency, and priority. Dynamic
69+
routes and the 404 page are excluded from the sitemap.
70+
71+
Args:
72+
sitemap_config: A dictionary containing the sitemap properties for each route.
73+
"""
74+
links = generate_links_for_sitemap(sitemap_config)
75+
generate_static_sitemap(links)
76+
77+
78+
def generate_links_for_sitemap(
79+
sitemap_config: Dict[str, Dict[str, str]],
80+
) -> List[dict[str, str]]:
81+
"""Generate a list of links for which sitemaps are generated.
82+
83+
This function loops through sitemap_config and generates a list of links with their respective sitemap properties
84+
such as location (URL), change frequency, and priority. Dynamic routes and the 404 page are excluded from the
85+
sitemap.
86+
87+
Args:
88+
sitemap_config: A dictionary containing the sitemap properties for each route.
89+
90+
Returns:
91+
List: A list of dictionaries where each dictionary contains the 'loc' (URL of the page), 'priority' and
92+
'changefreq' of each route.
93+
"""
94+
links = []
95+
96+
# find link of pages that are not dynamically created.
97+
for route in sitemap_config:
98+
# Ignore dynamic routes and 404
99+
if ("[" in route and "]" in route) or route == "404":
100+
continue
101+
102+
sitemap_changefreq = sitemap_config[route]["changefreq"]
103+
sitemap_priority = sitemap_config[route]["priority"]
104+
105+
# Handle the index route
106+
if route == "index":
107+
route = "/"
108+
109+
if not route.startswith("/"):
110+
route = f"/{route}"
111+
112+
if (
113+
sitemap_priority == constants.DefaultPage.SITEMAP_PRIORITY
114+
): # indicates that user didn't set priority
115+
depth = route.count("/")
116+
sitemap_priority = max(0.5, 1.0 - (depth * 0.1))
117+
118+
deploy_url = get_config().deploy_url # pick domain url from the config file.
119+
120+
links.append(
121+
{
122+
"loc": f"{deploy_url}{route}",
123+
"changefreq": sitemap_changefreq,
124+
"priority": sitemap_priority,
125+
}
126+
)
127+
return links
128+
129+
130+
def generate_static_sitemap(links: List[Dict[str, str]]) -> None:
131+
"""Generates the sitemaps for the pages stored in _pages. Store it in sitemap.xml.
132+
133+
This method is called from two methods:
134+
1. Every time the web app is deployed onto the server.
135+
2. When the user (or crawler) requests for the sitemap.xml file.
136+
137+
Args:
138+
links: The list of urls for which the sitemap is to be generated.
139+
"""
140+
sitemap = generate_xml(links)
141+
Path(_sitemap_folder_path).mkdir(parents=True, exist_ok=True)
142+
143+
# this method is only called when old sitemap.xml is not retrieved. So we can safely replace an already existing xml
144+
# file.
145+
with _sitemap_file_path.open("w") as f:
146+
f.write(sitemap)

reflex/utils/build.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,6 @@ def build(
203203
"Collecting build traces",
204204
]
205205

206-
# Generate a sitemap if a deploy URL is provided.
207-
if deploy_url is not None:
208-
generate_sitemap_config(deploy_url, export=for_export)
209-
command = "export-sitemap"
210-
211-
checkpoints.extend(["Loading next-sitemap", "Generation completed"])
212-
213206
# Start the subprocess with the progress bar.
214207
process = processes.new_process(
215208
[*prerequisites.get_js_package_executor(raise_on_none=True)[0], "run", command],

tests/units/test_app.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,23 @@ def test_add_duplicate_page_route_error(app: App, first_page, second_page, route
372372
app.add_page(second_page, route="/" + route.strip("/") if route else None)
373373

374374

375+
def test_add_page_with_sitemap_properties(app):
376+
"""Test if the sitemap properties of the app instance is set properly or not."""
377+
# check with given values.
378+
app.add_page(
379+
page1, route="/page1", sitemap_priority=0.9, sitemap_changefreq="daily"
380+
)
381+
assert app._sitemap_properties["page1"] == {"priority": 0.9, "changefreq": "daily"}
382+
383+
# check default values added.
384+
app.add_page(page2, route="/page2")
385+
print(app._sitemap_properties)
386+
assert app._sitemap_properties["page2"] == {
387+
"priority": 10.0,
388+
"changefreq": "weekly",
389+
}
390+
391+
375392
def test_initialize_with_admin_dashboard(test_model):
376393
"""Test setting the admin dashboard of an app.
377394

0 commit comments

Comments
 (0)