|
2 | 2 | import base64 |
3 | 3 | import gzip |
4 | 4 | from typing import TYPE_CHECKING |
| 5 | +from unittest.mock import patch |
5 | 6 |
|
6 | 7 | from yarl import URL |
7 | 8 |
|
@@ -216,3 +217,79 @@ def transform_request(request_options: RequestOptions) -> RequestOptions | Reque |
216 | 217 | 'http://not-exists.com/catalog?item=74&desc=vacation_newfoundland', |
217 | 218 | 'http://not-exists.com/catalog?item=83&desc=vacation_usa', |
218 | 219 | } |
| 220 | + |
| 221 | + |
| 222 | +async def test_transform_request_function_with_skip(server_url: URL, http_client: HttpClient) -> None: |
| 223 | + sitemap_url = (server_url / 'sitemap.xml').with_query(base64=encode_base64(BASIC_SITEMAP.encode())) |
| 224 | + |
| 225 | + def transform_request(_request_options: RequestOptions) -> RequestOptions | RequestTransformAction: |
| 226 | + return 'skip' |
| 227 | + |
| 228 | + sitemap_loader = SitemapRequestLoader( |
| 229 | + [str(sitemap_url)], |
| 230 | + http_client=http_client, |
| 231 | + transform_request_function=transform_request, |
| 232 | + ) |
| 233 | + |
| 234 | + while not await sitemap_loader.is_finished(): |
| 235 | + request = await sitemap_loader.fetch_next_request() |
| 236 | + |
| 237 | + if request: |
| 238 | + await sitemap_loader.mark_request_as_handled(request) |
| 239 | + |
| 240 | + # Even though the sitemap had URLs, all were skipped, so the loader should be empty and finished with |
| 241 | + # 0 handled requests. |
| 242 | + assert await sitemap_loader.is_empty() |
| 243 | + assert await sitemap_loader.is_finished() |
| 244 | + assert await sitemap_loader.get_total_count() == 0 |
| 245 | + assert await sitemap_loader.get_handled_count() == 0 |
| 246 | + |
| 247 | + |
| 248 | +async def test_sitemap_loader_to_tandem( |
| 249 | + server_url: URL, |
| 250 | + http_client: HttpClient, |
| 251 | +) -> None: |
| 252 | + sitemap_url = (server_url / 'sitemap.xml').with_query(base64=encode_base64(BASIC_SITEMAP.encode())) |
| 253 | + |
| 254 | + sitemap_loader = SitemapRequestLoader([str(sitemap_url)], http_client=http_client) |
| 255 | + request_manager = await sitemap_loader.to_tandem() |
| 256 | + |
| 257 | + while not await sitemap_loader.is_finished(): |
| 258 | + request = await request_manager.fetch_next_request() |
| 259 | + |
| 260 | + if request: |
| 261 | + await request_manager.mark_request_as_handled(request) |
| 262 | + |
| 263 | + assert await sitemap_loader.is_empty() |
| 264 | + assert await sitemap_loader.is_finished() |
| 265 | + |
| 266 | + assert await request_manager.is_empty() |
| 267 | + assert await request_manager.is_finished() |
| 268 | + |
| 269 | + |
| 270 | +async def test_sitemap_loader_to_tandem_with_request_dropped( |
| 271 | + server_url: URL, |
| 272 | + http_client: HttpClient, |
| 273 | +) -> None: |
| 274 | + sitemap_url = (server_url / 'sitemap.xml').with_query(base64=encode_base64(BASIC_SITEMAP.encode())) |
| 275 | + |
| 276 | + sitemap_loader = SitemapRequestLoader( |
| 277 | + [str(sitemap_url)], |
| 278 | + http_client=http_client, |
| 279 | + ) |
| 280 | + request_manager = await sitemap_loader.to_tandem() |
| 281 | + |
| 282 | + with patch.object( |
| 283 | + request_manager._read_write_manager, 'add_request', side_effect=Exception('Failed to add request') |
| 284 | + ): |
| 285 | + while not await sitemap_loader.is_finished(): |
| 286 | + request = await request_manager.fetch_next_request() |
| 287 | + |
| 288 | + if request: |
| 289 | + await request_manager.mark_request_as_handled(request) |
| 290 | + |
| 291 | + assert await sitemap_loader.is_empty() |
| 292 | + assert await sitemap_loader.is_finished() |
| 293 | + |
| 294 | + assert await request_manager.is_empty() |
| 295 | + assert await request_manager.is_finished() |
0 commit comments