|
57 | 57 | import json |
58 | 58 | import logging |
59 | 59 | import os |
60 | | -from typing import Any, Dict, List, Optional |
| 60 | +from typing import Any, Dict, List, Literal, Optional, get_args |
61 | 61 | from urllib.parse import urlparse |
62 | 62 |
|
63 | 63 | from rich.panel import Panel |
|
84 | 84 | DEFAULT_DATASET_ITEMS_LIMIT = 100 |
85 | 85 |
|
86 | 86 | WEBSITE_CONTENT_CRAWLER = "apify/website-content-crawler" |
87 | | -WEBSITE_CONTENT_CRAWLER_TYPES = ("playwright:adaptive", "playwright:firefox", "cheerio") |
| 87 | +CrawlerType = Literal["playwright:adaptive", "playwright:firefox", "cheerio"] |
| 88 | +WEBSITE_CONTENT_CRAWLER_TYPES = get_args(CrawlerType) |
88 | 89 |
|
89 | 90 |
|
90 | 91 | # --- Helper functions --- |
@@ -322,7 +323,7 @@ def scrape_url( |
322 | 323 | self, |
323 | 324 | url: str, |
324 | 325 | timeout_secs: int = DEFAULT_SCRAPE_TIMEOUT_SECS, |
325 | | - crawler_type: str = "cheerio", |
| 326 | + crawler_type: CrawlerType = "cheerio", |
326 | 327 | ) -> str: |
327 | 328 | """Scrape a single URL using Website Content Crawler and return markdown.""" |
328 | 329 | self._validate_url(url) |
@@ -608,7 +609,7 @@ def apify_run_task_and_get_dataset( |
608 | 609 | def apify_scrape_url( |
609 | 610 | url: str, |
610 | 611 | timeout_secs: int = DEFAULT_SCRAPE_TIMEOUT_SECS, |
611 | | - crawler_type: str = "cheerio", |
| 612 | + crawler_type: CrawlerType = "cheerio", |
612 | 613 | ) -> Dict[str, Any]: |
613 | 614 | """Scrape a single URL and return its content as markdown. |
614 | 615 |
|
|
0 commit comments