File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -134,14 +134,14 @@ res = sgai.search(
134134Crawl a website and its linked pages.
135135
136136``` python
137- from scrapegraph_py import ScrapeGraphAI, MarkdownFormatConfig
137+ from scrapegraph_py import ScrapeGraphAI, ScrapeMarkdownFormatEntry
138138
139139sgai = ScrapeGraphAI()
140140
141141# Start a crawl
142142start = sgai.crawl.start(
143143 " https://example.com" ,
144- formats = [MarkdownFormatConfig ()],
144+ formats = [ScrapeMarkdownFormatEntry ()],
145145 max_pages = 50 ,
146146 max_depth = 2 ,
147147 max_links_per_page = 10 ,
@@ -151,6 +151,7 @@ start = sgai.crawl.start(
151151
152152# Check status
153153status = sgai.crawl.get(start.data[" id" ])
154+ pages = sgai.crawl.pages(start.data[" id" ], cursor = 0 , limit = 50 )
154155
155156# Control
156157sgai.crawl.stop(crawl_id)
@@ -259,6 +260,7 @@ async with AsyncScrapeGraphAI() as sgai:
259260async with AsyncScrapeGraphAI() as sgai:
260261 start = await sgai.crawl.start(" https://example.com" , max_pages = 50 )
261262 status = await sgai.crawl.get(start.data[" id" ])
263+ pages = await sgai.crawl.pages(start.data[" id" ], cursor = 0 , limit = 50 )
262264```
263265
264266### Async Monitor
Original file line number Diff line number Diff line change 44 ApiResult ,
55 BrandingFormatConfig ,
66 CrawlPage ,
7+ CrawlPagesQuery ,
8+ CrawlPagesResponse ,
79 CrawlPageStatus ,
810 CrawlRequest ,
911 CrawlResponse ,
1012 CrawlStatus ,
1113 CreditsResponse ,
1214 ExtractRequest ,
15+ ExtractRequestBase ,
1316 ExtractResponse ,
1417 FetchConfig ,
1518 FetchContentType ,
2629 JsonFormatConfig ,
2730 LinksFormatConfig ,
2831 MarkdownFormatConfig ,
32+ MonitorActivityQuery ,
2933 MonitorActivityRequest ,
3034 MonitorActivityResponse ,
3135 MonitorCreateRequest ,
3539 MonitorTickEntry ,
3640 MonitorTickStatus ,
3741 MonitorUpdateRequest ,
42+ ScrapeBrandingFormatEntry ,
43+ ScrapeCaptureFormat ,
44+ ScrapeContentFormat ,
45+ ScrapeFormat ,
46+ ScrapeFormatEntry ,
47+ ScrapeFormatError ,
48+ ScrapeHtmlFormatEntry ,
49+ ScrapeImagesFormatEntry ,
50+ ScrapeJsonFormatEntry ,
51+ ScrapeLinksFormatEntry ,
52+ ScrapeMarkdownFormatEntry ,
3853 ScrapeRequest ,
3954 ScrapeResponse ,
55+ ScrapeScreenshotData ,
56+ ScrapeScreenshotFormatEntry ,
57+ ScrapeSummaryFormatEntry ,
4058 ScreenshotFormatConfig ,
4159 SearchRequest ,
4260 SearchResponse ,
5472 "ScrapeRequest" ,
5573 "ScrapeResponse" ,
5674 "ExtractRequest" ,
75+ "ExtractRequestBase" ,
5776 "ExtractResponse" ,
5877 "SearchRequest" ,
5978 "SearchResponse" ,
6079 "SearchResult" ,
6180 "CrawlRequest" ,
6281 "CrawlResponse" ,
6382 "CrawlPage" ,
83+ "CrawlPagesQuery" ,
84+ "CrawlPagesResponse" ,
6485 "CrawlPageStatus" ,
6586 "CrawlStatus" ,
6687 "MonitorCreateRequest" ,
6990 "MonitorResult" ,
7091 "MonitorDiffs" ,
7192 "MonitorActivityRequest" ,
93+ "MonitorActivityQuery" ,
7294 "MonitorActivityResponse" ,
7395 "MonitorTickEntry" ,
7496 "MonitorTickStatus" ,
83105 "FetchContentType" ,
84106 "FetchMode" ,
85107 "FormatConfig" ,
108+ "ScrapeFormat" ,
109+ "ScrapeContentFormat" ,
110+ "ScrapeCaptureFormat" ,
111+ "ScrapeFormatEntry" ,
112+ "ScrapeMarkdownFormatEntry" ,
113+ "ScrapeHtmlFormatEntry" ,
114+ "ScrapeScreenshotFormatEntry" ,
115+ "ScrapeJsonFormatEntry" ,
116+ "ScrapeLinksFormatEntry" ,
117+ "ScrapeImagesFormatEntry" ,
118+ "ScrapeSummaryFormatEntry" ,
119+ "ScrapeBrandingFormatEntry" ,
120+ "ScrapeFormatError" ,
121+ "ScrapeScreenshotData" ,
86122 "HtmlMode" ,
87123 "Service" ,
88124 "TimeRange" ,
Original file line number Diff line number Diff line change 1414from .env import env
1515from .schemas import (
1616 ApiResult ,
17+ CrawlPagesQuery ,
18+ CrawlPagesResponse ,
1719 CrawlRequest ,
1820 CrawlResponse ,
1921 CreditsResponse ,
@@ -116,6 +118,18 @@ async def start(
116118 async def get (self , id : str ) -> ApiResult [CrawlResponse ]:
117119 return await self ._client ._get (f"/crawl/{ id } " , CrawlResponse )
118120
121+ async def pages (
122+ self ,
123+ id : str ,
124+ * ,
125+ cursor : int | None = None ,
126+ limit : int | None = None ,
127+ ) -> ApiResult [CrawlPagesResponse ]:
128+ kwargs = _compact (cursor = cursor , limit = limit )
129+ query = CrawlPagesQuery (** kwargs ) if kwargs else None
130+ qs = {key : getattr (query , key ) for key in kwargs } if query else None
131+ return await self ._client ._get (f"/crawl/{ id } /pages" , CrawlPagesResponse , params = qs or None )
132+
119133 async def stop (self , id : str ) -> ApiResult [dict ]:
120134 return await self ._client ._post_empty (f"/crawl/{ id } /stop" )
121135
Original file line number Diff line number Diff line change 1414from .env import env
1515from .schemas import (
1616 ApiResult ,
17+ CrawlPagesQuery ,
18+ CrawlPagesResponse ,
1719 CrawlRequest ,
1820 CrawlResponse ,
1921 CreditsResponse ,
@@ -116,6 +118,18 @@ def start(
116118 def get (self , id : str ) -> ApiResult [CrawlResponse ]:
117119 return self ._client ._get (f"/crawl/{ id } " , CrawlResponse )
118120
121+ def pages (
122+ self ,
123+ id : str ,
124+ * ,
125+ cursor : int | None = None ,
126+ limit : int | None = None ,
127+ ) -> ApiResult [CrawlPagesResponse ]:
128+ kwargs = _compact (cursor = cursor , limit = limit )
129+ query = CrawlPagesQuery (** kwargs ) if kwargs else None
130+ qs = {key : getattr (query , key ) for key in kwargs } if query else None
131+ return self ._client ._get (f"/crawl/{ id } /pages" , CrawlPagesResponse , params = qs or None )
132+
119133 def stop (self , id : str ) -> ApiResult [dict ]:
120134 return self ._client ._post_empty (f"/crawl/{ id } /stop" )
121135
You can’t perform that action at this time.
0 commit comments