@@ -58,7 +58,7 @@ pub struct ScrapeCommand {
5858
5959 /// HTTP method to use for the request.
6060 /// Use HEAD to check headers without downloading content.
61- /// Supported values: GET (default), HEAD.
61+ /// Supported values: GET (default), HEAD, POST .
6262 #[ arg( long, default_value = "GET" , value_name = "METHOD" ) ]
6363 pub method : String ,
6464
@@ -80,6 +80,17 @@ pub struct ScrapeCommand {
8080 #[ arg( long = "header" , short = 'H' , value_name = "HEADER" ) ]
8181 pub headers : Vec < String > ,
8282
83+ /// Cookie to send with the request (format: "name=value").
84+ /// Can be specified multiple times for multiple cookies.
85+ /// Example: --cookie "session=abc123" --cookie "user=john"
86+ #[ arg( long = "cookie" , value_name = "COOKIE" ) ]
87+ pub cookies : Vec < String > ,
88+
89+ /// Disable following HTTP redirects.
90+ /// By default, up to 10 redirects are followed.
91+ #[ arg( long) ]
92+ pub no_follow_redirects : bool ,
93+
8394 /// Strip images from output.
8495 #[ arg( long) ]
8596 pub no_images : bool ,
@@ -227,8 +238,13 @@ impl ScrapeCommand {
227238 // Build HTTP client with redirect policy and cookie store
228239 // Cookie store is enabled to persist cookies across redirects, which is
229240 // required for auth-gated pages that set cookies then redirect.
241+ let redirect_policy = if self . no_follow_redirects {
242+ reqwest:: redirect:: Policy :: none ( )
243+ } else {
244+ reqwest:: redirect:: Policy :: limited ( 10 )
245+ } ;
230246 let mut client_builder = create_client_builder ( )
231- . redirect ( reqwest :: redirect :: Policy :: limited ( 10 ) )
247+ . redirect ( redirect_policy )
232248 . cookie_store ( true ) ;
233249
234250 // Override timeout if specified (0 means no timeout)
@@ -249,33 +265,46 @@ impl ScrapeCommand {
249265
250266 // Parse the HTTP method
251267 let method_upper = self . method . to_uppercase ( ) ;
252- let use_head = match method_upper. as_str ( ) {
253- "GET" => false ,
254- "HEAD" => true ,
255- _ => bail ! ( "Unsupported HTTP method: {}. Use GET or HEAD." , self . method) ,
256- } ;
257268
258269 // Add custom headers
259270 let parsed_headers = parse_headers ( & self . headers ) ?;
260271
272+ // Build cookie header from --cookie flags
273+ let cookie_header = if !self . cookies . is_empty ( ) {
274+ Some ( self . cookies . join ( "; " ) )
275+ } else {
276+ None
277+ } ;
278+
261279 if self . verbose {
262- eprintln ! (
263- "Fetching: {} (method: {})" ,
264- self . url,
265- if use_head { "HEAD" } else { "GET" }
266- ) ;
280+ eprintln ! ( "Fetching: {} (method: {})" , self . url, method_upper) ;
267281 }
268282
269- // Handle HEAD and GET requests separately for proper type inference
270- if use_head {
271- // HEAD request - just return headers
272- let mut request = client. head ( & self . url ) ;
273- for ( name, value) in & parsed_headers {
274- request = request. header ( name. as_str ( ) , value. as_str ( ) ) ;
275- }
283+ // Build request based on HTTP method
284+ let mut request = match method_upper. as_str ( ) {
285+ "GET" => client. get ( & self . url ) ,
286+ "HEAD" => client. head ( & self . url ) ,
287+ "POST" => client. post ( & self . url ) ,
288+ _ => bail ! (
289+ "Unsupported HTTP method: {}. Use GET, HEAD, or POST." ,
290+ self . method
291+ ) ,
292+ } ;
276293
277- let response = request. send ( ) . await . context ( "Failed to fetch URL" ) ?;
294+ // Add custom headers
295+ for ( name, value) in & parsed_headers {
296+ request = request. header ( name. as_str ( ) , value. as_str ( ) ) ;
297+ }
298+
299+ // Add cookies if specified
300+ if let Some ( ref cookies) = cookie_header {
301+ request = request. header ( "Cookie" , cookies. as_str ( ) ) ;
302+ }
303+
304+ let response = request. send ( ) . await . context ( "Failed to fetch URL" ) ?;
278305
306+ // For HEAD requests, just show headers and return
307+ if method_upper == "HEAD" {
279308 if !response. status ( ) . is_success ( ) {
280309 bail ! (
281310 "HTTP error: {} {}" ,
@@ -307,27 +336,7 @@ impl ScrapeCommand {
307336 return Ok ( ( ) ) ;
308337 }
309338
310- // GET request - fetch content
311- let mut request = client. get ( & self . url ) ;
312- for ( name, value) in & parsed_headers {
313- request = request. header ( name. as_str ( ) , value. as_str ( ) ) ;
314- }
315-
316- // Send request with improved timeout error message (Issue #1985)
317- let timeout_secs = self . timeout ;
318- let response = request. send ( ) . await . map_err ( |e| {
319- if e. is_timeout ( ) {
320- anyhow:: anyhow!(
321- "Request timed out after {} second{}. Use --timeout to increase the timeout." ,
322- timeout_secs,
323- if timeout_secs == 1 { "" } else { "s" }
324- )
325- } else if e. is_connect ( ) {
326- anyhow:: anyhow!( "Failed to connect to URL: {}" , e)
327- } else {
328- anyhow:: anyhow!( "Failed to fetch URL: {}" , e)
329- }
330- } ) ?;
339+ // For GET/POST, check response status
331340
332341 if !response. status ( ) . is_success ( ) {
333342 bail ! (
@@ -1420,9 +1429,12 @@ mod tests {
14201429 url : String :: new ( ) ,
14211430 output : None ,
14221431 format : "markdown" . to_string ( ) ,
1432+ method : "GET" . to_string ( ) ,
14231433 timeout : 30 ,
14241434 user_agent : None ,
14251435 headers : vec ! [ ] ,
1436+ cookies : vec ! [ ] ,
1437+ no_follow_redirects : false ,
14261438 no_images : false ,
14271439 no_links : false ,
14281440 selector : None ,
@@ -1445,9 +1457,12 @@ mod tests {
14451457 url : " " . to_string ( ) ,
14461458 output : None ,
14471459 format : "markdown" . to_string ( ) ,
1460+ method : "GET" . to_string ( ) ,
14481461 timeout : 30 ,
14491462 user_agent : None ,
14501463 headers : vec ! [ ] ,
1464+ cookies : vec ! [ ] ,
1465+ no_follow_redirects : false ,
14511466 no_images : false ,
14521467 no_links : false ,
14531468 selector : None ,
@@ -1472,9 +1487,12 @@ mod tests {
14721487 url : "https://example.com" . to_string ( ) ,
14731488 output : None ,
14741489 format : "markdown" . to_string ( ) ,
1490+ method : "GET" . to_string ( ) ,
14751491 timeout : 0 ,
14761492 user_agent : None ,
14771493 headers : vec ! [ ] ,
1494+ cookies : vec ! [ ] ,
1495+ no_follow_redirects : false ,
14781496 no_images : false ,
14791497 no_links : false ,
14801498 selector : None ,
0 commit comments