Skip to content

Commit cbabfd0

Browse files
HavenDVclaude
andcommitted
feat: parity with official Firecrawl .NET SDK + v2 endpoint coverage
Tier 1 — SDK-level fixes for parity with firecrawl/firecrawl dot-net-sdk: - Fix terminal-state polling bug: WaitJobAsync/WaitBatchAsync now treat "cancelled" as terminal instead of spinning until timeout. - Typed exception hierarchy (FirecrawlAuthenticationException 401, FirecrawlPaymentRequiredException 402, FirecrawlRateLimitException 429 with Retry-After parsing, FirecrawlServerException 5xx) wired via ProcessResponse partial on every sub-client. - FirecrawlClient.FromEnvironment() reads FIRECRAWL_API_KEY / FIRECRAWL_API_URL. - IdempotencyKeyExtensions adds x-idempotency-key header overloads for CrawlUrlsAsync and batch ScrapeAndExtractFromUrlsAsync. - Auto-pagination: CrawlingClient.PaginateAsync / WaitJobAndPaginateAsync and ScrapingClient.PaginateBatchAsync / WaitBatchAndPaginateAsync walk the next-URL chain with same-origin validation to prevent API-key exfiltration via hostile pagination pointers. Tier 2 — hand-written v2 endpoint layer (FirecrawlClient.V2): - Firecrawl has not published a v2 OpenAPI spec, so this layer is manual. - POST /v2/parse (multipart file upload + ParseFile.FromPath/FromBytes with MIME-type guessing). - GET /v2/concurrency-check. - Monitor CRUD: POST/GET/PATCH/DELETE /v2/monitor + monitor/{id}/run + /checks + /checks/{id} (with auto-pagination of MonitorCheckDetail.Next). - GET /v2/team/credit-usage/historical + /v2/team/token-usage/historical. - Dedicated V2SourceGenerationContext for AOT-safe System.Text.Json. Tier 3 — filed 6 generator improvement requests upstream at tryAGI/AutoSDK: - #332 typed HTTP exception hierarchy - #333 idempotency-key parameter on POST operations - #334 extend --generate-pageable-helpers to follow absolute 'next' URLs - #335 built-in retry handler with exponential backoff - #336 first-class multipart file-upload helpers - #337 polymorphic-array discriminator helpers (oneOf{string,object}) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 2c15342 commit cbabfd0

20 files changed

Lines changed: 1484 additions & 3 deletions
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
namespace Firecrawl;
2+
3+
public partial class CrawlingClient
4+
{
5+
/// <summary>
6+
/// Walks the <c>next</c>-URL chain on a crawl status response, appending
7+
/// every page of <see cref="CrawlStatusResponseObj.Data"/> into the first
8+
/// response and clearing <see cref="CrawlStatusResponseObj.Next"/> when
9+
/// done. The supplied <paramref name="response"/> is mutated and returned.
10+
/// </summary>
11+
/// <remarks>
12+
/// Firecrawl's <c>next</c> pointers are absolute URLs; we refuse to
13+
/// forward the <c>Authorization</c> header across origins to avoid leaking
14+
/// the API key.
15+
/// </remarks>
16+
public async Task<CrawlStatusResponseObj> PaginateAsync(
17+
CrawlStatusResponseObj response,
18+
CancellationToken cancellationToken = default)
19+
{
20+
ArgumentNullException.ThrowIfNull(response);
21+
22+
response.Data ??= new List<CrawlStatusResponseObjDataItem>();
23+
var next = response.Next;
24+
25+
while (!string.IsNullOrEmpty(next))
26+
{
27+
cancellationToken.ThrowIfCancellationRequested();
28+
29+
var content = await PaginationHelper
30+
.FetchNextPageJsonAsync(HttpClient, next, cancellationToken)
31+
.ConfigureAwait(false);
32+
33+
var page = CrawlStatusResponseObj.FromJson(content, JsonSerializerContext)
34+
?? throw new InvalidOperationException("Pagination response deserialization returned null.");
35+
36+
if (page.Data is { Count: > 0 })
37+
{
38+
foreach (var item in page.Data)
39+
{
40+
response.Data.Add(item);
41+
}
42+
}
43+
44+
next = page.Next;
45+
}
46+
47+
response.Next = null;
48+
return response;
49+
}
50+
51+
/// <summary>
52+
/// Convenience helper: waits for the job to finish and then paginates the
53+
/// full result set in one call.
54+
/// </summary>
55+
public async Task<CrawlStatusResponseObj> WaitJobAndPaginateAsync(
56+
string jobId,
57+
TimeSpan? pollingInterval = null,
58+
IProgress<CrawlStatusResponseObj>? progress = null,
59+
TimeSpan? timeout = null,
60+
CancellationToken cancellationToken = default)
61+
{
62+
var job = await WaitJobAsync(jobId, pollingInterval, progress, timeout, cancellationToken)
63+
.ConfigureAwait(false);
64+
65+
return await PaginateAsync(job, cancellationToken).ConfigureAwait(false);
66+
}
67+
}

src/libs/Firecrawl/CrawlClient.WaitJob.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public Task<CrawlStatusResponseObj> WaitJobAsync(
3030
{
3131
return PollingHelper.PollUntilAsync(
3232
fetchStatus: ct => GetCrawlStatusAsync(id: jobId, cancellationToken: ct),
33-
isComplete: r => r.Status is "completed" or "failed",
33+
isComplete: r => r.Status is "completed" or "failed" or "cancelled",
3434
jobDescription: $"Crawl job {jobId}",
3535
pollingInterval: pollingInterval,
3636
progress: progress,

src/libs/Firecrawl/Firecrawl.csproj

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,26 @@
33
<PropertyGroup>
44
<TargetFramework>net10.0</TargetFramework>
55
</PropertyGroup>
6-
6+
77
<PropertyGroup Label="Nuget">
88
<Description>Generated C# SDK based on Firecrawl OpenAPI specification.</Description>
99
<PackageTags>api;client;sdk;dotnet;swagger;openapi;specification;generated;nswag</PackageTags>
1010
</PropertyGroup>
1111

12+
<PropertyGroup Label="HandWrittenSuppressions">
13+
<!--
14+
Rules below are relaxed for hand-written DTO/extension code in this project.
15+
Generated files in Generated/ already comply via AutoSDK's templates.
16+
CS1591 — missing XML docs on public members of hand-written DTOs.
17+
CA1032 — extra exception constructors not used by our throw sites.
18+
CA1056 — Url string properties (Firecrawl returns plain strings, not Uri).
19+
CA1308 — ToLowerInvariant used for file-extension matching (intentional).
20+
CA1819 — byte[] property on ParseFile (necessary for multipart upload).
21+
CA1822 — partial-method overrides that don't touch instance state.
22+
CA2000 — StringContent/ByteArrayContent ownership transferred to MultipartFormDataContent.
23+
CA2227 — DTO collection setters required for System.Text.Json source-gen round-trip.
24+
-->
25+
<NoWarn>$(NoWarn);CS1591;CA1032;CA1056;CA1308;CA1819;CA1822;CA2000;CA2227</NoWarn>
26+
</PropertyGroup>
27+
1228
</Project>
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
namespace Firecrawl;
2+
3+
public sealed partial class FirecrawlClient
4+
{
5+
/// <summary>
6+
/// Environment variable read by <see cref="FromEnvironment"/> for the API key.
7+
/// </summary>
8+
public const string ApiKeyEnvironmentVariable = "FIRECRAWL_API_KEY";
9+
10+
/// <summary>
11+
/// Environment variable read by <see cref="FromEnvironment"/> for the base URL.
12+
/// </summary>
13+
public const string ApiUrlEnvironmentVariable = "FIRECRAWL_API_URL";
14+
15+
/// <summary>
16+
/// Creates a <see cref="FirecrawlClient"/> from the
17+
/// <c>FIRECRAWL_API_KEY</c> environment variable.
18+
/// Falls back to <c>FIRECRAWL_API_URL</c> for the base URL when set.
19+
/// </summary>
20+
/// <exception cref="InvalidOperationException">
21+
/// Thrown when <c>FIRECRAWL_API_KEY</c> is not set or empty.
22+
/// </exception>
23+
public static FirecrawlClient FromEnvironment(
24+
System.Net.Http.HttpClient? httpClient = null,
25+
bool disposeHttpClient = true)
26+
{
27+
var apiKey = Environment.GetEnvironmentVariable(ApiKeyEnvironmentVariable);
28+
if (string.IsNullOrWhiteSpace(apiKey))
29+
{
30+
throw new InvalidOperationException(
31+
$"API key not found. Set the {ApiKeyEnvironmentVariable} environment variable " +
32+
"or use a constructor overload that accepts an apiKey argument.");
33+
}
34+
35+
var baseUrlOverride = Environment.GetEnvironmentVariable(ApiUrlEnvironmentVariable);
36+
var baseUri = string.IsNullOrWhiteSpace(baseUrlOverride)
37+
? null
38+
: new Uri(baseUrlOverride, UriKind.Absolute);
39+
40+
return new FirecrawlClient(
41+
apiKey: apiKey,
42+
httpClient: httpClient,
43+
baseUri: baseUri,
44+
authorizations: null,
45+
disposeHttpClient: disposeHttpClient);
46+
}
47+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
using System.Net.Http;
2+
3+
namespace Firecrawl;
4+
5+
public sealed partial class FirecrawlClient
6+
{
7+
partial void ProcessResponse(HttpClient client, HttpResponseMessage response)
8+
=> FirecrawlExceptionMapper.ThrowTypedFirecrawlException(response);
9+
}
10+
11+
public partial class BillingClient
12+
{
13+
partial void ProcessResponse(HttpClient client, HttpResponseMessage response)
14+
=> FirecrawlExceptionMapper.ThrowTypedFirecrawlException(response);
15+
}
16+
17+
public partial class CrawlingClient
18+
{
19+
partial void ProcessResponse(HttpClient client, HttpResponseMessage response)
20+
=> FirecrawlExceptionMapper.ThrowTypedFirecrawlException(response);
21+
}
22+
23+
public partial class ExtractionClient
24+
{
25+
partial void ProcessResponse(HttpClient client, HttpResponseMessage response)
26+
=> FirecrawlExceptionMapper.ThrowTypedFirecrawlException(response);
27+
}
28+
29+
public partial class LLMsTxtClient
30+
{
31+
partial void ProcessResponse(HttpClient client, HttpResponseMessage response)
32+
=> FirecrawlExceptionMapper.ThrowTypedFirecrawlException(response);
33+
}
34+
35+
public partial class MappingClient
36+
{
37+
partial void ProcessResponse(HttpClient client, HttpResponseMessage response)
38+
=> FirecrawlExceptionMapper.ThrowTypedFirecrawlException(response);
39+
}
40+
41+
public partial class ResearchClient
42+
{
43+
partial void ProcessResponse(HttpClient client, HttpResponseMessage response)
44+
=> FirecrawlExceptionMapper.ThrowTypedFirecrawlException(response);
45+
}
46+
47+
public partial class ScrapingClient
48+
{
49+
partial void ProcessResponse(HttpClient client, HttpResponseMessage response)
50+
=> FirecrawlExceptionMapper.ThrowTypedFirecrawlException(response);
51+
}
52+
53+
public partial class SearchClient
54+
{
55+
partial void ProcessResponse(HttpClient client, HttpResponseMessage response)
56+
=> FirecrawlExceptionMapper.ThrowTypedFirecrawlException(response);
57+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
namespace Firecrawl;
2+
3+
public sealed partial class FirecrawlClient
4+
{
5+
/// <summary>
6+
/// Entry point for endpoints exposed only by the Firecrawl v2 API
7+
/// (parse, monitor, concurrency-check, historical usage).
8+
/// The v2 surface is hand-written because Firecrawl has not yet
9+
/// published an OpenAPI definition for v2; the generated client on this
10+
/// class covers v1 only.
11+
/// </summary>
12+
public V2.V2Client V2 => new(HttpClient);
13+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
using System.Net.Http;
2+
3+
namespace Firecrawl;
4+
5+
internal static class FirecrawlExceptionMapper
6+
{
7+
/// <summary>
8+
/// Promotes 401/402/429/5xx HTTP responses into typed
9+
/// <see cref="FirecrawlAuthenticationException"/>,
10+
/// <see cref="FirecrawlPaymentRequiredException"/>,
11+
/// <see cref="FirecrawlRateLimitException"/>, and
12+
/// <see cref="FirecrawlServerException"/> so callers can catch by intent
13+
/// rather than branching on <see cref="ApiException.StatusCode"/>.
14+
/// </summary>
15+
internal static void ThrowTypedFirecrawlException(HttpResponseMessage response)
16+
{
17+
if (response.IsSuccessStatusCode)
18+
return;
19+
20+
var status = response.StatusCode;
21+
var reason = response.ReasonPhrase ?? status.ToString();
22+
23+
switch ((int)status)
24+
{
25+
case 401:
26+
throw new FirecrawlAuthenticationException(reason);
27+
case 402:
28+
throw new FirecrawlPaymentRequiredException(reason);
29+
case 429:
30+
throw new FirecrawlRateLimitException(reason)
31+
{
32+
RetryAfter = ParseRetryAfter(response),
33+
};
34+
case >= 500 and < 600:
35+
throw new FirecrawlServerException(reason, status);
36+
}
37+
}
38+
39+
private static TimeSpan? ParseRetryAfter(HttpResponseMessage response)
40+
{
41+
var header = response.Headers.RetryAfter;
42+
if (header is null)
43+
return null;
44+
45+
if (header.Delta.HasValue)
46+
return header.Delta.Value;
47+
48+
if (header.Date is { } date)
49+
{
50+
var remaining = date - DateTimeOffset.UtcNow;
51+
return remaining > TimeSpan.Zero ? remaining : TimeSpan.Zero;
52+
}
53+
54+
return null;
55+
}
56+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System.Net;
2+
3+
namespace Firecrawl;
4+
5+
/// <summary>
6+
/// Thrown for HTTP 401 Unauthorized responses (invalid or missing API key).
7+
/// </summary>
8+
[Serializable]
9+
public class FirecrawlAuthenticationException : ApiException
10+
{
11+
public FirecrawlAuthenticationException() { }
12+
13+
public FirecrawlAuthenticationException(string message)
14+
: base(message, HttpStatusCode.Unauthorized) { }
15+
16+
public FirecrawlAuthenticationException(string message, Exception? innerException)
17+
: base(message, innerException, HttpStatusCode.Unauthorized) { }
18+
}
19+
20+
/// <summary>
21+
/// Thrown for HTTP 429 Too Many Requests responses.
22+
/// </summary>
23+
[Serializable]
24+
public class FirecrawlRateLimitException : ApiException
25+
{
26+
/// <summary>
27+
/// Value of the <c>Retry-After</c> response header, if present.
28+
/// Delta-seconds or HTTP-date depending on the server.
29+
/// </summary>
30+
public TimeSpan? RetryAfter { get; init; }
31+
32+
public FirecrawlRateLimitException() { }
33+
34+
public FirecrawlRateLimitException(string message)
35+
: base(message, HttpStatusCode.TooManyRequests) { }
36+
37+
public FirecrawlRateLimitException(string message, Exception? innerException)
38+
: base(message, innerException, HttpStatusCode.TooManyRequests) { }
39+
}
40+
41+
/// <summary>
42+
/// Thrown for HTTP 402 Payment Required responses (insufficient credits).
43+
/// </summary>
44+
[Serializable]
45+
public class FirecrawlPaymentRequiredException : ApiException
46+
{
47+
public FirecrawlPaymentRequiredException() { }
48+
49+
public FirecrawlPaymentRequiredException(string message)
50+
: base(message, HttpStatusCode.PaymentRequired) { }
51+
52+
public FirecrawlPaymentRequiredException(string message, Exception? innerException)
53+
: base(message, innerException, HttpStatusCode.PaymentRequired) { }
54+
}
55+
56+
/// <summary>
57+
/// Thrown for HTTP 5xx Server Error responses.
58+
/// </summary>
59+
[Serializable]
60+
public class FirecrawlServerException : ApiException
61+
{
62+
public FirecrawlServerException() { }
63+
64+
public FirecrawlServerException(string message, HttpStatusCode statusCode)
65+
: base(message, statusCode) { }
66+
67+
public FirecrawlServerException(string message, Exception? innerException, HttpStatusCode statusCode)
68+
: base(message, innerException, statusCode) { }
69+
}

0 commit comments

Comments
 (0)