hookdeck · leggetter · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/skills/scrapfly-webhooks/SKILL.md b/skills/scrapfly-webhooks/SKILL.md
@@ -23,6 +23,10 @@ metadata:
 - How do I handle Crawler API webhook events (`crawler_started`, `crawler_finished`, ...)?
 - Why is my Scrapfly webhook signature verification failing?
 
+## Prerequisites
+
+- **A paid Scrapfly plan.** Webhooks are not available on the FREE plan — its webhook queue size is 0, so no deliveries are ever dispatched even after configuration. The dashboard hides the webhook UI on the free tier. Any paid tier enables delivery. See [`references/setup.md`](references/setup.md) for the full plan-detection checklist.
+
 ## How Scrapfly Webhooks Work
 
 Scrapfly uses HMAC-SHA256 with **uppercase hex** encoding over the **raw request body**. There is no SDK for webhook verification — implementations follow Scrapfly's documented algorithm.
@@ -35,6 +39,8 @@ Key facts:
 - **No timestamp / replay window**: Scrapfly does not include a timestamp header; treat the signature as authenticity-only.
 - **Secret**: Use the value from the Scrapfly dashboard exactly as shown. Do not trim or base64-decode it.
 - **Routing**: Use `X-Scrapfly-Webhook-Resource-Type` (`scrape`, `extraction`, `screenshot`) to dispatch when one endpoint serves multiple products. Crawler events also carry `X-Scrapfly-Crawl-Event-Name` and an `event` field in the body.
+- **Content-Type is whatever you configured in the dashboard, not what the body actually is.** Scrapfly's webhook config has a Content-Type dropdown (`application/json` or `application/msgpack`) and sends the chosen value on every delivery — but it doesn't change what's in the body for image deliveries. Screenshot API deliveries carry raw image bytes (JPEG/PNG/WebP/GIF) regardless of the configured Content-Type, so the header is unreliable for that resource type. **Dispatch on `X-Scrapfly-Webhook-Resource-Type`, not on `Content-Type`, and parse only after dispatching.** HMAC verification works fine over any body — only the parse step needs to know whether it's a JSON, msgpack, or binary body. This skill's example handlers assume the dashboard is configured to `application/json`; if you pick msgpack, swap `JSON.parse` / `json.loads` for a msgpack decoder.
+- **Hookdeck Event Gateway alternative**: If you're already routing webhooks through Hookdeck (the [hookdeck-event-gateway](https://github.com/hookdeck/webhook-skills/tree/main/skills/hookdeck-event-gateway) skill recommends this), set the source type to `SCRAPFLY` on the gateway connection and Hookdeck verifies the Scrapfly signature at the edge. Your handler then only needs to verify Hookdeck's signature, not Scrapfly's directly.
 
 ## Essential Code (USE THIS)
 
@@ -87,23 +93,33 @@ app.post('/webhooks/scrapfly',
       return res.status(401).send('Invalid signature');
     }
 
-    // Parse only after verifying
-    const payload = JSON.parse(req.body.toString());
-
     console.log(`Scrapfly ${resourceType} webhook (job ${jobId}, id ${webhookId})`);
 
-    // Route by resource type for scrape / extraction / screenshot APIs
+    // CRITICAL: dispatch BEFORE JSON.parse — Screenshot API deliveries carry
+    // raw image bytes (JPEG/PNG/WebP/GIF) regardless of the Content-Type you
+    // configured in the Scrapfly dashboard. Content-Type is whatever you
+    // picked (application/json by default; application/msgpack is also an
+    // option). JSON.parse on a binary body throws after the signature
+    // has already verified.
+    if (resourceType === 'screenshot') {
+      console.log(`Screenshot received: ${req.body.length} bytes (binary)`);
+      // req.body is the raw image. Persist it to storage and return 200.
+      return res.status(200).send('OK');
+    }
+
+    // Remaining resource types deliver JSON payloads.
+    const payload = JSON.parse(req.body.toString());
+
     switch (resourceType) {
       case 'scrape':
         // Scrape API places the fetched URL at result.url; the webhook overlay's
         // context only carries `webhook` and `job` sub-objects.
         console.log('Scrape result:', payload.result?.status_code, payload.result?.url);
         break;
       case 'extraction':
-        console.log('Extraction result:', payload.result?.data);
-        break;
-      case 'screenshot':
-        console.log('Screenshot result:', payload.result?.screenshot_url);
+        // Extraction body shape: { content_type, data: {...}, context: {...} }.
+        // Extracted fields live at payload.data, NOT payload.result.data.
+        console.log('Extraction result:', payload.content_type, payload.data);
         break;
       default:
         // Crawler API uses event names in the body

diff --git a/skills/scrapfly-webhooks/examples/express/src/index.js b/skills/scrapfly-webhooks/examples/express/src/index.js
@@ -59,6 +59,30 @@ app.post('/webhooks/scrapfly',
       return res.status(401).send('Invalid signature');
     }
 
+    console.log(`Scrapfly webhook (id=${webhookId} resource=${resourceType} job=${jobId})`);
+
+    // Dispatch BEFORE JSON parsing — screenshot deliveries are raw image
+    // bytes (JPEG / PNG / WebP / GIF) regardless of the Content-Type you
+    // configured in the Scrapfly dashboard (json or msgpack — both apply
+    // verbatim to scrape/extraction deliveries, but screenshot bodies are
+    // binary either way). Trying to JSON.parse a binary body throws after
+    // verification has already succeeded.
+    if (resourceType === 'screenshot') {
+      console.log(`Screenshot received: ${req.body.length} bytes (binary, ${jobId})`);
+      // req.body is a Buffer of the rendered image. Persist it to storage
+      // (S3, disk, ImgIX, etc.) keyed on jobId / webhookId. Do not call
+      // JSON.parse here. The synchronous Screenshot API response exposes
+      // metadata in headers like X-Scrapfly-Screenshot-Url, but the
+      // webhook delivery only carries the image bytes — fetch the metadata
+      // from the synchronous response or the Scrapfly dashboard if needed.
+      return res.status(200).send('OK');
+    }
+
+    // Remaining resource types are serialised per your dashboard's
+    // Content-Type setting. This handler assumes `application/json`; if
+    // you configured `application/msgpack`, swap JSON.parse for a msgpack
+    // decoder (e.g. `@msgpack/msgpack`).
+
     let payload;
     try {
       payload = JSON.parse(req.body.toString('utf8'));
@@ -67,9 +91,7 @@ app.post('/webhooks/scrapfly',
       return res.status(400).send('Invalid JSON payload');
     }
 
-    console.log(`Scrapfly webhook (id=${webhookId} resource=${resourceType} job=${jobId})`);
-
-    // Route by resource type for the Scrape / Extraction / Screenshot APIs.
+    // Route by resource type for the Scrape / Extraction APIs.
     switch (resourceType) {
       case 'scrape':
         // Scrape API places the fetched URL at result.url (see scrapfly.io/docs/scrape-api/getting-started).
@@ -82,15 +104,16 @@ app.post('/webhooks/scrapfly',
         break;
 
       case 'extraction':
-        console.log('Extraction result:', payload?.result?.data);
+        // Extraction body shape (from a real capture):
+        // { content_type, data: { ... }, context: { webhook, job } }
+        // The extracted fields live at payload.data, NOT payload.result.data.
+        console.log('Extraction result:', {
+          content_type: payload?.content_type,
+          data: payload?.data,
+        });
         // TODO: Save structured data, trigger downstream enrichment
         break;
 
-      case 'screenshot':
-        console.log('Screenshot result URL:', payload?.result?.screenshot_url);
-        // TODO: Store image, generate thumbnail, notify user
-        break;
-
       default: {
         // Crawler API uses lifecycle events in the body and an
         // X-Scrapfly-Crawl-Event-Name header.

diff --git a/skills/scrapfly-webhooks/examples/express/test/webhook.test.js b/skills/scrapfly-webhooks/examples/express/test/webhook.test.js
@@ -121,8 +121,13 @@ describe('Scrapfly Webhook Endpoint', () => {
       expect(res.text).toBe('OK');
     });
 
-    it('returns 200 for a valid extraction webhook', async () => {
-      const body = JSON.stringify({ result: { data: { title: 'Test' } } });
+    it('returns 200 for a valid extraction webhook (data lives at payload.data, not payload.result.data)', async () => {
+      // Real extraction body shape from a live capture:
+      //   { content_type, data: {...}, context: {...} }
+      const body = JSON.stringify({
+        content_type: 'application/json',
+        data: { price: '$19.99', product_name: 'Widget', stock: 'In stock' },
+      });
       const sig = generateScrapflySignature(Buffer.from(body), secret);
 
       const res = await request(app)
@@ -135,22 +140,50 @@ describe('Scrapfly Webhook Endpoint', () => {
       expect(res.status).toBe(200);
     });
 
-    it('returns 200 for a valid screenshot webhook', async () => {
-      const body = JSON.stringify({
-        result: { screenshot_url: 'https://scrapfly.io/screenshots/abc.png' },
-      });
-      const sig = generateScrapflySignature(Buffer.from(body), secret);
+    it('returns 200 for a screenshot webhook with a BINARY body (not JSON)', async () => {
+      // Scrapfly Screenshot deliveries carry raw image bytes (JPEG/PNG/WebP/GIF),
+      // NOT JSON — even though Scrapfly lies in the Content-Type header by
+      // sending `application/json` for screenshot payloads (upstream quirk).
+      // The handler must dispatch on the resource type header BEFORE JSON.parse,
+      // otherwise the parse blows up after signature verification has already
+      // succeeded.
+      //
+      // Test note: we use `application/octet-stream` here so supertest doesn't
+      // auto-JSON-stringify the Buffer. The handler ignores Content-Type and
+      // reads the raw body via express.raw({ type: '*/*' }), so this is
+      // equivalent to the real on-wire scenario as far as the handler's
+      // dispatch logic is concerned.
+      //
+      // Minimal 1×1 JPEG (12 bytes — JFIF/SOI header is enough; we never
+      // decode the image, just verify the handler tolerates binary).
+      const binaryBody = Buffer.from([
+        0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+      ]);
+      const sig = generateScrapflySignature(binaryBody, secret);
 
       const res = await request(app)
         .post('/webhooks/scrapfly')
-        .set('Content-Type', 'application/json')
+        .set('Content-Type', 'application/octet-stream')
         .set('X-Scrapfly-Webhook-Signature', sig)
         .set('X-Scrapfly-Webhook-Resource-Type', 'screenshot')
-        .send(body);
+        .send(binaryBody);
 
       expect(res.status).toBe(200);
     });
 
+    it('rejects a screenshot delivery with an invalid signature', async () => {
+      const binaryBody = Buffer.from([0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10]);
+
+      const res = await request(app)
+        .post('/webhooks/scrapfly')
+        .set('Content-Type', 'application/octet-stream')
+        .set('X-Scrapfly-Webhook-Signature', 'AABBCCDD')
+        .set('X-Scrapfly-Webhook-Resource-Type', 'screenshot')
+        .send(binaryBody);
+
+      expect(res.status).toBe(401);
+    });
+
     const crawlerEvents = [
       'crawler_started',
       'crawler_url_visited',

diff --git a/skills/scrapfly-webhooks/examples/fastapi/main.py b/skills/scrapfly-webhooks/examples/fastapi/main.py
@@ -71,31 +71,57 @@ async def scrapfly_webhook(
         print("ERROR: Scrapfly webhook signature verification failed")
         raise HTTPException(status_code=401, detail="Invalid signature")
 
-    try:
-        payload = json.loads(raw_body.decode("utf-8"))
-    except json.JSONDecodeError as exc:
-        print(f"ERROR: Failed to parse Scrapfly webhook payload: {exc}")
-        raise HTTPException(status_code=400, detail="Invalid JSON payload")
-
     print(
         f"Scrapfly webhook (id={x_scrapfly_webhook_id} "
         f"resource={x_scrapfly_webhook_resource_type} job={x_scrapfly_webhook_job_id})"
     )
 
     resource_type = x_scrapfly_webhook_resource_type
 
+    # Dispatch BEFORE JSON parsing — screenshot deliveries are raw image
+    # bytes (JPEG / PNG / WebP / GIF) regardless of the Content-Type you
+    # configured in the Scrapfly dashboard (json or msgpack — both apply
+    # verbatim to scrape/extraction deliveries, but screenshot bodies are
+    # binary either way). Trying to json.loads a binary body raises
+    # JSONDecodeError after the signature has already verified.
+    if resource_type == "screenshot":
+        print(
+            f"Screenshot received: {len(raw_body)} bytes "
+            f"(binary, {x_scrapfly_webhook_job_id})"
+        )
+        # raw_body is the rendered image bytes. Persist it to storage
+        # (S3, disk, etc.) keyed on job_id / webhook_id. Do not call
+        # json.loads here. The synchronous Screenshot API response
+        # exposes metadata in headers like X-Scrapfly-Screenshot-Url;
+        # the webhook delivery only carries the image bytes.
+        return {"received": True}
+
+    # Remaining resource types are serialised per your dashboard's
+    # Content-Type setting. This handler assumes `application/json`; if
+    # you configured `application/msgpack`, swap json.loads for a msgpack
+    # decoder (e.g. the `msgpack` package).
+
+    try:
+        payload = json.loads(raw_body.decode("utf-8"))
+    except json.JSONDecodeError as exc:
+        print(f"ERROR: Failed to parse Scrapfly webhook payload: {exc}")
+        raise HTTPException(status_code=400, detail="Invalid JSON payload")
+
     if resource_type == "scrape":
         # Scrape API places the fetched URL at result.url. The webhook overlay's
         # payload["context"] only carries `webhook` and `job` sub-objects.
         result = payload.get("result", {})
         print(f"Scrape result: url={result.get('url')} status={result.get('status_code')}")
         # TODO: Persist HTML / extracted fields, enqueue parsing
     elif resource_type == "extraction":
-        print(f"Extraction result: {payload.get('result', {}).get('data')}")
+        # Extraction body shape (from a real capture):
+        #   { content_type, data: { ... }, context: { webhook, job } }
+        # The extracted fields live at payload["data"], NOT payload["result"]["data"].
+        print(
+            f"Extraction result: content_type={payload.get('content_type')} "
+            f"data={payload.get('data')}"
+        )
         # TODO: Save structured data, trigger enrichment
-    elif resource_type == "screenshot":
-        print(f"Screenshot URL: {payload.get('result', {}).get('screenshot_url')}")
-        # TODO: Store image, generate thumbnail
     else:
         # Crawler API uses lifecycle events in the body.
         event = x_scrapfly_crawl_event_name or payload.get("event")

diff --git a/skills/scrapfly-webhooks/examples/fastapi/test_webhook.py b/skills/scrapfly-webhooks/examples/fastapi/test_webhook.py
@@ -105,12 +105,19 @@ def test_valid_scrape_webhook(self, client, secret):
         assert response.status_code == 200
         assert response.json() == {"received": True}
 
-    @pytest.mark.parametrize(
-        "resource_type",
-        ["scrape", "extraction", "screenshot"],
-    )
-    def test_resource_types(self, client, secret, resource_type):
-        body = json.dumps({"result": {"status_code": 200}}).encode("utf-8")
+    def test_extraction_payload_data_at_top_level(self, client, secret):
+        """Real extraction body shape: { content_type, data: {...} } — data lives
+        at payload.data, NOT payload.result.data."""
+        body = json.dumps(
+            {
+                "content_type": "application/json",
+                "data": {
+                    "price": "$19.99",
+                    "product_name": "Widget",
+                    "stock": "In stock",
+                },
+            }
+        ).encode("utf-8")
         sig = generate_scrapfly_signature(body, secret)
 
         response = client.post(
@@ -119,11 +126,48 @@ def test_resource_types(self, client, secret, resource_type):
             headers={
                 "Content-Type": "application/json",
                 "X-Scrapfly-Webhook-Signature": sig,
-                "X-Scrapfly-Webhook-Resource-Type": resource_type,
+                "X-Scrapfly-Webhook-Resource-Type": "extraction",
+            },
+        )
+        assert response.status_code == 200
+
+    def test_screenshot_binary_body(self, client, secret):
+        """Scrapfly Screenshot deliveries carry raw image bytes (JPEG / PNG /
+        WebP / GIF), NOT JSON — even though Content-Type says application/json
+        (upstream Scrapfly quirk). The handler must dispatch on the resource
+        type header BEFORE json.loads to avoid raising JSONDecodeError after
+        signature verification has already succeeded."""
+        binary_body = bytes(
+            [0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46, 0x00, 0x01]
+        )
+        sig = generate_scrapfly_signature(binary_body, secret)
+
+        response = client.post(
+            "/webhooks/scrapfly",
+            content=binary_body,
+            headers={
+                # Scrapfly lies here — header says JSON but the body is binary.
+                "Content-Type": "application/json",
+                "X-Scrapfly-Webhook-Signature": sig,
+                "X-Scrapfly-Webhook-Resource-Type": "screenshot",
             },
         )
         assert response.status_code == 200
 
+    def test_screenshot_rejects_invalid_signature(self, client, secret):
+        binary_body = bytes([0xFF, 0xD8, 0xFF, 0xE0])
+
+        response = client.post(
+            "/webhooks/scrapfly",
+            content=binary_body,
+            headers={
+                "Content-Type": "application/json",
+                "X-Scrapfly-Webhook-Signature": "AABBCCDD",
+                "X-Scrapfly-Webhook-Resource-Type": "screenshot",
+            },
+        )
+        assert response.status_code == 401
+
     @pytest.mark.parametrize(
         "event",
         [