feat(vendors): refine inherent risk score after research lands posture data (#2760)

github-actions[bot] · Marfuen · claude · web-flow · commit e999c724d0e4 · 2026-05-06T10:20:04.000-04:00
The onboarding extraction pass scores vendor inherent risk
conservatively from the user's Q&amp;A signals only — it has no posture
data, so well-attested vendors get the generic (possible × moderate)
default. This adds a follow-up scoring pass that runs once
research-vendor has populated GlobalVendors with certifications,
subprocessors, type, and trust-page URLs. The per-org Vendor row
gets re-scored with that evidence on hand.

Components:

- New trigger task `score-vendor-risk` (apps/app/src/trigger/tasks/
  scrape/score-vendor-risk.ts). Idempotent — fetches the org's
  Vendor row + the GlobalVendors row by website, calls gpt-4.1-mini
  with a calibrated prompt that anchors each Likelihood / Impact
  bucket to attestation criteria (SOC 2 Type II, ISO 27001, ISO
  42001, HIPAA, PCI DSS, FedRAMP, etc.), and updates the four
  inherent + residual fields. Bails early if vendor has no
  website OR if GlobalVendors hasn't been populated yet.

- research-vendor accepts an optional `scoreContext: { vendorId,
  organizationId }` payload field. After saving GlobalVendors
  (whether new or existing), it enqueues score-vendor-risk for the
  per-org Vendor row. Existing-vendor short-circuit also kicks
  scoring so customers don't get stuck with the extraction default
  when GlobalVendors was already populated by a previous research
  run.

- triggerVendorResearch (orchestrator) passes `scoreContext` so the
  bulk onboarding fan-out chains research → scoring per vendor.

Net effect: vendors come out of onboarding with a posture-grounded
score (e.g. GitHub with SOC 2 + ISO 27001 should land at unlikely ×
moderate ≈ 3/10) instead of the conservative default. No prompt
contains a hardcoded vendor name list — calibration runs entirely
off the researched attributes.

Co-authored-by: Mariano &lt;marfuen98@gmail.com&gt;
Co-authored-by: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/apps/app/src/trigger/tasks/onboarding/onboard-organization-helpers.ts b/apps/app/src/trigger/tasks/onboarding/onboard-organization-helpers.ts
@@ -954,8 +954,16 @@ export async function triggerVendorResearch(vendors: any[]): Promise<void> {
     }
 
     try {
+      // `scoreContext` chains the research run into score-vendor-risk
+      // when GlobalVendors finishes saving, so the per-org Vendor row
+      // gets a posture-grounded score instead of the conservative
+      // (possible × moderate) default the extraction pass set.
       const handle = await tasks.trigger<typeof researchVendor>('research-vendor', {
         website,
+        scoreContext:
+          vendor.id && vendor.organizationId
+            ? { vendorId: vendor.id, organizationId: vendor.organizationId }
+            : undefined,
       });
       logger.info(`Triggered research for vendor ${vendor.name} with handle ${handle.id}`);
     } catch (error) {
diff --git a/apps/app/src/trigger/tasks/scrape/research.ts b/apps/app/src/trigger/tasks/scrape/research.ts
@@ -1,7 +1,8 @@
 import { researchJobCore } from '@/trigger/lib/research';
 import { db } from '@db/server';
-import { queue, schemaTask } from '@trigger.dev/sdk';
+import { logger, queue, schemaTask, tasks } from '@trigger.dev/sdk';
 import { z } from 'zod';
+import type { scoreVendorRisk } from './score-vendor-risk';
 
 // Each research run can hold a slot for minutes (firecrawl scrape + LLM
 // extraction). Without a cap, a 10-vendor onboarding can hog the whole
@@ -49,9 +50,37 @@ export const researchVendor = schemaTask({
   queue: researchVendorQueue,
   schema: z.object({
     website: z.string().url(),
+    /**
+     * Optional: when set, this task triggers `score-vendor-risk` once the
+     * GlobalVendors row is saved (or already exists), so the per-org
+     * Vendor row gets a refined inherent/residual score grounded in the
+     * researched posture data instead of the conservative default that
+     * the onboarding extraction pass set.
+     */
+    scoreContext: z
+      .object({
+        vendorId: z.string(),
+        organizationId: z.string(),
+      })
+      .optional(),
   }),
   maxDuration: 1000 * 60 * 10, // 10 minutes total task duration
   run: async (payload, { ctx }) => {
+    const queueScoring = async () => {
+      if (!payload.scoreContext) return;
+      try {
+        await tasks.trigger<typeof scoreVendorRisk>('score-vendor-risk', {
+          vendorId: payload.scoreContext.vendorId,
+          organizationId: payload.scoreContext.organizationId,
+        });
+      } catch (err) {
+        logger.error('[research-vendor] failed to enqueue score-vendor-risk', {
+          err,
+          ...payload.scoreContext,
+        });
+      }
+    };
+
     // Check if vendor already exists
     const existingVendor = await db.globalVendors.findFirst({
       where: {
@@ -68,13 +97,17 @@ export const researchVendor = schemaTask({
     });
 
     if (existingVendor) {
+      // Even when GlobalVendors is already populated, the per-org Vendor
+      // row hasn't been scored against that data yet — chain into scoring
+      // so the customer doesn't get stuck with the extraction default.
+      await queueScoring();
       return {
         message: 'Vendor already exists in database',
         existingVendor,
       };
     }
 
-    return researchJobCore({
+    const result = await researchJobCore({
       website: payload.website,
       prompt: "You're a cyber security researcher, researching a vendor.",
       schema: {
@@ -137,5 +170,12 @@ export const researchVendor = schemaTask({
         });
       },
     });
+
+    // Now that GlobalVendors is fresh, kick off scoring for the per-org
+    // Vendor row. Fire-and-forget — the scoring task is idempotent and
+    // failures shouldn't fail the research run.
+    await queueScoring();
+
+    return result;
   },
 });
diff --git a/apps/app/src/trigger/tasks/scrape/score-vendor-risk.ts b/apps/app/src/trigger/tasks/scrape/score-vendor-risk.ts
@@ -0,0 +1,165 @@
+import { openai } from '@ai-sdk/openai';
+import { Impact, Likelihood } from '@db';
+import { db } from '@db/server';
+import { logger, schemaTask } from '@trigger.dev/sdk';
+import { generateObject } from 'ai';
+import { z } from 'zod';
+
+const ScoreSchema = z.object({
+  inherent_probability: z.nativeEnum(Likelihood),
+  inherent_impact: z.nativeEnum(Impact),
+  residual_probability: z.nativeEnum(Likelihood),
+  residual_impact: z.nativeEnum(Impact),
+  rationale: z.string(),
+});
+
+/**
+ * Refine a vendor's inherent + residual risk scores using the data the
+ * `research-vendor` task gathered into GlobalVendors. The onboarding
+ * extraction pass scores conservatively from the user's Q&A signals only
+ * (no posture data), so it lands a lot of well-attested vendors at the
+ * generic middle. Once research has collected certifications, type, and
+ * description, we can ground the score in actual evidence and refine.
+ *
+ * Idempotent: safe to re-run for the same (vendorId, organizationId)
+ * — overwrites the four risk fields with the latest LLM verdict.
+ */
+export const scoreVendorRisk = schemaTask({
+  id: 'score-vendor-risk',
+  schema: z.object({
+    vendorId: z.string(),
+    organizationId: z.string(),
+  }),
+  maxDuration: 60,
+  retry: { maxAttempts: 2 },
+  run: async (payload) => {
+    const { vendorId, organizationId } = payload;
+
+    const vendor = await db.vendor.findFirst({
+      where: { id: vendorId, organizationId },
+      select: {
+        id: true,
+        name: true,
+        description: true,
+        category: true,
+        website: true,
+      },
+    });
+    if (!vendor) {
+      logger.warn('[score-vendor-risk] vendor not found, skipping', payload);
+      return { skipped: 'vendor-not-found' as const };
+    }
+    if (!vendor.website) {
+      logger.info('[score-vendor-risk] vendor has no website, skipping', payload);
+      return { skipped: 'no-website' as const };
+    }
+
+    const globalVendor = await db.globalVendors.findFirst({
+      where: { website: vendor.website },
+      select: {
+        company_description: true,
+        security_certifications: true,
+        subprocessors: true,
+        type_of_company: true,
+        security_page_url: true,
+        trust_page_url: true,
+      },
+    });
+
+    // Without GlobalVendors data we have nothing more than the extraction
+    // pass already had — re-running the LLM would just burn tokens to
+    // produce the same answer. Bail.
+    if (!globalVendor) {
+      logger.info(
+        '[score-vendor-risk] no GlobalVendors row yet — research probably still in flight; skipping',
+        payload,
+      );
+      return { skipped: 'no-research-data' as const };
+    }
+
+    const certifications = globalVendor.security_certifications ?? [];
+    const subprocessors = globalVendor.subprocessors ?? [];
+    const description = globalVendor.company_description ?? vendor.description ?? '';
+    const typeOfCompany = globalVendor.type_of_company ?? '';
+    const hasTrustOrSecurityPage = Boolean(
+      globalVendor.security_page_url ?? globalVendor.trust_page_url,
+    );
+
+    const promptBlock = [
+      `Vendor: ${vendor.name}`,
+      `Category (customer-set): ${vendor.category}`,
+      typeOfCompany ? `Type (researched): ${typeOfCompany}` : null,
+      description ? `Description: ${description}` : null,
+      certifications.length > 0
+        ? `Certifications / attestations (researched): ${certifications.join(', ')}`
+        : 'Certifications / attestations: none reported',
+      subprocessors.length > 0
+        ? `Subprocessors (researched): ${subprocessors.join(', ')}`
+        : null,
+      hasTrustOrSecurityPage
+        ? 'Vendor publishes a trust portal or security overview page (transparency signal).'
+        : 'No public trust portal / security overview page found.',
+    ]
+      .filter(Boolean)
+      .join('\n');
+
+    const { object } = await generateObject({
+      model: openai('gpt-4.1-mini'),
+      schema: ScoreSchema,
+      system: [
+        'You are scoring inherent vendor risk for a customer that has just listed this vendor as part of their compliance program. Your job is to assign Likelihood and Impact buckets based on the researched data below.',
+        '',
+        'inherent_probability — probability of a meaningful security or availability incident at the vendor over a typical 12-month window:',
+        '- very_unlikely: hyperscaler-tier vendor with multiple top-tier attestations (e.g. SOC 2 Type II + ISO 27001, OR FedRAMP, OR multiple of those) AND clear public transparency.',
+        '- unlikely: established vendor with at least one strong third-party attestation (SOC 2 Type II, ISO 27001, ISO 42001, HIPAA, PCI DSS Level 1, or equivalent) AND a public security/trust page.',
+        '- possible: vendor without independent attestation, OR with minor incidents in the last few years, OR limited public posture data. This is the DEFAULT.',
+        '- likely: vendor with public knowledge of significant security incidents in the last 24 months, OR explicitly no transparency despite handling sensitive data.',
+        '- very_likely: vendor with chronic / repeated security issues, or essentially unknown posture combined with sensitive-data exposure.',
+        '',
+        'inherent_impact — business impact if the vendor is compromised, assuming average customer usage given the vendor\'s category:',
+        '- insignificant: no PII / no business data / purely cosmetic or public utility.',
+        '- minor: anonymous metadata only, non-business utilities.',
+        '- moderate: PII or internal business data, but NOT payments / health / source / auth. DEFAULT for typical SaaS.',
+        '- major: vendor handles authentication, source code, payments, PHI, or production infrastructure that the customer depends on.',
+        '- severe: vendor IS the customer\'s production runtime / cloud / single source of truth — compromise means the customer is offline or fundamentally exposed.',
+        '',
+        'Scoring rules:',
+        '1. Read the certification list. ANY of {SOC 2 Type II, ISO 27001, ISO 42001, HIPAA, PCI DSS, FedRAMP, C5, CSA STAR Level 2+} counts as a strong attestation. Multiple of those, especially combined with FedRAMP / hyperscaler-tier scale, drop probability to very_unlikely. A single strong attestation drops probability to unlikely.',
+        '2. If the certification list is empty, default probability is possible (NOT very_likely). "We don\'t know" is not "definitely bad".',
+        '3. Use the type and description to set impact. Source-code, payments, auth, infrastructure providers → major. Generic CRM / analytics → moderate. Marketing widgets → minor.',
+        '4. Residual: default to inherent. Only LOWER residual when the customer has applied their OWN compensating controls (which we don\'t have visibility into here, so usually leave equal).',
+        '',
+        'Be specific in the rationale — name a certification, name an attribute. Don\'t recite the rubric.',
+      ].join('\n'),
+      prompt: promptBlock,
+    });
+
+    await db.vendor.update({
+      where: { id: vendorId },
+      data: {
+        inherentProbability: object.inherent_probability,
+        inherentImpact: object.inherent_impact,
+        residualProbability: object.residual_probability,
+        residualImpact: object.residual_impact,
+      },
+    });
+
+    logger.info('[score-vendor-risk] scored vendor', {
+      vendorId,
+      organizationId,
+      vendorName: vendor.name,
+      inherent: `${object.inherent_probability} × ${object.inherent_impact}`,
+      residual: `${object.residual_probability} × ${object.residual_impact}`,
+      rationale: object.rationale,
+      certifications,
+    });
+
+    return {
+      inherentProbability: object.inherent_probability,
+      inherentImpact: object.inherent_impact,
+      residualProbability: object.residual_probability,
+      residualImpact: object.residual_impact,
+      rationale: object.rationale,
+    };
+  },
+});