|
| 1 | +import { openai } from '@ai-sdk/openai'; |
| 2 | +import { Impact, Likelihood } from '@db'; |
| 3 | +import { db } from '@db/server'; |
| 4 | +import { logger, schemaTask } from '@trigger.dev/sdk'; |
| 5 | +import { generateObject } from 'ai'; |
| 6 | +import { z } from 'zod'; |
| 7 | + |
| 8 | +const ScoreSchema = z.object({ |
| 9 | + inherent_probability: z.nativeEnum(Likelihood), |
| 10 | + inherent_impact: z.nativeEnum(Impact), |
| 11 | + residual_probability: z.nativeEnum(Likelihood), |
| 12 | + residual_impact: z.nativeEnum(Impact), |
| 13 | + rationale: z.string(), |
| 14 | +}); |
| 15 | + |
| 16 | +/** |
| 17 | + * Refine a vendor's inherent + residual risk scores using the data the |
| 18 | + * `research-vendor` task gathered into GlobalVendors. The onboarding |
| 19 | + * extraction pass scores conservatively from the user's Q&A signals only |
| 20 | + * (no posture data), so it lands a lot of well-attested vendors at the |
| 21 | + * generic middle. Once research has collected certifications, type, and |
| 22 | + * description, we can ground the score in actual evidence and refine. |
| 23 | + * |
| 24 | + * Idempotent: safe to re-run for the same (vendorId, organizationId) |
| 25 | + * — overwrites the four risk fields with the latest LLM verdict. |
| 26 | + */ |
| 27 | +export const scoreVendorRisk = schemaTask({ |
| 28 | + id: 'score-vendor-risk', |
| 29 | + schema: z.object({ |
| 30 | + vendorId: z.string(), |
| 31 | + organizationId: z.string(), |
| 32 | + }), |
| 33 | + maxDuration: 60, |
| 34 | + retry: { maxAttempts: 2 }, |
| 35 | + run: async (payload) => { |
| 36 | + const { vendorId, organizationId } = payload; |
| 37 | + |
| 38 | + const vendor = await db.vendor.findFirst({ |
| 39 | + where: { id: vendorId, organizationId }, |
| 40 | + select: { |
| 41 | + id: true, |
| 42 | + name: true, |
| 43 | + description: true, |
| 44 | + category: true, |
| 45 | + website: true, |
| 46 | + }, |
| 47 | + }); |
| 48 | + if (!vendor) { |
| 49 | + logger.warn('[score-vendor-risk] vendor not found, skipping', payload); |
| 50 | + return { skipped: 'vendor-not-found' as const }; |
| 51 | + } |
| 52 | + if (!vendor.website) { |
| 53 | + logger.info('[score-vendor-risk] vendor has no website, skipping', payload); |
| 54 | + return { skipped: 'no-website' as const }; |
| 55 | + } |
| 56 | + |
| 57 | + const globalVendor = await db.globalVendors.findFirst({ |
| 58 | + where: { website: vendor.website }, |
| 59 | + select: { |
| 60 | + company_description: true, |
| 61 | + security_certifications: true, |
| 62 | + subprocessors: true, |
| 63 | + type_of_company: true, |
| 64 | + security_page_url: true, |
| 65 | + trust_page_url: true, |
| 66 | + }, |
| 67 | + }); |
| 68 | + |
| 69 | + // Without GlobalVendors data we have nothing more than the extraction |
| 70 | + // pass already had — re-running the LLM would just burn tokens to |
| 71 | + // produce the same answer. Bail. |
| 72 | + if (!globalVendor) { |
| 73 | + logger.info( |
| 74 | + '[score-vendor-risk] no GlobalVendors row yet — research probably still in flight; skipping', |
| 75 | + payload, |
| 76 | + ); |
| 77 | + return { skipped: 'no-research-data' as const }; |
| 78 | + } |
| 79 | + |
| 80 | + const certifications = globalVendor.security_certifications ?? []; |
| 81 | + const subprocessors = globalVendor.subprocessors ?? []; |
| 82 | + const description = globalVendor.company_description ?? vendor.description ?? ''; |
| 83 | + const typeOfCompany = globalVendor.type_of_company ?? ''; |
| 84 | + const hasTrustOrSecurityPage = Boolean( |
| 85 | + globalVendor.security_page_url ?? globalVendor.trust_page_url, |
| 86 | + ); |
| 87 | + |
| 88 | + const promptBlock = [ |
| 89 | + `Vendor: ${vendor.name}`, |
| 90 | + `Category (customer-set): ${vendor.category}`, |
| 91 | + typeOfCompany ? `Type (researched): ${typeOfCompany}` : null, |
| 92 | + description ? `Description: ${description}` : null, |
| 93 | + certifications.length > 0 |
| 94 | + ? `Certifications / attestations (researched): ${certifications.join(', ')}` |
| 95 | + : 'Certifications / attestations: none reported', |
| 96 | + subprocessors.length > 0 |
| 97 | + ? `Subprocessors (researched): ${subprocessors.join(', ')}` |
| 98 | + : null, |
| 99 | + hasTrustOrSecurityPage |
| 100 | + ? 'Vendor publishes a trust portal or security overview page (transparency signal).' |
| 101 | + : 'No public trust portal / security overview page found.', |
| 102 | + ] |
| 103 | + .filter(Boolean) |
| 104 | + .join('\n'); |
| 105 | + |
| 106 | + const { object } = await generateObject({ |
| 107 | + model: openai('gpt-4.1-mini'), |
| 108 | + schema: ScoreSchema, |
| 109 | + system: [ |
| 110 | + 'You are scoring inherent vendor risk for a customer that has just listed this vendor as part of their compliance program. Your job is to assign Likelihood and Impact buckets based on the researched data below.', |
| 111 | + '', |
| 112 | + 'inherent_probability — probability of a meaningful security or availability incident at the vendor over a typical 12-month window:', |
| 113 | + '- very_unlikely: hyperscaler-tier vendor with multiple top-tier attestations (e.g. SOC 2 Type II + ISO 27001, OR FedRAMP, OR multiple of those) AND clear public transparency.', |
| 114 | + '- unlikely: established vendor with at least one strong third-party attestation (SOC 2 Type II, ISO 27001, ISO 42001, HIPAA, PCI DSS Level 1, or equivalent) AND a public security/trust page.', |
| 115 | + '- possible: vendor without independent attestation, OR with minor incidents in the last few years, OR limited public posture data. This is the DEFAULT.', |
| 116 | + '- likely: vendor with public knowledge of significant security incidents in the last 24 months, OR explicitly no transparency despite handling sensitive data.', |
| 117 | + '- very_likely: vendor with chronic / repeated security issues, or essentially unknown posture combined with sensitive-data exposure.', |
| 118 | + '', |
| 119 | + 'inherent_impact — business impact if the vendor is compromised, assuming average customer usage given the vendor\'s category:', |
| 120 | + '- insignificant: no PII / no business data / purely cosmetic or public utility.', |
| 121 | + '- minor: anonymous metadata only, non-business utilities.', |
| 122 | + '- moderate: PII or internal business data, but NOT payments / health / source / auth. DEFAULT for typical SaaS.', |
| 123 | + '- major: vendor handles authentication, source code, payments, PHI, or production infrastructure that the customer depends on.', |
| 124 | + '- severe: vendor IS the customer\'s production runtime / cloud / single source of truth — compromise means the customer is offline or fundamentally exposed.', |
| 125 | + '', |
| 126 | + 'Scoring rules:', |
| 127 | + '1. Read the certification list. ANY of {SOC 2 Type II, ISO 27001, ISO 42001, HIPAA, PCI DSS, FedRAMP, C5, CSA STAR Level 2+} counts as a strong attestation. Multiple of those, especially combined with FedRAMP / hyperscaler-tier scale, drop probability to very_unlikely. A single strong attestation drops probability to unlikely.', |
| 128 | + '2. If the certification list is empty, default probability is possible (NOT very_likely). "We don\'t know" is not "definitely bad".', |
| 129 | + '3. Use the type and description to set impact. Source-code, payments, auth, infrastructure providers → major. Generic CRM / analytics → moderate. Marketing widgets → minor.', |
| 130 | + '4. Residual: default to inherent. Only LOWER residual when the customer has applied their OWN compensating controls (which we don\'t have visibility into here, so usually leave equal).', |
| 131 | + '', |
| 132 | + 'Be specific in the rationale — name a certification, name an attribute. Don\'t recite the rubric.', |
| 133 | + ].join('\n'), |
| 134 | + prompt: promptBlock, |
| 135 | + }); |
| 136 | + |
| 137 | + await db.vendor.update({ |
| 138 | + where: { id: vendorId }, |
| 139 | + data: { |
| 140 | + inherentProbability: object.inherent_probability, |
| 141 | + inherentImpact: object.inherent_impact, |
| 142 | + residualProbability: object.residual_probability, |
| 143 | + residualImpact: object.residual_impact, |
| 144 | + }, |
| 145 | + }); |
| 146 | + |
| 147 | + logger.info('[score-vendor-risk] scored vendor', { |
| 148 | + vendorId, |
| 149 | + organizationId, |
| 150 | + vendorName: vendor.name, |
| 151 | + inherent: `${object.inherent_probability} × ${object.inherent_impact}`, |
| 152 | + residual: `${object.residual_probability} × ${object.residual_impact}`, |
| 153 | + rationale: object.rationale, |
| 154 | + certifications, |
| 155 | + }); |
| 156 | + |
| 157 | + return { |
| 158 | + inherentProbability: object.inherent_probability, |
| 159 | + inherentImpact: object.inherent_impact, |
| 160 | + residualProbability: object.residual_probability, |
| 161 | + residualImpact: object.residual_impact, |
| 162 | + rationale: object.rationale, |
| 163 | + }; |
| 164 | + }, |
| 165 | +}); |
0 commit comments