Skip to content

Commit a840682

Browse files
authored
Merge pull request #2574 from trycompai/main
[comp] Production Deploy
2 parents 15c6037 + 08a3786 commit a840682

19 files changed

Lines changed: 2191 additions & 134 deletions

apps/api/src/trigger/vendor/vendor-risk-assessment-task.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,9 @@ export const vendorRiskAssessmentTask: Task<
476476
minTimeoutInMs: 1000,
477477
maxTimeoutInMs: 10000,
478478
},
479-
maxDuration: 1000 * 60 * 10,
479+
// 30 minutes total: Firecrawl Agent can take up to 25 min on slow SPA
480+
// trust centers (Ubiquiti), and deep-scrape + DB writes need room too.
481+
maxDuration: 1000 * 60 * 30,
480482
run: async (payload) => {
481483
await tags.add([`org:${payload.organizationId}`]);
482484

@@ -1061,7 +1063,19 @@ export const vendorRiskAssessmentTask: Task<
10611063
badgeCount: Array.isArray(complianceBadges)
10621064
? complianceBadges.length
10631065
: 0,
1066+
complianceBadgesJson: JSON.stringify(complianceBadges ?? null),
10641067
hasLogo: Boolean(logoUrl),
1068+
certificationsInAssessmentJson: JSON.stringify(
1069+
Array.isArray(
1070+
(coreData as { certifications?: unknown })?.certifications,
1071+
)
1072+
? (
1073+
coreData as {
1074+
certifications?: Array<{ type: string; status: string }>;
1075+
}
1076+
).certifications
1077+
: [],
1078+
),
10651079
});
10661080

10671081
// Update vendor with core data (keep status in_progress — news may still be loading)
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
import type { VendorRiskAssessmentCertification } from './agent-types';
2+
import { pickDeepScrapeSourceUrl } from './deep-scrape-source-url';
3+
4+
const cert = (
5+
overrides: Partial<VendorRiskAssessmentCertification> = {},
6+
): VendorRiskAssessmentCertification => ({
7+
type: 'SOC 2 Type II',
8+
status: 'verified',
9+
issuedAt: null,
10+
expiresAt: null,
11+
url: null,
12+
...overrides,
13+
});
14+
15+
describe('pickDeepScrapeSourceUrl', () => {
16+
const vendorDomain = 'acme.com';
17+
18+
it("prefers 'Trust & Security' link over 'Security Overview'", () => {
19+
const result = pickDeepScrapeSourceUrl({
20+
vendorDomain,
21+
links: [
22+
{ label: 'Security Overview', url: 'https://acme.com/security' },
23+
{ label: 'Trust & Security', url: 'https://acme.com/trust' },
24+
],
25+
certifications: [],
26+
});
27+
expect(result).toBe('https://acme.com/trust');
28+
});
29+
30+
it("falls back to 'Security Overview' when no 'Trust & Security' link", () => {
31+
const result = pickDeepScrapeSourceUrl({
32+
vendorDomain,
33+
links: [{ label: 'Security Overview', url: 'https://acme.com/security' }],
34+
certifications: [],
35+
});
36+
expect(result).toBe('https://acme.com/security');
37+
});
38+
39+
it('falls back to a verified cert URL on the vendor domain when no labelled links match', () => {
40+
const result = pickDeepScrapeSourceUrl({
41+
vendorDomain,
42+
links: [],
43+
certifications: [
44+
cert({ url: 'https://acme.com/reports/soc2.pdf', status: 'verified' }),
45+
],
46+
});
47+
expect(result).toBe('https://acme.com/reports/soc2.pdf');
48+
});
49+
50+
it('skips subdomain-matching cert URL when status is not verified', () => {
51+
const result = pickDeepScrapeSourceUrl({
52+
vendorDomain,
53+
links: [],
54+
certifications: [
55+
cert({ url: 'https://trust.acme.com/iso', status: 'unknown' }),
56+
],
57+
});
58+
expect(result).toBeNull();
59+
});
60+
61+
it('accepts subdomain-matching cert URL (same registrable domain)', () => {
62+
const result = pickDeepScrapeSourceUrl({
63+
vendorDomain,
64+
links: [],
65+
certifications: [
66+
cert({ url: 'https://trust.acme.com/iso', status: 'verified' }),
67+
],
68+
});
69+
expect(result).toBe('https://trust.acme.com/iso');
70+
});
71+
72+
it('rejects off-domain labelled links', () => {
73+
const result = pickDeepScrapeSourceUrl({
74+
vendorDomain,
75+
links: [
76+
{ label: 'Trust & Security', url: 'https://acme.trust.page' },
77+
],
78+
certifications: [],
79+
});
80+
expect(result).toBeNull();
81+
});
82+
83+
it('rejects off-domain verified cert URL', () => {
84+
const result = pickDeepScrapeSourceUrl({
85+
vendorDomain,
86+
links: [],
87+
certifications: [
88+
cert({ url: 'https://acme.safebase.io/soc2', status: 'verified' }),
89+
],
90+
});
91+
expect(result).toBeNull();
92+
});
93+
94+
it('rejects unparseable URLs', () => {
95+
const result = pickDeepScrapeSourceUrl({
96+
vendorDomain,
97+
links: [{ label: 'Trust & Security', url: 'not a url' }],
98+
certifications: [cert({ url: 'also not a url', status: 'verified' })],
99+
});
100+
expect(result).toBeNull();
101+
});
102+
103+
it('returns null when everything is empty', () => {
104+
const result = pickDeepScrapeSourceUrl({
105+
vendorDomain,
106+
links: [],
107+
certifications: [],
108+
});
109+
expect(result).toBeNull();
110+
});
111+
112+
it('returns first verified cert URL and ignores later verified certs', () => {
113+
const result = pickDeepScrapeSourceUrl({
114+
vendorDomain,
115+
links: [],
116+
certifications: [
117+
cert({
118+
type: 'SOC 2',
119+
status: 'verified',
120+
url: 'https://acme.com/first.pdf',
121+
}),
122+
cert({
123+
type: 'ISO 27001',
124+
status: 'verified',
125+
url: 'https://acme.com/second.pdf',
126+
}),
127+
],
128+
});
129+
expect(result).toBe('https://acme.com/first.pdf');
130+
});
131+
132+
it('skips verified certs whose URL is null and continues to next cert', () => {
133+
const result = pickDeepScrapeSourceUrl({
134+
vendorDomain,
135+
links: [],
136+
certifications: [
137+
cert({ type: 'SOC 2', status: 'verified', url: null }),
138+
cert({
139+
type: 'ISO 27001',
140+
status: 'verified',
141+
url: 'https://acme.com/iso.pdf',
142+
}),
143+
],
144+
});
145+
expect(result).toBe('https://acme.com/iso.pdf');
146+
});
147+
});
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import type { VendorRiskAssessmentCertification } from './agent-types';
2+
3+
/**
4+
* Resolve the best "source URL" to feed into `deepScrapeTrustPortal`.
5+
*
6+
* Fallback order:
7+
* 1. The Agent-returned link labelled "Trust & Security" if it's on the vendor's domain.
8+
* 2. The Agent-returned link labelled "Security Overview" if it's on the vendor's domain.
9+
* 3. The URL of any verified certification that's on the vendor's domain.
10+
*
11+
* Returns null if nothing qualifies. Off-domain URLs are rejected at every tier —
12+
* `deepScrapeTrustPortal` applies an additional third-party-portal gate, but
13+
* this helper is the first line of defense against scraping an unrelated host.
14+
*/
15+
export function pickDeepScrapeSourceUrl(args: {
16+
vendorDomain: string;
17+
links: Array<{ label: string; url: string }>;
18+
certifications: VendorRiskAssessmentCertification[];
19+
}): string | null {
20+
const { vendorDomain, links, certifications } = args;
21+
22+
const isOnVendorDomain = (url: string): boolean => {
23+
try {
24+
const host = new URL(url).hostname.toLowerCase();
25+
return host === vendorDomain || host.endsWith(`.${vendorDomain}`);
26+
} catch {
27+
return false;
28+
}
29+
};
30+
31+
const byLabel = (label: string) =>
32+
links.find((l) => l.label === label && isOnVendorDomain(l.url))?.url ??
33+
null;
34+
35+
const trustUrl = byLabel('Trust & Security');
36+
if (trustUrl) return trustUrl;
37+
38+
const securityUrl = byLabel('Security Overview');
39+
if (securityUrl) return securityUrl;
40+
41+
for (const cert of certifications) {
42+
if (cert.status !== 'verified') continue;
43+
if (cert.url && isOnVendorDomain(cert.url)) return cert.url;
44+
}
45+
46+
return null;
47+
}

0 commit comments

Comments
 (0)