diff --git a/packages/bubble-core/package.json b/packages/bubble-core/package.json index c44412fa..2c49cec6 100644 --- a/packages/bubble-core/package.json +++ b/packages/bubble-core/package.json @@ -1,6 +1,6 @@ { "name": "@bubblelab/bubble-core", - "version": "0.1.300", + "version": "0.1.301", "type": "module", "license": "Apache-2.0", "main": "./dist/index.js", diff --git a/packages/bubble-core/src/bubbles/tool-bubble/people-search-tool.ts b/packages/bubble-core/src/bubbles/tool-bubble/people-search-tool.ts index a89b6859..1320bd97 100644 --- a/packages/bubble-core/src/bubbles/tool-bubble/people-search-tool.ts +++ b/packages/bubble-core/src/bubbles/tool-bubble/people-search-tool.ts @@ -41,6 +41,399 @@ function sanitizeLinkedInUrl(url: string | null | undefined): string | null { return sanitized; } +/** + * Translate the tool's Crustdata-flavored seniority vocabulary to the values + * FullEnrich's /people/search actually accepts. FullEnrich's seniority enum is: + * Owner, Founder, C-level, Partner, VP, Head, Director, Manager, Senior. + * + * Unknown inputs are passed through unchanged — FullEnrich accepts the request + * but silently ignores unrecognized seniority values, so there is nothing to + * gain from dropping them client-side. Callers targeting seniority levels FE + * doesn't index (e.g. "Entry Level", "In Training") will get `warnings` + * pointing that out. + */ +function mapSeniorityToFullEnrich(values: string[]): { + mapped: string[]; + unknown: string[]; +} { + const table: Record = { + 'owner / partner': ['Owner', 'Partner'], + owner: ['Owner'], + partner: ['Partner'], + cxo: ['C-level'], + 'c-level': ['C-level'], + 'vice president': ['VP'], + vp: ['VP'], + head: ['Head'], + director: ['Director'], + 'experienced manager': ['Manager'], + 'entry level manager': ['Manager'], + manager: ['Manager'], + strategic: ['Senior'], + senior: ['Senior'], + founder: ['Founder'], + }; + const feEnum = new Set([ + 'Owner', + 'Founder', + 'C-level', + 'Partner', + 'VP', + 'Head', + 'Director', + 'Manager', + 'Senior', + ]); + const mapped = new Set(); + const unknown: string[] = []; + for (const raw of values) { + const key = raw.trim().toLowerCase(); + const hit = table[key]; + if (hit) { + for (const v of hit) mapped.add(v); + continue; + } + if (feEnum.has(raw)) { + mapped.add(raw); + } else { + unknown.push(raw); + } + } + return { mapped: Array.from(mapped), unknown }; +} + +/** + * Translate Crustdata-flavored function categories to FullEnrich's top-level + * function enum (see FE docs: Functions & Subfunctions). Unknown inputs are + * tracked so callers can see in `warnings` which values FE ignored. FE's + * subfunctions (e.g. "Recruiting/Talent Acquisition") are also accepted — if + * the caller passes one directly we forward it unchanged. + */ +function mapFunctionCategoryToFullEnrich(values: string[]): { + mapped: string[]; + unknown: string[]; +} { + // Top-level FE functions — any value here passes through. + const feFunctions = new Set([ + 'Administrative', + 'Agriculture & Environment', + 'Construction & Trades', + 'Consulting & Advisory', + 'Customer Service', + 'Design', + 'Education', + 'Energy & Utilities', + 'Entertainment & Gaming', + 'Executive & Leadership', + 'Finance', + 'Hospitality & Tourism', + 'Human Resources', + 'Legal', + 'Marketing', + 'Media & Communications', + 'Medical & Health', + 'Non-Profit & Government', + 'Not Employed', + 'Operations', + 'Personal & Home Services', + 'Product', + 'Project & Program Management', + 'Public Safety & Security', + 'Research & Science', + 'Retail & Consumer', + 'Sales', + 'Software', + 'Traditional Engineering', + 'Transportation & Logistics', + ]); + // Crustdata → FE translation (lowercased keys). + const table: Record = { + accounting: 'Finance', + administrative: 'Administrative', + 'arts and design': 'Design', + 'business development': 'Sales', + 'community and social services': 'Non-Profit & Government', + consulting: 'Consulting & Advisory', + education: 'Education', + engineering: 'Software', + entrepreneurship: 'Executive & Leadership', + finance: 'Finance', + 'healthcare services': 'Medical & Health', + 'human resources': 'Human Resources', + 'information technology': 'Software', + legal: 'Legal', + marketing: 'Marketing', + 'media and communication': 'Media & Communications', + 'military and protective services': 'Public Safety & Security', + operations: 'Operations', + 'product management': 'Product', + 'program and project management': 'Project & Program Management', + purchasing: 'Operations', + 'quality assurance': 'Software', + 'real estate': 'Finance', + research: 'Research & Science', + sales: 'Sales', + 'customer success and support': 'Customer Service', + }; + // Subfunctions FE indexes under top-level functions — agents sometimes want + // this precision (e.g. "Recruiting/Talent Acquisition" instead of plain + // "Human Resources"). Accept any as-is. Sourced from FE's published + // "Accepted Filter Values → Functions & Subfunctions". + const feSubfunctions = new Set([ + 'Recruiting/Talent Acquisition', + 'HR Business Partner', + 'HR Operations', + 'HR Leadership', + 'Learning & Development', + 'Talent Management', + 'People Analytics', + 'Compensation & Benefits', + 'Employee Relations', + 'Organizational Development', + 'Customer Success', + 'Customer Support', + 'Customer Experience', + 'Customer Operations', + 'Client Services', + 'General Customer Service', + 'Software Engineering', + 'Backend Engineering', + 'Frontend Engineering', + 'Fullstack Engineering', + 'Data Engineering/Analytics', + 'DevOps', + 'Platform Engineering', + 'Site Reliability Engineering', + 'QA/Quality', + 'Cybersecurity', + 'Security Engineering', + 'Cloud Engineering', + 'Cloud Operations', + 'Database Engineering', + 'Database Administration', + 'Mobile Engineering', + 'AI/Machine Learning', + 'Solutions Architecture', + 'Technical Writing', + 'Product Management', + 'Product Marketing', + 'Product Owner', + 'Product Strategy', + 'Product Operations', + 'Product Analytics', + 'UX Research', + 'Technical Product Management', + 'Account Management', + 'Business Development', + 'Channel Sales', + 'Enterprise Sales', + 'Field Sales', + 'General Sales', + 'Inside Sales', + 'Partnership Sales', + 'Revenue Operations', + 'SDR/BDR', + 'Sales Enablement', + 'Sales Engineering', + 'Sales Operations', + 'Sales Leadership', + 'Brand Marketing', + 'Content Marketing', + 'Demand Generation', + 'Digital Marketing', + 'Email Marketing', + 'Event Marketing', + 'Growth Marketing', + 'Market Research', + 'Marketing Operations', + 'Public Relations', + 'SEO/SEM', + 'Social Media Marketing', + 'Accounting', + 'Audit', + 'Banking/Finance', + 'Controllers', + 'Corporate Finance', + 'Credit Analysis', + 'Finance Leadership', + 'Financial Planning & Analysis', + 'Financial Reporting', + 'Investment', + 'Investor Relations', + 'Mergers & Acquisitions', + 'Risk Management', + 'Tax', + 'Treasury', + ]); + const mapped = new Set(); + const unknown: string[] = []; + for (const raw of values) { + const trimmed = raw.trim(); + if (feFunctions.has(trimmed) || feSubfunctions.has(trimmed)) { + mapped.add(trimmed); + continue; + } + const hit = table[trimmed.toLowerCase()]; + if (hit) { + mapped.add(hit); + continue; + } + unknown.push(trimmed); + } + return { mapped: Array.from(mapped), unknown }; +} + +/** + * FullEnrich uses LinkedIn's industry taxonomy (~400 canonical values like + * "Software Development", "IT Services and IT Consulting", "Financial + * Services"). Shorthand like "SaaS" or "AI" silently zeros out the search on + * FE, so we (a) alias the common shorthand to canonical FE values and (b) + * allow any value that matches a canonical industry exactly. + * + * Keep `FE_INDUSTRY_ALIASES` focused on values agents plausibly type. The + * full taxonomy is too long to enumerate here — if a value matches no alias + * and isn't already canonical, we return it as `unknown` so the caller can + * fail the request with a pointer to the FE docs. + */ +const FE_INDUSTRY_ALIASES: Record = { + saas: ['Software Development'], + software: ['Software Development'], + 'software development': ['Software Development'], + ai: ['Technology, Information and Internet'], + 'artificial intelligence': ['Technology, Information and Internet'], + 'machine learning': ['Technology, Information and Internet'], + tech: ['Technology, Information and Internet'], + technology: ['Technology, Information and Internet'], + internet: ['Technology, Information and Internet'], + fintech: ['Financial Services'], + finance: ['Financial Services'], + banking: ['Banking'], + insurance: ['Insurance'], + healthcare: ['Hospitals and Health Care'], + health: ['Hospitals and Health Care'], + biotech: ['Biotechnology Research'], + pharma: ['Pharmaceutical Manufacturing'], + ecommerce: ['Retail'], + 'e-commerce': ['Retail'], + retail: ['Retail'], + consulting: ['Business Consulting and Services'], + 'management consulting': ['Business Consulting and Services'], + 'it services': ['IT Services and IT Consulting'], + it: ['IT Services and IT Consulting'], + manufacturing: ['Manufacturing'], + automotive: ['Motor Vehicle Manufacturing', 'Automotive'], + media: [ + 'Online Audio and Video Media', + 'Broadcast Media Production and Distribution', + ], + entertainment: ['Entertainment Providers'], + education: ['Education'], + government: ['Government Administration'], + nonprofit: ['Non-profit Organizations'], + 'non-profit': ['Non-profit Organizations'], + agency: ['Marketing Services', 'Advertising Services'], + marketing: ['Marketing Services'], + advertising: ['Advertising Services'], + law: ['Law Practice', 'Legal Services'], + legal: ['Legal Services'], + realestate: ['Real Estate'], + 'real estate': ['Real Estate'], + construction: ['Construction'], + energy: ['Oil and Gas', 'Renewable Energy Power Generation', 'Utilities'], + telecom: ['Telecommunications'], + telecommunications: ['Telecommunications'], + logistics: ['Transportation, Logistics, Supply Chain and Storage'], + 'supply chain': ['Transportation, Logistics, Supply Chain and Storage'], + airline: ['Airlines and Aviation'], + aviation: ['Airlines and Aviation'], + hospitality: ['Hospitality'], + staffing: ['Staffing and Recruiting'], + recruiting: ['Staffing and Recruiting'], + vc: ['Venture Capital and Private Equity Principals'], + 'venture capital': ['Venture Capital and Private Equity Principals'], + pe: ['Venture Capital and Private Equity Principals'], + 'private equity': ['Venture Capital and Private Equity Principals'], + b2b: ['Business Consulting and Services', 'Software Development'], + 'b2b software': ['Software Development'], +}; + +/** + * Canonical FE industry values (subset). Used for exact-match passthrough — + * we don't enumerate all ~400; a value not in aliases and not in this list is + * rejected with a pointer at the FE docs. This covers the canonical values + * most common for tech/sales prospecting. + */ +const FE_INDUSTRY_CANONICAL = new Set([ + 'Software Development', + 'Technology, Information and Internet', + 'Technology, Information and Media', + 'Financial Services', + 'Banking', + 'Insurance', + 'Hospitals and Health Care', + 'Health, Wellness & Fitness', + 'Biotechnology Research', + 'Pharmaceutical Manufacturing', + 'Retail', + 'Business Consulting and Services', + 'IT Services and IT Consulting', + 'Manufacturing', + 'Motor Vehicle Manufacturing', + 'Automotive', + 'Online Audio and Video Media', + 'Broadcast Media Production and Distribution', + 'Entertainment Providers', + 'Education', + 'Higher Education', + 'Government Administration', + 'Non-profit Organizations', + 'Marketing Services', + 'Advertising Services', + 'Law Practice', + 'Legal Services', + 'Real Estate', + 'Construction', + 'Oil and Gas', + 'Renewable Energy Power Generation', + 'Utilities', + 'Telecommunications', + 'Transportation, Logistics, Supply Chain and Storage', + 'Airlines and Aviation', + 'Hospitality', + 'Staffing and Recruiting', + 'Venture Capital and Private Equity Principals', + 'Computer Games', + 'Defense and Space Manufacturing', + 'E-Learning Providers', + 'Food and Beverage Services', + 'Research Services', + 'Investment Management', + 'Investment Banking', + 'Capital Markets', +]); + +function mapIndustryToFullEnrich(values: string[]): { + mapped: string[]; + unknown: string[]; +} { + const mapped = new Set(); + const unknown: string[] = []; + for (const raw of values) { + const trimmed = raw.trim(); + if (FE_INDUSTRY_CANONICAL.has(trimmed)) { + mapped.add(trimmed); + continue; + } + const aliased = FE_INDUSTRY_ALIASES[trimmed.toLowerCase()]; + if (aliased) { + for (const v of aliased) mapped.add(v); + continue; + } + unknown.push(trimmed); + } + return { mapped: Array.from(mapped), unknown }; +} + // Simplified person result schema with full profile information const PersonResultSchema = z.object({ // Basic info @@ -56,7 +449,9 @@ const PersonResultSchema = z.object({ websites: z .array(z.string()) .nullable() - .describe('Personal/professional websites'), + .describe( + 'Personal/professional websites. Crustdata-only — always null on FullEnrich results.' + ), // Enriched email data (from FullEnrich if enabled) enrichedWorkEmail: z @@ -98,11 +493,15 @@ const PersonResultSchema = z.object({ yearsOfExperience: z .number() .nullable() - .describe('Total years of professional experience'), + .describe( + 'Total years of professional experience. Crustdata-only — always null on FullEnrich results.' + ), recentlyChangedJobs: z .boolean() .nullable() - .describe('Whether this person recently changed jobs'), + .describe( + 'Whether this person recently changed jobs. Crustdata-only — always null on FullEnrich results.' + ), // Location location: z.string().nullable().describe('Location/region'), @@ -116,7 +515,9 @@ const PersonResultSchema = z.object({ numConnections: z .number() .nullable() - .describe('Number of LinkedIn connections'), + .describe( + 'Number of LinkedIn connections. Crustdata-only — always null on FullEnrich results.' + ), // Work history currentEmployers: z @@ -198,7 +599,7 @@ const PeopleSearchToolParamsSchema = z.object({ .enum(['crustdata', 'fullenrich']) .default('fullenrich') .describe( - "Search provider. Default: 'fullenrich'. Supported filter sets differ between providers — unsupported filters are silently ignored. FullEnrich does not support: locationRadius, minYearsExperience/maxYearsExperience, functionCategories, schoolName, pastJobTitle, minYearsAtCompany, recentlyChangedJobs, minConnections, excludeCompanies, excludeProfiles, languages. Crustdata supports all filters. Email enrichment (enrichEmails) only applies to Crustdata provider." + "Search provider. Default: 'fullenrich'. Leave unset unless you need a filter FullEnrich cannot honor. On the FullEnrich provider every string filter is sent with exact_match=true so results are strictly scoped (e.g. companyName='Stripe' will NOT also return 'Stripes' or ex-Stripe folks). Filters the FullEnrich provider DOES NOT SUPPORT (the tool returns a hard error if any of these are set while provider='fullenrich'): locationRadius, minYearsExperience, maxYearsExperience, minConnections, excludeCompanies, excludeProfiles, companyLinkedinUrl. Enumerated filters (seniorityLevels, functionCategories, companyIndustries) are validated against FullEnrich's taxonomy and return a hard error with the accepted values if an unknown value is passed — no silent soft-match. Everything else — including languages, schoolName, pastJobTitle, minYearsAtCompany, recentlyChangedJobs — is supported on BOTH providers." ), // ===== PRIMARY SEARCH CRITERIA (at least one required) ===== @@ -372,7 +773,7 @@ const PeopleSearchToolParamsSchema = z.object({ .optional() .default(false) .describe( - 'Enrich emails for found people using FullEnrich. Requires FULLENRICH_API_KEY credential. Costs 1 credit per work email, 3 credits per personal email.' + "Enrich emails for found people via FullEnrich bulk enrichment as a post step. Applies to BOTH providers. Requires FULLENRICH_API_KEY credential. Costs 1 credit per work email, 3 credits per personal email. IMPORTANT: this is a SLOW operation — FullEnrich's bulk pipeline typically takes 30–120 seconds per batch (polls every 5s up to a 120s cap). If the cap trips, affected people are returned without emails and the result's `warnings` array explains which batches timed out. Consider surfacing a progress UI on user-facing flows." ), includePersonalEmails: z .boolean() @@ -399,6 +800,12 @@ const PeopleSearchToolResultSchema = z.object({ .describe( 'Pagination cursor for fetching the next page. Pass this as the cursor parameter in the next request. Undefined when no more results are available.' ), + warnings: z + .array(z.string()) + .optional() + .describe( + 'Non-fatal signals from the tool — e.g. filters that were requested but the chosen provider does not support (and were therefore ignored). Useful for surfacing why a search returned fewer results than expected.' + ), success: z.boolean().describe('Whether the operation was successful'), error: z.string().describe('Error message if operation failed'), }); @@ -413,8 +820,12 @@ export type PeopleSearchResult = PeopleSearchToolResult; /** * People Search Tool * - * Agent-friendly tool for searching and discovering professionals. - * Uses the Crustdata PersonDB in-database search API for fast, comprehensive results. + * Agent-friendly tool for searching and discovering professionals. Defaults to + * FullEnrich's /people/search; falls back to Crustdata PersonDB only when the + * caller opts in (provider='crustdata') or relies on a filter FullEnrich + * doesn't support (locationRadius, functionCategories, schoolName, pastJobTitle, + * minYearsExperience/maxYearsExperience, recentlyChangedJobs, minConnections, + * excludeCompanies, excludeProfiles, languages). * * Features: * - Search by company name/URL, job title, location, or skills @@ -443,17 +854,49 @@ export class PeopleSearchTool extends ToolBubble< 'Comprehensive people search by company, title, location, skills, with optional email enrichment'; static readonly longDescription = ` Comprehensive people search tool for finding and discovering professionals. - Uses the Crustdata PersonDB in-database search for fast, comprehensive results. + + **PROVIDERS:** + - \`fullenrich\` (DEFAULT, preferred): FullEnrich /people/search. Returns LinkedIn profiles, + titles, locations, employment history, and work emails when available. + - \`crustdata\`: Crustdata PersonDB. Wider filter set (geo radius, function category, + school name, recent-job-change, connection count, exclusions), but requires + CRUSTDATA_API_KEY and falls back here only when you genuinely need one of the + crustdata-only filters. + + Leave \`provider\` unset (or pass \`'fullenrich'\`) unless you need a filter FullEnrich + doesn't support — the tool will silently drop unsupported filters on FullEnrich, so + check the per-filter notes below before choosing. + + **HOW TO GET HIGH-QUALITY LEADS:** + 1. Prefer an explicit \`jobTitles\` list (e.g. \`["Customer Success Manager", + "CSM", "Senior Customer Success Manager"]\`) for precise role targeting. + FullEnrich's \`seniorityLevels\` and \`functionCategories\` are ML-derived + tags on each person and are imperfect — a person titled "VP of Strategic + Finance" may still surface in a Software/VP search. jobTitles matched with + exact_match=true are much more reliable. + 2. Combine filters. \`jobTitles\` + \`country\` + \`minCompanyHeadcount\` + + \`companyIndustries\` is the canonical high-precision recipe. + 3. Use \`companyName\` for single-company searches — with exact_match=true it + won't leak into sister companies (e.g. "Stripe" won't also match "Stripes"). + 4. On FullEnrich the enumerated filters (seniorityLevels, functionCategories, + companyIndustries) are validated against FE's taxonomy. An unknown value + produces a hard error listing the acceptable values — read the error and + fix your input. **SEARCH CRITERIA (at least one required):** - - Company: companyName, companyLinkedinUrl, pastCompanyName + Everything below is supported on BOTH providers unless tagged + *(crustdata only)*. Invalid filters for the selected provider produce a + hard error (not silent soft-matching). + - Company: companyName, companyLinkedinUrl *(crustdata only)*, pastCompanyName - Job Title: jobTitle (single), jobTitles (multiple with OR logic), pastJobTitle - - Location: location (fuzzy), locationRadius (geo search), country, city - - Skills & Experience: skills, languages, minYearsExperience, maxYearsExperience + - Location: location (fuzzy), country, city, locationRadius *(crustdata only)* + - Skills & Experience: skills, languages, minYearsExperience / maxYearsExperience *(crustdata only)* - Seniority & Function: seniorityLevels, functionCategories - Company Attributes: companyIndustries, minCompanyHeadcount, maxCompanyHeadcount - Education: schoolName - - Status: recentlyChangedJobs, minConnections, minYearsAtCompany + - Status: recentlyChangedJobs, minYearsAtCompany, minConnections *(crustdata only)* + - Exclusions: excludeCompanies *(crustdata only)*, excludeProfiles *(crustdata only)* + - Email enrichment: enrichEmails, includePersonalEmails (both providers) **SENIORITY LEVELS (valid values):** ${(PersonSeniorityLevelEnum._def.values as readonly string[]).map((v) => `- ${v}`).join('\n ')} @@ -894,6 +1337,7 @@ export class PeopleSearchTool extends ToolBubble< // Transform results let people = this.transformProfiles(personSearchData.profiles || []); + const enrichmentWarnings: string[] = []; // Email enrichment if requested and FULLENRICH_API_KEY is available const { enrichEmails, includePersonalEmails } = this.params; @@ -902,17 +1346,20 @@ export class PeopleSearchTool extends ToolBubble< credentials[CredentialType.FULLENRICH_API_KEY] && people.length > 0 ) { - people = await this.enrichPeopleEmails( + const enrichResult = await this.enrichPeopleEmails( people, credentials, includePersonalEmails ?? false ); + people = enrichResult.people; + enrichmentWarnings.push(...enrichResult.warnings); } return { people, totalCount: personSearchData.total_count || people.length, nextCursor: personSearchData.next_cursor, + ...(enrichmentWarnings.length > 0 && { warnings: enrichmentWarnings }), success: true, error: '', }; @@ -925,9 +1372,12 @@ export class PeopleSearchTool extends ToolBubble< /** * Search via FullEnrich v2 /people/search. Maps tool params to FullEnrich - * filters (unsupported filters silently dropped), normalizes results into - * PersonResult[]. Email enrichment post-step is not applied — FullEnrich - * already returns work emails when available. + * filters, collecting warnings for any filter the provider cannot honor so + * callers can see why results may look narrower than expected. + * + * When `enrichEmails=true`, runs the FullEnrich bulk-enrichment pipeline as a + * post step (same path as the Crustdata branch) because /people/search does + * not return emails by itself. */ private async searchFullEnrich(): Promise { const credentials = this.params?.credentials; @@ -939,27 +1389,63 @@ export class PeopleSearchTool extends ToolBubble< const { companyName, + companyLinkedinUrl, jobTitle, jobTitles, location, + locationRadius, skills, + languages, + minYearsExperience, + maxYearsExperience, seniorityLevels, + functionCategories, companyIndustries, minCompanyHeadcount, maxCompanyHeadcount, + minYearsAtCompany, pastCompanyName, + pastJobTitle, + schoolName, country, city, + recentlyChangedJobs, + minConnections, + excludeCompanies, + excludeProfiles, + enrichEmails, + includePersonalEmails, limit = 100, cursor, } = this.params; + // Hard-fail on filters FullEnrich cannot honor. Surfacing a clear error + // is better than silently producing wrong results — agents will see the + // error in their build-run-validate loop and fix the code upstream. + const crustdataOnly: string[] = []; + if (locationRadius) crustdataOnly.push('locationRadius'); + if (companyLinkedinUrl) + crustdataOnly.push('companyLinkedinUrl (exact URL match)'); + if (minConnections !== undefined) crustdataOnly.push('minConnections'); + if (excludeCompanies?.length) crustdataOnly.push('excludeCompanies'); + if (excludeProfiles?.length) crustdataOnly.push('excludeProfiles'); + if (minYearsExperience !== undefined) + crustdataOnly.push('minYearsExperience'); + if (maxYearsExperience !== undefined) + crustdataOnly.push('maxYearsExperience'); + if (crustdataOnly.length > 0) { + return this.createErrorResult( + `The following filters are only supported by the Crustdata provider, but the current request routes to FullEnrich: ${crustdataOnly.join(', ')}. Either (a) remove these filters, or (b) set provider="crustdata".` + ); + } + // Build title list (merge jobTitle + jobTitles) const allJobTitles: string[] = []; if (jobTitle) allJobTitles.push(jobTitle); if (jobTitles?.length) allJobTitles.push(...jobTitles); - // Build location list (concatenate location + city + country) + // Build location list (raw strings — FE accepts any granularity: continent, + // country, state, sub-region, or city). const allLocations: string[] = []; if (location) allLocations.push(location); if (city) allLocations.push(city); @@ -969,22 +1455,60 @@ export class PeopleSearchTool extends ToolBubble< const hasCriteria = allJobTitles.length > 0 || allLocations.length > 0 || - companyName || - pastCompanyName || + !!companyName || + !!pastCompanyName || + !!pastJobTitle || + !!schoolName || (skills && skills.length > 0) || + (languages && languages.length > 0) || (seniorityLevels && seniorityLevels.length > 0) || + (functionCategories && functionCategories.length > 0) || (companyIndustries && companyIndustries.length > 0) || minCompanyHeadcount !== undefined || - maxCompanyHeadcount !== undefined; + maxCompanyHeadcount !== undefined || + minYearsAtCompany !== undefined || + recentlyChangedJobs === true; if (!hasCriteria) { return this.createErrorResult( - 'FullEnrich people search requires at least one supported filter (companyName, jobTitle/jobTitles, location/city/country, skills, seniorityLevels, companyIndustries, min/maxCompanyHeadcount, pastCompanyName).' + 'FullEnrich people search requires at least one supported filter (companyName, jobTitle/jobTitles, location/city/country, skills, languages, seniorityLevels, functionCategories, companyIndustries, min/maxCompanyHeadcount, minYearsAtCompany, pastCompanyName, pastJobTitle, schoolName, recentlyChangedJobs).' + ); + } + + // Strict validate enumerated filters before we call FE. Any unknown value + // produces a hard error with exact guidance on what FE accepts — this + // replaces the old soft-warn-and-soft-match behaviour that let agents + // silently receive wrong-category results. + const seniorityResult = seniorityLevels?.length + ? mapSeniorityToFullEnrich(seniorityLevels) + : { mapped: [] as string[], unknown: [] as string[] }; + if (seniorityResult.unknown.length > 0) { + return this.createErrorResult( + `Unknown seniorityLevels values for FullEnrich: ${seniorityResult.unknown.join(', ')}. FullEnrich accepts: Owner, Founder, C-level, Partner, VP, Head, Director, Manager, Senior. Set provider="crustdata" for the extra Crustdata-only values (Entry Level, In Training, Experienced Manager, Entry Level Manager, Strategic).` + ); + } + + const functionResult = functionCategories?.length + ? mapFunctionCategoryToFullEnrich(functionCategories) + : { mapped: [] as string[], unknown: [] as string[] }; + if (functionResult.unknown.length > 0) { + return this.createErrorResult( + `Unknown functionCategories values for FullEnrich: ${functionResult.unknown.join(', ')}. Use one of FullEnrich's top-level functions (Administrative, Consulting & Advisory, Customer Service, Design, Education, Executive & Leadership, Finance, Human Resources, Legal, Marketing, Media & Communications, Medical & Health, Non-Profit & Government, Operations, Product, Project & Program Management, Public Safety & Security, Research & Science, Retail & Consumer, Sales, Software, Traditional Engineering, Transportation & Logistics) or a documented subfunction (e.g. "Recruiting/Talent Acquisition", "Customer Success", "Software Engineering"). Full list: https://docs.fullenrich.com/ → Accepted Filter Values.` + ); + } + + const industryResult = companyIndustries?.length + ? mapIndustryToFullEnrich(companyIndustries) + : { mapped: [] as string[], unknown: [] as string[] }; + if (industryResult.unknown.length > 0) { + return this.createErrorResult( + `Unknown companyIndustries values for FullEnrich: ${industryResult.unknown.join(', ')}. FullEnrich uses LinkedIn's industry taxonomy — common shorthand like "SaaS" is auto-mapped, but other values must match the canonical taxonomy (e.g. "Software Development", "Financial Services", "IT Services and IT Consulting", "Hospitals and Health Care"). Full list: https://docs.fullenrich.com/ → Accepted Filter Values → Company Industry.` ); } // FullEnrich caps at limit=100 per request const feLimit = Math.min(limit, 100); + const warnings: string[] = []; try { const headcountRange: { min?: number; max?: number } = {}; @@ -993,39 +1517,63 @@ export class PeopleSearchTool extends ToolBubble< if (maxCompanyHeadcount !== undefined) headcountRange.max = maxCompanyHeadcount; + // exact_match: true disables FE's default fuzzy matching ("Stripe" would + // otherwise also match "Stripes" and ex-Stripe folks). We default this + // on for every string filter since the tool's users overwhelmingly want + // strict matches for prospecting lists. + const asExactFilter = (v: string) => ({ value: v, exact_match: true }); + const people_search = await new FullEnrichBubble( { operation: 'people_search', limit: feLimit, ...(allJobTitles.length > 0 && { - current_position_titles: allJobTitles.map((v) => ({ value: v })), + current_position_titles: allJobTitles.map(asExactFilter), }), ...(companyName && { - current_company_names: [{ value: companyName }], + current_company_names: [asExactFilter(companyName)], }), ...(pastCompanyName && { - past_company_names: [{ value: pastCompanyName }], + past_company_names: [asExactFilter(pastCompanyName)], + }), + ...(pastJobTitle && { + past_position_titles: [asExactFilter(pastJobTitle)], + }), + ...(schoolName && { + person_universities: [asExactFilter(schoolName)], }), ...(allLocations.length > 0 && { - person_locations: allLocations.map((v) => ({ value: v })), + person_locations: allLocations.map(asExactFilter), }), ...(skills?.length && { - person_skills: skills.map((v) => ({ value: v })), + person_skills: skills.map(asExactFilter), }), - ...(seniorityLevels?.length && { - current_position_seniority_level: seniorityLevels.map((v) => ({ - value: v, - })), + ...(languages?.length && { + person_languages: languages.map(asExactFilter), }), - ...(companyIndustries?.length && { - current_company_industries: companyIndustries.map((v) => ({ - value: v, - })), + ...(seniorityResult.mapped.length > 0 && { + current_position_seniority_level: + seniorityResult.mapped.map(asExactFilter), + }), + ...(functionResult.mapped.length > 0 && { + current_position_job_functions: + functionResult.mapped.map(asExactFilter), + }), + ...(industryResult.mapped.length > 0 && { + current_company_industries: + industryResult.mapped.map(asExactFilter), }), ...((minCompanyHeadcount !== undefined || maxCompanyHeadcount !== undefined) && { current_company_headcounts: [headcountRange], }), + ...(minYearsAtCompany !== undefined && { + current_company_years_at: [{ min: minYearsAtCompany }], + }), + ...(recentlyChangedJobs === true && { + // "Recently changed jobs" → last 90 days since last job change. + current_company_days_since_last_job_change: [{ max: 90 }], + }), ...(cursor && { search_after: cursor }), credentials, }, @@ -1048,14 +1596,28 @@ export class PeopleSearchTool extends ToolBubble< }; }; - const people: PersonResult[] = (data.people ?? []).map((p) => + let people: PersonResult[] = (data.people ?? []).map((p) => this.transformFullEnrichPerson(p) ); + // Post-enrichment step: /people/search returns profile data but not + // emails, so when the caller asks for enrichEmails we reuse the existing + // bulk-enrichment pipeline that the Crustdata branch uses. + if (enrichEmails && people.length > 0) { + const enrichResult = await this.enrichPeopleEmails( + people, + credentials, + includePersonalEmails ?? false + ); + people = enrichResult.people; + warnings.push(...enrichResult.warnings); + } + return { people, totalCount: data.metadata?.total ?? people.length, nextCursor: data.metadata?.search_after, + ...(warnings.length > 0 && { warnings }), success: true, error: '', }; @@ -1192,19 +1754,50 @@ export class PeopleSearchTool extends ToolBubble< ) as string[]) : null; + // /people/search occasionally returns a contact block with email data on + // results the person already surfaced elsewhere in FullEnrich's pipeline. + // Populate both the canonical `emails` array and the `enriched*` mirrors so + // callers that read either shape see the data. + const contact = (p.contact ?? {}) as Record; + const mostProbableEmail = + (contact.most_probable_email as string | undefined) ?? null; + const mostProbablePersonalEmail = + (contact.most_probable_personal_email as string | undefined) ?? null; + const contactEmails = Array.isArray(contact.emails) + ? (contact.emails as Array>) + .map((e) => ({ + email: (e.email as string | undefined) ?? '', + status: e.status as string | undefined, + })) + .filter((e) => e.email.length > 0) + : null; + const contactPersonalEmails = Array.isArray(contact.personal_emails) + ? (contact.personal_emails as Array>) + .map((e) => ({ + email: (e.email as string | undefined) ?? '', + status: e.status as string | undefined, + })) + .filter((e) => e.email.length > 0) + : null; + const mergedEmails = new Set(); + if (mostProbableEmail) mergedEmails.add(mostProbableEmail); + if (mostProbablePersonalEmail) mergedEmails.add(mostProbablePersonalEmail); + contactEmails?.forEach((e) => mergedEmails.add(e.email)); + contactPersonalEmails?.forEach((e) => mergedEmails.add(e.email)); + return { name: fullName || null, title: (current.title as string | undefined) ?? null, headline: (p.headline as string | undefined) ?? null, linkedinUrl: extractSocialUrl(social, 'linkedin'), profilePictureUrl: (p.profile_picture_url as string | undefined) ?? null, - emails: [], + emails: mergedEmails.size > 0 ? Array.from(mergedEmails) : null, twitterHandle: extractSocialUrl(social, 'twitter'), websites: null, - enrichedWorkEmail: null, - enrichedPersonalEmail: null, - enrichedWorkEmails: null, - enrichedPersonalEmails: null, + enrichedWorkEmail: mostProbableEmail, + enrichedPersonalEmail: mostProbablePersonalEmail, + enrichedWorkEmails: contactEmails, + enrichedPersonalEmails: contactPersonalEmails, seniorityLevel: (current.seniority as string | undefined) ?? null, yearsOfExperience: (p.years_of_experience as number | undefined) ?? null, recentlyChangedJobs: null, @@ -1332,14 +1925,20 @@ export class PeopleSearchTool extends ToolBubble< } /** - * Enrich people with email data using FullEnrich - * Batches requests and polls until completion + * Enrich people with email data using FullEnrich. + * + * Batches up to 100 contacts per request and polls every 5s up to 120s per + * batch. Bulk enrichment is slow by design — expect 30–120s of wall time for + * a full batch. Returns both the enriched people and a list of warning + * strings so callers can surface "emails failed to arrive" to the user + * instead of silently returning un-enriched people. */ private async enrichPeopleEmails( people: PersonResult[], credentials: Partial>, includePersonalEmails: boolean - ): Promise { + ): Promise<{ people: PersonResult[]; warnings: string[] }> { + const warnings: string[] = []; // Build contacts for enrichment (only those with LinkedIn URLs or name+company) const contactsToEnrich: Array<{ index: number; @@ -1378,7 +1977,7 @@ export class PeopleSearchTool extends ToolBubble< } if (contactsToEnrich.length === 0) { - return people; // No contacts to enrich + return { people, warnings }; // No contacts to enrich } // Determine enrich fields @@ -1422,15 +2021,22 @@ export class PeopleSearchTool extends ToolBubble< ).action(); if (!find_email.success || !find_email.data?.enrichment_id) { - continue; // Skip this batch on error + warnings.push( + `Email enrichment batch ${batchStart / batchSize + 1} failed to start: ${find_email.error || 'unknown error'}. People in this batch are returned without enriched emails.` + ); + continue; } const enrichmentId = find_email.data.enrichment_id; - // Poll until done (max 120 seconds) + // Poll until done (max 120 seconds). 5s interval keeps request count + // reasonable (~24 polls max) vs. the old 3s cadence — enrichment takes + // 30–120s typically, so the extra 2s granularity doesn't matter. const maxWaitMs = 120000; - const pollIntervalMs = 3000; + const pollIntervalMs = 5000; const startTime = Date.now(); + let finished = false; + let finalStatus: string | undefined; while (Date.now() - startTime < maxWaitMs) { await this.sleep(pollIntervalMs); @@ -1451,12 +2057,14 @@ export class PeopleSearchTool extends ToolBubble< } const status = poll_email.data?.status; + finalStatus = status; if ( status === 'FINISHED' || status === 'CANCELED' || status === 'CREDITS_INSUFFICIENT' ) { + finished = true; // Extract results const results = poll_email.data?.results as | Array<{ @@ -1527,13 +2135,29 @@ export class PeopleSearchTool extends ToolBubble< break; // Done with this batch } } - } catch { - // Continue with next batch on error + + if (!finished) { + warnings.push( + `Email enrichment batch ${batchStart / batchSize + 1} did not complete within ${maxWaitMs / 1000}s (last status: ${finalStatus ?? 'unknown'}). People in this batch are returned without enriched emails.` + ); + } else if (finalStatus === 'CANCELED') { + warnings.push( + `Email enrichment batch ${batchStart / batchSize + 1} was canceled. People in this batch are returned without enriched emails.` + ); + } else if (finalStatus === 'CREDITS_INSUFFICIENT') { + warnings.push( + `Email enrichment batch ${batchStart / batchSize + 1} stopped due to insufficient FullEnrich credits. Partial results only.` + ); + } + } catch (err) { + warnings.push( + `Email enrichment batch ${batchStart / batchSize + 1} threw: ${err instanceof Error ? err.message : String(err)}. People in this batch are returned without enriched emails.` + ); continue; } } - return enrichedPeople; + return { people: enrichedPeople, warnings }; } /** diff --git a/packages/bubble-runtime/package.json b/packages/bubble-runtime/package.json index 19b5a7a6..dbcbfdb7 100644 --- a/packages/bubble-runtime/package.json +++ b/packages/bubble-runtime/package.json @@ -1,6 +1,6 @@ { "name": "@bubblelab/bubble-runtime", - "version": "0.1.300", + "version": "0.1.301", "type": "module", "license": "Apache-2.0", "main": "./dist/index.js", diff --git a/packages/bubble-scope-manager/package.json b/packages/bubble-scope-manager/package.json index 5818298b..0474ad89 100644 --- a/packages/bubble-scope-manager/package.json +++ b/packages/bubble-scope-manager/package.json @@ -1,6 +1,6 @@ { "name": "@bubblelab/ts-scope-manager", - "version": "0.1.300", + "version": "0.1.301", "private": false, "license": "MIT", "type": "commonjs", diff --git a/packages/bubble-shared-schemas/package.json b/packages/bubble-shared-schemas/package.json index be5b3de7..e46203e5 100644 --- a/packages/bubble-shared-schemas/package.json +++ b/packages/bubble-shared-schemas/package.json @@ -1,6 +1,6 @@ { "name": "@bubblelab/shared-schemas", - "version": "0.1.300", + "version": "0.1.301", "type": "module", "license": "Apache-2.0", "main": "./dist/index.js", diff --git a/packages/create-bubblelab-app/package.json b/packages/create-bubblelab-app/package.json index 98edbbd0..d376e652 100644 --- a/packages/create-bubblelab-app/package.json +++ b/packages/create-bubblelab-app/package.json @@ -1,6 +1,6 @@ { "name": "create-bubblelab-app", - "version": "0.1.300", + "version": "0.1.301", "type": "module", "license": "Apache-2.0", "description": "Create BubbleLab AI agent applications with one command", diff --git a/packages/create-bubblelab-app/templates/basic/package.json b/packages/create-bubblelab-app/templates/basic/package.json index cd680fa3..a63d9bc1 100644 --- a/packages/create-bubblelab-app/templates/basic/package.json +++ b/packages/create-bubblelab-app/templates/basic/package.json @@ -11,9 +11,9 @@ "typecheck": "tsc --noEmit" }, "dependencies": { - "@bubblelab/bubble-core": "^0.1.300", - "@bubblelab/bubble-runtime": "^0.1.300", - "@bubblelab/shared-schemas": "^0.1.300", + "@bubblelab/bubble-core": "^0.1.301", + "@bubblelab/bubble-runtime": "^0.1.301", + "@bubblelab/shared-schemas": "^0.1.301", "dotenv": "^16.4.5" }, "devDependencies": { diff --git a/packages/create-bubblelab-app/templates/reddit-scraper/package.json b/packages/create-bubblelab-app/templates/reddit-scraper/package.json index a78cdee3..f977f984 100644 --- a/packages/create-bubblelab-app/templates/reddit-scraper/package.json +++ b/packages/create-bubblelab-app/templates/reddit-scraper/package.json @@ -11,8 +11,8 @@ "typecheck": "tsc --noEmit" }, "dependencies": { - "@bubblelab/bubble-core": "^0.1.300", - "@bubblelab/bubble-runtime": "^0.1.300", + "@bubblelab/bubble-core": "^0.1.301", + "@bubblelab/bubble-runtime": "^0.1.301", "dotenv": "^16.4.5" }, "devDependencies": {