Skip to content

Commit e9e3d42

Browse files
committed
refactor: split parseOpportunity to helper functions and add logging
1 parent 0e75abb commit e9e3d42

2 files changed

Lines changed: 385 additions & 228 deletions

File tree

src/common/opportunity/parse.ts

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
import z from 'zod';
2+
import { randomUUID } from 'node:crypto';
3+
import { Storage } from '@google-cloud/storage';
4+
import { FastifyBaseLogger } from 'fastify';
5+
import { DataSource, DeepPartial, IsNull, Not } from 'typeorm';
6+
import { fileTypeFromBuffer } from 'file-type';
7+
import {
8+
LocationType,
9+
OpportunityContent,
10+
OpportunityState,
11+
BrokkrParseRequest,
12+
} from '@dailydotdev/schema';
13+
14+
import { getBufferFromStream } from '../utils';
15+
import { ValidationError } from 'apollo-server-errors';
16+
import { garmScraperService } from '../scraper';
17+
import {
18+
acceptedOpportunityFileTypes,
19+
opportunityMatchBatchSize,
20+
} from '../../types';
21+
import { RESUME_BUCKET_NAME } from '../../config';
22+
import { deleteFileFromBucket, uploadResumeFromBuffer } from '../googleCloud';
23+
import { getBrokkrClient } from '../brokkr';
24+
import { opportunityCreateParseSchema } from '../schema/opportunities';
25+
import { markdown } from '../markdown';
26+
import { OpportunityJob } from '../../entity/opportunities/OpportunityJob';
27+
import { OpportunityLocation } from '../../entity/opportunities/OpportunityLocation';
28+
import { OpportunityKeyword } from '../../entity/OpportunityKeyword';
29+
import { OpportunityUserRecruiter } from '../../entity/opportunities/user/OpportunityUserRecruiter';
30+
import { findDatasetLocation } from '../../entity/dataset/utils';
31+
import { addOpportunityDefaultQuestionFeedback } from './question';
32+
import type { Opportunity } from '../../entity/opportunities/Opportunity';
33+
34+
interface FileUpload {
35+
filename: string;
36+
createReadStream: () => NodeJS.ReadableStream;
37+
}
38+
39+
export interface ParseOpportunityPayload {
40+
url?: string;
41+
file?: Promise<FileUpload>;
42+
}
43+
44+
export interface OpportunityFileBufferResult {
45+
buffer: Buffer;
46+
extension: string;
47+
}
48+
49+
export interface OpportunityFileValidationResult {
50+
mime: string;
51+
}
52+
53+
export interface ParsedOpportunityResult {
54+
opportunity: z.infer<typeof opportunityCreateParseSchema>;
55+
content: OpportunityContent;
56+
}
57+
58+
export interface ParseOpportunityContext {
59+
con: DataSource;
60+
userId?: string;
61+
trackingId?: string;
62+
log: FastifyBaseLogger;
63+
}
64+
65+
/**
66+
* Fetches opportunity content from a URL and converts it to a PDF buffer
67+
*/
68+
async function fetchOpportunityFromUrl(url: string): Promise<Buffer> {
69+
const response = await garmScraperService.execute(async () => {
70+
const response = await fetch(`${process.env.SCRAPER_URL}/pdf`, {
71+
method: 'POST',
72+
body: JSON.stringify({ url }),
73+
headers: { 'content-type': 'application/json' },
74+
});
75+
76+
if (!response.ok) {
77+
throw new Error('Failed to fetch job from URL');
78+
}
79+
80+
return response;
81+
});
82+
83+
return Buffer.from(await response.arrayBuffer());
84+
}
85+
86+
/**
87+
* Gets the opportunity file buffer from either a URL or file upload
88+
*
89+
* @param payload - The parsed opportunity payload containing either url or file
90+
* @returns The file buffer and its extension
91+
*/
92+
export async function getOpportunityFileBuffer(
93+
payload: ParseOpportunityPayload,
94+
): Promise<OpportunityFileBufferResult> {
95+
if (payload.url) {
96+
const buffer = await fetchOpportunityFromUrl(payload.url);
97+
return { buffer, extension: 'pdf' };
98+
}
99+
100+
if (!payload.file) {
101+
throw new ValidationError('Either url or file must be provided');
102+
}
103+
104+
const fileUpload = await payload.file;
105+
const extension =
106+
fileUpload.filename?.split('.')?.pop()?.toLowerCase() || 'pdf';
107+
const buffer = await getBufferFromStream(fileUpload.createReadStream());
108+
109+
return { buffer, extension };
110+
}
111+
112+
/**
113+
* Validates the opportunity file type against accepted types
114+
*
115+
* @param buffer - The file buffer to validate
116+
* @param extension - The file extension
117+
* @returns The validated MIME type
118+
* @throws ValidationError if file type is not supported
119+
*/
120+
export async function validateOpportunityFileType(
121+
buffer: Buffer,
122+
extension: string,
123+
): Promise<OpportunityFileValidationResult> {
124+
const supportedFileType = acceptedOpportunityFileTypes.find(
125+
(type) => type.ext === extension,
126+
);
127+
128+
if (!supportedFileType) {
129+
throw new ValidationError('File extension not supported');
130+
}
131+
132+
const fileType = await fileTypeFromBuffer(buffer);
133+
134+
if (supportedFileType.mime !== fileType?.mime) {
135+
throw new ValidationError('File type not supported');
136+
}
137+
138+
return { mime: fileType.mime };
139+
}
140+
141+
/**
142+
* Renders markdown content for opportunity fields
143+
*/
144+
function renderOpportunityMarkdownContent(
145+
content: Record<string, { content?: string }> | undefined,
146+
): OpportunityContent {
147+
const renderedContent: Record<string, { content: string; html: string }> = {};
148+
149+
Object.entries(content || {}).forEach(([key, value]) => {
150+
if (typeof value?.content !== 'string') {
151+
return;
152+
}
153+
154+
renderedContent[key] = {
155+
content: value.content,
156+
html: markdown.render(value.content),
157+
};
158+
});
159+
160+
return new OpportunityContent(renderedContent);
161+
}
162+
163+
/**
164+
* Parses an opportunity file using the Brokkr service
165+
*
166+
* Handles:
167+
* - Uploading file to GCS
168+
* - Calling Brokkr to parse the opportunity
169+
* - Cleaning up the uploaded file
170+
* - Rendering markdown content
171+
*
172+
* @param buffer - The file buffer to parse
173+
* @param mime - The MIME type of the file
174+
* @param log - Logger instance for debugging
175+
* @returns The parsed opportunity data with rendered content
176+
*/
177+
export async function parseOpportunityWithBrokkr(
178+
buffer: Buffer,
179+
mime: string,
180+
log: FastifyBaseLogger,
181+
): Promise<ParsedOpportunityResult> {
182+
const filename = `job-opportunity-${randomUUID()}.pdf`;
183+
184+
try {
185+
await uploadResumeFromBuffer(filename, buffer, {
186+
contentType: mime,
187+
});
188+
189+
const brokkrClient = getBrokkrClient();
190+
191+
const result = await brokkrClient.garmr.execute(() => {
192+
return brokkrClient.instance.parseOpportunity(
193+
new BrokkrParseRequest({
194+
bucketName: RESUME_BUCKET_NAME,
195+
blobName: filename,
196+
}),
197+
);
198+
});
199+
200+
log.info(result, 'brokkrParseOpportunityResponse');
201+
202+
const parsedOpportunity = await opportunityCreateParseSchema.parseAsync(
203+
result.opportunity,
204+
);
205+
206+
const content = renderOpportunityMarkdownContent(parsedOpportunity.content);
207+
208+
return {
209+
opportunity: parsedOpportunity,
210+
content,
211+
};
212+
} finally {
213+
const storage = new Storage();
214+
const bucket = storage.bucket(RESUME_BUCKET_NAME);
215+
await deleteFileFromBucket(bucket, filename);
216+
}
217+
}
218+
219+
/**
220+
* Creates an opportunity and all related entities from parsed data
221+
*
222+
* Handles:
223+
* - Creating the opportunity record
224+
* - Creating location relationships
225+
* - Creating keywords
226+
* - Adding default feedback questions
227+
* - Assigning recruiter (if userId provided)
228+
* - Associating with existing organization (if user has one)
229+
*
230+
* @param ctx - Context with database connection and user info
231+
* @param parsedData - The parsed opportunity data from Brokkr
232+
* @returns The created opportunity
233+
*/
234+
export async function createOpportunityFromParsedData(
235+
ctx: ParseOpportunityContext,
236+
parsedData: ParsedOpportunityResult,
237+
): Promise<OpportunityJob> {
238+
const { opportunity: parsedOpportunity, content } = parsedData;
239+
const locationData = parsedOpportunity.location || [];
240+
241+
return ctx.con.transaction(async (entityManager) => {
242+
const flags: Opportunity['flags'] = {};
243+
244+
if (!ctx.userId) {
245+
flags.anonUserId = ctx.trackingId;
246+
}
247+
248+
flags.batchSize = opportunityMatchBatchSize;
249+
250+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
251+
const { location, ...opportunityData } = parsedOpportunity;
252+
253+
// Associate with existing organization if user has one
254+
if (ctx.userId) {
255+
const existingOrganizationOpportunity: Pick<
256+
OpportunityJob,
257+
'id' | 'organizationId'
258+
> | null = await entityManager.getRepository(OpportunityJob).findOne({
259+
select: ['id', 'organizationId'],
260+
where: {
261+
users: {
262+
userId: ctx.userId,
263+
},
264+
organizationId: Not(IsNull()),
265+
},
266+
});
267+
268+
if (existingOrganizationOpportunity) {
269+
opportunityData.organizationId =
270+
existingOrganizationOpportunity.organizationId;
271+
}
272+
}
273+
274+
const opportunity = await entityManager.getRepository(OpportunityJob).save(
275+
entityManager.getRepository(OpportunityJob).create({
276+
...opportunityData,
277+
state: OpportunityState.DRAFT,
278+
content,
279+
flags,
280+
} as DeepPartial<OpportunityJob>),
281+
);
282+
283+
// Create location entries
284+
for (const loc of locationData) {
285+
const datasetLocation = await findDatasetLocation(ctx.con, loc);
286+
287+
if (datasetLocation) {
288+
await entityManager.getRepository(OpportunityLocation).save({
289+
opportunityId: opportunity.id,
290+
locationId: datasetLocation.id,
291+
type: loc.type || LocationType.REMOTE,
292+
});
293+
}
294+
}
295+
296+
await addOpportunityDefaultQuestionFeedback({
297+
entityManager,
298+
opportunityId: opportunity.id,
299+
});
300+
301+
if (parsedOpportunity.keywords) {
302+
await entityManager.getRepository(OpportunityKeyword).insert(
303+
parsedOpportunity.keywords.map((keyword) => ({
304+
opportunityId: opportunity.id,
305+
keyword: keyword.keyword,
306+
})),
307+
);
308+
}
309+
310+
if (ctx.userId) {
311+
await entityManager.getRepository(OpportunityUserRecruiter).insert(
312+
entityManager.getRepository(OpportunityUserRecruiter).create({
313+
opportunityId: opportunity.id,
314+
userId: ctx.userId,
315+
}),
316+
);
317+
}
318+
319+
return opportunity;
320+
});
321+
}

0 commit comments

Comments
 (0)