@@ -14,166 +14,18 @@ import type {
1414 FileStats ,
1515 JSONSchema ,
1616 JSONSchemaArray ,
17- Logprob ,
1817 OptionsOrString ,
1918 ParsePDFOptions ,
2019 PromptContext ,
2120 PromptGenerator ,
2221 PromptGeneratorOptions ,
23- RunPromptUsage ,
2422 StringLike ,
2523 WorkspaceFile ,
2624 WorkspaceGrepOptions ,
2725} from "@genaiscript/core" ;
28- import { delay , uniq , uniqBy , chunk } from "es-toolkit" ;
29- import { z } from "zod" ;
3026
3127const globalPromptContext : PromptContext = globalThis as unknown as PromptContext ;
3228
33- /**
34- * Utility functions exported for general use
35- */
36- export { delay , uniq , uniqBy , z , chunk } ;
37- /**
38- * Options for classifying data using AI models.
39- *
40- * @property {boolean } [other] - Inject a 'other' label.
41- * @property {boolean } [explanations] - Explain answers before returning token.
42- * @property {ChatGenerationContext } [ctx] - Options runPrompt context.
43- */
44- export type ClassifyOptions = {
45- /**
46- * When true, adds an 'other' category to handle cases that don't match defined labels
47- */
48- other ?: boolean ;
49- /**
50- * When true, provides explanatory text before the classification result
51- */
52- explanations ?: boolean ;
53- /**
54- * Context for running the classification prompt
55- */
56- ctx ?: ChatGenerationContext ;
57- } & Omit < PromptGeneratorOptions , "choices" > ;
58-
59- /**
60- * Classifies input text into predefined categories using AI.
61- * Inspired by https://github.com/prefecthq/marvin.
62- *
63- * @param text - Text content to classify or a prompt generator function.
64- * @param labels - Object mapping label names to their descriptions.
65- * @param options - Configuration options for classification, including whether to add an "other" category, provide explanations, and specify context.
66- * @returns Classification result containing the chosen label, confidence metrics, log probabilities, the full answer text, and usage statistics.
67- * @throws Error if fewer than two labels are provided (including "other").
68- */
69- export async function classify < L extends Record < string , string > > (
70- text : StringLike | PromptGenerator ,
71- labels : L ,
72- options ?: ClassifyOptions ,
73- ) : Promise < {
74- label : keyof typeof labels | "other" ;
75- entropy ?: number ;
76- logprob ?: number ;
77- probPercent ?: number ;
78- answer : string ;
79- logprobs ?: Record < keyof typeof labels | "other" , Logprob > ;
80- usage ?: RunPromptUsage ;
81- } > {
82- const { other, explanations, ...rest } = options || { } ;
83-
84- const entries = Object . entries ( {
85- ...labels ,
86- ...( other
87- ? {
88- other : "This label is used when the text does not fit any of the available labels." ,
89- }
90- : { } ) ,
91- } ) . map ( ( [ k , v ] ) => [ k . trim ( ) . toLowerCase ( ) , v ] ) ;
92-
93- if ( entries . length < 2 ) throw Error ( "classify must have at least two label (including other)" ) ;
94-
95- const choices = entries . map ( ( [ k ] ) => k ) ;
96- const allChoices = uniq < keyof typeof labels | "other" > ( choices ) ;
97- const ctx = options ?. ctx || globalPromptContext . env . generator ;
98-
99- const res = await ctx . runPrompt (
100- async ( _ ) => {
101- _ . $ `## Expert Classifier
102- You are a specialized text classification system.
103- Your task is to carefully read and classify any input text or image into one
104- of the predefined labels below.
105- For each label, you will find a short description. Use these descriptions to guide your decision.
106- ` . role ( "system" ) ;
107- _ . $ `## Labels
108- You must classify the data as one of the following labels.
109- ${ entries . map ( ( [ id , descr ] ) => `- Label '${ id } ': ${ descr } ` ) . join ( "\n" ) }
110-
111- ## Output
112- ${ explanations ? "Provide a single short sentence justification for your choice." : "" }
113- Output the label as a single word on the last line (do not emit "Label").
114-
115- ` ;
116- _ . fence (
117- `- Label 'yes': funny
118- - Label 'no': not funny
119-
120- DATA:
121- Why did the chicken cross the road? Because moo.
122-
123- Output:
124- ${ explanations ? "It's a classic joke but the ending does not relate to the start of the joke." : "" }
125- no
126-
127- ` ,
128- { language : "example" } ,
129- ) ;
130- if ( typeof text === "function" ) await text ( _ ) ;
131- else _ . def ( "DATA" , text ) ;
132- } ,
133- {
134- model : "classify" ,
135- choices : choices ,
136- label : `classify ${ choices . join ( ", " ) } ` ,
137- logprobs : true ,
138- topLogprobs : Math . min ( 3 , choices . length ) ,
139- maxTokens : explanations ? 100 : 1 ,
140- system : [
141- "system.output_plaintext" ,
142- "system.safety_jailbreak" ,
143- "system.safety_harmful_content" ,
144- "system.safety_protected_material" ,
145- ] ,
146- ...rest ,
147- } ,
148- ) ;
149-
150- // find the last label
151- const answer = res . text . toLowerCase ( ) ;
152- const indexes = choices . map ( ( l ) => answer . lastIndexOf ( l ) ) ;
153- const labeli = indexes . reduce ( ( previ , _label , i ) => {
154- if ( indexes [ i ] > indexes [ previ ] ) return i ;
155- else return previ ;
156- } , 0 ) ;
157- const label = entries [ labeli ] [ 0 ] ;
158- const logprobs = res . choices
159- ? ( Object . fromEntries (
160- res . choices . filter ( ( c ) => ! isNaN ( c ?. logprob ) ) . map ( ( c , i ) => [ allChoices [ i ] , c ] ) ,
161- ) as Record < keyof typeof labels | "other" , Logprob > )
162- : undefined ;
163- const logprob = logprobs ?. [ label ] ;
164- const usage = res . usage ;
165-
166- return {
167- label,
168- entropy : logprob ?. entropy ,
169- logprob : logprob ?. logprob ,
170- probPercent : logprob ?. probPercent ,
171- answer,
172- logprobs,
173- usage,
174- } ;
175- }
176-
17729/**
17830 * Enhances content generation by applying iterative improvements.
17931 *
0 commit comments