@@ -14,11 +14,15 @@ import {
1414 type SchedulerSlotSnapshot ,
1515 type WarmupInferenceRequest ,
1616} from "@razroo/ray-core" ;
17+ import { resolvePromptTemplateRequest } from "@ray/prompts" ;
1718import {
18- renderPromptTemplate ,
19- requirePromptTemplate ,
20- resolvePromptTemplateRequest ,
21- } from "@ray/prompts" ;
19+ PromptScaffoldCache ,
20+ buildPromptScaffoldCacheKey ,
21+ createPromptScaffold ,
22+ renderPromptFromScaffold ,
23+ renderPromptScaffoldTemplate ,
24+ type PromptScaffold ,
25+ } from "@ray/prompt-cache" ;
2226import {
2327 BACKEND_RESPONSE_BODY_LIMIT_BYTES ,
2428 MAX_ADAPTER_TIMEOUT_MS ,
@@ -47,6 +51,7 @@ const MAX_SLOT_SNAPSHOTS = 64;
4751const MAX_FAMILY_PREFERRED_SLOT_KEYS = 512 ;
4852const MAX_SLOT_FAMILY_ASSIGNMENTS = 64 ;
4953const MAX_PROMPT_SCAFFOLD_CACHE_ENTRIES = 4_096 ;
54+ const PROMPT_SCAFFOLD_CACHE_TTL_MS = 86_400_000 ;
5055const MAX_LLAMA_CPP_DIAGNOSTIC_NUMBER = 1_000_000_000 ;
5156const MAX_LAUNCH_PROFILE_PATH_CHARS = 4_096 ;
5257const MAX_LAUNCH_PROFILE_HOST_CHARS = 256 ;
@@ -188,11 +193,6 @@ interface LlamaCppSlotResponse {
188193 } ;
189194}
190195
191- interface PromptScaffold {
192- segments : string [ ] ;
193- variableOrder : string [ ] ;
194- }
195-
196196interface PreparedPromptState {
197197 prompt : string ;
198198}
@@ -726,7 +726,7 @@ export class LlamaCppProvider implements ModelProvider {
726726 } as const ;
727727 private readonly preparationCache = new Map < string , ProviderRequestPreparation > ( ) ;
728728 private readonly promptTokenCache = new Map < string , number > ( ) ;
729- private readonly promptScaffolds = new Map < string , PromptScaffold > ( ) ;
729+ private readonly promptScaffolds : PromptScaffoldCache ;
730730 private readonly familyPreferredSlots = new Map < string , number > ( ) ;
731731 private readonly slotFamilyAssignments = new Map < number , string > ( ) ;
732732 private readonly maxPreparationCacheEntries = 256 ;
@@ -743,6 +743,10 @@ export class LlamaCppProvider implements ModelProvider {
743743 this . modelId = model . id ;
744744 this . adapter = snapshotLlamaCppAdapter ( adapter , model . maxOutputTokens ) ;
745745 this . maxPromptScaffoldEntries = this . adapter . promptScaffoldCacheEntries ?? 128 ;
746+ this . promptScaffolds = new PromptScaffoldCache ( {
747+ maxEntries : this . maxPromptScaffoldEntries ,
748+ ttlMs : PROMPT_SCAFFOLD_CACHE_TTL_MS ,
749+ } ) ;
746750 }
747751
748752 async warm ( ) : Promise < void > {
@@ -1516,8 +1520,8 @@ export class LlamaCppProvider implements ModelProvider {
15161520 responseFormatType : "text" | "json_object" ,
15171521 signal ?: AbortSignal ,
15181522 ) : Promise < PromptScaffold > {
1519- const cacheKey = hashValue ( {
1520- model : this . adapter . modelRef ,
1523+ const cacheKey = buildPromptScaffoldCacheKey ( {
1524+ modelRef : this . adapter . modelRef ,
15211525 templateId,
15221526 responseFormatType,
15231527 } ) ;
@@ -1526,14 +1530,8 @@ export class LlamaCppProvider implements ModelProvider {
15261530 return cached ;
15271531 }
15281532
1529- const template = requirePromptTemplate ( templateId ) ;
1530- const sentinelVariables = Object . fromEntries (
1531- template . variables . map ( ( variable : string , index : number ) => [
1532- variable ,
1533- `__RAY_PROMPT_VAR_${ index } __` ,
1534- ] ) ,
1535- ) ;
1536- const rendered = renderPromptTemplate ( template . id , sentinelVariables ) ;
1533+ const scaffoldTemplate = renderPromptScaffoldTemplate ( templateId ) ;
1534+ const rendered = scaffoldTemplate . rendered ;
15371535 const prompt = await this . applyTemplate (
15381536 {
15391537 input : rendered . input ,
@@ -1551,38 +1549,14 @@ export class LlamaCppProvider implements ModelProvider {
15511549 } ,
15521550 signal ,
15531551 ) ;
1554- const segments : string [ ] = [ ] ;
1555- let cursor = 0 ;
1556-
1557- for ( const variable of template . variables ) {
1558- const sentinel = sentinelVariables [ variable ] ;
1559- if ( ! sentinel ) {
1560- throw new RayError ( `Prompt scaffold marker "${ variable } " is missing` , {
1561- code : "provider_invalid_response" ,
1562- status : 500 ,
1563- } ) ;
1564- }
1565- const position = prompt . indexOf ( sentinel , cursor ) ;
1566-
1567- if ( position === - 1 ) {
1568- throw new RayError (
1569- `Prompt scaffold marker "${ variable } " was not found in rendered prompt` ,
1570- {
1571- code : "provider_invalid_response" ,
1572- status : 500 ,
1573- } ,
1574- ) ;
1575- }
1576-
1577- segments . push ( prompt . slice ( cursor , position ) ) ;
1578- cursor = position + sentinel . length ;
1579- }
1580-
1581- segments . push ( prompt . slice ( cursor ) ) ;
1582- const scaffold : PromptScaffold = {
1583- segments,
1584- variableOrder : [ ...template . variables ] ,
1585- } ;
1552+ const scaffold = createPromptScaffold ( {
1553+ prompt,
1554+ variableOrder : scaffoldTemplate . variableOrder ,
1555+ sentinelVariables : scaffoldTemplate . sentinelVariables ,
1556+ templateId : rendered . id ,
1557+ templateVersion : rendered . version ,
1558+ family : rendered . family ,
1559+ } ) ;
15861560 this . setPromptScaffold ( cacheKey , scaffold ) ;
15871561 return scaffold ;
15881562 }
@@ -1591,26 +1565,7 @@ export class LlamaCppProvider implements ModelProvider {
15911565 scaffold : PromptScaffold ,
15921566 templateVariables : Record < string , string > ,
15931567 ) : string {
1594- let prompt = scaffold . segments [ 0 ] ?? "" ;
1595-
1596- for ( let index = 0 ; index < scaffold . variableOrder . length ; index += 1 ) {
1597- const variableName = scaffold . variableOrder [ index ] ;
1598- if ( ! variableName ) {
1599- continue ;
1600- }
1601- const value = templateVariables [ variableName ] ;
1602- if ( value === undefined ) {
1603- throw new RayError ( `Missing template variable "${ variableName } " for prompt scaffold` , {
1604- code : "invalid_request" ,
1605- status : 400 ,
1606- } ) ;
1607- }
1608-
1609- prompt += value ;
1610- prompt += scaffold . segments [ index + 1 ] ?? "" ;
1611- }
1612-
1613- return prompt ;
1568+ return renderPromptFromScaffold ( scaffold , templateVariables ) ;
16141569 }
16151570
16161571 private async getSlotSnapshots ( signal ?: AbortSignal ) : Promise < SchedulerSlotSnapshot [ ] > {
@@ -1809,19 +1764,7 @@ export class LlamaCppProvider implements ModelProvider {
18091764 }
18101765
18111766 private setPromptScaffold ( key : string , scaffold : PromptScaffold ) : void {
1812- if ( this . promptScaffolds . has ( key ) ) {
1813- this . promptScaffolds . delete ( key ) ;
1814- }
1815-
18161767 this . promptScaffolds . set ( key , scaffold ) ;
1817-
1818- while ( this . promptScaffolds . size > this . maxPromptScaffoldEntries ) {
1819- const oldestKey = this . promptScaffolds . keys ( ) . next ( ) . value ;
1820- if ( ! oldestKey ) {
1821- break ;
1822- }
1823- this . promptScaffolds . delete ( oldestKey ) ;
1824- }
18251768 }
18261769
18271770 private async fetchHealthPayload ( ) : Promise < {
0 commit comments