-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprompt-cache.js
More file actions
91 lines (80 loc) · 2.85 KB
/
prompt-cache.js
File metadata and controls
91 lines (80 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env node
/**
* Prompt Cache - Optimize API calls with intelligent caching
* Achieve 90% savings on repeated system prompts and context
*/
class PromptCache {
constructor(options = {}) {
this.minSize = 1024;
this.maxBreakpoints = 4;
}
// Optimize messages for caching
optimizeForCaching(messages, systemPrompt = null) {
const result = {
model: null,
messages: messages.map((m, i) => {
if (i < messages.length - 2 && JSON.stringify(m).length > this.minSize) {
return { ...m, cache_control: { type: 'ephemeral' } };
}
return m;
}),
system: systemPrompt && systemPrompt.length > this.minSize ?
[{ type: 'text', text: systemPrompt, cache_control: { type: 'ephemeral' } }] :
systemPrompt
};
return result;
}
// Analyze caching potential
analyzeCachingPotential(messages, systemPrompt = null) {
let cacheableTokens = 0;
let totalTokens = 0;
const opportunities = [];
if (systemPrompt && systemPrompt.length > this.minSize) {
const tokens = Math.ceil(systemPrompt.length / 4);
cacheableTokens += tokens;
totalTokens += tokens;
opportunities.push({ type: 'system_prompt', tokens, savings: '90%' });
}
messages.forEach((m, i) => {
const tokens = Math.ceil(JSON.stringify(m).length / 4);
totalTokens += tokens;
if (i < messages.length - 2 && tokens > this.minSize) {
cacheableTokens += tokens;
opportunities.push({ type: 'message', index: i, tokens, savings: '90%' });
}
});
return {
totalTokens,
cacheableTokens,
potentialSavings: totalTokens > 0 ? `${Math.round((cacheableTokens / totalTokens) * 90)}%` : '0%',
opportunities
};
}
// Estimate break-even
getBreakEvenAnalysis(systemPromptSize) {
const tokens = Math.ceil(systemPromptSize / 4);
const writeTokens = tokens * 1.25; // 1.25x for ephemeral
const readTokens = tokens * 0.1;
const writeHeuristicSavings = ((writeTokens * 1) - (readTokens * 1)) / 2;
return {
writeTokens: Math.round(writeTokens),
readTokens: Math.round(readTokens),
breakEvenCalls: 2,
savingsPerRead: '90%',
recommendation: 'Cache system prompts used 2+ times'
};
}
}
module.exports = PromptCache;
// CLI usage
if (require.main === module) {
const cache = new PromptCache();
const sysPrompt = 'You are an expert engineer. Analyze code for bugs and improvements.';
const messages = [
{ role: 'user', content: 'Review this code: ' + 'x'.repeat(2000) },
{ role: 'assistant', content: 'I see several issues...' },
{ role: 'user', content: 'Fix them' }
];
console.log('Caching Analysis:', JSON.stringify(cache.analyzeCachingPotential(messages, sysPrompt), null, 2));
console.log('Break-even:', cache.getBreakEvenAnalysis(sysPrompt.length));
}