Skip to content

Commit a41c75c

Browse files
indexzeroclaude
andauthored
fix(cache): use readable origin keys instead of truncated base64 (#17)
The previous scheme encoded origins as the first 8 characters of base64(full URL), which caused all HTTPS registries to collide on "aHR0cHM6" (the base64 of "https://"). New scheme produces readable, collision-resistant keys: - Format: [http~]hostname[~port][~path~segments] - Segments truncated: <=5 chars kept whole, else first 3 + last 2 - HTTPS implicit, only http~ prefix for HTTP origins - npm alias preserved for registry.npmjs.com/org, replicate.npmjs.com Examples: https://registry.npmjs.com -> npm https://packages.example.com/js -> paces.exale.com~js http://localhost:4873 -> http~locst~4873 https://host.jfrog.io/art/api/npm -> host.jfrog.io~art~api~npm Backward compatibility: - Old base64 keys (without . or ~) decode to <legacy:...> - No migration needed; old cache entries become orphans Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 1e32585 commit a41c75c

2 files changed

Lines changed: 154 additions & 27 deletions

File tree

src/cache/cache-key.js

Lines changed: 82 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,23 @@
55

66
const CACHE_KEY_VERSION = 'v1';
77

8+
const NPM_HOSTS = new Set([
9+
'registry.npmjs.com',
10+
'registry.npmjs.org',
11+
'replicate.npmjs.com'
12+
]);
13+
14+
/**
15+
* Truncate a segment for compact cache keys
16+
* If segment is 5 chars or less, keep whole. Otherwise, first 3 + last 2.
17+
* @param {string} segment - Segment to truncate
18+
* @returns {string} Truncated segment
19+
*/
20+
function truncateSegment(segment) {
21+
if (segment.length <= 5) return segment;
22+
return segment.slice(0, 3) + segment.slice(-2);
23+
}
24+
825
/**
926
* Create a cache key for a partition
1027
* @param {string} startKey - Start key of the partition
@@ -16,7 +33,7 @@ export function createPartitionKey(startKey, endKey, origin = 'https://replicate
1633
const originKey = encodeOrigin(origin);
1734
const startHex = encodeKeySegment(startKey);
1835
const endHex = encodeKeySegment(endKey);
19-
36+
2037
return `${CACHE_KEY_VERSION}:partition:${originKey}:${startHex}:${endHex}`;
2138
}
2239

@@ -29,30 +46,53 @@ export function createPartitionKey(startKey, endKey, origin = 'https://replicate
2946
export function createPackumentKey(packageName, origin = 'https://registry.npmjs.com') {
3047
const originKey = encodeOrigin(origin);
3148
const nameHex = encodeKeySegment(packageName);
32-
49+
3350
return `${CACHE_KEY_VERSION}:packument:${originKey}:${nameHex}`;
3451
}
3552

3653
/**
37-
* Encode origin URL to short key
54+
* Encode origin URL to short, readable key
55+
* Format: [http~]hostname[~port][~path~segments]
56+
* - HTTPS is implicit (only prefix http~ for HTTP)
57+
* - Each segment truncated: <=5 chars kept whole, else first 3 + last 2
58+
* - Hostname uses . separator, port/path use ~ separator
3859
* @param {string} origin - Full origin URL
3960
* @returns {string} Short origin key
4061
*/
4162
function encodeOrigin(origin) {
42-
// Use short aliases for common registries
43-
if (origin === 'https://replicate.npmjs.com') return 'npm';
44-
if (origin === 'https://registry.npmjs.org') return 'npm';
45-
if (origin === 'https://registry.npmjs.com') return 'npm';
46-
47-
// For custom registries, use first 8 chars of base64url
48-
const encoder = new TextEncoder();
49-
const bytes = encoder.encode(origin);
50-
// Simple base64url encoding for edge compatibility
51-
const base64 = btoa(String.fromCharCode(...bytes))
52-
.replace(/\+/g, '-')
53-
.replace(/\//g, '_')
54-
.replace(/=/g, '');
55-
return base64.substring(0, 8);
63+
// Handle bare hostnames (no protocol)
64+
if (!origin.includes('://')) {
65+
origin = 'https://' + origin;
66+
}
67+
68+
const url = new URL(origin);
69+
const hostname = url.hostname.toLowerCase();
70+
const isHttp = url.protocol === 'http:';
71+
const isDefaultPort =
72+
!url.port ||
73+
(url.protocol === 'https:' && url.port === '443') ||
74+
(url.protocol === 'http:' && url.port === '80');
75+
const pathSegments = url.pathname.split('/').filter(Boolean);
76+
77+
// Check npm alias
78+
if (NPM_HOSTS.has(hostname) && isDefaultPort && pathSegments.length === 0) {
79+
return 'npm';
80+
}
81+
82+
// Truncate hostname segments (split by .)
83+
const truncatedHost = hostname
84+
.split('.')
85+
.map(truncateSegment)
86+
.join('.');
87+
88+
// Build parts array
89+
const parts = [];
90+
if (isHttp) parts.push('http');
91+
parts.push(truncatedHost);
92+
if (!isDefaultPort && url.port) parts.push(url.port);
93+
parts.push(...pathSegments.map(truncateSegment));
94+
95+
return parts.join('~');
5696
}
5797

5898
/**
@@ -103,16 +143,35 @@ export function decodeCacheKey(cacheKey) {
103143
}
104144

105145
/**
106-
* Decode origin from short key
146+
* Decode origin from short key (best-effort reconstruction)
147+
* Handles both old base64 format and new readable format
107148
* @param {string} originKey - Short origin key
108-
* @returns {string} Full origin URL
149+
* @returns {string} Reconstructed origin URL (may not match original exactly)
109150
*/
110151
function decodeOrigin(originKey) {
111152
if (originKey === 'npm') return 'https://registry.npmjs.com';
112-
113-
// For custom registries, decode from base64url
114-
// Note: This is lossy - we only stored first 8 chars
115-
return `<custom:${originKey}>`;
153+
154+
// Detect new readable format: contains '.' (hostname) or '~' (separator)
155+
if (originKey.includes('.') || originKey.includes('~')) {
156+
// New readable format
157+
const isHttp = originKey.startsWith('http~');
158+
const protocol = isHttp ? 'http://' : 'https://';
159+
const remainder = isHttp ? originKey.slice(5) : originKey;
160+
161+
// Split on ~ to get hostname and path/port parts
162+
const parts = remainder.split('~');
163+
const hostname = parts[0];
164+
const rest = parts.slice(1);
165+
166+
// Reconstruct URL (note: truncated segments cannot be fully recovered)
167+
if (rest.length === 0) {
168+
return `${protocol}${hostname}`;
169+
}
170+
return `${protocol}${hostname}/${rest.join('/')}`;
171+
}
172+
173+
// Old base64 format - cannot decode meaningfully
174+
return `<legacy:${originKey}>`;
116175
}
117176

118177
/**

src/cache/test/cache-key.test.js

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,22 @@ describe('Cache Key Utilities', () => {
101101
assert.equal(decoded.origin, 'https://registry.npmjs.com');
102102
});
103103

104-
it('should handle custom origins as lossy', () => {
105-
const key = createPartitionKey('a', 'b', 'https://very-long-custom-registry.example.com');
104+
it('should handle custom origins with readable format', () => {
105+
const key = createPartitionKey('a', 'b', 'https://packages.example.com/javascript');
106106
const decoded = decodeCacheKey(key);
107-
108-
assert.ok(decoded.origin.startsWith('<custom:'));
107+
108+
// Decoded origin is best-effort reconstruction from truncated segments
109+
assert.ok(decoded.origin.includes('paces.exale.com'));
110+
assert.ok(decoded.origin.includes('javpt'));
111+
});
112+
113+
it('should handle legacy base64 origin keys', () => {
114+
// Simulate decoding an old-format key by calling decodeCacheKey directly
115+
// Old keys had base64 origins like 'aHR0cHM6' that can't contain '.' or '~'
116+
const legacyKey = 'v1:packument:aHR0cHM6:657870726573';
117+
const decoded = decodeCacheKey(legacyKey);
118+
119+
assert.ok(decoded.origin.startsWith('<legacy:'));
109120
});
110121

111122
it('should throw on invalid format', () => {
@@ -121,6 +132,63 @@ describe('Cache Key Utilities', () => {
121132
});
122133
});
123134

135+
describe('origin encoding scheme', () => {
136+
it('should use npm alias for npm registries', () => {
137+
const key1 = createPackumentKey('test', 'https://registry.npmjs.com');
138+
const key2 = createPackumentKey('test', 'https://registry.npmjs.org');
139+
const key3 = createPackumentKey('test', 'https://replicate.npmjs.com');
140+
141+
assert.ok(key1.includes(':npm:'));
142+
assert.ok(key2.includes(':npm:'));
143+
assert.ok(key3.includes(':npm:'));
144+
});
145+
146+
it('should truncate segments: <=5 chars kept whole, else first 3 + last 2', () => {
147+
// 'packages' (8 chars) -> 'pac' + 'es' = 'paces'
148+
// 'example' (7 chars) -> 'exa' + 'le' = 'exale'
149+
// 'com' (3 chars) -> 'com'
150+
// 'javascript' (10 chars) -> 'jav' + 'pt' = 'javpt'
151+
const key = createPackumentKey('test', 'https://packages.example.com/javascript');
152+
assert.ok(key.includes(':paces.exale.com~javpt:'));
153+
});
154+
155+
it('should handle http protocol with prefix', () => {
156+
const key = createPackumentKey('test', 'http://localhost:4873');
157+
assert.ok(key.includes(':http~locst~4873:'));
158+
});
159+
160+
it('should handle multiple path segments', () => {
161+
const key = createPackumentKey('test', 'https://mycompany.jfrog.io/artifactory/api/npm/npm-local');
162+
// mycompany -> mycom + 'ny' = myany? No wait: first 3 + last 2 = 'myc' + 'ny' = 'mycny'
163+
// Actually: 'mycompany' is 9 chars -> 'myc' + 'ny' = 'mycny'
164+
// 'jfrog' is 5 chars -> 'jfrog'
165+
// 'artifactory' is 11 chars -> 'art' + 'ry' = 'artry'
166+
// 'api' is 3 chars -> 'api'
167+
// 'npm' is 3 chars -> 'npm'
168+
// 'npm-local' is 9 chars -> 'npm' + 'al' = 'npmal'
169+
assert.ok(key.includes('mycny.jfrog.io~artry~api~npm~npmal'));
170+
});
171+
172+
it('should handle bare hostnames by assuming https', () => {
173+
const key = createPackumentKey('test', 'my-registry.com');
174+
assert.ok(key.includes(':my-ry.com:'));
175+
});
176+
177+
it('should omit default ports', () => {
178+
const key1 = createPackumentKey('test', 'https://example.com:443');
179+
const key2 = createPackumentKey('test', 'http://example.com:80');
180+
181+
// Should not contain port numbers for default ports
182+
assert.ok(!key1.includes('443'));
183+
assert.ok(!key2.includes('80'));
184+
});
185+
186+
it('should preserve non-default ports', () => {
187+
const key = createPackumentKey('test', 'https://example.com:8443');
188+
assert.ok(key.includes('~8443'));
189+
});
190+
});
191+
124192
describe('hex encoding edge cases', () => {
125193
it('should handle unicode correctly', () => {
126194
const key = createPackumentKey('😀emoji');

0 commit comments

Comments
 (0)