Skip to content

Commit 5961dec

Browse files
indexzeroclaude
andauthored
feat(views) Relax! 🛋️ views are a useful analysis tool for datasets of unstructured metadata (#21)
Add view system for registry data queries and joins. Introduces a view abstraction over cached packument data enabling SQL-like queries without loading everything into memory: - View: defines origin + projection over cached data - ViewStore: persists view definitions as JSON - Projection: jq-like field selection with transforms - Query: stream cached packuments through projections - Join: O(1) joins between views using cache key construction CLI commands: - view define: create named views with origin and projection - view list/show/delete: manage view definitions - view query: stream records from a view - view join: join two views on package name Also exports encodeOrigin from cache module and adds config directory for view storage. --------- Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 18f8ff9 commit 5961dec

24 files changed

Lines changed: 1905 additions & 4 deletions

cli/cli/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"@_all_docs/frame": "workspace:*",
1919
"@_all_docs/packument": "workspace:*",
2020
"@_all_docs/partition": "workspace:*",
21+
"@_all_docs/view": "workspace:*",
2122
"@_all_docs/worker": "workspace:*",
2223
"@vltpkg/error-cause": "0.0.0-9",
2324
"debug": "^4.4.0",

cli/cli/src/cmd/view/define.js

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import { View, ViewStore } from '@_all_docs/view';
2+
import { encodeOrigin } from '@_all_docs/cache';
3+
4+
export const usage = `Usage: _all_docs view define <name> [options]
5+
6+
Define a named view over cached registry data.
7+
8+
A view is a predicate (origin filter) plus a projection (field selection).
9+
Views are stored as JSON files and can be queried or joined.
10+
11+
Options:
12+
--origin <key> Origin key (e.g., npm, paces.exale.com~javpt)
13+
--registry <url> Registry URL (converted to origin key internally)
14+
--type <type> Entity type: packument (default) or partition
15+
--select <expr> Field selection expression
16+
17+
Select Expression Syntax:
18+
Simple fields: name, version, description
19+
Nested fields: time.modified, repository.url
20+
With transforms: versions|keys, dependencies|length
21+
With aliases: versions|keys as version_list
22+
23+
Available Transforms:
24+
keys, values Object to array
25+
length Array/string length
26+
first, last First/last element
27+
sort, reverse Sort/reverse array
28+
unique, compact Dedupe/remove nulls
29+
flatten Flatten nested arrays
30+
31+
Examples:
32+
_all_docs view define npm-packages --origin npm
33+
_all_docs view define npm-versions --origin npm --select 'name, versions|keys as versions, time'
34+
_all_docs view define private --registry https://npm.company.com --select 'name, versions|keys'
35+
`;
36+
37+
export const command = async (cli) => {
38+
if (cli.values.help) {
39+
console.log(usage);
40+
return;
41+
}
42+
43+
const name = cli._[0];
44+
if (!name) {
45+
console.error('Error: View name required');
46+
console.error('Usage: _all_docs view define <name> --origin <key> [--select <expr>]');
47+
process.exit(1);
48+
}
49+
50+
// Validate name
51+
if (!/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(name)) {
52+
console.error('Error: View name must start with a letter and contain only letters, numbers, underscores, and hyphens');
53+
process.exit(1);
54+
}
55+
56+
// Prioritize --registry if specified, otherwise use --origin
57+
// Note: cli.values.origin has a default from jack.js, so we check registry first
58+
let origin;
59+
if (cli.values.registry) {
60+
origin = encodeOrigin(cli.values.registry);
61+
} else if (cli.values.origin && cli.values.origin !== 'https://replicate.npmjs.com') {
62+
// User explicitly set --origin (not the default)
63+
origin = cli.values.origin;
64+
} else {
65+
console.error('Error: --origin or --registry required');
66+
console.error('Example: _all_docs view define my-view --origin npm');
67+
process.exit(1);
68+
}
69+
70+
const view = new View({
71+
name,
72+
origin,
73+
registry: cli.values.registry || null,
74+
type: cli.values.type || 'packument',
75+
select: cli.values.select || null
76+
});
77+
78+
const store = new ViewStore(cli.dir('config'));
79+
80+
// Check if view already exists
81+
if (await store.exists(name)) {
82+
if (!cli.values.force) {
83+
console.error(`Error: View '${name}' already exists. Use --force to overwrite.`);
84+
process.exit(1);
85+
}
86+
}
87+
88+
await store.save(view);
89+
90+
console.log(`View '${name}' defined:`);
91+
console.log(` Origin: ${origin}`);
92+
if (cli.values.registry) {
93+
console.log(` Registry: ${cli.values.registry}`);
94+
}
95+
console.log(` Type: ${view.type}`);
96+
if (view.select) {
97+
console.log(` Select: ${view.select}`);
98+
}
99+
};

cli/cli/src/cmd/view/delete.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import { ViewStore } from '@_all_docs/view';
2+
3+
export const usage = `Usage: _all_docs view delete <name>
4+
5+
Delete a defined view.
6+
7+
Options:
8+
--force Skip confirmation
9+
10+
Examples:
11+
_all_docs view delete old-view
12+
_all_docs view delete old-view --force
13+
`;
14+
15+
export const command = async (cli) => {
16+
if (cli.values.help) {
17+
console.log(usage);
18+
return;
19+
}
20+
21+
const name = cli._[0];
22+
if (!name) {
23+
console.error('Error: View name required');
24+
console.error('Usage: _all_docs view delete <name>');
25+
process.exit(1);
26+
}
27+
28+
const store = new ViewStore(cli.dir('config'));
29+
30+
if (!await store.exists(name)) {
31+
console.error(`Error: View '${name}' does not exist`);
32+
process.exit(1);
33+
}
34+
35+
await store.delete(name);
36+
console.log(`View '${name}' deleted`);
37+
};

cli/cli/src/cmd/view/index.js

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/**
2+
* View commands - define, query, and join views over cached data
3+
*/
4+
export { command as define, usage as defineUsage } from './define.js';
5+
export { command as list, usage as listUsage } from './list.js';
6+
export { command as show, usage as showUsage } from './show.js';
7+
export { command as deleteView, usage as deleteUsage } from './delete.js';
8+
export { command as query, usage as queryUsage } from './query.js';
9+
export { command as join, usage as joinUsage } from './join.js';
10+
11+
export const usage = `Usage: _all_docs view <command> [options]
12+
13+
Manage named views over cached registry data.
14+
15+
Commands:
16+
define <name> Define a new view
17+
list List all defined views
18+
show <name> Show view details
19+
delete <name> Delete a view
20+
query <name> Query a view (output ndjson)
21+
join <left> <right> Join two views
22+
23+
A view is a predicate (origin filter) plus a projection (field selection).
24+
Views enable efficient queries and joins across different registry caches.
25+
26+
Examples:
27+
_all_docs view define npm-pkgs --origin npm
28+
_all_docs view define npm-vers --origin npm --select 'name, versions|keys'
29+
_all_docs view list
30+
_all_docs view query npm-vers --limit 100
31+
_all_docs view join npm-vers cgr-vers --diff
32+
`;

cli/cli/src/cmd/view/join.js

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import { ViewStore, joinViews, diffViews } from '@_all_docs/view';
2+
import { Cache, createStorageDriver } from '@_all_docs/cache';
3+
4+
export const usage = `Usage: _all_docs view join <left-view> <right-view> [options]
5+
6+
Join two views on their common key (package name).
7+
8+
Join Types:
9+
--left Include all from left, matching from right (default)
10+
--inner Only include records present in both views
11+
--right Include all from right, matching from left
12+
--full Include all records from both views
13+
--diff Output records in left but not in right
14+
15+
Options:
16+
--on <field> Join key field (default: name)
17+
--limit <n> Maximum records to return
18+
--json Output as ndjson (default)
19+
20+
Examples:
21+
_all_docs view join npm-packages cgr-packages
22+
_all_docs view join npm-packages cgr-packages --inner
23+
_all_docs view join npm-packages cgr-packages --diff
24+
_all_docs view join npm-versions cgr-versions --limit 1000
25+
`;
26+
27+
export const command = async (cli) => {
28+
if (cli.values.help) {
29+
console.log(usage);
30+
return;
31+
}
32+
33+
const leftName = cli._[0];
34+
const rightName = cli._[1];
35+
36+
if (!leftName || !rightName) {
37+
console.error('Error: Two view names required');
38+
console.error('Usage: _all_docs view join <left-view> <right-view>');
39+
process.exit(1);
40+
}
41+
42+
const store = new ViewStore(cli.dir('config'));
43+
44+
let leftView, rightView;
45+
try {
46+
leftView = await store.load(leftName);
47+
rightView = await store.load(rightName);
48+
} catch (err) {
49+
console.error(`Error: ${err.message}`);
50+
process.exit(1);
51+
}
52+
53+
const driver = await createStorageDriver({ CACHE_DIR: cli.dir('packuments') });
54+
const cache = new Cache({ path: cli.dir('packuments'), driver });
55+
56+
// Determine join type
57+
let type = 'left';
58+
if (cli.values.inner) type = 'inner';
59+
else if (cli.values.right) type = 'right';
60+
else if (cli.values.full) type = 'full';
61+
62+
const options = {
63+
type,
64+
on: cli.values.on || 'name',
65+
limit: cli.values.limit ? parseInt(cli.values.limit, 10) : undefined
66+
};
67+
68+
try {
69+
// Special case for diff
70+
if (cli.values.diff) {
71+
for await (const record of diffViews(leftView, rightView, cache, options)) {
72+
console.log(JSON.stringify(record));
73+
}
74+
return;
75+
}
76+
77+
// Regular join
78+
for await (const record of joinViews(leftView, rightView, cache, options)) {
79+
console.log(JSON.stringify(record));
80+
}
81+
} catch (err) {
82+
console.error(`Error joining views: ${err.message}`);
83+
process.exit(1);
84+
}
85+
};

cli/cli/src/cmd/view/list.js

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import { ViewStore } from '@_all_docs/view';
2+
3+
export const usage = `Usage: _all_docs view list
4+
5+
List all defined views.
6+
7+
Options:
8+
--json Output as JSON array
9+
10+
Examples:
11+
_all_docs view list
12+
_all_docs view list --json
13+
`;
14+
15+
export const command = async (cli) => {
16+
if (cli.values.help) {
17+
console.log(usage);
18+
return;
19+
}
20+
21+
const store = new ViewStore(cli.dir('config'));
22+
const names = await store.list();
23+
24+
if (names.length === 0) {
25+
console.log('No views defined.');
26+
console.log('');
27+
console.log('Create a view with:');
28+
console.log(' _all_docs view define <name> --origin <key> [--select <expr>]');
29+
return;
30+
}
31+
32+
if (cli.values.json) {
33+
const views = [];
34+
for (const name of names) {
35+
const view = await store.load(name);
36+
views.push(view.toJSON());
37+
}
38+
console.log(JSON.stringify(views, null, 2));
39+
return;
40+
}
41+
42+
console.log('Defined views:');
43+
console.log('');
44+
45+
for (const name of names) {
46+
try {
47+
const view = await store.load(name);
48+
console.log(` ${name}`);
49+
console.log(` Origin: ${view.origin}`);
50+
console.log(` Type: ${view.type}`);
51+
if (view.select) {
52+
console.log(` Select: ${view.select}`);
53+
}
54+
console.log('');
55+
} catch (err) {
56+
console.log(` ${name} (error loading: ${err.message})`);
57+
}
58+
}
59+
};

0 commit comments

Comments
 (0)