Skip to content

Commit 24f8919

Browse files
committed
terminal: glyph protocol parser and response encoder
This adds the core parse/encode for the still in-development and experimental terminal glyph protocol: raphamorim/rio#1542 The only cross-cutting change necessary was changing the APC identification logic which previously only looked at a single byte to support multi-byte identifiers since the glyph protocol uses `25a1`.
1 parent afdae72 commit 24f8919

6 files changed

Lines changed: 991 additions & 17 deletions

File tree

src/terminal/apc.zig

Lines changed: 144 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ const std = @import("std");
22
const build_options = @import("terminal_options");
33
const Allocator = std.mem.Allocator;
44

5+
const glyph = @import("apc/glyph.zig");
56
const kitty_gfx = @import("kitty/graphics.zig");
67

78
const log = std.log.scoped(.terminal_apc);
@@ -18,6 +19,7 @@ pub const Handler = struct {
1819
/// use `.initFull`.
1920
max_bytes: std.EnumMap(Protocol, usize) = .initFullWith(.{
2021
.kitty = Protocol.defaultMaxBytes(.kitty),
22+
.glyph = Protocol.defaultMaxBytes(.glyph),
2123
}),
2224

2325
pub fn deinit(self: *Handler) void {
@@ -26,7 +28,7 @@ pub const Handler = struct {
2628

2729
pub fn start(self: *Handler) void {
2830
self.state.deinit();
29-
self.state = .{ .identify = {} };
31+
self.state = .{ .identify = .{} };
3032
}
3133

3234
pub fn feed(self: *Handler, alloc: Allocator, byte: u8) void {
@@ -38,21 +40,45 @@ pub const Handler = struct {
3840
.ignore => return,
3941

4042
// We identify the APC command by the first byte.
41-
.identify => {
42-
switch (byte) {
43-
// Kitty graphics protocol
44-
'G' => self.state = if (comptime build_options.kitty_graphics)
45-
.{ .kitty = .init(
43+
.identify => |*id| id: {
44+
// Kitty graphics is detected immediately on the `G` byte,
45+
// since commands begin immediately after with no termination
46+
// character after the 'G'.
47+
if (comptime build_options.kitty_graphics) {
48+
if (id.len == 0 and byte == 'G') {
49+
self.state = .{ .kitty = .init(
4650
alloc,
4751
self.max_bytes.get(.kitty) orelse
4852
Protocol.defaultMaxBytes(.kitty),
49-
) }
50-
else
51-
.ignore,
53+
) };
54+
break :id;
55+
}
56+
}
5257

53-
// Unknown
54-
else => self.state = .ignore,
58+
// If we hit `;` then identify...
59+
if (byte == ';') {
60+
const str = id.buf[0..id.len];
61+
if (std.mem.eql(u8, str, "25a1")) {
62+
self.state = .{ .glyph = .init(
63+
alloc,
64+
self.max_bytes.get(.glyph) orelse
65+
Protocol.defaultMaxBytes(.glyph),
66+
) };
67+
} else {
68+
self.state = .ignore;
69+
}
70+
71+
break :id;
72+
}
73+
74+
// If we're out of space to buffer then we're done.
75+
if (id.len >= id.buf.len) {
76+
self.state = .ignore;
77+
break :id;
5578
}
79+
80+
id.buf[id.len] = byte;
81+
id.len += 1;
5682
},
5783

5884
.kitty => |*p| if (comptime build_options.kitty_graphics) {
@@ -62,6 +88,12 @@ pub const Handler = struct {
6288
self.state = .ignore;
6389
};
6490
} else unreachable,
91+
92+
.glyph => |*p| p.feed(byte) catch |err| {
93+
log.warn("glyph protocol error: {}", .{err});
94+
p.deinit();
95+
self.state = .ignore;
96+
},
6597
}
6698
}
6799

@@ -86,31 +118,54 @@ pub const Handler = struct {
86118

87119
break :kitty .{ .kitty = command };
88120
},
121+
122+
.glyph => |*p| glyph_cmd: {
123+
const command = p.complete(p.alloc) catch |err| {
124+
log.warn("glyph protocol error: {}", .{err});
125+
break :glyph_cmd null;
126+
};
127+
128+
break :glyph_cmd .{ .glyph = command };
129+
},
89130
};
90131
}
91132
};
92133

93134
pub const State = union(enum) {
94135
/// We're not in the middle of an APC command yet.
95-
inactive: void,
136+
inactive,
96137

97138
/// We got an unrecognized APC sequence or the APC sequence we
98139
/// recognized became invalid. We're just dropping bytes.
99-
ignore: void,
100-
101-
/// We're waiting to identify the APC sequence. This is done by
102-
/// inspecting the first byte of the sequence.
103-
identify: void,
140+
ignore,
141+
142+
/// We're waiting to identify the APC sequence. The way this is done
143+
/// is pretty fluid depending on supported APC protocols, but for now
144+
/// our rule is:
145+
///
146+
/// * 'G' - immediate transition to Kitty graphics protocol
147+
/// * Buffer up to `;` and the bytes before dictate the protocol.
148+
/// If we overflow then we're immediately invalid because we don't
149+
/// support anything longer than this.
150+
///
151+
identify: struct {
152+
len: u3 = 0,
153+
buf: [4]u8 = undefined,
154+
},
104155

105156
/// Kitty graphics protocol
106157
kitty: if (build_options.kitty_graphics)
107158
kitty_gfx.CommandParser
108159
else
109160
void,
110161

162+
/// Glyph protocol
163+
glyph: glyph.CommandParser,
164+
111165
pub fn deinit(self: *State) void {
112166
switch (self.*) {
113167
.inactive, .ignore, .identify => {},
168+
.glyph => |*v| v.deinit(),
114169
.kitty => |*v| if (comptime build_options.kitty_graphics)
115170
v.deinit()
116171
else
@@ -122,13 +177,18 @@ pub const State = union(enum) {
122177
/// Possible APC command types.
123178
pub const Protocol = enum {
124179
kitty,
180+
glyph,
125181

126182
/// Returns the default maximum bytes for the given protocol.
127183
pub fn defaultMaxBytes(self: Protocol) usize {
128184
return switch (self) {
129185
// Kitty graphics payloads can be very large (e.g. full images
130186
// encoded as base64), so the default is set to 65 MiB.
131187
.kitty => 65 * 1024 * 1024,
188+
// Glyph protocol messages carry single glyf outlines which
189+
// are small, but base64 encoding inflates them. 1 MiB is
190+
// generous for any single simple-glyph record.
191+
.glyph => 1 * 1024 * 1024,
132192
};
133193
}
134194
};
@@ -140,12 +200,16 @@ pub const Command = union(Protocol) {
140200
else
141201
void,
142202

203+
glyph: glyph.Request,
204+
143205
pub fn deinit(self: *Command, alloc: Allocator) void {
144206
switch (self.*) {
145207
.kitty => |*v| if (comptime build_options.kitty_graphics)
146208
v.deinit(alloc)
147209
else
148210
unreachable,
211+
212+
.glyph => |*v| v.deinit(alloc),
149213
}
150214
}
151215
};
@@ -246,3 +310,66 @@ test "valid Kitty command" {
246310
defer cmd.deinit(alloc);
247311
try testing.expect(cmd == .kitty);
248312
}
313+
314+
test "identify with unrecognized command" {
315+
const testing = std.testing;
316+
const alloc = testing.allocator;
317+
318+
var h: Handler = .{};
319+
h.start();
320+
for ("abcd;payload") |c| h.feed(alloc, c);
321+
try testing.expect(h.end() == null);
322+
}
323+
324+
test "identify buffer overflow" {
325+
const testing = std.testing;
326+
const alloc = testing.allocator;
327+
328+
var h: Handler = .{};
329+
h.start();
330+
for ("abcde;payload") |c| h.feed(alloc, c);
331+
try testing.expect(h.end() == null);
332+
}
333+
334+
test "identify with no input" {
335+
const testing = std.testing;
336+
337+
var h: Handler = .{};
338+
h.start();
339+
try testing.expect(h.end() == null);
340+
}
341+
342+
test "identify with unknown partial input" {
343+
const testing = std.testing;
344+
const alloc = testing.allocator;
345+
346+
var h: Handler = .{};
347+
h.start();
348+
for ("25a") |c| h.feed(alloc, c);
349+
try testing.expect(h.end() == null);
350+
}
351+
352+
test "garbage glyph command" {
353+
const testing = std.testing;
354+
const alloc = testing.allocator;
355+
356+
var h: Handler = .{};
357+
h.start();
358+
for ("25a1;X") |c| h.feed(alloc, c);
359+
360+
try testing.expect(h.end() == null);
361+
}
362+
363+
test "valid glyph command" {
364+
const testing = std.testing;
365+
const alloc = testing.allocator;
366+
367+
var h: Handler = .{};
368+
h.start();
369+
for ("25a1;q;cp=E0A0") |c| h.feed(alloc, c);
370+
371+
var cmd = h.end().?;
372+
defer cmd.deinit(alloc);
373+
try testing.expect(cmd == .glyph);
374+
try testing.expect(cmd.glyph == .query);
375+
}

src/terminal/apc/glyph.zig

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
//! # Glyph Protocol
2+
//!
3+
//! The Glyph Protocol lets applications register custom glyphs with the
4+
//! terminal at runtime and query whether a given codepoint is already
5+
//! covered by a system font or a prior registration. It eliminates the
6+
//! requirement for users to install patched fonts (e.g. Nerd Fonts) in
7+
//! order to render icons in TUIs.
8+
//!
9+
//! This file documents the wire protocol surface implemented by the parser
10+
//! and response formatter below.
11+
//!
12+
//! ## Transport
13+
//!
14+
//! Messages use APC (Application Program Command) framing.
15+
//! Terminals that do not implement the protocol can safely ignore APC
16+
//! sequences. Every message is prefixed with the identifier `25a1`
17+
//! (U+25A1 WHITE SQUARE — the canonical tofu symbol).
18+
//!
19+
//! ## Framing
20+
//!
21+
//! ```
22+
//! ESC _ 25a1 ; <verb> [ ; key=value ]* [ ; <payload> ] ESC \
23+
//! ```
24+
//!
25+
//! Four verbs are defined:
26+
//!
27+
//! - `s` — support query
28+
//! - `q` — codepoint query
29+
//! - `r` — register a glyph
30+
//! - `c` — clear registrations
31+
//!
32+
//! ## Support (`s`)
33+
//!
34+
//! Detects whether the terminal implements Glyph Protocol and which
35+
//! payload formats it supports.
36+
//!
37+
//! Request: `ESC _ 25a1 ; s ESC \`
38+
//! Response: `ESC _ 25a1 ; s ; fmt=<bitfield> ESC \`
39+
//!
40+
//! `fmt` bits:
41+
//! - bit 0 (`1`): `glyf` — TrueType simple glyphs (required in v1)
42+
//! - bit 1 (`2`): `colrv0` — COLR v0 layered flat-colour glyphs
43+
//! - bit 2 (`4`): `colrv1` — COLR v1 paint-graph glyphs
44+
//!
45+
//! Any reply confirms support; no reply within a timeout means the
46+
//! terminal does not implement the protocol.
47+
//!
48+
//! ## Query (`q`)
49+
//!
50+
//! Asks whether a codepoint is renderable and by whom.
51+
//!
52+
//! Request: `ESC _ 25a1 ; q ; cp=<hex> ESC \`
53+
//! Response: `ESC _ 25a1 ; q ; cp=<hex> ; status=<u8> ESC \`
54+
//!
55+
//! `status` is a two-bit field:
56+
//! - `0` (`free`) — nothing renders this codepoint (tofu)
57+
//! - `1` (`system`) — a system font covers it
58+
//! - `2` (`glossary`) — a session registration covers it
59+
//! - `3` (`both`) — both; the registration shadows the system font
60+
//!
61+
//! ## Register (`r`)
62+
//!
63+
//! Registers a glyph outline at a Private Use Area codepoint.
64+
//!
65+
//! Request:
66+
//! `ESC _ 25a1 ; r ; cp=<hex> [; fmt=glyf] [; upm=<int>]
67+
//! [; reply=<0|1|2>] ; <base64-payload> ESC \`
68+
//!
69+
//! Response:
70+
//! `ESC _ 25a1 ; r ; cp=<hex> ; status=0 ESC \`
71+
//! On error: `status=<nonzero> ; reason=<code>`
72+
//!
73+
//! Parameters:
74+
//! - `cp` — target codepoint (hex). Must be in a PUA range:
75+
//! U+E000–U+F8FF, U+F0000–U+FFFFD, or U+100000–U+10FFFD.
76+
//! Non-PUA values are rejected with `reason=out_of_namespace`.
77+
//! - `fmt` — payload format. Default `glyf`; `colrv0` and `colrv1`
78+
//! are optional and advertised via the `s` reply.
79+
//! - `upm` — units-per-em for the coordinate space. Default 1000.
80+
//! - `reply` — response verbosity:
81+
//! `1` (default) = success + failure replies
82+
//! `2` = failure replies only (silent success)
83+
//! `0` = no replies (fire-and-forget)
84+
//! - payload — base64-encoded `glyf` simple-glyph record.
85+
//!
86+
//! The `glyf` subset accepted:
87+
//! - Simple glyphs only (no composites).
88+
//! - Standard flag encoding (on-curve, off-curve, x/y-short, repeat).
89+
//! - No hinting instructions.
90+
//! - Coordinates are in the `upm` space; the terminal scales to cell size.
91+
//!
92+
//! A second `r` on the same `cp` overwrites the previous registration.
93+
//! `glyf` outlines render in the current foreground colour.
94+
//!
95+
//! ## Clear (`c`)
96+
//!
97+
//! Removes registrations.
98+
//!
99+
//! Single slot: `ESC _ 25a1 ; c ; cp=<hex> ESC \`
100+
//! All slots: `ESC _ 25a1 ; c ESC \`
101+
//!
102+
//! The terminal acks with `status=0` even if the slot was already empty.
103+
//! Clear replies do not echo `cp`. `cp` must be in a PUA range; non-PUA values return
104+
//! `reason=out_of_namespace`.
105+
//!
106+
//! ## Glossary Capacity
107+
//!
108+
//! Each session holds at most 1024 registrations keyed by codepoint.
109+
//! Registrations live for the session duration. A 1025th registration
110+
//! evicts the oldest entry (FIFO). Sessions are isolated: two tabs may
111+
//! independently register the same codepoint.
112+
//!
113+
//! ## Security: PUA-Only Restriction
114+
//!
115+
//! Registration is restricted to the three Unicode Private Use Areas to
116+
//! prevent glyph-spoofing attacks. PUA codepoints never appear in normal
117+
//! text (filenames, URLs, commands), so a registered glyph cannot alter
118+
//! how real text is perceived. The cell buffer always stores the original
119+
//! codepoint — copy/paste, search, and hyperlink detection return the
120+
//! codepoint the application emitted, never the rendered glyph.
121+
//!
122+
//! Reference: <https://rapha.land/introducing-glyph-protocol-for-terminals/>
123+
124+
const std = @import("std");
125+
126+
pub const request = @import("glyph/request.zig");
127+
pub const response = @import("glyph/response.zig");
128+
129+
pub const CommandParser = request.CommandParser;
130+
pub const Request = request.Request;
131+
pub const Response = response.Response;

0 commit comments

Comments
 (0)