Skip to content

Commit ce71c29

Browse files
committed
add fuzz test for zig fmt
Adds a new fuzz test for zig fmt. This fuzz test checks that Ast.render succeeds for parsed inputs. Additionally, for inputs it knows that Ast.render cannot change the order of, it checks that they are not rewritten. This fuzz test has been very successful. Using #23416, it has found three bugs (one was a TODO); two of them I have fixed and the other is in #23754. I have run the test for 650,000,000 iterations (about 2 hours) and haven't found any more bugs. Some functions in the fuzz test have instrumentation disabled because their branches are not interesting to the fuzzer; doing this found a ~40% boost to the runs per second. Additionally, the fuzz test handles tokenization itself since so it can determine if the input can be rewritten.
1 parent 8e79fc6 commit ce71c29

1 file changed

Lines changed: 131 additions & 0 deletions

File tree

lib/std/zig/parser_test.zig

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6528,3 +6528,134 @@ fn testError(source: [:0]const u8, expected_errors: []const Error) !void {
65286528
try std.testing.expectEqual(expected, tree.errors[i].tag);
65296529
}
65306530
}
6531+
6532+
test "fuzz zig fmt" {
6533+
try std.testing.fuzz({}, fuzzTestOneRender, .{});
6534+
}
6535+
6536+
fn parseTokens(
6537+
fba: std.mem.Allocator,
6538+
source: [:0]const u8,
6539+
) error{ Invalid, OutOfMemory }!struct {
6540+
toks: std.zig.Ast.TokenList,
6541+
maybe_rewriteable: bool,
6542+
} {
6543+
@disableInstrumentation();
6544+
// Byte-order marker can be stripped
6545+
var maybe_rewriteable: bool = std.mem.startsWith(u8, source, "\xEF\xBB\xBF");
6546+
6547+
var tokens: std.zig.Ast.TokenList = .{};
6548+
try tokens.ensureTotalCapacity(fba, source.len / 2);
6549+
var tokenizer: std.zig.Tokenizer = .init(source);
6550+
while (true) {
6551+
const tok = tokenizer.next();
6552+
switch (tok.tag) {
6553+
.invalid,
6554+
.invalid_periodasterisks,
6555+
=> return error.Invalid,
6556+
// Extra colons can be removed
6557+
.keyword_asm,
6558+
// Qualifiers can be reordered
6559+
// keyword_const is intentionally excluded since it is used in other contexts and
6560+
// having only one qualifier will never lead to reordering.
6561+
.keyword_addrspace,
6562+
.keyword_align,
6563+
.keyword_allowzero,
6564+
.keyword_callconv,
6565+
.keyword_linksection,
6566+
.keyword_volatile,
6567+
=> maybe_rewriteable = true,
6568+
// Labeled statements can sometimes be (questionably) rewritten due to ambigous grammer
6569+
// ex: `O: for (x) |T| (break O: T)` -> `O: O: for (x) |T| (break :O T)`
6570+
.keyword_for,
6571+
.keyword_while,
6572+
.l_brace,
6573+
=> {
6574+
const tags = tokens.items(.tag);
6575+
maybe_rewriteable = maybe_rewriteable or (tags.len >= 2 and
6576+
tags[tags.len - 2] == .identifier and tags[tags.len - 1] == .colon);
6577+
},
6578+
// #23754
6579+
.container_doc_comment => maybe_rewriteable = true,
6580+
// Quoted identifiers can be unquoted
6581+
.identifier => maybe_rewriteable = maybe_rewriteable or source[tok.loc.start] == '@',
6582+
else => {},
6583+
}
6584+
try tokens.append(fba, .{
6585+
.tag = tok.tag,
6586+
.start = @intCast(tok.loc.start),
6587+
});
6588+
if (tok.tag == .eof) break;
6589+
}
6590+
return .{
6591+
.toks = tokens,
6592+
.maybe_rewriteable = maybe_rewriteable,
6593+
};
6594+
}
6595+
6596+
fn parseAstFromTokens(
6597+
fba: std.mem.Allocator,
6598+
source: [:0]const u8,
6599+
toks: std.zig.Ast.TokenList,
6600+
) error{OutOfMemory}!std.zig.Ast {
6601+
var parser: @import("Parse.zig") = .{
6602+
.source = source,
6603+
.gpa = fba,
6604+
.tokens = toks.slice(),
6605+
.errors = .{},
6606+
.nodes = .{},
6607+
.extra_data = .{},
6608+
.scratch = .{},
6609+
.tok_i = 0,
6610+
};
6611+
try parser.nodes.ensureTotalCapacity(fba, 1 + toks.len / 2);
6612+
try parser.parseRoot();
6613+
return .{
6614+
.source = source,
6615+
.mode = .zig,
6616+
.tokens = parser.tokens,
6617+
.nodes = parser.nodes.slice(),
6618+
.extra_data = parser.extra_data.items,
6619+
.errors = parser.errors.items,
6620+
};
6621+
}
6622+
6623+
/// Checks equivelence of non-whitespace characters
6624+
/// If there are commas in `bytes`, then it is checked they are also present in `rendered`. Extra
6625+
/// commas in `rendered` are considered equivelent.
6626+
fn isRewritten(bytes: []const u8, rendered: []const u8) bool {
6627+
@disableInstrumentation();
6628+
var i: usize = 0;
6629+
for (bytes) |c| switch (c) {
6630+
' ', '\r', '\t', '\n' => {},
6631+
else => while (true) {
6632+
if (i == rendered.len) return true;
6633+
defer i += 1;
6634+
switch (rendered[i]) {
6635+
' ', '\r', '\n' => {},
6636+
',' => if (c == ',') break,
6637+
else => |n| if (c != n) return false else break,
6638+
}
6639+
},
6640+
};
6641+
for (rendered[i..]) |c| switch (c) {
6642+
' ', '\n', ',' => {},
6643+
else => return true,
6644+
};
6645+
return false;
6646+
}
6647+
6648+
fn fuzzTestOneRender(_: void, bytes: []const u8) anyerror!void {
6649+
if (bytes.len < 2) return;
6650+
const mem_limit: u16 = @bitCast(bytes[0..2].*);
6651+
6652+
var fba_ctx = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..mem_limit]);
6653+
const fba = fba_ctx.allocator();
6654+
const source = fba.dupeZ(u8, bytes[2..]) catch return;
6655+
const toks = parseTokens(fba, source) catch return;
6656+
const tree = parseAstFromTokens(fba, source, toks.toks) catch return;
6657+
if (tree.errors.len != 0) return;
6658+
6659+
const rendered = tree.render(fba) catch return;
6660+
if (!toks.maybe_rewriteable and isRewritten(source, rendered)) return error.TestFailed;
6661+
}

0 commit comments

Comments
 (0)