-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_value_tests.py
More file actions
392 lines (346 loc) · 17.8 KB
/
generate_value_tests.py
File metadata and controls
392 lines (346 loc) · 17.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
#!/usr/bin/env python3
import os
import subprocess
import glob
import re
def format_zig_string(content):
"""Format content for Zig string literal, handling tabs and special characters."""
# Check if we need to use escaped string notation
needs_escaping = False
for char in content:
if char == '\t' or char == '"' or (ord(char) < 0x20 and char != '\n') or ord(char) == 0x7F:
needs_escaping = True
break
if needs_escaping:
# Use regular string with escaping
result = []
for char in content:
if char == '\\':
result.append('\\\\')
elif char == '"':
result.append('\\"')
elif char == '\n':
result.append('\\n')
elif char == '\t':
result.append('\\t')
elif char == '\r':
result.append('\\r')
elif ord(char) < 0x20 or ord(char) == 0x7F:
# Control characters - use \xNN notation
result.append(f'\\x{ord(char):02x}')
elif ord(char) >= 0x80:
# Non-ASCII/Unicode - let UTF-8 bytes pass through
result.append(char)
else:
result.append(char)
return f'"{"".join(result)}"'
else:
# Use multiline string literal (Unicode/emoji will pass through as UTF-8)
lines = content.rstrip().split('\n')
if len(lines) == 1 and len(lines[0]) < 60:
# Short single line - use regular string
return f'"{lines[0]}"'
else:
# Multiline string - start on next line
# Unicode characters pass through unchanged as UTF-8 bytes
formatted_lines = []
for line in lines:
# Each line needs \\ prefix
formatted_lines.append(' \\\\' + line)
return '\n' + '\n'.join(formatted_lines) + '\n ' # Start on next line
def run_yaml_rust2_enhanced(yaml_file):
"""Run enhanced yaml-rust2-test and get assertions."""
try:
result = subprocess.run(
['./yaml-rust2-test/target/release/rust-yaml2-test', yaml_file],
cwd='/Users/dylan/yamlz-3',
capture_output=True,
text=True,
timeout=5
)
return result.stdout
except Exception as e:
return None
def parse_assertions(output):
"""Parse the ZIG_ASSERT lines from the enhanced output."""
if not output or 'Error' in output:
return None
assertions = []
lines = output.split('\n')
for line in lines:
if 'ZIG_ASSERT:' in line:
# Parse assertion like: ZIG_ASSERT: doc0 == hash[1]
match = re.match(r'ZIG_ASSERT: (.*?) == (.+)', line)
if match:
path = match.group(1)
value = match.group(2)
assertions.append((path, value))
return assertions
def escape_zig_string(s):
"""Escape a string for use in Zig string literals."""
result = []
for char in s:
if char == '\\':
result.append('\\\\')
elif char == '"':
result.append('\\"')
elif char == '\n':
result.append('\\n')
elif char == '\t':
result.append('\\t')
elif char == '\r':
result.append('\\r')
elif ord(char) < 0x20 or ord(char) == 0x7F:
# Control characters - use \xNN notation
result.append(f'\\x{ord(char):02x}')
elif ord(char) >= 0x80:
# Non-ASCII characters - let UTF-8 pass through directly
# Zig will interpret these as UTF-8 bytes in the string literal
result.append(char)
else:
# Regular ASCII printable characters
result.append(char)
return ''.join(result)
def generate_zig_test_code(assertions, input_var):
"""Generate Zig test code from assertions."""
test_code = []
seen_nested_maps = set() # Track which nested maps we've already declared
for path, value in assertions:
# Parse the path to understand the structure
if path == 'doc0':
if value.startswith('hash['):
count = re.search(r'\[(\d+)\]', value).group(1)
test_code.append(f'''
// Root is a mapping with {count} entries
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data)), .mapping), std.meta.activeTag(doc.root.data));
const map = doc.root.data.mapping;
try testing.expectEqual(@as(usize, {count}), map.keys.items.len);''')
elif value.startswith('array['):
count = re.search(r'\[(\d+)\]', value).group(1)
test_code.append(f'''
// Root is a sequence with {count} items
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data)), .sequence), std.meta.activeTag(doc.root.data));
const seq = doc.root.data.sequence;
try testing.expectEqual(@as(usize, {count}), seq.list.items.len);''')
elif value.startswith('string "'):
string_val = value[8:-1] # Remove 'string "' and '"'
escaped_val = escape_zig_string(string_val)
test_code.append(f'''
// Root is a scalar string
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data)), .scalar), std.meta.activeTag(doc.root.data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data.scalar)), .string), std.meta.activeTag(doc.root.data.scalar));
try testing.expectEqualStrings("{escaped_val}", doc.root.data.scalar.string.slice({input_var}));''')
elif value.startswith('integer '):
int_val = value[8:]
test_code.append(f'''
// Root is a scalar number
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data)), .scalar), std.meta.activeTag(doc.root.data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data.scalar)), .number), std.meta.activeTag(doc.root.data.scalar));
try testing.expectEqual(@as(f64, {int_val}), doc.root.data.scalar.number);''')
elif value.startswith('boolean '):
bool_val = value[8:]
test_code.append(f'''
// Root is a scalar boolean
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data)), .scalar), std.meta.activeTag(doc.root.data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data.scalar)), .boolean), std.meta.activeTag(doc.root.data.scalar));
try testing.expectEqual({bool_val}, doc.root.data.scalar.boolean);''')
elif value == 'null':
test_code.append(f'''
// Root is null
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data)), .scalar), std.meta.activeTag(doc.root.data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data.scalar)), .null), std.meta.activeTag(doc.root.data.scalar));''')
elif '.keys[' in path:
# Key assertion
match = re.match(r'doc0(?:\.values\[\d+\])*\.keys\[(\d+)\]', path)
if match and value.startswith('string "'):
key_idx = match.group(1)
string_val = value[8:-1]
escaped_val = escape_zig_string(string_val)
# Determine the correct map variable based on path depth
if '.values[' in path:
# Nested map - skip for now (too complex)
continue
else:
# Top-level map
test_code.append(f'''
// Check key at index {key_idx}
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(map.keys.items[{key_idx}].data)), .scalar), std.meta.activeTag(map.keys.items[{key_idx}].data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(map.keys.items[{key_idx}].data.scalar)), .string), std.meta.activeTag(map.keys.items[{key_idx}].data.scalar));
try testing.expectEqualStrings("{escaped_val}", map.keys.items[{key_idx}].data.scalar.string.slice({input_var}));''')
elif '.values[' in path and '[' in path.split('.values[')[1] and '][' in path.split('.values[')[1]:
# Nested array element within a mapping value (e.g., doc0.values[0][1])
match = re.match(r'doc0\.values\[(\d+)\]\[(\d+)\]', path)
if match:
val_idx = match.group(1)
arr_idx = match.group(2)
# First time seeing this array? Declare it
if val_idx not in seen_nested_maps:
seen_nested_maps.add(val_idx)
# We need to check that this value is an array first
test_code.append(f'''
// Check that value at index {val_idx} is a sequence
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(map.values.items[{val_idx}].data)), .sequence), std.meta.activeTag(map.values.items[{val_idx}].data));
const seq_{val_idx} = map.values.items[{val_idx}].data.sequence;''')
if value.startswith('string "'):
string_val = value[8:-1]
escaped_val = escape_zig_string(string_val)
test_code.append(f'''
// Check array element [{val_idx}][{arr_idx}]
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(seq_{val_idx}.list.items[{arr_idx}].data)), .scalar), std.meta.activeTag(seq_{val_idx}.list.items[{arr_idx}].data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(seq_{val_idx}.list.items[{arr_idx}].data.scalar)), .string), std.meta.activeTag(seq_{val_idx}.list.items[{arr_idx}].data.scalar));
try testing.expectEqualStrings("{escaped_val}", seq_{val_idx}.list.items[{arr_idx}].data.scalar.string.slice({input_var}));''')
elif value.startswith('integer '):
int_val = value[8:]
test_code.append(f'''
// Check array element [{val_idx}][{arr_idx}]
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(seq_{val_idx}.list.items[{arr_idx}].data)), .scalar), std.meta.activeTag(seq_{val_idx}.list.items[{arr_idx}].data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(seq_{val_idx}.list.items[{arr_idx}].data.scalar)), .number), std.meta.activeTag(seq_{val_idx}.list.items[{arr_idx}].data.scalar));
try testing.expectEqual(@as(f64, {int_val}), seq_{val_idx}.list.items[{arr_idx}].data.scalar.number);''')
elif '.values[' in path and not '.values[0].keys' in path and not '.values[0].values' in path:
# Simple value assertion (not nested)
match = re.match(r'doc0\.values\[(\d+)\]$', path)
if match:
val_idx = match.group(1)
if value.startswith('string "'):
string_val = value[8:-1]
escaped_val = escape_zig_string(string_val)
test_code.append(f'''
// Check value at index {val_idx}
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(map.values.items[{val_idx}].data)), .scalar), std.meta.activeTag(map.values.items[{val_idx}].data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(map.values.items[{val_idx}].data.scalar)), .string), std.meta.activeTag(map.values.items[{val_idx}].data.scalar));
try testing.expectEqualStrings("{escaped_val}", map.values.items[{val_idx}].data.scalar.string.slice({input_var}));''')
elif value.startswith('array['):
# Array value - just check structure
if val_idx not in seen_nested_maps:
seen_nested_maps.add(val_idx)
count = re.search(r'\[(\d+)\]', value).group(1)
test_code.append(f'''
// Check sequence at value index {val_idx}
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(map.values.items[{val_idx}].data)), .sequence), std.meta.activeTag(map.values.items[{val_idx}].data));
const seq_{val_idx} = map.values.items[{val_idx}].data.sequence;
try testing.expectEqual(@as(usize, {count}), seq_{val_idx}.list.items.len);''')
elif value.startswith('hash['):
# Nested hash - just check structure
if val_idx not in seen_nested_maps:
seen_nested_maps.add(val_idx)
count = re.search(r'\[(\d+)\]', value).group(1)
test_code.append(f'''
// Check nested mapping at index {val_idx}
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(map.values.items[{val_idx}].data)), .mapping), std.meta.activeTag(map.values.items[{val_idx}].data));
const nested_map_{val_idx} = map.values.items[{val_idx}].data.mapping;
try testing.expectEqual(@as(usize, {count}), nested_map_{val_idx}.keys.items.len);''')
elif path.startswith('doc0['):
# Array element
match = re.match(r'doc0\[(\d+)\]', path)
if match:
idx = match.group(1)
if value.startswith('string "'):
string_val = value[8:-1]
escaped_val = escape_zig_string(string_val)
test_code.append(f'''
// Check array item at index {idx}
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(seq.list.items[{idx}].data)), .scalar), std.meta.activeTag(seq.list.items[{idx}].data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(seq.list.items[{idx}].data.scalar)), .string), std.meta.activeTag(seq.list.items[{idx}].data.scalar));
try testing.expectEqualStrings("{escaped_val}", seq.list.items[{idx}].data.scalar.string.slice({input_var}));''')
elif value.startswith('integer '):
int_val = value[8:]
test_code.append(f'''
// Check array item at index {idx}
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(seq.list.items[{idx}].data)), .scalar), std.meta.activeTag(seq.list.items[{idx}].data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(seq.list.items[{idx}].data.scalar)), .number), std.meta.activeTag(seq.list.items[{idx}].data.scalar));
try testing.expectEqual(@as(f64, {int_val}), seq.list.items[{idx}].data.scalar.number);''')
return '\n'.join(test_code)
def generate_test(yaml_file, test_name):
"""Generate a single test case with value assertions."""
# Read content
with open(yaml_file, 'r') as f:
content = f.read()
# Handle empty stream as a special case
if not content.strip():
# Empty stream creates one empty document
return f'''
test "{test_name}" {{
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const allocator = arena.allocator();
const input = "";
const parse_result = yaml.parse(.utf8, allocator, input);
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(parse_result)), .result), std.meta.activeTag(parse_result));
const res = parse_result.result;
// Empty stream creates one document with null root
try testing.expectEqual(@as(usize, 1), res.stream.docs.items.len);
const doc = res.stream.docs.items[0];
// The root should be a null scalar
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data)), .scalar), std.meta.activeTag(doc.root.data));
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(doc.root.data.scalar)), .null), std.meta.activeTag(doc.root.data.scalar));
}}
'''
# Get assertions from enhanced rust parser
rust_output = run_yaml_rust2_enhanced(yaml_file)
assertions = parse_assertions(rust_output) if rust_output else None
# Format the YAML content for Zig
formatted_content = format_zig_string(content)
# Generate test
test = f'''
test "{test_name}" {{
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const allocator = arena.allocator();
const input = {formatted_content};
const parse_result = yaml.parse(.utf8, allocator, input);
try testing.expectEqual(@as(std.meta.Tag(@TypeOf(parse_result)), .result), std.meta.activeTag(parse_result));
const res = parse_result.result;
try testing.expect(res.stream.docs.items.len > 0);
const doc = res.stream.docs.items[0];
'''
# Add value assertions
if assertions:
test_code = generate_zig_test_code(assertions, 'input')
test += test_code
else:
test += '''
// Check structure (no detailed assertions available)
_ = doc.root;'''
test += '''
}
'''
return test
def main():
# Get all YAML files
yaml_files = sorted(glob.glob('/Users/dylan/clones/libfyaml/test/emitter-examples/*.yaml'))
print(f"Found {len(yaml_files)} YAML test files")
print("Generating tests with value assertions...")
# Start the output file
output = '''const std = @import("std");
const testing = std.testing;
const yaml = @import("yaml.zig");
// Tests generated from libfyaml emitter-examples with value assertions
// This file contains all tests from libfyaml test suite with actual value checks
'''
successful = 0
failed = 0
for i, yaml_file in enumerate(yaml_files):
base_name = os.path.basename(yaml_file).replace('.yaml', '').replace('.', '_').replace('-', '_')
test_name = f"emitter_{base_name}"
print(f" Processing {i+1}/{len(yaml_files)}: {os.path.basename(yaml_file)}")
try:
test_case = generate_test(yaml_file, test_name)
if test_case:
output += test_case
successful += 1
else:
print(f" Skipped (empty or invalid)")
failed += 1
except Exception as e:
print(f" Error: {e}")
failed += 1
# Write the output file
filename = '/Users/dylan/yamlz-3/src/emitter_value_tests.zig'
with open(filename, 'w') as f:
f.write(output)
print(f"\nGenerated {filename}")
print(f" Successful: {successful} tests")
print(f" Failed/Skipped: {failed} tests")
print(f" Total: {len(yaml_files)} files processed")
if __name__ == '__main__':
main()