Skip to content

Commit cb35df0

Browse files
authored
Add WasmFileKeyGenerator for split Wasm symbol files (#5853)
Adds support for indexing **WebAssembly modules** on symbol servers by introducing a `WasmFileKeyGenerator` that parses Wasm binaries and extracts the build_id custom section. This enables uploading and retrieving both halves of split-debug Wasm files. (It follows the same convention as ELF (elf-buildid vs elf-buildid-sym): - Stripped Wasm modules => indexed with wasm-buildid prefix - Symbol files (containing DWARF .debug_* sections) => indexed with wasm-buildid-sym prefix **Changes:** - New: `WasmFileKeyGenerator` class that validates the Wasm header, scans custom sections for build_id, and detects symbol files via .debug_* sections - Modified: `FileKeyGenerator` to include `WasmFileKeyGenerator` in the auto-detection chain - Tests: Added unit tests with synthetic Wasm binaries covering plain modules, symbol files, and files without build IDs
1 parent e106c19 commit cb35df0

8 files changed

Lines changed: 341 additions & 5 deletions

File tree

documentation/symbols/SSQP_Key_Conventions.md

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,26 @@ Example:
195195

196196
### WASM (WebAssembly) Modules
197197

198-
WebAssembly symbols, which can be used by browser developer tools to provide source-level debugging experiences, are based on the DWARF format. These are indexed by their DWARF Build ID
199-
(built with `-Wl,--build-id` arguments) and the name of the module being debugged via the
200-
[buildId property](https://chromedevtools.github.io/devtools-protocol/tot/Debugger/#event-scriptParsed), and the symbol file itself is suffixed with `.s` to disambiguate from the WASM file.
198+
WebAssembly symbols, which can be used by browser developer tools to provide source-level debugging experiences, are based on the DWARF format. These are indexed by the Build ID stored in the `build_id` custom section of the Wasm binary. The Build ID is a byte sequence typically produced by the linker (e.g., with `-Wl,--build-id` arguments). Both the module and its corresponding symbol file contain the same `build_id` section, allowing them to be matched.
199+
200+
The key uses the actual filename of the file being indexed. For split symbol files, toolchains such as Emscripten produce a separate file (e.g., `foo.debug.wasm`) that contains the DWARF debug sections (`.debug_info`, `.debug_line`, etc.) stripped from the original module.
201+
202+
The final key is formatted as follows:
203+
204+
`<file_name>/<build_id_byte_sequence>/<file_name>`
205+
206+
Example (module):
201207

202208
**File name:** `main.wasm`
203209

204-
**Build ID of file:** `e3b0c44298fc1c149afbf4c8996fb92427ae41e4`
210+
**Build ID bytes:** `0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4`
211+
212+
**Lookup key:** `main.wasm/e3b0c44298fc1c149afbf4c8996fb92427ae41e4/main.wasm`
213+
214+
Example (split symbol file):
215+
216+
**File name:** `main.debug.wasm`
217+
218+
**Build ID bytes:** `(same as module)`
205219

206-
**Lookup key:**: `main.wasm.s/e3b0c44298fc1c149afbf4c8996fb92427ae41e4/main.wasm.s`
220+
**Lookup key:** `main.debug.wasm/e3b0c44298fc1c149afbf4c8996fb92427ae41e4/main.debug.wasm`

src/Microsoft.SymbolStore/KeyGenerators/FileKeyGenerator.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ private IEnumerable<KeyGenerator> GetGenerators()
5858
yield return new PDBFileKeyGenerator(Tracer, _file);
5959
yield return new PortablePDBFileKeyGenerator(Tracer, _file);
6060
yield return new PerfMapFileKeyGenerator(Tracer, _file);
61+
yield return new WasmFileKeyGenerator(Tracer, _file);
6162
}
6263
}
6364
}
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Diagnostics;
7+
using System.IO;
8+
using System.Text;
9+
10+
namespace Microsoft.SymbolStore.KeyGenerators
11+
{
12+
public class WasmFileKeyGenerator : KeyGenerator
13+
{
14+
/// <summary>
15+
/// Wasm binary magic number: '\0asm'
16+
/// </summary>
17+
private static readonly byte[] s_wasmMagic = new byte[] { 0x00, 0x61, 0x73, 0x6D };
18+
19+
/// <summary>
20+
/// Wasm binary format version 1
21+
/// </summary>
22+
private static readonly byte[] s_wasmVersion = new byte[] { 0x01, 0x00, 0x00, 0x00 };
23+
24+
/// <summary>
25+
/// Custom section ID in Wasm binary format
26+
/// </summary>
27+
private const byte CustomSectionId = 0;
28+
29+
/// <summary>
30+
/// The name of the custom section containing the build ID
31+
/// </summary>
32+
private const string BuildIdSectionName = "build_id";
33+
34+
/// <summary>
35+
/// Maximum reasonable build ID length (256 bytes). Protects against
36+
/// malformed input causing large allocations.
37+
/// </summary>
38+
private const int MaxBuildIdLength = 256;
39+
40+
private readonly SymbolStoreFile _file;
41+
private byte[] _buildId;
42+
private bool _parsed;
43+
private bool _isValid;
44+
45+
public WasmFileKeyGenerator(ITracer tracer, SymbolStoreFile file)
46+
: base(tracer)
47+
{
48+
_file = file ?? throw new ArgumentNullException(nameof(file));
49+
}
50+
51+
public override bool IsValid()
52+
{
53+
return HasIndexableWasmBuildId();
54+
}
55+
56+
public bool HasIndexableWasmBuildId()
57+
{
58+
ParseWasmFile();
59+
return _isValid;
60+
}
61+
62+
public override IEnumerable<SymbolStoreKey> GetKeys(KeyTypeFlags flags)
63+
{
64+
if (IsValid())
65+
{
66+
if ((flags & KeyTypeFlags.IdentityKey) != 0)
67+
{
68+
yield return GetKey(_file.FileName, _buildId);
69+
}
70+
}
71+
}
72+
73+
/// <summary>
74+
/// Create a symbol store key for a Wasm file with a build ID.
75+
/// </summary>
76+
/// <param name="path">file name and path</param>
77+
/// <param name="buildId">build ID bytes from the build_id custom section</param>
78+
/// <returns>symbol store key</returns>
79+
public static SymbolStoreKey GetKey(string path, byte[] buildId)
80+
{
81+
Debug.Assert(path != null);
82+
Debug.Assert(buildId != null && buildId.Length > 0);
83+
string file = GetFileName(path).ToLowerInvariant();
84+
return BuildKey(path, prefix: null, buildId, file);
85+
}
86+
87+
/// <summary>
88+
/// Parses the Wasm file to validate the header and find the buildId custom section.
89+
/// </summary>
90+
private void ParseWasmFile()
91+
{
92+
if (_parsed)
93+
{
94+
return;
95+
}
96+
_parsed = true;
97+
_isValid = false;
98+
99+
Stream stream = _file.Stream;
100+
long prevPosition = stream.Position;
101+
try
102+
{
103+
stream.Position = 0;
104+
105+
// Validate magic number
106+
byte[] magic = new byte[4];
107+
if (stream.Read(magic, 0, 4) != 4)
108+
{
109+
return;
110+
}
111+
for (int i = 0; i < 4; i++)
112+
{
113+
if (magic[i] != s_wasmMagic[i])
114+
{
115+
return;
116+
}
117+
}
118+
119+
// Validate version
120+
byte[] version = new byte[4];
121+
if (stream.Read(version, 0, 4) != 4)
122+
{
123+
return;
124+
}
125+
for (int i = 0; i < 4; i++)
126+
{
127+
if (version[i] != s_wasmVersion[i])
128+
{
129+
return;
130+
}
131+
}
132+
133+
// Scan sections for the build_id custom section
134+
while (stream.Position < stream.Length)
135+
{
136+
int sectionId = stream.ReadByte();
137+
if (sectionId == -1)
138+
{
139+
break;
140+
}
141+
142+
uint sectionSize = ReadLEB128Unsigned(stream);
143+
long sectionEnd = stream.Position + sectionSize;
144+
145+
// Validate that the section doesn't extend beyond the stream
146+
if (sectionEnd > stream.Length)
147+
{
148+
break;
149+
}
150+
151+
if (sectionId == CustomSectionId)
152+
{
153+
string name = ReadWasmString(stream, sectionEnd);
154+
if (name == BuildIdSectionName)
155+
{
156+
// The remainder of the section payload is the build ID
157+
int buildIdLength = (int)(sectionEnd - stream.Position);
158+
if (buildIdLength > 0 && buildIdLength <= MaxBuildIdLength)
159+
{
160+
_buildId = new byte[buildIdLength];
161+
if (stream.Read(_buildId, 0, buildIdLength) == buildIdLength)
162+
{
163+
_isValid = true;
164+
return;
165+
}
166+
}
167+
}
168+
}
169+
170+
stream.Position = sectionEnd;
171+
}
172+
}
173+
catch (Exception ex) when (ex is IOException || ex is OverflowException || ex is ArgumentOutOfRangeException)
174+
{
175+
Tracer.Verbose("Error parsing Wasm file {0}: {1}", _file.FileName, ex.Message);
176+
}
177+
finally
178+
{
179+
stream.Position = prevPosition;
180+
}
181+
}
182+
183+
/// <summary>
184+
/// Reads an unsigned LEB128-encoded integer from the stream.
185+
/// </summary>
186+
private static uint ReadLEB128Unsigned(Stream stream)
187+
{
188+
uint result = 0;
189+
int shift = 0;
190+
191+
while (true)
192+
{
193+
int b = stream.ReadByte();
194+
if (b == -1)
195+
{
196+
throw new IOException("Unexpected end of stream reading LEB128 value.");
197+
}
198+
199+
result |= (uint)(b & 0x7F) << shift;
200+
if ((b & 0x80) == 0)
201+
{
202+
break;
203+
}
204+
205+
shift += 7;
206+
if (shift >= 35)
207+
{
208+
throw new OverflowException("LEB128 value too large for uint32.");
209+
}
210+
}
211+
212+
return result;
213+
}
214+
215+
/// <summary>
216+
/// Maximum section name length we'll read. Names longer than this are
217+
/// skipped since they cannot match the sections we're looking for.
218+
/// </summary>
219+
private const int MaxSectionNameLength = 64;
220+
221+
/// <summary>
222+
/// Reads a Wasm string (LEB128 length prefix followed by UTF-8 bytes).
223+
/// Returns null if the string is too long or extends past the section boundary.
224+
/// </summary>
225+
private static string ReadWasmString(Stream stream, long sectionEnd)
226+
{
227+
uint length = ReadLEB128Unsigned(stream);
228+
if (length == 0)
229+
{
230+
return string.Empty;
231+
}
232+
if (length > MaxSectionNameLength || stream.Position + length > sectionEnd)
233+
{
234+
return null;
235+
}
236+
237+
int stringLength = (int)length;
238+
byte[] bytes = new byte[stringLength];
239+
int bytesRead = stream.Read(bytes, 0, stringLength);
240+
if (bytesRead != stringLength)
241+
{
242+
return null;
243+
}
244+
245+
return Encoding.UTF8.GetString(bytes);
246+
}
247+
}
248+
}

src/tests/Microsoft.SymbolStore.UnitTests/KeyGeneratorTests.cs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ public void FileKeyGenerator()
3333
PEFileKeyGeneratorInternal(fileGenerator: true);
3434
PortablePDBFileKeyGeneratorInternal(fileGenerator: true);
3535
PerfMapFileKeyGeneratorInternal(fileGenerator: true);
36+
WasmFileKeyGeneratorInternal(fileGenerator: true);
3637
}
3738

3839

@@ -531,5 +532,77 @@ public void SourceFileKeyGenerator()
531532
Assert.True(clrKeys.Count() == 0);
532533
}
533534
}
535+
[Fact]
536+
public void WasmFileKeyGenerator()
537+
{
538+
WasmFileKeyGeneratorInternal(fileGenerator: false);
539+
}
540+
541+
private void WasmFileKeyGeneratorInternal(bool fileGenerator)
542+
{
543+
// Test 1: Plain Wasm module with build_id (not a symbol file)
544+
const string WasmModulePath = "TestBinaries/test_module.wasm";
545+
using (Stream stream = File.OpenRead(WasmModulePath))
546+
{
547+
var file = new SymbolStoreFile(stream, WasmModulePath);
548+
KeyGenerator generator = fileGenerator ? (KeyGenerator)new FileKeyGenerator(_tracer, file) : new WasmFileKeyGenerator(_tracer, file);
549+
550+
Assert.True(generator.IsValid());
551+
552+
IEnumerable<SymbolStoreKey> identityKey = generator.GetKeys(KeyTypeFlags.IdentityKey);
553+
Assert.True(identityKey.Count() == 1);
554+
Assert.True(identityKey.First().Index == "test_module.wasm/deadbeef0123456789abcdeffedcba98/test_module.wasm");
555+
556+
IEnumerable<SymbolStoreKey> symbolKey = generator.GetKeys(KeyTypeFlags.SymbolKey);
557+
Assert.True(!symbolKey.Any());
558+
559+
IEnumerable<SymbolStoreKey> clrKeys = generator.GetKeys(KeyTypeFlags.ClrKeys);
560+
Assert.True(!clrKeys.Any());
561+
}
562+
563+
// Test 2: Wasm symbol file with build_id and .debug_info section
564+
const string WasmSymbolPath = "TestBinaries/test_module_symbols.wasm";
565+
using (Stream stream = File.OpenRead(WasmSymbolPath))
566+
{
567+
var file = new SymbolStoreFile(stream, WasmSymbolPath);
568+
KeyGenerator generator = fileGenerator ? (KeyGenerator)new FileKeyGenerator(_tracer, file) : new WasmFileKeyGenerator(_tracer, file);
569+
570+
Assert.True(generator.IsValid());
571+
572+
IEnumerable<SymbolStoreKey> identityKey = generator.GetKeys(KeyTypeFlags.IdentityKey);
573+
Assert.True(identityKey.Count() == 1);
574+
Assert.True(identityKey.First().Index == "test_module_symbols.wasm/deadbeef0123456789abcdeffedcba98/test_module_symbols.wasm");
575+
576+
IEnumerable<SymbolStoreKey> symbolKey = generator.GetKeys(KeyTypeFlags.SymbolKey);
577+
Assert.True(!symbolKey.Any());
578+
}
579+
580+
// Test 3: Wasm file without build_id should be invalid
581+
const string WasmNoBuildIdPath = "TestBinaries/test_module_no_buildid.wasm";
582+
using (Stream stream = File.OpenRead(WasmNoBuildIdPath))
583+
{
584+
var file = new SymbolStoreFile(stream, WasmNoBuildIdPath);
585+
var generator = new WasmFileKeyGenerator(_tracer, file);
586+
587+
Assert.False(generator.IsValid());
588+
589+
IEnumerable<SymbolStoreKey> identityKey = generator.GetKeys(KeyTypeFlags.IdentityKey);
590+
Assert.True(!identityKey.Any());
591+
}
592+
593+
// Test 4: Wasm file with a custom section name longer than 64 chars before build_id
594+
const string WasmLongNamePath = "TestBinaries/test_module_long_section_name.wasm";
595+
using (Stream stream = File.OpenRead(WasmLongNamePath))
596+
{
597+
var file = new SymbolStoreFile(stream, WasmLongNamePath);
598+
var generator = new WasmFileKeyGenerator(_tracer, file);
599+
600+
Assert.True(generator.IsValid());
601+
602+
IEnumerable<SymbolStoreKey> identityKey = generator.GetKeys(KeyTypeFlags.IdentityKey);
603+
Assert.True(identityKey.Count() == 1);
604+
Assert.True(identityKey.First().Index == "test_module_long_section_name.wasm/deadbeef0123456789abcdeffedcba98/test_module_long_section_name.wasm");
605+
}
606+
}
534607
}
535608
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)