Skip to content

Commit d828de8

Browse files
committed
[GCN] Extract stats from the 'AMDGPU.csdata' section. More tests.
1 parent 5272321 commit d828de8

2 files changed

Lines changed: 74 additions & 10 deletions

File tree

src/stats/gcn.jl

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ struct GCNStats
88
vgpr_spill_count :: Int
99
uses_dyn_stack :: Bool
1010

11+
# Memory stats
12+
code_size :: Int
13+
LDS_size :: Int # Static allocations of shared memory only
14+
1115
# Arguments
1216
arguments_count :: Int
1317
arguments_size :: Int # in bytes
@@ -51,8 +55,13 @@ function extract_stats(::Val{:gcn}, gcn_source, stats_opts)
5155

5256
code = extract_gcn_code_stats(gcn_source; arch, arch_version)
5357

58+
csdata = extract_gcn_csdata(gcn_source)
59+
code_bytes = get(csdata, "codeLenInByte", 0)
60+
LDS_bytes = get(csdata, "LDSByteSize", 0)
61+
5462
return GCNStats(
5563
sgpr_count, vgpr_count, agpr_count, sgpr_spills, vgpr_spills, uses_dyn_stack,
64+
code_bytes, LDS_bytes,
5665
arguments_count, arguments_size,
5766
target_triple, processor,
5867
string(arch), arch_version, wavefront_size,
@@ -62,14 +71,6 @@ function extract_stats(::Val{:gcn}, gcn_source, stats_opts)
6271
end
6372

6473

65-
parse_match_group(::Nothing, group, default) = default
66-
parse_match_group(m::RegexMatch, group, default::String) = @something m[group] default
67-
function parse_match_group(m::RegexMatch, group, default::T) where {T}
68-
isnothing(m[group]) && return default
69-
return @something tryparse(T, m[group]) default
70-
end
71-
72-
7374
function parse_amdgpu_target(target_triple)
7475
# Docs: https://llvm.org/docs/AMDGPUUsage.html#target-triples
7576
# It is made of a "<target triple>-<processor>", with 4 '-' in total.
@@ -191,6 +192,37 @@ function extract_gcn_code_stats(gcn_source; arch=:unknown, arch_version=v"0")
191192
end
192193

193194

195+
function extract_gcn_csdata(gcn_source)
196+
# Between the GCN kernel source and the YAML metadata, there is a "AMDGPU.csdata" section.
197+
# I cannot find where it is documented (if it is), therefore it shouldn't be relied on too much.
198+
# It likely contains important information for the GPU driver, therefore it may be reliable, but
199+
# it may also depend on the target.
200+
# Some of the information is also found in the YAML metadata, which should be preferred over
201+
# this one, as its format and content does not depend on the target.
202+
csdata_start = findfirst(r"\.section\s+\.AMDGPU\.csdata", gcn_source)
203+
isnothing(csdata_start) && return Dict{String, Int}()
204+
205+
csdata_end = findnext(".text", gcn_source, last(csdata_start))
206+
isnothing(csdata_end) && return Dict{String, Int}()
207+
208+
csdata_section = @view gcn_source[last(csdata_end):first(csdata_end)]
209+
210+
# Captures the key and value (decimal) of all entries within the csdata section.
211+
# Examples:
212+
# "; NumSgprs: 8" => key="NumSgprs", value="8"
213+
# "; COMPUTE_PGM_RSRC2:SCRATCH_EN: 0" => key="COMPUTE_PGM_RSRC2:SCRATCH_EN", value="0"
214+
csdata_regex = r"^;\s(?<key>[\w:]+)\s?[=:]\s(?<value>\d+)\b"m
215+
216+
csdata = Dict{String, Int}()
217+
for m in eachmatch(csdata_regex, csdata_section)
218+
value = @something tryparse(Int, m[:value]) 0
219+
csdata[m[:key]] = value
220+
end
221+
222+
return csdata
223+
end
224+
225+
194226
function extract_gcn_yaml_metadata(gcn_source)
195227
# As per the LLVM docs, this section is in YAML.
196228
# https://llvm.org/docs/AMDGPUUsage.html#id112
@@ -231,6 +263,10 @@ function Base.show(io::IO, stats::GCNStats)
231263
println(io, " - Accum. registers (AGPR) ", stats.accu_registers)
232264
println(io, " - Uses dynamic stack ", stats.uses_dyn_stack)
233265
println(io)
266+
println(io, "Kernel memory stats:")
267+
println(io, " - Instructions ", Base.format_bytes(stats.code_size))
268+
println(io, " - Shared mem. (LDS) ", Base.format_bytes(stats.LDS_size), " (static allocations)")
269+
println(io)
234270
println(io, "Kernel target:")
235271
println(io, " - Target ", stats.target_triple)
236272
println(io, " - ISA ", stats.ISA)

test/stats.jl

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ end
6262
@test sass_stats.warp_sync == 0
6363
@test sass_stats.calls == 12
6464

65-
disp_sass_stats = display_str(sass_stats)
66-
6765
cleaned_sass = CDC.cleanup_code(Val(:sass), sass_sample)
6866
cleaned_sass_stats = CDS.extract_stats(Val(:sass), cleaned_sass)
6967
@test sass_stats == cleaned_sass_stats
@@ -102,6 +100,9 @@ end
102100
@test gcn_stats.arguments_count == 12
103101
@test gcn_stats.arguments_size == 304
104102

103+
@test gcn_stats.code_size == 325 * 8 # each instruction is 8 bytes long
104+
@test gcn_stats.LDS_size == 0
105+
105106
@test gcn_stats.target_triple == "amdgcn-amd-amdhsa"
106107
@test gcn_stats.ISA == "gfx90a"
107108
@test gcn_stats.architecture == "CDNA"
@@ -122,6 +123,33 @@ end
122123
@test cleaned_gcn_stats.scalar_registers == 0
123124
@test cleaned_gcn_stats.inst_count == gcn_stats.inst_count
124125
@test gcn_stats != cleaned_gcn_stats
126+
127+
disp_gcn_stats = display_str(gcn_stats)
128+
@test occursin("CDNA 2", disp_gcn_stats)
129+
end
130+
131+
@testset "Architecture" begin
132+
@test CDS.amdgpu_processor_to_architecture("unknown") == (:unknown, v"0")
133+
@test CDS.amdgpu_processor_to_architecture("gfx0") == (:unknown, v"0")
134+
135+
@test CDS.amdgpu_processor_to_architecture("gfx600") == (:GCN, v"1")
136+
@test CDS.amdgpu_processor_to_architecture("gfx700") == (:GCN, v"2")
137+
@test CDS.amdgpu_processor_to_architecture("gfx800") == (:GCN, v"3")
138+
@test CDS.amdgpu_processor_to_architecture("gfx900") == (:GCN, v"5")
139+
140+
@test CDS.amdgpu_processor_to_architecture("gfx908") == (:CDNA, v"1")
141+
@test CDS.amdgpu_processor_to_architecture("gfx90a") == (:CDNA, v"2")
142+
@test CDS.amdgpu_processor_to_architecture("gfx942") == (:CDNA, v"3")
143+
@test CDS.amdgpu_processor_to_architecture("gfx950") == (:CDNA, v"3")
144+
145+
@test CDS.amdgpu_processor_to_architecture("gfx1010") == (:RDNA, v"1")
146+
@test CDS.amdgpu_processor_to_architecture("gfx1100") == (:RDNA, v"3")
147+
@test CDS.amdgpu_processor_to_architecture("gfx1200") == (:RDNA, v"4")
148+
149+
@test CDS.amdgpu_processor_to_architecture("gfx9-generic") == (:GCN, v"5")
150+
@test CDS.amdgpu_processor_to_architecture("gfx10-3-generic") == (:RDNA, v"3")
151+
@test CDS.amdgpu_processor_to_architecture("gfx11-generic") == (:RDNA, v"3")
152+
@test CDS.amdgpu_processor_to_architecture("gfx12-generic") == (:RDNA, v"4")
125153
end
126154
end
127155

0 commit comments

Comments
 (0)