@@ -8,6 +8,10 @@ struct GCNStats
88 vgpr_spill_count :: Int
99 uses_dyn_stack :: Bool
1010
11+ # Memory stats
12+ code_size :: Int
13+ LDS_size :: Int # Static allocations of shared memory only
14+
1115 # Arguments
1216 arguments_count :: Int
1317 arguments_size :: Int # in bytes
@@ -51,8 +55,13 @@ function extract_stats(::Val{:gcn}, gcn_source, stats_opts)
5155
5256 code = extract_gcn_code_stats (gcn_source; arch, arch_version)
5357
58+ csdata = extract_gcn_csdata (gcn_source)
59+ code_bytes = get (csdata, " codeLenInByte" , 0 )
60+ LDS_bytes = get (csdata, " LDSByteSize" , 0 )
61+
5462 return GCNStats (
5563 sgpr_count, vgpr_count, agpr_count, sgpr_spills, vgpr_spills, uses_dyn_stack,
64+ code_bytes, LDS_bytes,
5665 arguments_count, arguments_size,
5766 target_triple, processor,
5867 string (arch), arch_version, wavefront_size,
@@ -62,14 +71,6 @@ function extract_stats(::Val{:gcn}, gcn_source, stats_opts)
6271end
6372
6473
65- parse_match_group (:: Nothing , group, default) = default
66- parse_match_group (m:: RegexMatch , group, default:: String ) = @something m[group] default
67- function parse_match_group (m:: RegexMatch , group, default:: T ) where {T}
68- isnothing (m[group]) && return default
69- return @something tryparse (T, m[group]) default
70- end
71-
72-
7374function parse_amdgpu_target (target_triple)
7475 # Docs: https://llvm.org/docs/AMDGPUUsage.html#target-triples
7576 # It is made of a "<target triple>-<processor>", with 4 '-' in total.
@@ -191,6 +192,37 @@ function extract_gcn_code_stats(gcn_source; arch=:unknown, arch_version=v"0")
191192end
192193
193194
195+ function extract_gcn_csdata (gcn_source)
196+ # Between the GCN kernel source and the YAML metadata, there is a "AMDGPU.csdata" section.
197+ # I cannot find where it is documented (if it is), therefore it shouldn't be relied on too much.
198+ # It likely contains important information for the GPU driver, therefore it may be reliable, but
199+ # it may also depend on the target.
200+ # Some of the information is also found in the YAML metadata, which should be preferred over
201+ # this one, as its format and content does not depend on the target.
202+ csdata_start = findfirst (r" \. section\s +\. AMDGPU\. csdata" , gcn_source)
203+ isnothing (csdata_start) && return Dict {String, Int} ()
204+
205+ csdata_end = findnext (" .text" , gcn_source, last (csdata_start))
206+ isnothing (csdata_end) && return Dict {String, Int} ()
207+
208+ csdata_section = @view gcn_source[last (csdata_end): first (csdata_end)]
209+
210+ # Captures the key and value (decimal) of all entries within the csdata section.
211+ # Examples:
212+ # "; NumSgprs: 8" => key="NumSgprs", value="8"
213+ # "; COMPUTE_PGM_RSRC2:SCRATCH_EN: 0" => key="COMPUTE_PGM_RSRC2:SCRATCH_EN", value="0"
214+ csdata_regex = r" ^;\s (?<key>[\w :]+)\s ?[=:]\s (?<value>\d +)\b " m
215+
216+ csdata = Dict {String, Int} ()
217+ for m in eachmatch (csdata_regex, csdata_section)
218+ value = @something tryparse (Int, m[:value ]) 0
219+ csdata[m[:key ]] = value
220+ end
221+
222+ return csdata
223+ end
224+
225+
194226function extract_gcn_yaml_metadata (gcn_source)
195227 # As per the LLVM docs, this section is in YAML.
196228 # https://llvm.org/docs/AMDGPUUsage.html#id112
@@ -231,6 +263,10 @@ function Base.show(io::IO, stats::GCNStats)
231263 println (io, " - Accum. registers (AGPR) " , stats. accu_registers)
232264 println (io, " - Uses dynamic stack " , stats. uses_dyn_stack)
233265 println (io)
266+ println (io, " Kernel memory stats:" )
267+ println (io, " - Instructions " , Base. format_bytes (stats. code_size))
268+ println (io, " - Shared mem. (LDS) " , Base. format_bytes (stats. LDS_size), " (static allocations)" )
269+ println (io)
234270 println (io, " Kernel target:" )
235271 println (io, " - Target " , stats. target_triple)
236272 println (io, " - ISA " , stats. ISA)
0 commit comments