Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/compiler/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ function emit_tile!(cache::CacheView, mi::Core.MethodInstance,

# Compute bytecode via driver
sci, rettype, kernel_meta = ir_result
key = cache.owner::TileCacheKey
key = tile_cache_key(cache.owner)
opts = CGOpts((sm_arch=unpack_version(key.sm_arch),
opt_level=unpack_hint(key.opt_level),
num_ctas=unpack_hint(key.num_ctas),
Expand Down
19 changes: 16 additions & 3 deletions src/compiler/interpreter.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# Integration with Julia's abstract interpreter

using Base.ScopedValues: ScopedValue

Base.Experimental.@MethodTable cuTileMethodTable

# When assigned, every `cuTileInterpreter(cache)` constructed within
# the dynamic scope reuses this inference cache instead of allocating
# a fresh one. Lets batched inference passes (autotuning over many
# const-seeded variants of the same kernel) share work; without it,
# kernels that hit slow inference paths (e.g. `ct.load(..., order=...)`)
# pay the cost on every config.
const _SCOPED_INF_CACHE = ScopedValue{Any}()

function get_method_table_view(world::UInt)
CC.CachedMethodTable(CC.OverlayMethodTable(world, cuTileMethodTable))
end
Expand All @@ -21,10 +30,14 @@ end

function cuTileInterpreter(cache::CacheView; always_inline::Bool=true)
method_table = get_method_table_view(cache.world)
@static if isdefined(CC, :InferenceCache)
inf_cache = CC.InferenceCache()
inf_cache = if isassigned(_SCOPED_INF_CACHE)
_SCOPED_INF_CACHE[]
else
inf_cache = Vector{CC.InferenceResult}()
@static if isdefined(CC, :InferenceCache)
CC.InferenceCache()
else
Vector{CC.InferenceResult}()
end
end
inf_params = CC.InferenceParams()
opt_params = if always_inline
Expand Down
22 changes: 21 additions & 1 deletion src/cuTile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,27 @@ include("cache.jl")
include("launch.jl")

public launch, TileBackend, DefaultBackend, Tiled, ByTarget,
@compiler_options, @fpmode, @.,
@compiler_options, @fpmode, @., @cutile,
bytecode_version

"""
@cutile [kwargs...] kernel(args...)

Shorthand for `CUDACore.@cuda backend=cuTile [kwargs...] kernel(args...)`.

Works from any module — does not require the caller to `using CUDACore`,
since the macro expands to a fully-qualified reference to the actual
`CUDACore` module object.

```julia
@cutile blocks=N kernel(a, b, c)
@cutile blocks=N occupancy=4 kernel(a, b, c)
```
"""
macro cutile(args...)
esc(:($CUDACore.@cuda backend=$cuTile $(args...)))
end

# World age captured at __init__ time. The compilation pipeline
# (typeinf!, codegen, bytecode emission) is invoked in this world via
# `invoke_frozen` so that precompiled native code stays usable even after
Expand Down Expand Up @@ -123,4 +141,6 @@ end

include("precompile.jl")

include("experimental/Experimental.jl")

end # module cuTile
27 changes: 27 additions & 0 deletions src/experimental/Experimental.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
module Experimental

using ..cuTile
using ..cuTile: cuTileconvert, default_sm_arch, temporary_cufunction,
_SCOPED_INF_CACHE

using CUDACore: CUDACore

using Base.ScopedValues: with
import Core.Compiler as CC
using Random

# Builds a fresh inference cache compatible with the running Julia version.
# Used to wrap an autotune pass in `with(_SCOPED_INF_CACHE => ...)` so all the
# per-config const-seeded inference calls share results instead of paying
# the slow paths (e.g. `ct.load(..., order=...)`) once per config.
@inline _fresh_inf_cache() = @static if isdefined(CC, :InferenceCache)
CC.InferenceCache()
else
Vector{CC.InferenceResult}()
end

include("search_space.jl")
include("autotune.jl")
include("autotune_macro.jl")

end
Loading