Inference regression with external interpreters + recursive functions #52938

maleadt · 2024-01-17T09:12:02Z

Opening an issue to track an 1.11 regression seen in the GPU stack on 1.11 (JuliaGPU/CUDA.jl#2240, JuliaGPU/GPUCompiler.jl#506), involving recursive functions. These functions are common, e.g. typejoin is recursive, breaking certain kwarg calls:

; ││┌ @ iterators.jl:279 within `pairs`
; │││┌ @ essentials.jl:388 within `Pairs`
; ││││┌ @ namedtuple.jl:236 within `eltype`
; │││││┌ @ namedtuple.jl:238 within `nteltype`
; ││││││┌ @ tuple.jl:203 within `eltype`
; │││││││┌ @ tuple.jl:223 within `_compute_eltype`
; ││││││││┌ @ promotion.jl:174 within `promote_typejoin`
           %8 = load {}*, {}** bitcast (i8* getelementptr (i8, i8* @jl_small_typeof, i64 256) to {}**), align 8
           %gc_slot_addr_1 = call {}** @julia.get_gc_frame_slot({}** nonnull %gcframe, i32 1)
           store {}* %8, {}** %gc_slot_addr_1, align 8
           %9 = call fastcc nonnull {}* @julia_typejoin_54511({}* readonly %8, {}* readonly inttoptr (i64 130372983896064 to {}*))
; │││││││││ @ promotion.jl:175 within `promote_typejoin`
           %10 = load {}*, {}** bitcast (i8* getelementptr (i8, i8* @jl_small_typeof, i64 64) to {}**), align 8
           %gc_slot_addr_2 = call {}** @julia.get_gc_frame_slot({}** nonnull %gcframe, i32 2)
           store {}* %10, {}** %gc_slot_addr_2, align 8
           %gc_slot_addr_0 = call {}** @julia.get_gc_frame_slot({}** nonnull %gcframe, i32 0)
           store {}* %9, {}** %gc_slot_addr_0, align 8
           store {}* %10, {}** %jlcallframe35.sub, align 8
           %11 = getelementptr inbounds [4 x {}*], [4 x {}*]* %jlcallframe35, i64 0, i64 1
           store {}* %8, {}** %11, align 8
           %12 = getelementptr inbounds [4 x {}*], [4 x {}*]* %jlcallframe35, i64 0, i64 2
           store {}* inttoptr (i64 130372983896064 to {}*), {}** %12, align 8
           %13 = getelementptr inbounds [4 x {}*], [4 x {}*]* %jlcallframe35, i64 0, i64 3
           store {}* %9, {}** %13, align 8
           %14 = call nonnull {}* @jl_f_apply_type({}* null, {}** nonnull %jlcallframe35.sub, i32 4)

MWE without the GPU stack:

const CC = Core.Compiler
using Core: MethodInstance, CodeInstance, CodeInfo, MethodTable


## code instance cache

struct CodeCache
    dict::IdDict{MethodInstance,Vector{CodeInstance}}

    CodeCache() = new(IdDict{MethodInstance,Vector{CodeInstance}}())
end

function CC.setindex!(cache::CodeCache, ci::CodeInstance, mi::MethodInstance)
    cis = get!(cache.dict, mi, CodeInstance[])
    push!(cis, ci)
end


## world view of the cache

function CC.haskey(wvc::CC.WorldView{CodeCache}, mi::MethodInstance)
    CC.get(wvc, mi, nothing) !== nothing
end

function CC.get(wvc::CC.WorldView{CodeCache}, mi::MethodInstance, default)
    # check the cache
    for ci in get!(wvc.cache.dict, mi, CodeInstance[])
        if ci.min_world <= wvc.worlds.min_world && wvc.worlds.max_world <= ci.max_world
            # TODO: if (code && (code == jl_nothing || jl_ir_flag_inferred((jl_array_t*)code)))
            src = if ci.inferred isa Vector{UInt8}
                ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any),
                       mi.def, C_NULL, ci.inferred)
            else
                ci.inferred
            end
            return ci
        end
    end

    return default
end

function CC.getindex(wvc::CC.WorldView{CodeCache}, mi::MethodInstance)
    r = CC.get(wvc, mi, nothing)
    r === nothing && throw(KeyError(mi))
    return r::CodeInstance
end

function CC.setindex!(wvc::CC.WorldView{CodeCache}, ci::CodeInstance, mi::MethodInstance)
    src = if ci.inferred isa Vector{UInt8}
        ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any),
              mi.def, C_NULL, ci.inferred)
    else
        ci.inferred
    end
    CC.setindex!(wvc.cache, ci, mi)
end


## interpreter

if isdefined(CC, :CachedMethodTable)
    const ExternalMethodTableView = CC.CachedMethodTable{CC.OverlayMethodTable}
    get_method_table_view(world::UInt, mt::MethodTable) =
        CC.CachedMethodTable(CC.OverlayMethodTable(world, mt))
else
    const ExternalMethodTableView = CC.OverlayMethodTable
    get_method_table_view(world::UInt, mt::MethodTable) = CC.OverlayMethodTable(world, mt)
end

struct ExternalInterpreter <: CC.AbstractInterpreter
    world::UInt
    method_table::ExternalMethodTableView

    code_cache
    inf_cache::Vector{CC.InferenceResult}
end

function ExternalInterpreter(world::UInt=Base.get_world_counter(); method_table, code_cache)
    @assert world <= Base.get_world_counter()
    method_table = get_method_table_view(world, method_table)
    inf_cache = Vector{CC.InferenceResult}()

    return ExternalInterpreter(world, method_table, code_cache, inf_cache)
end

CC.InferenceParams(interp::ExternalInterpreter) = CC.InferenceParams()
CC.OptimizationParams(interp::ExternalInterpreter) = CC.OptimizationParams()
CC.get_world_counter(interp::ExternalInterpreter) = interp.world
CC.get_inference_cache(interp::ExternalInterpreter) = interp.inf_cache
CC.code_cache(interp::ExternalInterpreter) = CC.WorldView(interp.code_cache, interp.world)

# No need to do any locking since we're not putting our results into the runtime cache
CC.lock_mi_inference(interp::ExternalInterpreter, mi::MethodInstance) = nothing
CC.unlock_mi_inference(interp::ExternalInterpreter, mi::MethodInstance) = nothing

function CC.add_remark!(interp::ExternalInterpreter, sv::CC.InferenceState, msg)
    @debug "Inference remark during External compilation of $(sv.linfo): $msg"
end

CC.may_optimize(interp::ExternalInterpreter) = true
CC.may_compress(interp::ExternalInterpreter) = true
CC.may_discard_trees(interp::ExternalInterpreter) = true
CC.verbose_stmt_info(interp::ExternalInterpreter) = false
CC.method_table(interp::ExternalInterpreter) = interp.method_table


# main

Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE)

inner(f, types::Type, args...; kwargs...) = nothing
outer(f) = @inline inner(f, Tuple{}; foo=Ref(42), bar=1)

function main()
    println("Native:")
    display(Base.code_ircode(outer, Tuple{Nothing}))

    println()

    println("External:")
    interp = ExternalInterpreter(; method_table=GLOBAL_METHOD_TABLE, code_cache=CodeCache())
    display(Base.code_ircode(outer, Tuple{Nothing}; interp))

    return
end

isinteractive() || main()

Native:
1-element Vector{Any}:
115 1 ─     return nothing                                                  │
     => Nothing

External:
1-element Vector{Any}:
115 1 ─ %1 = invoke Base.typejoin(Int64::Any, Base.RefValue{Int64}::Any)::Any
    │        Core.apply_type(Base.Union, Int64, Base.RefValue{Int64}, %1)::Type
    └──      return nothing                                  │
     => Nothing

Bisected to #51092; cc @vtjnash. There's a solution proposed by @aviatesk in #51092 (comment).

The text was updated successfully, but these errors were encountered:

maleadt · 2024-01-20T12:21:07Z

I guess JET suffers from this as well; I came across the following (presumably wrong) report_opt note that seems very familiar to the dynamic dispatch reported here:

┌ display_error(io::IOContext{IOBuffer}, stack::Base.ExceptionStack) @ Base ./client.jl:110
│┌ kwcall(::NamedTuple{(:bold, :color), <:Tuple{Bool, Union{Int64, Symbol}}}, ::typeof(printstyled), io::IOContext{IOBuffer}, msg::String) @ Base ./util.jl:141
││┌ pairs(nt::NamedTuple) @ Base.Iterators ./iterators.jl:279
│││┌ (Base.Pairs{Symbol})(data::NamedTuple, itr::Tuple{Vararg{Symbol}}) @ Base ./essentials.jl:343
││││┌ eltype(::Type{A} where A<:NamedTuple) @ Base ./namedtuple.jl:237
│││││┌ nteltype(::Type{NamedTuple{names, T}} where names) where T<:Tuple @ Base ./namedtuple.jl:239
││││││┌ eltype(t::Type{<:Tuple{Vararg{E}}}) where E @ Base ./tuple.jl:208
│││││││┌ _compute_eltype(t::Type{<:Tuple{Vararg{E}}} where E) @ Base ./tuple.jl:231
││││││││┌ afoldl(op::Base.var"#54#55", a::Any, bs::Vararg{Any}) @ Base ./operators.jl:544
│││││││││┌ (::Base.var"#54#55")(a::Any, b::Any) @ Base ./tuple.jl:235
││││││││││┌ promote_typejoin(a::Any, b::Any) @ Base ./promotion.jl:172
│││││││││││┌ typejoin(a::Any, b::Any) @ Base ./promotion.jl:127
││││││││││││ runtime dispatch detected: Base.UnionAll(%403::Any, %405::Any)::Any
│││││││││││└────────────────────
││││││││┌ afoldl(op::Base.var"#54#55", a::Any, bs::Vararg{Any}) @ Base ./operators.jl:545
│││││││││┌ (::Base.var"#54#55")(a::Type, b::Any) @ Base ./tuple.jl:235
││││││││││┌ promote_typejoin(a::Type, b::Any) @ Base ./promotion.jl:172
│││││││││││┌ typejoin(a::Type, b::Any) @ Base ./promotion.jl:127
││││││││││││ runtime dispatch detected: Base.UnionAll(%398::Any, %400::Any)::Any
│││││││││││└────────────────────

maleadt · 2024-04-05T10:33:33Z

@aviatesk Any update on this? It blocks CUDA.jl supporting Julia 1.11.

The `:terminates` effect bit must be conservatively tainted unless recursion cycle has been fully resolved. As for other effects, there's no need to taint them at this moment because they will be tainted as we try to resolve the cycle. - fixes #52938 - xref #51092

maleadt added kind:regression Regression in behavior compared to a previous version domain:gpu Affects running Julia on a GPU labels Jan 17, 2024

maleadt added this to the 1.11 milestone Jan 17, 2024

This was referenced Jan 17, 2024

Optimizer regression on 1.11 JuliaGPU/GPUCompiler.jl#506

Closed

Fixes for nightly JuliaGPU/CUDA.jl#2240

Merged

aviatesk self-assigned this Jan 17, 2024

maleadt mentioned this issue Jan 17, 2024

Support for Julia 1.11 JuliaGPU/CUDA.jl#2241

Open

aviatesk linked a pull request May 1, 2024 that will close this issue

inference: fix too conservative effects for recursive cycles #54323

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Inference regression with external interpreters + recursive functions #52938

Inference regression with external interpreters + recursive functions #52938

maleadt commented Jan 17, 2024

maleadt commented Jan 20, 2024

maleadt commented Apr 5, 2024

Inference regression with external interpreters + recursive functions #52938

Inference regression with external interpreters + recursive functions #52938

Comments

maleadt commented Jan 17, 2024

maleadt commented Jan 20, 2024

maleadt commented Apr 5, 2024