Files
julia/test/jit.jl
Sam Schweigel c24fc18dda Add function to emit multiple CodeInstances to the JIT atomically (remove jl_typeinf_lock) (#61255)
`jl_typeinf_lock` was introduced because it was easy to observe
performance regressions when running code that triggered type inference
on multiple threads. These changes prevent the unexpected invoke
trampolines and remove the type inference lock.

The typical situation is this: let f() and g() be functions, where f()
calls g(). Thread 1 triggers inference for f(), which also infers g().
Then, in `add_codeinst_to_jit!`, thread 1 adds the code for f(), which
becomes visible to other threads because the `invoke` field of the
CodeInstance is set to `jl_fptr_wait_for_compiled` [1]. Before thread 1
adds g() to the JIT, thread 2 comes along, sees the invoke field on f()
and attempts to invoke it. The JIT must then compile a tojlinvoke
trampoline to g(), because it does not yet have IR for it.

This PR renames `jl_add_codeinst_to_jit` to `jl_add_codeinsts_to_jit`
and makes it take a vector of CodeInstances and a vector of CodeInfos.
We then emit all of the CodeInstances to a single `jl_codegen_output_t`
and add it to the JIT with `JuliaOJIT::addOutput`. The JIT, while
holding `JuliaOJIT::LinkerMutex`, sets the `invoke` pointer for every
defined CodeInstance to `jl_fptr_wait_for_compiled`. If another thread
has compiled that CodeInstance in the meantime, we skip it. If another
thread observes the invoke pointer we have just set, it's okay because
it will block waiting to acquire `LinkerMutex` if it attempts to invoke
it.

This pull request also changes the condition in
`JuliaOJIT::linkCallTarget` to match `add_codeinst_to_jit!` to avoid a
few other unnecessary trampolines: namely, we know an equivalent
CodeInstance will have been emitted to the JIT only if the target
CodeInstance is not in the global cache (since that's what inference
checks).

[1] In practice you need another function, because of the order
`add_codeinst_to_jit!` collects invokes in, but it would complicate the
presentation.
2026-04-28 10:33:48 -04:00

71 lines
2.3 KiB
Julia

# This file is a part of Julia. License is MIT: https://julialang.org/license
using Core: CodeInstance, MethodInstance
using Test
struct TestOwner end
const owner = TestOwner()
function compile_no_deps(f, argtypes)
@nospecialize
mi = Base.method_instance(f, argtypes)
source, _ = only(code_typed(f, argtypes))
ci = CodeInstance(
mi, owner, source.rettype, #=exctype=#Any, #=inferred_const=#nothing,
#=inferred=#nothing, #=const_flags=#Int32(0), source.min_world,
#=max_world=#typemax(UInt), #=effects=#UInt32(0),
#=analysis_results=#nothing, source.debuginfo, source.edges
)
# Insert the CI into the global cache (necessary before adding to JIT)
ccall(:jl_mi_cache_insert, Cvoid, (Any, Any), mi, ci)
ccall(:jl_add_codeinsts_to_jit, Cvoid, (Any, Any), Any[ci], Any[source])
ci
end
function check_edges_not_compiled(ci::CodeInstance, target)
@nospecialize
for e in ci.edges
e isa CodeInstance || continue
e.def isa MethodInstance || continue
e.def.def isa Method || continue
if e.def.def.sig <: Tuple{typeof(target), Vararg}
e.invoke == Ptr{Nothing}(0) || return false
e.specptr == Ptr{Nothing}(0) || return false
end
end
true
end
# Test fptr1 -> tojlinvoke trampoline
module M1
@noinline foo(xs...) = xs[2]
bar(x) = 2*foo(x, x, x, x, x, x)
end
ci = compile_no_deps(M1.bar, (Int,))
@test check_edges_not_compiled(ci, M1.foo)
@test invoke(M1.bar, ci, 100) == 200
# Test specsig -> tojlinvoke trampoline
module M2
@noinline foo(x) = x+100
bar(x) = 2*foo(x)
end
ci = compile_no_deps(M2.bar, (Int,))
@test check_edges_not_compiled(ci, M2.foo)
@test invoke(M2.bar, ci, 5) == 210
# Each `eval` must compile (because of the ccall) a top-level thunk. The
# CodeInstance for this thunk becomes garbage-collectable after being invoked,
# but before returning, because of wait(). If the invoke must return for the
# CodeInstance address to be unregistered from the JIT, this will crash. Credit
# to @vtjnash for this example.
function test_gc_codeinst()
for i=1:10000
@async eval(:(ccall(:sqrt, Float64, (Float64,), $i); wait()))
i % 100 == 0 && GC.gc()
end
true
end
@test test_gc_codeinst()
sleep(5) # Avoids problems where we don't respond to Distributed.jl fast enough