diff --git a/benchmarks/GlobalOptimization/CondaPkg.toml b/benchmarks/GlobalOptimization/CondaPkg.toml
new file mode 100644
index 000000000..2b924f81e
--- /dev/null
+++ b/benchmarks/GlobalOptimization/CondaPkg.toml
@@ -0,0 +1,2 @@
+[deps]
+python = ">=3.10,<3.13"
diff --git a/benchmarks/GlobalOptimization/pso_global_optimizers.jmd b/benchmarks/GlobalOptimization/pso_global_optimizers.jmd
new file mode 100644
index 000000000..14204734e
--- /dev/null
+++ b/benchmarks/GlobalOptimization/pso_global_optimizers.jmd
@@ -0,0 +1,377 @@
+---
+title: PSO Global Optimizer Benchmarks
+author: Utkarsh, Chris Rackauckas
+---
+
+This benchmark evaluates Particle Swarm Optimization (PSO) variants from
+[ParallelParticleSwarms.jl](https://github.com/SciML/ParallelParticleSwarms.jl)
+against established global optimizers on the
+[BlackBoxOptimizationBenchmarking.jl](https://github.com/jonathanBieler/BlackBoxOptimizationBenchmarking.jl)
+suite, using the [Optimization.jl](https://github.com/SciML/Optimization.jl) interface.
+This tests both iterations and wall-clock time vs accuracy, i.e. for a given budget
+(in iterations or time), what percentage of problems from the set is a solver able to solve.
+
+## Setup
+```julia
+using Random
+Random.seed!(42)
+
+using BlackBoxOptimizationBenchmarking, Plots, Optimization, Memoize, Statistics
+import BlackBoxOptimizationBenchmarking: Chain, BenchmarkSetup, BenchmarkResults,
+    BBOBFunction, FunctionCallsCounter, solve_problem, pinit, compute_CI
+const BBOB = BlackBoxOptimizationBenchmarking
+
+ENV["GKSwstype"] = "nul"
+gr()
+
+using OptimizationBBO, OptimizationOptimJL, OptimizationNLopt, OptimizationSciPy
+
+using ParallelParticleSwarms
+using KernelAbstractions: CPU
+using StaticArrays, LinearAlgebra
+
+const PSOKernel     = ParallelParticleSwarms.ParallelPSOKernel
+const SyncPSOKernel = ParallelParticleSwarms.ParallelSyncPSOKernel
+const SerPSO        = ParallelParticleSwarms.SerialPSO
+const HPso          = ParallelParticleSwarms.HybridPSO
+
+const BACKEND = CPU()
+```
+```julia
+function make_success_tracker(f_raw, f_opt, Δf)
+    t0 = Ref(time())
+    time_to_success = Ref(Inf)
+    function tracked_f(u)
+        val = f_raw(u)
+        if val < Δf + f_opt && time_to_success[] == Inf
+            time_to_success[] = time() - t0[]
+        end
+        return val
+    end
+    return tracked_f, t0, time_to_success
+end
+
+function solve_problem_timed(optimizer::BenchmarkSetup, tracked_f, D::Int, run_length::Int;
+        u0 = pinit(D))
+    method = optimizer.method
+    optf = OptimizationFunction((u, _) -> tracked_f(u), AutoForwardDiff())
+    if optimizer.isboxed
+        prob = OptimizationProblem(optf, u0, lb = fill(-5.5, D), ub = fill(5.5, D))
+    else
+        prob = OptimizationProblem(optf, u0)
+    end
+    solve(prob, method; maxiters = run_length)
+end
+
+function solve_problem_timed(m::Chain, tracked_f, D::Int, run_length::Int)
+    rl1 = round(Int, m.p * run_length)
+    rl2 = run_length - rl1
+    sol = solve_problem_timed(m.first, tracked_f, D, rl1)
+    solve_problem_timed(m.second, tracked_f, D, rl2; u0 = sol.u)
+end
+
+function benchmark_time_to_success(
+    optimizer::Union{Chain, BenchmarkSetup}, funcs::Vector{BBOBFunction};
+    Ntrials::Int = 20, dimension::Int = 3, Δf::Real = 1e-6, max_run_length::Int = 100_000
+)
+    all_times = Float64[]
+    for f in funcs
+        for _ in 1:Ntrials
+            tracked_f, t0_ref, tts_ref = make_success_tracker(f.f, f.f_opt, Δf)
+            try
+                t0_ref[] = time()
+                solve_problem_timed(optimizer, tracked_f, dimension, max_run_length)
+                push!(all_times, tts_ref[])
+            catch err
+                push!(all_times, Inf)
+                @warn(string(optimizer, " failed: ", err))
+            end
+        end
+    end
+    return all_times
+end
+
+benchmark_time_to_success(optimizer, funcs::Vector{BBOBFunction}; kwargs...) =
+    benchmark_time_to_success(BenchmarkSetup(optimizer), funcs; kwargs...)
+
+function success_rate_cdf(all_times::Vector{Float64}, time_thresholds::AbstractVector{Float64})
+    N = length(all_times)
+    return [count(x -> x <= t, all_times) / N for t in time_thresholds]
+end
+```
+
+PSO variants require an `SVector` interface and a custom solve loop.
+```julia
+function pso_solve(opt, f::BBOBFunction, D::Int, maxiters::Int; local_maxiters::Int = 20)
+    obj  = (x, p) -> f.f(Vector(x))
+    ad   = opt isa HPso ? AutoForwardDiff() : Optimization.SciMLBase.NoAD()
+    optf = OptimizationFunction{false}(obj, ad)
+    lb   = SVector{D}(ntuple(_ -> -5.0, Val(D)))
+    ub   = SVector{D}(ntuple(_ ->  5.0, Val(D)))
+    x0   = SVector{D}(ntuple(_ -> -5.0 + rand() * 10.0, Val(D)))
+    prob = OptimizationProblem{false}(optf, x0, nothing; lb, ub)
+    if opt isa HPso
+        solve(prob, opt; maxiters, local_maxiters, abstol = 1e-8, reltol = 1e-8)
+    else
+        solve(prob, opt; maxiters)
+    end
+end
+
+function _extract_u(sol, D)
+    u = sol.u
+    u isa SVector && return u
+    u[]
+end
+
+function pso_benchmark(opt, funcs, run_length;
+        Ntrials = 20, dimension = 3, local_maxiters = 20, Δf = 1e-6, CI_quantile = 0.25)
+    Nf = length(funcs)
+    Nr = length(run_length)
+    success = zeros(Float64, Nf, Nr)
+    dist    = zeros(Float64, Nf, Nr)
+    fmin    = zeros(Float64, Nf, Nr)
+    # warmup
+    for f in funcs
+        try pso_solve(opt, f, dimension, 3; local_maxiters) catch; end
+    end
+    t0 = time()
+    for (fi, f) in enumerate(funcs)
+        xopt = SVector{dimension}(f.x_opt[1:dimension])
+        for (ri, rl) in enumerate(run_length)
+            hits = 0; dsum = 0.0; fsum = 0.0
+            for _ in 1:Ntrials
+                sol = try pso_solve(opt, f, dimension, rl; local_maxiters) catch; nothing end
+                if sol !== nothing
+                    u    = _extract_u(sol, dimension)
+                    fval = sol.objective isa Real ? Float64(sol.objective) : Float64(sol.objective[])
+                    hits += abs(fval - f.f_opt) < Δf ? 1 : 0
+                    dsum += norm(u .- xopt)
+                    fsum += fval - f.f_opt
+                end
+            end
+            success[fi, ri] = hits / Ntrials
+            dist[fi, ri]    = dsum / Ntrials
+            fmin[fi, ri]    = fsum / Ntrials
+        end
+    end
+    elapsed = time() - t0
+    Neff = Ntrials * Nf
+    sr   = vec(mean(success, dims = 1))
+    sc   = vec(sum(success .* Ntrials, dims = 1)) .|> round .|> Int
+    ci   = BBOB.compute_CI(sr, Neff, CI_quantile)
+    BenchmarkResults(
+        run_length                = collect(run_length),
+        success_count             = sc,
+        success_rate              = sr,
+        success_rate_qlow         = ci.success_rate_qlow,
+        success_rate_qhigh        = ci.success_rate_qhigh,
+        distance_to_minimizer     = vec(mean(dist, dims = 1)),
+        minimum                   = vec(mean(fmin, dims = 1)),
+        runtime                   = elapsed,
+        Neffective                = Neff,
+        callcount                 = Float64.(run_length),
+        success_rate_per_function = [success[fi, end] for fi in 1:Nf],
+    )
+end
+
+function pso_tts(opt, funcs; Ntrials = 20, dimension = 3, Δf = 1e-6,
+        local_maxiters = 20, max_run_length = 100_000)
+    all_times = Float64[]
+    D  = dimension
+    lb = SVector{D}(ntuple(_ -> -5.0, Val(D)))
+    ub = SVector{D}(ntuple(_ ->  5.0, Val(D)))
+    for f in funcs
+        for _ in 1:Ntrials
+            tracked_f, t0_ref, tts_ref = make_success_tracker(f.f, f.f_opt, Δf)
+            obj  = (x, p) -> tracked_f(Vector(x))
+            ad   = opt isa HPso ? AutoForwardDiff() : Optimization.SciMLBase.NoAD()
+            optf = OptimizationFunction{false}(obj, ad)
+            x0   = SVector{D}(ntuple(_ -> -5.0 + rand() * 10.0, Val(D)))
+            prob = OptimizationProblem{false}(optf, x0, nothing; lb, ub)
+            try
+                t0_ref[] = time()
+                if opt isa HPso
+                    solve(prob, opt; maxiters = max_run_length, local_maxiters,
+                        abstol = 1e-8, reltol = 1e-8)
+                else
+                    solve(prob, opt; maxiters = max_run_length)
+                end
+                push!(all_times, tts_ref[])
+            catch err
+                push!(all_times, Inf)
+                @warn(string(opt, " failed: ", err))
+            end
+        end
+    end
+    all_times
+end
+```
+```julia
+chain = (t; isboxed = false) -> Chain(
+    BenchmarkSetup(t, isboxed = isboxed),
+    BenchmarkSetup(NelderMead(), isboxed = false),
+    0.9)
+
+test_functions = BBOB.list_functions()
+dimension      = 3
+run_length     = round.(Int, 10 .^ LinRange(1, 5, 30))
+num_particles  = 1024
+
+PSO_KEYS = Set(["SerialPSO", "PSOKernel", "SyncPSOKernel", "HybridPSO_LBFGS"])
+
+setup = Dict(
+    # Baseline global optimizers 
+    "NelderMead"                               => NelderMead(),
+    "NLopt_GN_CRS2_LM"                        => chain(NLopt.GN_CRS2_LM(), isboxed = true),
+    "BBO_adaptive_de_rand_1_bin"               => chain(BBO_adaptive_de_rand_1_bin(), isboxed = true),
+    "BBO_adaptive_de_rand_1_bin_radiuslimited" => chain(BBO_adaptive_de_rand_1_bin_radiuslimited(), isboxed = true),
+    "BBO_de_rand_2_bin"                        => chain(BBO_de_rand_2_bin(), isboxed = true),
+    "ScipyDifferentialEvolution"               => chain(ScipyDifferentialEvolution(), isboxed = true),
+    # PSO variants 
+    "SerialPSO"        => SerPSO(num_particles),
+    "PSOKernel"        => PSOKernel(num_particles; backend = BACKEND, global_update = true),
+    "SyncPSOKernel"    => SyncPSOKernel(num_particles; backend = BACKEND),
+    "HybridPSO_LBFGS"  => HPso(pso = PSOKernel(num_particles; backend = BACKEND); backend = BACKEND),
+)
+
+@memoize run_bench(algo) = algo in PSO_KEYS ?
+    pso_benchmark(setup[algo], test_functions, run_length; Ntrials = 20, dimension) :
+    BBOB.benchmark(setup[algo], test_functions, run_length; Ntrials = 40, dimension)
+
+@memoize run_tts(algo) = algo in PSO_KEYS ?
+    pso_tts(setup[algo], test_functions; Ntrials = 20, dimension) :
+    benchmark_time_to_success(setup[algo], test_functions; Ntrials = 40, dimension)
+```
+
+## Test all (iterations)
+```julia
+const MARKERS = [:circle, :diamond, :utriangle, :square, :star5, :dtriangle, :pentagon,
+    :hexagon, :cross, :xcross, :rtriangle, :ltriangle, :star4, :star8, :heptagon, :octagon,
+    :vline, :hline, :+, :x]
+const LINESTYLES = [:solid, :dash, :dot, :dashdot, :dashdotdot]
+
+labels  = collect(keys(setup))
+results = Array{BBOB.BenchmarkResults}(undef, length(setup))
+
+# PSO variants first
+for (i, algo) in enumerate(labels)
+    algo in PSO_KEYS || continue
+    @info "PSO: $algo ..."
+    try
+        results[i] = run_bench(algo)
+        @info "  done" success = round(results[i].success_rate[end], digits = 3)
+    catch err
+        @warn "$algo failed" exception = (err, catch_backtrace())
+    end
+end
+
+# Baseline optimizers (threaded)
+Threads.@threads for (i, algo) in collect(enumerate(labels))
+    algo in PSO_KEYS && continue
+    results[i] = run_bench(algo)
+end
+
+results
+```
+
+## Success Rate vs. Iterations
+```julia
+idx = sortperm([b.success_rate[end] for b in results], rev = true)
+
+p = plot(xscale = :log10, legend = :outerright,
+    size = (700, 350), margin = 10Plots.px, dpi = 200)
+for (j, i) in enumerate(idx)
+    plot!(results[i], label = labels[i], showribbon = false,
+        lw = 2.5, xlim = (1, 1e5), x = :run_length,
+        markershape = MARKERS[mod1(j, length(MARKERS))],
+        linestyle = LINESTYLES[mod1(j, length(LINESTYLES))],
+        markersize = 4, markerstrokewidth = 0)
+end
+p
+```
+
+## Test all (wall-clock time to success)
+
+For the time-based benchmark, each optimizer is run once with a large iteration budget
+(100,000 iterations) per (function, trial) pair. The objective function is wrapped to
+detect the first evaluation that achieves the success criterion (objective < Δf + f_opt)
+and record the wall-clock time at that moment. This gives a true "time to success" for
+each trial, from which we build a CDF.
+```julia
+tts_results = Dict{String, Vector{Float64}}()
+
+# PSO variants first
+for algo in labels
+    algo in PSO_KEYS || continue
+    tts_results[algo] = run_tts(algo)
+end
+
+# Baseline optimizers
+for algo in labels
+    algo in PSO_KEYS && continue
+    tts_results[algo] = run_tts(algo)
+end
+```
+
+## Success Rate vs. Wall-Clock Time
+```julia
+all_finite      = filter(isfinite, vcat(values(tts_results)...))
+t_lo            = minimum(all_finite) / 2
+t_hi            = maximum(all_finite) * 2
+time_thresholds = 10 .^ range(log10(t_lo), log10(t_hi), length = 50)
+
+cdfs = Dict(algo => success_rate_cdf(tts_results[algo], time_thresholds) for algo in labels)
+idx  = sortperm([cdfs[l][end] for l in labels], rev = true)
+
+p = plot(xscale = :log10, legend = :outerright,
+    size = (700, 350), margin = 10Plots.px, dpi = 200,
+    xlabel = "Wall time (s)", ylabel = "Success rate", ylim = (0, 1))
+for (j, i) in enumerate(idx)
+    plot!(time_thresholds, cdfs[labels[i]], label = labels[i], lw = 2.5,
+        markershape = MARKERS[mod1(j, length(MARKERS))],
+        linestyle = LINESTYLES[mod1(j, length(LINESTYLES))],
+        markersize = 4, markerstrokewidth = 0)
+end
+p
+```
+
+## Success Rate per Function Heatmap
+```julia
+success_rate_per_function = reduce(hcat, b.success_rate_per_function for b in results)
+
+idx     = sortperm(mean(success_rate_per_function, dims = 1)[:], rev = false)
+idxfunc = 1:length(test_functions)
+
+p = heatmap(
+    string.(test_functions)[idxfunc], labels[idx], success_rate_per_function[idxfunc, idx]',
+    cmap = :RdYlGn, xticks = :all, yticks = :all, xrotation = 45, dpi = 200)
+```
+
+## Distance to Minimizer vs. Iterations
+```julia
+idx = sortperm([b.distance_to_minimizer[end] for b in results], rev = false)
+
+p = plot(xscale = :log10, legend = :outerright,
+    size = (900, 500), margin = 10Plots.px, ylim = (0, 5))
+for (j, i) in enumerate(idx)
+    plot!(results[i].run_length, results[i].distance_to_minimizer, label = labels[i],
+        showribbon = false, lw = 2, xlim = (1, 1e5),
+        xlabel = "Iterations", ylabel = "Mean distance to minimum",
+        markershape = MARKERS[mod1(j, length(MARKERS))],
+        linestyle = LINESTYLES[mod1(j, length(LINESTYLES))],
+        markersize = 4, markerstrokewidth = 0)
+end
+p
+```
+
+## Relative Runtime
+```julia
+ref      = findfirst("NelderMead" .== labels)
+runtimes = getfield.(results, :runtime)
+runtimes = runtimes ./ runtimes[ref]
+
+bar(
+    labels, runtimes, xrotation = 45, xticks = :all, ylabel = "Run time relative to NM",
+    yscale = :log10, yticks = [0.1, 1, 10, 100],
+    legend = false, margin = 25Plots.px)
+```
\ No newline at end of file