diff --git a/ext/plots/svs_plots.jl b/ext/plots/svs_plots.jl
index f660ff23..ccd6bcdd 100644
--- a/ext/plots/svs_plots.jl
+++ b/ext/plots/svs_plots.jl
@@ -1,7 +1,12 @@
 import DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling:
-    Solution, compute_path_list
+    Solution,
+    compute_path_list,
+    evaluate_scenario,
+    get_nb_scenarios,
+    build_stochastic_instance
 
 has_visualization(::StochasticVehicleSchedulingBenchmark) = true
+has_visualization(::ContextualStochasticVehicleSchedulingBenchmark) = true
 
 # ── helpers ────────────────────────────────────────────────────────────────────
 
@@ -70,32 +75,108 @@ end
 
 function _plot_routes(fig, city, path_list; route_linewidth=2, route_alpha=0.7)
     (; tasks) = city
-    for path in path_list
-        X = Float64[]
-        Y = Float64[]
-        (; end_point) = tasks[path[1]]
-        push!(X, end_point.x)
-        push!(Y, end_point.y)
+    for (route_idx, path) in enumerate(path_list)
+        prev_pt = tasks[path[1]].end_point
         for task_idx in path[2:end]
             (; start_point, end_point) = tasks[task_idx]
-            push!(X, start_point.x)
-            push!(Y, start_point.y)
-            push!(X, end_point.x)
-            push!(Y, end_point.y)
+            for (a, b) in ((prev_pt, start_point), (start_point, end_point))
+                mx = (a.x + b.x) / 2
+                my = (a.y + b.y) / 2
+                Plots.plot!(
+                    fig,
+                    [a.x, mx],
+                    [a.y, my];
+                    linewidth=route_linewidth,
+                    alpha=route_alpha,
+                    label=false,
+                    z_order=:back,
+                    arrow=true,
+                    color=route_idx,
+                )
+                Plots.plot!(
+                    fig,
+                    [mx, b.x],
+                    [my, b.y];
+                    linewidth=route_linewidth,
+                    alpha=route_alpha,
+                    label=false,
+                    z_order=:back,
+                    color=route_idx,
+                )
+            end
+            prev_pt = end_point
         end
-        Plots.plot!(
-            fig,
-            X,
-            Y;
-            linewidth=route_linewidth,
-            alpha=route_alpha,
-            label=false,
-            z_order=:back,
-        )
     end
     return fig
 end
 
+function _annotate_districts(fig, city, district_μ, district_σ; fontsize=6)
+    (; districts, district_width) = city
+    lin = LinearIndices(districts)
+    nx, ny = size(districts)
+    for ix in 1:nx, iy in 1:ny
+        cx = (ix - 0.5) * district_width
+        cy = (iy - 0.5) * district_width
+        i = lin[ix, iy]
+        label = "μ=$(round(district_μ[i]; digits=2))\nσ=$(round(district_σ[i]; digits=2))"
+        Plots.annotate!(fig, cx, cy, Plots.text(label, fontsize, :black, :center))
+    end
+    return fig
+end
+
+function _highlight_district(fig, city, district_idx; color=:red, alpha=0.25)
+    (; district_width, width) = city
+    nx = width ÷ district_width
+    ix = (district_idx - 1) % nx + 1
+    iy = (district_idx - 1) ÷ nx + 1
+    x0 = (ix - 1) * district_width
+    y0 = (iy - 1) * district_width
+    Plots.plot!(
+        fig,
+        Plots.Shape(
+            [x0, x0 + district_width, x0 + district_width, x0],
+            [y0, y0, y0 + district_width, y0 + district_width],
+        );
+        fillcolor=color,
+        fillalpha=alpha,
+        linewidth=0,
+        label=nothing,
+    )
+    return fig
+end
+
+function _plot_stats(solution, instance, nb_vehicles; fontsize=7)
+    nb_scenarios = get_nb_scenarios(instance)
+    avg_delay = if nb_scenarios == 0
+        NaN
+    else
+        sum(evaluate_scenario(solution, instance, s) for s in 1:nb_scenarios) / nb_scenarios
+    end
+    vehicle_cost_total = instance.vehicle_cost * nb_vehicles
+    delay_cost_total = instance.delay_cost * avg_delay
+    total_cost = vehicle_cost_total + delay_cost_total
+
+    text = string(
+        "# vehicles: $nb_vehicles    avg delay: $(round(avg_delay; digits=2))\n",
+        "vehicle cost: $(round(vehicle_cost_total; digits=1))    ",
+        "delay cost: $(round(delay_cost_total; digits=1))\n",
+        "total cost: $(round(total_cost; digits=1))",
+    )
+
+    p = Plots.plot(;
+        framestyle=:none,
+        ticks=nothing,
+        legend=false,
+        grid=false,
+        xlims=(0, 1),
+        ylims=(0, 1),
+        top_margin=(-6)Plots.mm,
+        bottom_margin=(-2)Plots.mm,
+    )
+    Plots.annotate!(p, 0.5, 0.5, Plots.text(text, fontsize, :center, :center))
+    return p
+end
+
 # ── interface methods ──────────────────────────────────────────────────────────
 
 function plot_context(::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...)
@@ -106,9 +187,63 @@ end
 function plot_sample(::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...)
     @assert hasproperty(sample.instance, :city) "Sample does not contain city information."
     city = sample.instance.city
-    fig = _plot_city(city; kwargs...)
+    fig = _plot_city(city; kwargs..., bottom_margin=(-20)Plots.mm)
     solution = Solution(sample.y, sample.instance)
-    path_list = compute_path_list(solution)
+    path_list = filter(p -> length(p) > 2, compute_path_list(solution))
     _plot_routes(fig, city, path_list)
+    stats = _plot_stats(solution, sample.instance, length(path_list))
+    return Plots.plot(fig, stats; layout=Plots.@layout([a; b{0.12h}]), size=(500, 560))
+end
+
+function plot_context(
+    ::ContextualStochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...
+)
+    @assert hasproperty(sample.instance, :city) "Sample does not contain city information."
+    city = sample.instance.city
+    fig = _plot_city(city; kwargs...)
+    _annotate_districts(fig, city, sample.district_μ, sample.district_σ)
+    if hasproperty(sample.context, :storm_district)
+        _highlight_district(fig, city, sample.storm_district; color=:blue)
+    end
+    return fig
+end
+
+function plot_sample(
+    ::ContextualStochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...
+)
+    @assert hasproperty(sample.instance, :city) "Sample does not contain city information."
+    city = sample.instance.city
+    fig = _plot_city(
+        city; kwargs..., bottom_margin=(isnothing(sample.y) ? 0 : -20) * Plots.mm
+    )
+    _annotate_districts(fig, city, sample.district_μ, sample.district_σ)
+    if hasproperty(sample.context, :storm_district)
+        storm_active = if hasproperty(sample.extra, :scenario)
+            sample.extra.scenario.storm_active
+        elseif hasproperty(sample.extra, :scenarios)
+            any(s -> s.storm_active, sample.extra.scenarios)
+        else
+            false
+        end
+        if storm_active
+            _highlight_district(fig, city, sample.storm_district)
+        else
+            _highlight_district(fig, city, sample.storm_district; color=:green)
+        end
+    end
+    if !isnothing(sample.y)
+        solution = Solution(sample.y, sample.instance)
+        path_list = filter(p -> length(p) > 2, compute_path_list(solution))
+        _plot_routes(fig, city, path_list)
+        eval_instance = if hasproperty(sample.extra, :scenario)
+            build_stochastic_instance(sample.instance, [sample.extra.scenario])
+        elseif hasproperty(sample.extra, :scenarios)
+            build_stochastic_instance(sample.instance, sample.extra.scenarios)
+        else
+            sample.instance
+        end
+        stats = _plot_stats(solution, eval_instance, length(path_list))
+        return Plots.plot(fig, stats; layout=Plots.@layout([a; b{0.12h}]), size=(500, 560))
+    end
     return fig
 end
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 7380544a..05768807 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -84,6 +84,7 @@ export is_minimization_problem
 export objective_value
 export has_visualization, plot_context, plot_sample, plot_trajectory, animate_trajectory
 export compute_gap
+export compute_feature_std
 
 # Export all benchmarks
 using .Argmax
@@ -107,6 +108,7 @@ export FixedSizeShortestPathBenchmark
 export PortfolioOptimizationBenchmark
 export RankingBenchmark
 export StochasticVehicleSchedulingBenchmark
+export ContextualStochasticVehicleSchedulingBenchmark
 export SubsetSelectionBenchmark
 export WarcraftBenchmark
 export MaintenanceBenchmark
diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
index 3e0ba9f7..e4488df1 100644
--- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
+++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
@@ -1,18 +1,20 @@
 module StochasticVehicleScheduling
 
 export StochasticVehicleSchedulingBenchmark
+export ContextualStochasticVehicleSchedulingBenchmark
 export generate_dataset, generate_maximizer, generate_statistical_model
 export compact_linearized_mip,
     compact_mip, column_generation_algorithm, local_search, deterministic_mip
 export evaluate_solution, is_feasible
 export VSPScenario, build_stochastic_instance
+export compute_feature_std
 
 using ..Utils
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using ConstrainedShortestPaths:
     stochastic_routing_shortest_path, stochastic_routing_shortest_path_with_threshold
 using Distributions: Distribution, LogNormal, Uniform, DiscreteUniform
-using Flux: Chain, Dense
+using Flux: Chain, Dense, relu, softplus
 using Graphs:
     AbstractGraph,
     SimpleDiGraph,
@@ -30,7 +32,7 @@ using JuMP:
 using Printf: @printf
 using Random: Random, AbstractRNG, MersenneTwister
 using SparseArrays: sparse, SparseMatrixCSC
-using Statistics: quantile, mean
+using Statistics: quantile, mean, std
 
 include("utils.jl")
 include("instance/constants.jl")
@@ -50,6 +52,7 @@ include("solution/algorithms/deterministic_mip.jl")
 include("solution/algorithms/anticipative_solver.jl")
 
 include("maximizer.jl")
+include("contextual.jl")
 
 """
 $TYPEDEF
diff --git a/src/StochasticVehicleScheduling/contextual.jl b/src/StochasticVehicleScheduling/contextual.jl
new file mode 100644
index 00000000..c9a09b78
--- /dev/null
+++ b/src/StochasticVehicleScheduling/contextual.jl
@@ -0,0 +1,417 @@
+"""
+$TYPEDEF
+
+Contextual variant of the stochastic vehicle scheduling benchmark.
+
+The instance fixes the city layout, tasks, and inter-area distribution; only the
+per-district `(μ_d, σ_d)` parameters are re-drawn per context (from
+`default_district_μ` and `default_district_σ`) and exposed to the predictive model as
+features. Scenarios are then sampled fresh per context using its `(μ_d, σ_d)`.
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef struct ContextualStochasticVehicleSchedulingBenchmark <:
+              AbstractStochasticBenchmark{true}
+    "number of tasks in each instance"
+    nb_tasks::Int = 25
+    "number of scenarios drawn per context (used for objective evaluation)"
+    nb_scenarios::Int = 10
+    "cost of one vehicle (overrides `default_vehicle_cost` for this benchmark only)"
+    vehicle_cost::Float64 = 200.0
+    "cost of one minute of delay (overrides `default_delay_cost` for this benchmark only)"
+    delay_cost::Float64 = 20.0
+    "probability that the storm is active in each scenario"
+    p_storm::Float64 = 0.15
+    "delay multiplier for the storm hotspot district when the storm is active"
+    storm_multiplier::Float64 = 50.0
+    "number of Monte Carlo draws used to compute per-arc slack quantile features"
+    nb_feat_scenarios::Int = 50
+end
+
+"""
+$TYPEDSIGNATURES
+
+Build a `City` for the contextual stochastic VSP variant.
+
+Differs from [`create_random_city`](@ref) in three ways:
+- `nb_scenarios=0`: no internal scenario storage (per-context scenarios are drawn on demand
+   by [`generate_scenario`](@ref) for the contextual benchmark).
+- Per-district `random_delay` is a placeholder (`LogNormal(0.0, 1.0)`): the contextual
+   scenario generator builds `LogNormal`s from the sample's `(district_μ, district_σ)`
+   instead of reading the city's stored ones.
+- No `generate_scenarios!` / `compute_perturbed_end_times!` pass (nothing to roll into).
+"""
+function create_contextual_city(;
+    nb_tasks::Int=default_nb_tasks,
+    rng::AbstractRNG=Random.default_rng(),
+    seed=nothing,
+    αᵥ_low=default_αᵥ_low,
+    αᵥ_high=default_αᵥ_high,
+    first_begin_time=default_first_begin_time,
+    last_begin_time=default_last_begin_time,
+    task_μ=default_task_μ,
+    task_σ=default_task_σ,
+    city_kwargs...,
+)
+    isnothing(seed) || Random.seed!(rng, seed)
+    city = City(; nb_scenarios=0, nb_tasks, city_kwargs...)
+    placeholder = LogNormal(0.0, 1.0)
+    nb_per_edge = city.width ÷ city.district_width
+    for x in 1:nb_per_edge, y in 1:nb_per_edge
+        city.districts[x, y] = District(; random_delay=placeholder, nb_scenarios=0)
+    end
+    init_tasks!(
+        city, αᵥ_low, αᵥ_high, first_begin_time, last_begin_time, task_μ, task_σ; rng=rng
+    )
+    return city
+end
+
+"""
+$TYPEDSIGNATURES
+
+Generate an instance for the contextual stochastic VSP benchmark.
+
+The returned `DataSample` carries the city + graph in `context.instance` and leaves
+`x=nothing`; per-district `(μ_d, σ_d)` and the feature matrix are added later by
+[`generate_context`](@ref).
+"""
+function Utils.generate_instance(
+    benchmark::ContextualStochasticVehicleSchedulingBenchmark, rng::AbstractRNG; kwargs...
+)
+    (; nb_tasks, vehicle_cost, delay_cost) = benchmark
+    city = create_contextual_city(; nb_tasks, rng, vehicle_cost, delay_cost, kwargs...)
+    graph = create_VSP_graph(city)
+    instance = Instance(
+        graph,
+        zeros(0, ne(graph)),
+        zeros(0, 0),
+        zeros(0, 0),
+        city.vehicle_cost,
+        city.delay_cost,
+        city,
+    )
+    return DataSample(; instance)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Build the `20 × ne(graph)` per-edge feature matrix for the contextual stochastic VSP
+using Monte Carlo draws from the context-defined distributions.
+
+Mirrors [`compute_features`](@ref) from the non-contextual benchmark exactly:
+- Feature 1: nominal travel time (Euclidean distance between task endpoints).
+- Feature 2: vehicle cost if the arc leaves the depot source node, else 0.
+- Features 3–11: deciles (0.1 … 0.9) of the empirical slack distribution over
+  `nb_feat_scenarios` draws.
+- Features 12–20: empirical CDF of the slack at thresholds
+  `[-100, -50, -20, -10, 0, 10, 50, 200, 500]`.
+
+The slack for arc (u→v) in one draw uses **Formula A** (identical to `compute_slacks`
+in the non-contextual pipeline):
+```
+slack = scenario_start_time[v] - (scenario_end_time[u] + nominal_travel_time)
+```
+where `scenario_end_time[u]` and `scenario_start_time[v]` are drawn by replicating the
+`generate_scenarios!` + `compute_perturbed_end_times!` logic with the contextual
+`(district_μ, district_σ)`.  The storm is activated once per scenario draw (Bernoulli
+with probability `p_storm`) and shifts `district_μ[storm_district]` by
+`log(storm_multiplier)`, exactly as in [`generate_scenario`](@ref).
+
+A forked copy of `rng` is used so the main RNG state (used afterwards for scenario
+generation) is not consumed.
+"""
+function compute_contextual_slack_features(
+    city::City,
+    graph::AbstractGraph,
+    district_μ::AbstractVector,
+    district_σ::AbstractVector,
+    storm_district::Int,
+    p_storm::Float64,
+    storm_multiplier::Float64,
+    rng::AbstractRNG;
+    nb_feat_scenarios::Int=50,
+)
+    lin = LinearIndices(city.districts)
+    tasks = city.tasks
+    N = length(tasks)
+    nb_per_edge = size(city.districts, 1)
+    E = ne(graph)
+
+    cumul = [-100, -50, -20, -10, 0, 10, 50, 200, 500]
+    nb_features = 2 + 9 + length(cumul)
+    features = zeros(Float32, nb_features, E)
+
+    # Use a forked RNG so the caller's state is untouched.
+    rng_feat = copy(rng)
+
+    # Accumulate per-arc slack samples across nb_feat_scenarios draws.
+    slack_samples = [Vector{Float64}(undef, nb_feat_scenarios) for _ in 1:E]
+
+    for s in 1:nb_feat_scenarios
+        # --- Storm activation (once per scenario, same as generate_scenario) ---
+        storm_active = rand(rng_feat) < p_storm
+
+        # --- Inter-area factor for 24 hours (mirrors generate_scenarios!) ---
+        inter_area = zeros(24)
+        prev_ia = 0.0
+        for h in 1:24
+            prev_ia = (prev_ia + 0.1) * rand(rng_feat, city.random_inter_area_factor)
+            inter_area[h] = prev_ia
+        end
+
+        # --- District delays for 24 hours (mirrors roll(district, rng)) ---
+        # Uses contextual LogNormal(effective_μ, σ) with storm correction.
+        district_delays = [zeros(24) for _ in 1:nb_per_edge, _ in 1:nb_per_edge]
+        for x in 1:nb_per_edge
+            for y in 1:nb_per_edge
+                i = lin[x, y]
+                effective_μ = if (storm_active && i == storm_district)
+                    district_μ[i] + log(storm_multiplier)
+                else
+                    district_μ[i]
+                end
+                d = LogNormal(effective_μ, district_σ[i])
+                prev_d = 0.0
+                for h in 1:24
+                    prev_d = prev_d / 2.0 + rand(rng_feat, d)
+                    district_delays[x, y][h] = prev_d
+                end
+            end
+        end
+
+        # --- Task start times (mirrors roll(task, rng)) ---
+        scenario_start_time = [t.start_time + rand(rng_feat, t.random_delay) for t in tasks]
+
+        # --- Task end times (mirrors compute_perturbed_end_times!) ---
+        scenario_end_time = [t.end_time for t in tasks]
+        for i in 2:(N - 1)
+            task = tasks[i]
+            origin_x, origin_y = get_district(task.start_point, city)
+            dest_x, dest_y = get_district(task.end_point, city)
+            ξ₁ = scenario_start_time[i]
+            ξ₂ = ξ₁ + district_delays[origin_x, origin_y][hour_of(ξ₁)]
+            ξ₃ = ξ₂ + (task.end_time - task.start_time) + inter_area[hour_of(ξ₂)]
+            scenario_end_time[i] = ξ₃ + district_delays[dest_x, dest_y][hour_of(ξ₃)]
+        end
+
+        # --- Slack per arc: Formula A (mirrors compute_slacks(city, u, v)) ---
+        for (j, e) in enumerate(edges(graph))
+            u, v = src(e), dst(e)
+            travel_time = distance(tasks[u].end_point, tasks[v].start_point)
+            slack_samples[j][s] =
+                scenario_start_time[v] - (scenario_end_time[u] + travel_time)
+        end
+    end
+
+    # --- Aggregate into feature matrix (mirrors compute_features loop) ---
+    for (i, e) in enumerate(edges(graph))
+        u, v = src(e), dst(e)
+        features[1, i] = distance(tasks[u].end_point, tasks[v].start_point)
+        features[2, i] = u == 1 ? Float32(city.vehicle_cost) : 0.0f0
+        slacks = slack_samples[i]
+        features[3:11, i] = quantile(slacks, [0.1 * k for k in 1:9])
+        features[12:nb_features, i] = [mean(slacks .<= x) for x in cumul]
+    end
+
+    return features
+end
+
+"""
+$TYPEDSIGNATURES
+
+Compute the per-feature standard deviation across all edges in `dataset`.
+
+Concatenates the `7 × ne` feature matrices of all samples horizontally and returns a
+length-7 vector of row-wise standard deviations. Any zero standard deviation (constant
+feature) is replaced by `1f0` to avoid division by zero when normalizing.
+Intended to be called on the training split only, to prevent data leakage.
+"""
+function compute_feature_std(dataset)
+    all_features = hcat([sample.x for sample in dataset]...)  # 20 × (total edges)
+    stds = vec(std(all_features; dims=2))                      # length-20
+    stds[stds .== 0.0f0] .= 1.0f0                                  # avoid division by zero
+    return Float32.(stds)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Draw `(district_μ, district_σ)` for every district from `default_district_μ` and
+`default_district_σ`, add them to `instance_sample.context`, and compute the per-edge
+feature matrix `x`.
+"""
+function Utils.generate_context(
+    bench::ContextualStochasticVehicleSchedulingBenchmark,
+    rng::AbstractRNG,
+    instance_sample::DataSample,
+)
+    instance = instance_sample.context.instance
+    city = instance.city
+    nb_districts = length(city.districts)
+    district_μ = rand(rng, default_district_μ, nb_districts)
+    district_σ = rand(rng, default_district_σ, nb_districts)
+    # Draw storm hotspot from occupied districts only (districts containing ≥1 task
+    # start_point), guaranteeing the storm always affects at least one arc.
+    tasks_jobs = city.tasks[2:(end - 1)]
+    lin = LinearIndices(city.districts)
+    occupied = unique([lin[get_district(t.start_point, city)...] for t in tasks_jobs])
+    storm_district = rand(rng, occupied)
+    x = compute_contextual_slack_features(
+        city,
+        instance.graph,
+        district_μ,
+        district_σ,
+        storm_district,
+        bench.p_storm,
+        bench.storm_multiplier,
+        rng;
+        nb_feat_scenarios=bench.nb_feat_scenarios,
+    )
+    return DataSample(;
+        x,
+        instance_sample.context...,
+        district_μ=district_μ,
+        district_σ=district_σ,
+        storm_district=storm_district,
+        p_storm=bench.p_storm,
+        storm_multiplier=bench.storm_multiplier,
+    )
+end
+
+"""
+$TYPEDSIGNATURES
+
+Draw a fresh [`VSPScenario`](@ref) using the context's `(district_μ, district_σ)`
+to build per-district `LogNormal` distributions on the fly. Solver kwargs `instance`,
+`district_μ`, `district_σ` are spread from `sample.context`.
+"""
+function Utils.generate_scenario(
+    ::ContextualStochasticVehicleSchedulingBenchmark,
+    rng::AbstractRNG;
+    instance::Instance,
+    district_μ::AbstractVector,
+    district_σ::AbstractVector,
+    storm_district::Int,
+    p_storm::Float64,
+    storm_multiplier::Float64,
+    kwargs...,
+)
+    city = instance.city
+    @assert !isnothing(city) "contextual SVS `generate_scenario` requires `store_city=true`"
+    lin = LinearIndices(city.districts)
+    storm_active = rand(rng) < p_storm
+    district_delay_fn =
+        (x, y) -> begin
+            i = lin[x, y]
+            effective_μ = if (storm_active && i == storm_district)
+                district_μ[i] + log(storm_multiplier)
+            else
+                district_μ[i]
+            end
+            return LogNormal(effective_μ, district_σ[i])
+        end
+    return draw_scenario(city, instance.graph, rng; district_delay_fn, storm_active)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Linear model mapping the `20×ne(graph)` per-edge feature matrix to a length-`ne(graph)`
+positive score vector. Architecture: `Chain(Dense(20 => 1; bias=false), vec)`.
+"""
+function Utils.generate_statistical_model(
+    ::ContextualStochasticVehicleSchedulingBenchmark; seed=nothing
+)
+    Random.seed!(seed)
+    return Chain(Dense(20 => 1; bias=false), vec)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the same deterministic VSP maximizer as the non-contextual benchmark.
+"""
+function Utils.generate_maximizer(
+    ::ContextualStochasticVehicleSchedulingBenchmark; model_builder=highs_model
+)
+    return StochasticVechicleSchedulingMaximizer(model_builder)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the anticipative solver: a callable `(scenario::VSPScenario; instance, kwargs...) -> y`
+that solves the 1-scenario stochastic VSP. Identical to the non-contextual variant;
+per-context `(district_μ, district_σ)` only affect scenario sampling, not the oracle.
+
+# Keyword Arguments
+- `model_builder`: a function returning an empty `JuMP.Model` with a solver attached (defaults to `scip_model`).
+"""
+function Utils.generate_anticipative_solver(
+    ::ContextualStochasticVehicleSchedulingBenchmark; model_builder=scip_model
+)
+    return AnticipativeSolver(; model_builder=model_builder)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the parametric anticipative solver: a callable
+`(θ, scenario::VSPScenario; instance, kwargs...) -> y` that solves the parametric
+anticipative subproblem `argmin_y c(y, scenario) + θᵀy`.
+
+# Keyword Arguments
+- `model_builder`: a function returning an empty `JuMP.Model` with a solver attached (defaults to `scip_model`).
+"""
+function Utils.generate_parametric_anticipative_solver(
+    ::ContextualStochasticVehicleSchedulingBenchmark; model_builder=scip_model
+)
+    return AnticipativeSolver(; model_builder=model_builder)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Forward to the SVS baseline policies. These operate on `sample.instance` and scenarios,
+so they work unchanged for the contextual variant.
+"""
+function Utils.generate_baseline_policies(::ContextualStochasticVehicleSchedulingBenchmark)
+    return (;
+        deterministic=Policy(
+            "Deterministic MIP", "Ignores delays", svs_deterministic_policy
+        ),
+        saa=Policy("SAA (col gen)", "Stochastic MIP over K scenarios", svs_saa_policy),
+        saa_mip=Policy(
+            "SAA (exact MIP)",
+            "Exact stochastic MIP over K scenarios via compact linearized formulation",
+            svs_saa_mip_policy,
+        ),
+        local_search=Policy(
+            "Local search", "Heuristic with K scenarios", svs_local_search_policy
+        ),
+    )
+end
+
+function Utils.objective_value(
+    ::ContextualStochasticVehicleSchedulingBenchmark,
+    sample::DataSample,
+    y::BitVector,
+    scenario::VSPScenario,
+)
+    stoch = build_stochastic_instance(sample.instance, [scenario])
+    return evaluate_solution(y, stoch)
+end
+
+function Utils.objective_value(
+    bench::ContextualStochasticVehicleSchedulingBenchmark, sample::DataSample, y::BitVector
+)
+    if hasproperty(sample.extra, :scenario)
+        return Utils.objective_value(bench, sample, y, sample.extra.scenario)
+    elseif hasproperty(sample.extra, :scenarios)
+        stoch = build_stochastic_instance(sample.instance, sample.extra.scenarios)
+        return evaluate_solution(y, stoch)
+    end
+    return error("Sample must have scenario or scenarios")
+end
diff --git a/src/StochasticVehicleScheduling/maximizer.jl b/src/StochasticVehicleScheduling/maximizer.jl
index 66e9eb32..dc392f31 100644
--- a/src/StochasticVehicleScheduling/maximizer.jl
+++ b/src/StochasticVehicleScheduling/maximizer.jl
@@ -4,7 +4,7 @@ $TYPEDSIGNATURES
 Given arcs weights θ, solve the deterministic VSP problem associated to `instance`.
 """
 function vsp_maximizer(
-    θ::AbstractVector; instance::Instance, model_builder=highs_model, silent=true
+    θ::AbstractVector; instance::Instance, model_builder=highs_model, silent=true, kwargs...
 )
     (; graph) = instance
 
diff --git a/src/StochasticVehicleScheduling/scenario.jl b/src/StochasticVehicleScheduling/scenario.jl
index 142ede72..d656a2da 100644
--- a/src/StochasticVehicleScheduling/scenario.jl
+++ b/src/StochasticVehicleScheduling/scenario.jl
@@ -6,11 +6,23 @@ Represents a single scenario for the stochastic vehicle scheduling problem.
 # Fields
 $TYPEDFIELDS
 """
-struct VSPScenario
+struct VSPScenario{T<:Union{Nothing,Bool}}
     "delays per task (length = nb_tasks + 2): scenario_end_time - nominal_end_time"
     delays::Vector{Float64}
     "scalar slack per arc for this scenario"
     slacks::SparseMatrixCSC{Float64,Int}
+    "whether the storm is active in this scenario; `nothing` when the storm concept does not apply (non-contextual SVS)"
+    storm_active::T
+end
+
+"""
+$TYPEDSIGNATURES
+
+Construct a [`VSPScenario`](@ref) with no storm information (`storm_active = nothing`),
+as used by the non-contextual stochastic VSP. Yields a `VSPScenario{Nothing}`.
+"""
+function VSPScenario(delays::Vector{Float64}, slacks::SparseMatrixCSC{Float64,Int})
+    return VSPScenario(delays, slacks, nothing)
 end
 
 """
@@ -19,7 +31,8 @@ $TYPEDSIGNATURES
 Display a compact summary of a [`VSPScenario`](@ref): number of tasks and edges.
 """
 function Base.show(io::IO, s::VSPScenario)
-    return print(io, "VSPScenario($(length(s.delays) - 2) tasks)")
+    storm = isnothing(s.storm_active) ? "" : ", storm=$(s.storm_active)"
+    return print(io, "VSPScenario($(length(s.delays) - 2) tasks$storm)")
 end
 
 """
@@ -27,8 +40,22 @@ $TYPEDSIGNATURES
 
 Draw a single fresh scenario from the city's random distributions,
 independently of the stored scenario draws in the `City` struct.
+
+The `district_delay_fn(x, y) -> LogNormal{Float64}` keyword controls the per-district
+delay distribution used at grid position `(x, y)`. By default it reads from
+`city.districts[x, y].random_delay`; pass a custom callable to override the lookup
+(used by the contextual variant to build distributions from per-sample `(μ_d, σ_d)`).
+
+The `storm_active` keyword (default `nothing`) is stored verbatim on the returned
+scenario; pass a `Bool` from the contextual variant to record whether the storm fired.
 """
-function draw_scenario(city::City, graph::AbstractGraph, rng::AbstractRNG)
+function draw_scenario(
+    city::City,
+    graph::AbstractGraph,
+    rng::AbstractRNG;
+    district_delay_fn=(x, y) -> city.districts[x, y].random_delay,
+    storm_active=nothing,
+)
     tasks = city.tasks
     N = length(tasks)
 
@@ -46,8 +73,9 @@ function draw_scenario(city::City, graph::AbstractGraph, rng::AbstractRNG)
     for x in 1:nb_per_edge
         for y in 1:nb_per_edge
             prev = 0.0
+            d = district_delay_fn(x, y)
             for h in 1:24
-                prev = scenario_next_delay(prev, city.districts[x, y].random_delay, rng)
+                prev = scenario_next_delay(prev, d, rng)
                 district_delays[x, y][h] = prev
             end
         end
@@ -93,7 +121,7 @@ function draw_scenario(city::City, graph::AbstractGraph, rng::AbstractRNG)
     end
     slacks = sparse(I_idx, J_idx, slack_vals, N, N)
 
-    return VSPScenario(delays, slacks)
+    return VSPScenario(delays, slacks, storm_active)
 end
 
 """
@@ -103,7 +131,7 @@ Build a stochastic [`Instance`](@ref) from a base instance and a vector of fresh
 [`VSPScenario`](@ref)s. Each scenario contributes one column to the `intrinsic_delays`
 matrix and one entry per edge to the `slacks` sparse matrix.
 """
-function build_stochastic_instance(instance::Instance, scenarios::Vector{VSPScenario})
+function build_stochastic_instance(instance::Instance, scenarios::Vector{<:VSPScenario})
     K = length(scenarios)
     nb_nodes = length(first(scenarios).delays)
     intrinsic_delays = Matrix{Float64}(undef, nb_nodes, K)
diff --git a/src/StochasticVehicleScheduling/solution/algorithms/mip.jl b/src/StochasticVehicleScheduling/solution/algorithms/mip.jl
index 91f36aac..ceec41b1 100644
--- a/src/StochasticVehicleScheduling/solution/algorithms/mip.jl
+++ b/src/StochasticVehicleScheduling/solution/algorithms/mip.jl
@@ -96,7 +96,7 @@ SAA variant: build stochastic instance from `scenarios` then solve via
 [`compact_linearized_mip`](@ref).
 """
 function compact_linearized_mip(
-    instance::Instance, scenarios::Vector{VSPScenario}; kwargs...
+    instance::Instance, scenarios::Vector{<:VSPScenario}; kwargs...
 )
     return compact_linearized_mip(build_stochastic_instance(instance, scenarios); kwargs...)
 end
@@ -116,6 +116,7 @@ function compact_mip(
     scenario_range=nothing,
     model_builder=scip_model,
     silent=true,
+    kwargs...,
 )
     (; graph, slacks, intrinsic_delays, vehicle_cost, delay_cost) = instance
     nb_nodes = nv(graph)
@@ -187,7 +188,7 @@ SAA variant: build stochastic instance from `scenarios` then solve via
 [`compact_mip`](@ref).
 """
 function compact_mip(
-    instance::Instance, scenarios::Vector{VSPScenario}, θ=nothing; kwargs...
+    instance::Instance, scenarios::Vector{<:VSPScenario}, θ=nothing; kwargs...
 )
     return compact_mip(build_stochastic_instance(instance, scenarios), θ; kwargs...)
 end
diff --git a/test/contextual_vsp.jl b/test/contextual_vsp.jl
new file mode 100644
index 00000000..e8ae2c24
--- /dev/null
+++ b/test/contextual_vsp.jl
@@ -0,0 +1,137 @@
+@testset "Contextual Stochastic VSP" begin
+    using DecisionFocusedLearningBenchmarks
+    using DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling
+    using Graphs
+    using Plots
+    using StableRNGs: StableRNG
+
+    b = ContextualStochasticVehicleSchedulingBenchmark(; nb_tasks=15, nb_scenarios=4)
+
+    @test is_exogenous(b)
+
+    # Custom cost fields thread through to the instance
+    b_custom = ContextualStochasticVehicleSchedulingBenchmark(;
+        nb_tasks=10, vehicle_cost=100.0, delay_cost=5.0, p_storm=0.3, storm_multiplier=20.0
+    )
+    sample_custom = generate_dataset(
+        b_custom, 1; contexts_per_instance=1, nb_scenarios=1, seed=99
+    )[1]
+    @test sample_custom.instance.vehicle_cost == 100.0
+    @test sample_custom.instance.delay_cost == 5.0
+
+    N = 2
+    M = 3
+    K = 2
+
+    # N instances × M contexts × K scenarios = N*M*K unlabeled samples
+    unlabeled = generate_dataset(b, N; contexts_per_instance=M, nb_scenarios=K, seed=1)
+    @test length(unlabeled) == N * M * K
+    @test hasproperty(unlabeled[1].extra, :scenario)
+    @test unlabeled[1].extra.scenario isa VSPScenario
+    @test unlabeled[1].extra.scenario.storm_active isa Bool
+
+    # Each sample carries μ/σ and storm fields in context
+    @test hasproperty(unlabeled[1].context, :district_μ)
+    @test hasproperty(unlabeled[1].context, :district_σ)
+    @test length(unlabeled[1].district_μ) == length(unlabeled[1].instance.city.districts)
+    @test length(unlabeled[1].district_σ) == length(unlabeled[1].instance.city.districts)
+    @test hasproperty(unlabeled[1].context, :storm_district)
+    @test hasproperty(unlabeled[1].context, :p_storm)
+    @test hasproperty(unlabeled[1].context, :storm_multiplier)
+    @test unlabeled[1].storm_district isa Int
+    @test 0 < unlabeled[1].p_storm < 1
+    @test unlabeled[1].storm_multiplier > 1
+
+    # storm_district is always drawn from occupied districts (≥1 task start_point inside)
+    let city = unlabeled[1].instance.city
+        lin = LinearIndices(city.districts)
+        occupied = unique([
+            lin[StochasticVehicleScheduling.get_district(t.start_point, city)...] for
+            t in city.tasks[2:(end - 1)]
+        ])
+        @test unlabeled[1].storm_district in occupied
+    end
+
+    # Samples sharing an instance have identical city, but different μ/σ across contexts
+    same_instance_block = unlabeled[1:(M * K)]
+    cities = unique(s.instance.city for s in same_instance_block)
+    @test length(cities) == 1
+    μs = [s.district_μ for s in same_instance_block[1:K:end]]  # one per context
+    @test length(unique(μs)) == M
+    # storm_district is re-drawn per context; coincidences are possible but unlikely
+    storm_districts = [s.storm_district for s in same_instance_block[1:K:end]]
+    @test all(d -> d isa Int, storm_districts)
+
+    # Features are 20 × ne(graph), Float32
+    # Layout matches compute_features (non-contextual):
+    #   1: travel time, 2: vehicle cost (source arcs), 3-11: slack deciles, 12-20: slack CDF
+    sample = unlabeled[1]
+    E = ne(sample.instance.graph)
+    @test size(sample.x) == (20, E)
+    @test eltype(sample.x) === Float32
+    # Travel-time feature (row 1) must be non-negative
+    @test all(sample.x[1, :] .>= 0)
+    # Deciles (rows 3–11) must be non-decreasing per arc
+    @test all(all(sample.x[k, :] .<= sample.x[k + 1, :]) for k in 3:10)
+    # CDF features (rows 12–20) must be in [0, 1]
+    @test all(0 .<= sample.x[12:20, :] .<= 1)
+
+    # Statistical model + maximizer pipeline
+    model = generate_statistical_model(b; seed=0)
+    @test size(model[1].weight) == (1, 20)
+    maximizer = generate_maximizer(b)
+    θ = model(sample.x)
+    @test length(θ) == E
+    y = maximizer(θ; sample.context...)
+    @test y isa BitVector
+    @test length(y) == E
+
+    # Baseline policies
+    policies = generate_baseline_policies(b)
+    @test hasproperty(policies, :saa)
+    @test hasproperty(policies, :deterministic)
+    @test hasproperty(policies, :local_search)
+
+    # Labeled dataset via SAA policy: N*M labeled samples
+    saa_dataset = generate_dataset(
+        b,
+        N;
+        contexts_per_instance=M,
+        nb_scenarios=K,
+        seed=0,
+        rng=StableRNG(0),
+        target_policy=policies.saa,
+    )
+    @test length(saa_dataset) == N * M
+    @test hasproperty(saa_dataset[1].extra, :scenarios)
+    @test length(saa_dataset[1].extra.scenarios) == K
+    @test saa_dataset[1].y isa BitVector
+
+    # Plot extension
+    @test has_visualization(b)
+    figure_1 = plot_context(b, saa_dataset[1])
+    @test figure_1 isa Plots.Plot
+    figure_2 = plot_sample(b, saa_dataset[1])
+    @test figure_2 isa Plots.Plot
+
+    # compute_gap returns a finite number
+    gap = compute_gap(b, saa_dataset, model, maximizer)
+    @test isfinite(gap)
+
+    # Objective value: ensure regenerating scenario from same context reproduces consistent value
+    rng = StableRNG(7)
+    fresh_scenario = generate_scenario(b, rng; sample.context...)
+    @test fresh_scenario isa VSPScenario
+    obj = objective_value(b, sample, y, fresh_scenario)
+    @test isfinite(obj)
+
+    # Anticipative solvers (1-scenario)
+    anticipative_solver = generate_anticipative_solver(b)
+    y_anti = anticipative_solver(sample.scenario; sample.context...)
+    @test y_anti isa BitVector
+
+    parametric_solver = generate_parametric_anticipative_solver(b)
+    θ_zero = zeros(E)
+    y_zero = parametric_solver(θ_zero, sample.scenario; sample.context...)
+    @test y_zero == y_anti
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 575c875f..8718bda3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,6 +18,7 @@ using Random
     include("maintenance.jl")
     include("warcraft.jl")
     include("vsp.jl")
+    include("contextual_vsp.jl")
     include("contextual_stochastic_argmax.jl")
     include("portfolio_optimization.jl")
 
diff --git a/test/vsp.jl b/test/vsp.jl
index 0d778d26..b585c4c6 100644
--- a/test/vsp.jl
+++ b/test/vsp.jl
@@ -31,21 +31,21 @@
     )
     @test length(saa_dataset) == N
     @test hasproperty(saa_dataset[1].extra, :scenarios)
-    @test saa_dataset[1].extra.scenarios isa Vector{VSPScenario}
+    @test saa_dataset[1].extra.scenarios isa Vector{VSPScenario{Nothing}}
     @test length(saa_dataset[1].extra.scenarios) == K
     det_dataset = generate_dataset(
         b, N; nb_scenarios=K, seed=0, rng=StableRNG(0), target_policy=policies.deterministic
     )
     @test length(det_dataset) == N
     @test hasproperty(det_dataset[1].extra, :scenarios)
-    @test det_dataset[1].extra.scenarios isa Vector{VSPScenario}
+    @test det_dataset[1].extra.scenarios isa Vector{VSPScenario{Nothing}}
     @test length(det_dataset[1].extra.scenarios) == K
     ls_dataset = generate_dataset(
         b, N; nb_scenarios=K, seed=0, rng=StableRNG(0), target_policy=policies.local_search
     )
     @test length(ls_dataset) == N
     @test hasproperty(ls_dataset[1].extra, :scenarios)
-    @test ls_dataset[1].extra.scenarios isa Vector{VSPScenario}
+    @test ls_dataset[1].extra.scenarios isa Vector{VSPScenario{Nothing}}
     @test length(ls_dataset[1].extra.scenarios) == K
 
     # Plots work unchanged