Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
name = "DecisionFocusedLearningAlgorithms"
uuid = "46d52364-bc3b-4fac-a992-eb1d3ef2de15"
version = "0.2.0"
authors = ["Members of JuliaDecisionFocusedLearning and contributors"]

[workspace]
projects = ["docs", "test"]
version = "0.2.0"

[deps]
DecisionFocusedLearningBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f"
Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand All @@ -21,12 +21,18 @@ ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"
[compat]
DecisionFocusedLearningBenchmarks = "0.5.0, 0.6"
DocStringExtensions = "0.9.5"
Documenter = "1.17.0"
Flux = "0.16.9"
InferOpt = "0.7.1"
Literate = "2.21.0"
MLUtils = "0.4.8"
Plots = "1.41.6"
ProgressMeter = "1.11.0"
Random = "1.11.0"
Statistics = "1.11.1"
UnicodePlots = "3.8.2"
ValueHistories = "0.5.6"
julia = "1.11"

[workspace]
projects = ["docs", "test"]
3 changes: 2 additions & 1 deletion src/DecisionFocusedLearningAlgorithms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ include("algorithms/abstract_algorithm.jl")
include("algorithms/supervised/fyl.jl")
include("algorithms/supervised/anticipative_imitation.jl")
include("algorithms/supervised/dagger.jl")
include("algorithms/MirrorDescent/mirror_descent.jl")
Comment thread
sdelannoypavy marked this conversation as resolved.

export TrainingContext

Expand All @@ -41,7 +42,7 @@ export AbstractMetric,

export AbstractAlgorithm, AbstractImitationAlgorithm
export PerturbedFenchelYoungLossImitation,
DAgger, AnticipativeImitation, train_policy!, train_policy
DAgger, AnticipativeImitation, train_policy!, train_policy, MirrorDescent
export AbstractPolicy, DFLPolicy

end
138 changes: 138 additions & 0 deletions src/algorithms/MirrorDescent/mirror_descent.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
$TYPEDEF

Mirror Descent algorithm for learning coordinated solutions.

This algorithm is designed for stochastic benchmarks.

Reference: <https://arxiv.org/abs/2505.04757>

# Fields
$TYPEDFIELDS
"""
@kwdef struct MirrorDescent{A} <: AbstractImitationAlgorithm
Comment thread
sdelannoypavy marked this conversation as resolved.
"inner imitation algorithm for supervised learning"
inner_algorithm::A = PerturbedFenchelYoungLossImitation()
end

"""
$TYPEDSIGNATURES
Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm.

# Core training method
Comment thread
sdelannoypavy marked this conversation as resolved.
"""


function train_policy(
Comment thread
sdelannoypavy marked this conversation as resolved.
algorithm::MirrorDescent,
benchmark::ExogenousStochasticBenchmark;
dataset_size=30,
epochs=10,
iterations=10,
κ = 1.0,
metrics::Tuple=(),
seed=nothing,
)

train_dataset = generate_dataset(benchmark, dataset_size; seed=seed)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By default, generate_dataset applied to an ExogenousStochasticBenchmark generates a single context and scenario per instance. Do we want to be able to choose different nb_scenarios and contexts_per_instance values?

@sdelannoypavy sdelannoypavy Jun 1, 2026

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added nb_scenarios and contexts_per_instance as keyword arguments in train_policy. I am not sure of what it means to generate multiple contexts for an instance. The code for dataset generation is quite heavy now, we may want to do something else


# Initialize model and create policy
model = generate_statistical_model(benchmark; seed=seed)
maximizer = generate_maximizer(benchmark)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Depending on the considered benchmark, generate_statistical_model and generate_maximizer may have optional kwargs. Should we optionally take them as input of the train_policy method?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe maximizer_kwargs, model_kwargs and solver_kwargs could be passed as an argument to train_policy? (assuming that anticipative_solver and parametric_anticipative_solver use the same optional kwargs)

policy = DFLPolicy(model, maximizer)

# vector because we store one history per iteration
histories_per_iteration = MVHistory[]
Comment thread
sdelannoypavy marked this conversation as resolved.

anticipative_solver = generate_anticipative_solver(benchmark;)
parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark;)
Comment thread
sdelannoypavy marked this conversation as resolved.

# perturb = true correspond to "real" iterations of mirror descent
# we compute solutions with the penalized anticipative solver + perturbation

# perturb = false correspond to imitation learning
# we use the anticipative solver without perturbation
# usefull to start with one iteration of pure imitation learning
perturb = false

# Train policy
for n_it in 1:iterations
println("Iteration $n_it / $iterations")
Comment thread
sdelannoypavy marked this conversation as resolved.

if n_it > 1
perturb = true
end


# Generate anticipative solutions as training data
augmented_dataset = augment_dataset(
algorithm.inner_algorithm, benchmark, train_dataset, model, anticipative_solver, parametric_anticipative_solver;
κ = κ, perturb = perturb
)


# Train policy on augmented dataset
history = train_policy!(
algorithm.inner_algorithm,
policy,
augmented_dataset;
epochs = epochs,
metrics = metrics,
maximizer_kwargs=sample -> sample.context,
)

push!(histories_per_iteration, history)
end

return histories_per_iteration, policy
end


function augment_dataset(
algorithm::PerturbedFenchelYoungLossImitation,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only works for PerturbedFenchelYoungLossImitation?
If so, A can be restricted even further.

@sdelannoypavy sdelannoypavy Jun 1, 2026

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, yes. We could write a more general version that works with other losses, but I’m not exactly sure how to do it yet. I think for now we can restrict the type of A.

bench::AbstractStochasticBenchmark,
train_dataset::AbstractArray,
model,
anticipative_solver,
parametric_anticipative_solver;
κ = 1.0,
perturb = false
)

(; nb_samples, ε, threaded, training_optimizer, seed) = algorithm

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

threaded, training_optimizer, and seedare unused kin this method. Should they be forwarded to the perturbed_maximizer contruction?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they are all used except threaded, so yes training_optimizer and seed should be forwarded !


augmented_dataset = Vector{DataSample}()

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a concrete type (because DataSample isn't one) an may cause type instabilities and affect performance.

@sdelannoypavy sdelannoypavy Jun 1, 2026

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I replaced push! by an in-place update as suggested in your comments.


if perturb
perturbed_maximizer = PerturbedAdditive(
Comment thread
sdelannoypavy marked this conversation as resolved.
parametric_anticipative_solver; ε=κ*ε, nb_samples=nb_samples
)
end


for sample in train_dataset

θ = model(sample.x)

if perturb
if is_minimization_problem(bench)
y = perturbed_maximizer(-κ*θ; scenario = sample.scenario, context = sample)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this line is bugged:

  • scenario is silently not passed to the parametric_anticipative_solver. It is defined as a positional argument and not a keyword one, which means this line either fails or applies the non-parametric anticipative solver with scenario=-kappa*theta
  • context is also silently not passed to the solver, which will fail for benchmark with non empty contexts. It should be passed as sample.context... instead.

@sdelannoypavy sdelannoypavy Jun 1, 2026

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure that we can use PerturbedAdditive is the solver has positional arguments in addition to θ (the parameter that should be perturbed). So when I wrote my own benchmark I put scenario as a keyword arguments. But looking at the signature in the solvers implemented in other benchmarks, I agree that we have a problem here.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a better workaround that the one I told you yesterday.
You can just use a closure to reorder the arguments, like this:

perturbed_maximizer = PerturbedAdditive((θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); ε=κ*ε, nb_samples=nb_samples)

else
y = perturbed_maximizer(κ*θ; scenario = sample.scenario, context = sample)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same bug here

end
else
y = anticipative_solver(sample.scenario; context = sample)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to apply regular anticipative imitation as the first iteration of the algorithm? Does it not work if we directly start with a regular iteration?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this behavior can be useful when comparing imitation learning and mirror descent (you just need to check whether there is an improvement after the first iteration). Maybe we could make this behavior optional and control it with a parameter.

end

augmented_datasample = DataSample(;
x = sample.x,
y,
instance = sample.context,
extra = sample.extra
)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might cause a bug for some benchmarks. Using the copy constructor instead may avoid errors:

DataSample(sample; y=y)

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried this but it gives the following error:
ERROR: LoadError: MethodError: no method matching DataSample(::DataSample{@NamedTuple{…}, @NamedTuple{…}, Vector{…}, Nothing, Nothing}; y::Vector{Float64})
The type DataSample exists, but no method is defined for this combination of argument types when trying to construct it.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a line of code to check the type of y and convert it if necessary. This was needed to avoid bugs on StochasticVehicleScheduling, but it is a bit heavy.


push!(augmented_dataset, augmented_datasample)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may cause a lot of allocations in combination with the type instability mentioned above. One workaround would be to modify the dataset in place by directly doing:

dataset[i] = augmented_datasample

This would solve both issues at once.

@sdelannoypavy sdelannoypavy Jun 1, 2026

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Problem: train_dataset is initialised with sampled were y = Nothing. I replaced push! by a map. I am not sure that it is the right thing to do.

end

return augmented_dataset
end
196 changes: 196 additions & 0 deletions src/algorithms/mirror_descent/mirror_descent.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
"""
$TYPEDEF

Mirror Descent algorithm for learning coordinated solutions.

This algorithm is designed for stochastic benchmarks.

Reference: <https://arxiv.org/abs/2505.04757>

# Fields
$TYPEDFIELDS
"""
@kwdef struct MirrorDescent{A<:PerturbedFenchelYoungLossImitation} <: AbstractAlgorithm
"inner imitation algorithm for supervised learning"
inner_algorithm::A = PerturbedFenchelYoungLossImitation()
end

"""
$TYPEDSIGNATURES

Train a DFLPolicy using the Mirror Descent algorithm on a provided training dataset.

# Core training method

# Arguments
- `epochs`: number of training epochs per iteration
- `iterations`: number of mirror descent iterations
- `κ`: scaling factor for the perturbation magnitude
- `metrics`: tuple of metrics to track during training
- `verbose`: if true, prints progress at each iteration
- `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation)
"""

function train_policy!(
benchmark::ExogenousStochasticBenchmark,
algorithm::MirrorDescent,
policy::DFLPolicy,
train_dataset,
anticipative_solver,
perturbed_anticipative_solver;
epochs=10,
iterations=10,
κ=1.0,
metrics::Tuple=(),
verbose::Bool=false,
imitation_start::Bool=true
)

augmented_dataset = train_dataset
return map(1:iterations) do n_it
if verbose
println("Iteration $n_it / $iterations")
end

perturb = n_it > 1 || !imitation_start

augmented_dataset = augment_dataset(
benchmark, augmented_dataset, policy.statistical_model, anticipative_solver, perturbed_anticipative_solver;
κ=κ, perturb=perturb
)

train_policy!(
algorithm.inner_algorithm,
policy,
augmented_dataset;
epochs=epochs,
metrics=metrics,
maximizer_kwargs=sample -> sample.context,
)
end
end

"""
$TYPEDSIGNATURES

Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm.

# Benchmark convenience wrapper

This high-level function handles all setup from the benchmark and returns a trained policy.

# Arguments
- `dataset_size`: number of samples in the training dataset
- `epochs`: number of training epochs per iteration
- `iterations`: number of mirror descent iterations
- `κ`: scaling factor for the perturbation magnitude
- `metrics`: tuple of metrics to track during training
- `seed`: random seed for reproducibility
- `verbose`: if true, prints progress at each iteration
- `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation)
- `model_kwargs`: additional keyword arguments passed to `generate_statistical_model`
- `maximizer_kwargs`: additional keyword arguments passed to `generate_maximizer`
- `solver_kwargs`: additional keyword arguments passed to `generate_anticipative_solver` and `generate_parametric_anticipative_solver`
- `nb_scenarios`: number of scenarios per instance.
- `context_per_instance`: number of contexts per instance.
"""



function train_policy(
algorithm::MirrorDescent,
benchmark::ExogenousStochasticBenchmark;
dataset_size=30,
epochs=10,
iterations=10,
κ=1.0,
metrics::Tuple=(),
seed=nothing,
verbose::Bool=false,
imitation_start::Bool=true,
model_kwargs=(;),
maximizer_kwargs=(;),
solver_kwargs=(;),
nb_scenarios = 1,
context_per_instance = 1,
)
train_dataset = generate_dataset(benchmark, dataset_size; nb_scenarios=nb_scenarios, contexts_per_instance=context_per_instance, seed=seed)

model = generate_statistical_model(benchmark; seed=seed, model_kwargs...)
maximizer = generate_maximizer(benchmark; maximizer_kwargs...)
policy = DFLPolicy(model, maximizer)

anticipative_solver = generate_anticipative_solver(benchmark; solver_kwargs...)
parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark; solver_kwargs...)
(; nb_samples, ε, threaded, seed) = algorithm.inner_algorithm
perturbed_anticipative_solver = PerturbedAdditive((θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); ε=κ*ε, nb_samples=nb_samples, seed=seed, threaded=threaded)


histories_per_iteration = train_policy!(
benchmark, algorithm, policy, train_dataset, anticipative_solver, perturbed_anticipative_solver;
epochs=epochs, iterations=iterations, κ=κ, metrics=metrics, verbose=verbose, imitation_start=imitation_start
)

return histories_per_iteration, policy
end

function augment_dataset(
bench::ExogenousStochasticBenchmark,
train_dataset::AbstractArray,
model,
anticipative_solver,
perturbed_anticipative_solver;
κ=1.0,
perturb=false
)
return _augment_dataset(
Val(fieldtype(eltype(train_dataset), :y) !== Nothing),
bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver;
κ=κ, perturb=perturb
)
end

# Raw dataset (samples have no y) → create new DataSamples
function _augment_dataset(
::Val{false},
bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver;
κ=1.0, perturb=false
)
return map(train_dataset) do sample
θ = model(sample.x)
if perturb
if is_minimization_problem(bench)
y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...)
else
y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...)
end
else
y = anticipative_solver(sample.scenario; sample.context...)
end
DataSample(sample; y=y)
end
end

# Augmented dataset (samples already have y) → update y in place
function _augment_dataset(
::Val{true},
bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver;
κ=1.0, perturb=false
)
for (i, sample) in enumerate(train_dataset)
θ = model(sample.x)
if perturb
if is_minimization_problem(bench)
y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...)
else
y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...)
end
else
y = anticipative_solver(sample.scenario; sample.context...)
end
ET = eltype(sample.y)
y_converted = convert(typeof(sample.y), ET <: Integer ? round.(ET, y) : y)
train_dataset[i] = DataSample(sample; y=y_converted)
end
return train_dataset
end
Loading
Loading