-
Notifications
You must be signed in to change notification settings - Fork 1
Solene #12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Solene #12
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| """ | ||
| $TYPEDEF | ||
|
|
||
| Mirror Descent algorithm for learning coordinated solutions. | ||
|
|
||
| This algorithm is designed for stochastic benchmarks. | ||
|
|
||
| Reference: <https://arxiv.org/abs/2505.04757> | ||
|
|
||
| # Fields | ||
| $TYPEDFIELDS | ||
| """ | ||
| @kwdef struct MirrorDescent{A} <: AbstractImitationAlgorithm | ||
|
sdelannoypavy marked this conversation as resolved.
|
||
| "inner imitation algorithm for supervised learning" | ||
| inner_algorithm::A = PerturbedFenchelYoungLossImitation() | ||
| end | ||
|
|
||
| """ | ||
| $TYPEDSIGNATURES | ||
| Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm. | ||
|
|
||
| # Core training method | ||
|
sdelannoypavy marked this conversation as resolved.
|
||
| """ | ||
|
|
||
|
|
||
| function train_policy( | ||
|
sdelannoypavy marked this conversation as resolved.
|
||
| algorithm::MirrorDescent, | ||
| benchmark::ExogenousStochasticBenchmark; | ||
| dataset_size=30, | ||
| epochs=10, | ||
| iterations=10, | ||
| κ = 1.0, | ||
| metrics::Tuple=(), | ||
| seed=nothing, | ||
| ) | ||
|
|
||
| train_dataset = generate_dataset(benchmark, dataset_size; seed=seed) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By default,
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added |
||
|
|
||
| # Initialize model and create policy | ||
| model = generate_statistical_model(benchmark; seed=seed) | ||
| maximizer = generate_maximizer(benchmark) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Depending on the considered benchmark,
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe |
||
| policy = DFLPolicy(model, maximizer) | ||
|
|
||
| # vector because we store one history per iteration | ||
| histories_per_iteration = MVHistory[] | ||
|
sdelannoypavy marked this conversation as resolved.
|
||
|
|
||
| anticipative_solver = generate_anticipative_solver(benchmark;) | ||
| parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark;) | ||
|
sdelannoypavy marked this conversation as resolved.
|
||
|
|
||
| # perturb = true correspond to "real" iterations of mirror descent | ||
| # we compute solutions with the penalized anticipative solver + perturbation | ||
|
|
||
| # perturb = false correspond to imitation learning | ||
| # we use the anticipative solver without perturbation | ||
| # usefull to start with one iteration of pure imitation learning | ||
| perturb = false | ||
|
|
||
| # Train policy | ||
| for n_it in 1:iterations | ||
| println("Iteration $n_it / $iterations") | ||
|
sdelannoypavy marked this conversation as resolved.
|
||
|
|
||
| if n_it > 1 | ||
| perturb = true | ||
| end | ||
|
|
||
|
|
||
| # Generate anticipative solutions as training data | ||
| augmented_dataset = augment_dataset( | ||
| algorithm.inner_algorithm, benchmark, train_dataset, model, anticipative_solver, parametric_anticipative_solver; | ||
| κ = κ, perturb = perturb | ||
| ) | ||
|
|
||
|
|
||
| # Train policy on augmented dataset | ||
| history = train_policy!( | ||
| algorithm.inner_algorithm, | ||
| policy, | ||
| augmented_dataset; | ||
| epochs = epochs, | ||
| metrics = metrics, | ||
| maximizer_kwargs=sample -> sample.context, | ||
| ) | ||
|
|
||
| push!(histories_per_iteration, history) | ||
| end | ||
|
|
||
| return histories_per_iteration, policy | ||
| end | ||
|
|
||
|
|
||
| function augment_dataset( | ||
| algorithm::PerturbedFenchelYoungLossImitation, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only works for
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now, yes. We could write a more general version that works with other losses, but I’m not exactly sure how to do it yet. I think for now we can restrict the type of |
||
| bench::AbstractStochasticBenchmark, | ||
| train_dataset::AbstractArray, | ||
| model, | ||
| anticipative_solver, | ||
| parametric_anticipative_solver; | ||
| κ = 1.0, | ||
| perturb = false | ||
| ) | ||
|
|
||
| (; nb_samples, ε, threaded, training_optimizer, seed) = algorithm | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think they are all used except |
||
|
|
||
| augmented_dataset = Vector{DataSample}() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not a concrete type (because
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I replaced push! by an in-place update as suggested in your comments. |
||
|
|
||
| if perturb | ||
| perturbed_maximizer = PerturbedAdditive( | ||
|
sdelannoypavy marked this conversation as resolved.
|
||
| parametric_anticipative_solver; ε=κ*ε, nb_samples=nb_samples | ||
| ) | ||
| end | ||
|
|
||
|
|
||
| for sample in train_dataset | ||
|
|
||
| θ = model(sample.x) | ||
|
|
||
| if perturb | ||
| if is_minimization_problem(bench) | ||
| y = perturbed_maximizer(-κ*θ; scenario = sample.scenario, context = sample) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this line is bugged:
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure that we can use
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a better workaround that the one I told you yesterday. perturbed_maximizer = PerturbedAdditive((θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); ε=κ*ε, nb_samples=nb_samples) |
||
| else | ||
| y = perturbed_maximizer(κ*θ; scenario = sample.scenario, context = sample) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same bug here |
||
| end | ||
| else | ||
| y = anticipative_solver(sample.scenario; context = sample) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to apply regular anticipative imitation as the first iteration of the algorithm? Does it not work if we directly start with a regular iteration?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this behavior can be useful when comparing imitation learning and mirror descent (you just need to check whether there is an improvement after the first iteration). Maybe we could make this behavior optional and control it with a parameter. |
||
| end | ||
|
|
||
| augmented_datasample = DataSample(; | ||
| x = sample.x, | ||
| y, | ||
| instance = sample.context, | ||
| extra = sample.extra | ||
| ) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might cause a bug for some benchmarks. Using the copy constructor instead may avoid errors: DataSample(sample; y=y)
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried this but it gives the following error:
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added a line of code to check the type of y and convert it if necessary. This was needed to avoid bugs on StochasticVehicleScheduling, but it is a bit heavy. |
||
|
|
||
| push!(augmented_dataset, augmented_datasample) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may cause a lot of allocations in combination with the type instability mentioned above. One workaround would be to modify the dataset in place by directly doing: dataset[i] = augmented_datasampleThis would solve both issues at once.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Problem: |
||
| end | ||
|
|
||
| return augmented_dataset | ||
| end | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,196 @@ | ||
| """ | ||
| $TYPEDEF | ||
|
|
||
| Mirror Descent algorithm for learning coordinated solutions. | ||
|
|
||
| This algorithm is designed for stochastic benchmarks. | ||
|
|
||
| Reference: <https://arxiv.org/abs/2505.04757> | ||
|
|
||
| # Fields | ||
| $TYPEDFIELDS | ||
| """ | ||
| @kwdef struct MirrorDescent{A<:PerturbedFenchelYoungLossImitation} <: AbstractAlgorithm | ||
| "inner imitation algorithm for supervised learning" | ||
| inner_algorithm::A = PerturbedFenchelYoungLossImitation() | ||
| end | ||
|
|
||
| """ | ||
| $TYPEDSIGNATURES | ||
|
|
||
| Train a DFLPolicy using the Mirror Descent algorithm on a provided training dataset. | ||
|
|
||
| # Core training method | ||
|
|
||
| # Arguments | ||
| - `epochs`: number of training epochs per iteration | ||
| - `iterations`: number of mirror descent iterations | ||
| - `κ`: scaling factor for the perturbation magnitude | ||
| - `metrics`: tuple of metrics to track during training | ||
| - `verbose`: if true, prints progress at each iteration | ||
| - `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation) | ||
| """ | ||
|
|
||
| function train_policy!( | ||
| benchmark::ExogenousStochasticBenchmark, | ||
| algorithm::MirrorDescent, | ||
| policy::DFLPolicy, | ||
| train_dataset, | ||
| anticipative_solver, | ||
| perturbed_anticipative_solver; | ||
| epochs=10, | ||
| iterations=10, | ||
| κ=1.0, | ||
| metrics::Tuple=(), | ||
| verbose::Bool=false, | ||
| imitation_start::Bool=true | ||
| ) | ||
|
|
||
| augmented_dataset = train_dataset | ||
| return map(1:iterations) do n_it | ||
| if verbose | ||
| println("Iteration $n_it / $iterations") | ||
| end | ||
|
|
||
| perturb = n_it > 1 || !imitation_start | ||
|
|
||
| augmented_dataset = augment_dataset( | ||
| benchmark, augmented_dataset, policy.statistical_model, anticipative_solver, perturbed_anticipative_solver; | ||
| κ=κ, perturb=perturb | ||
| ) | ||
|
|
||
| train_policy!( | ||
| algorithm.inner_algorithm, | ||
| policy, | ||
| augmented_dataset; | ||
| epochs=epochs, | ||
| metrics=metrics, | ||
| maximizer_kwargs=sample -> sample.context, | ||
| ) | ||
| end | ||
| end | ||
|
|
||
| """ | ||
| $TYPEDSIGNATURES | ||
|
|
||
| Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm. | ||
|
|
||
| # Benchmark convenience wrapper | ||
|
|
||
| This high-level function handles all setup from the benchmark and returns a trained policy. | ||
|
|
||
| # Arguments | ||
| - `dataset_size`: number of samples in the training dataset | ||
| - `epochs`: number of training epochs per iteration | ||
| - `iterations`: number of mirror descent iterations | ||
| - `κ`: scaling factor for the perturbation magnitude | ||
| - `metrics`: tuple of metrics to track during training | ||
| - `seed`: random seed for reproducibility | ||
| - `verbose`: if true, prints progress at each iteration | ||
| - `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation) | ||
| - `model_kwargs`: additional keyword arguments passed to `generate_statistical_model` | ||
| - `maximizer_kwargs`: additional keyword arguments passed to `generate_maximizer` | ||
| - `solver_kwargs`: additional keyword arguments passed to `generate_anticipative_solver` and `generate_parametric_anticipative_solver` | ||
| - `nb_scenarios`: number of scenarios per instance. | ||
| - `context_per_instance`: number of contexts per instance. | ||
| """ | ||
|
|
||
|
|
||
|
|
||
| function train_policy( | ||
| algorithm::MirrorDescent, | ||
| benchmark::ExogenousStochasticBenchmark; | ||
| dataset_size=30, | ||
| epochs=10, | ||
| iterations=10, | ||
| κ=1.0, | ||
| metrics::Tuple=(), | ||
| seed=nothing, | ||
| verbose::Bool=false, | ||
| imitation_start::Bool=true, | ||
| model_kwargs=(;), | ||
| maximizer_kwargs=(;), | ||
| solver_kwargs=(;), | ||
| nb_scenarios = 1, | ||
| context_per_instance = 1, | ||
| ) | ||
| train_dataset = generate_dataset(benchmark, dataset_size; nb_scenarios=nb_scenarios, contexts_per_instance=context_per_instance, seed=seed) | ||
|
|
||
| model = generate_statistical_model(benchmark; seed=seed, model_kwargs...) | ||
| maximizer = generate_maximizer(benchmark; maximizer_kwargs...) | ||
| policy = DFLPolicy(model, maximizer) | ||
|
|
||
| anticipative_solver = generate_anticipative_solver(benchmark; solver_kwargs...) | ||
| parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark; solver_kwargs...) | ||
| (; nb_samples, ε, threaded, seed) = algorithm.inner_algorithm | ||
| perturbed_anticipative_solver = PerturbedAdditive((θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); ε=κ*ε, nb_samples=nb_samples, seed=seed, threaded=threaded) | ||
|
|
||
|
|
||
| histories_per_iteration = train_policy!( | ||
| benchmark, algorithm, policy, train_dataset, anticipative_solver, perturbed_anticipative_solver; | ||
| epochs=epochs, iterations=iterations, κ=κ, metrics=metrics, verbose=verbose, imitation_start=imitation_start | ||
| ) | ||
|
|
||
| return histories_per_iteration, policy | ||
| end | ||
|
|
||
| function augment_dataset( | ||
| bench::ExogenousStochasticBenchmark, | ||
| train_dataset::AbstractArray, | ||
| model, | ||
| anticipative_solver, | ||
| perturbed_anticipative_solver; | ||
| κ=1.0, | ||
| perturb=false | ||
| ) | ||
| return _augment_dataset( | ||
| Val(fieldtype(eltype(train_dataset), :y) !== Nothing), | ||
| bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; | ||
| κ=κ, perturb=perturb | ||
| ) | ||
| end | ||
|
|
||
| # Raw dataset (samples have no y) → create new DataSamples | ||
| function _augment_dataset( | ||
| ::Val{false}, | ||
| bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; | ||
| κ=1.0, perturb=false | ||
| ) | ||
| return map(train_dataset) do sample | ||
| θ = model(sample.x) | ||
| if perturb | ||
| if is_minimization_problem(bench) | ||
| y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...) | ||
| else | ||
| y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...) | ||
| end | ||
| else | ||
| y = anticipative_solver(sample.scenario; sample.context...) | ||
| end | ||
| DataSample(sample; y=y) | ||
| end | ||
| end | ||
|
|
||
| # Augmented dataset (samples already have y) → update y in place | ||
| function _augment_dataset( | ||
| ::Val{true}, | ||
| bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; | ||
| κ=1.0, perturb=false | ||
| ) | ||
| for (i, sample) in enumerate(train_dataset) | ||
| θ = model(sample.x) | ||
| if perturb | ||
| if is_minimization_problem(bench) | ||
| y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...) | ||
| else | ||
| y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...) | ||
| end | ||
| else | ||
| y = anticipative_solver(sample.scenario; sample.context...) | ||
| end | ||
| ET = eltype(sample.y) | ||
| y_converted = convert(typeof(sample.y), ET <: Integer ? round.(ET, y) : y) | ||
| train_dataset[i] = DataSample(sample; y=y_converted) | ||
| end | ||
| return train_dataset | ||
| end |
Uh oh!
There was an error while loading. Please reload this page.