diff --git a/Project.toml b/Project.toml index a4b06c5..2418b61 100644 --- a/Project.toml +++ b/Project.toml @@ -1,17 +1,17 @@ name = "DecisionFocusedLearningAlgorithms" uuid = "46d52364-bc3b-4fac-a992-eb1d3ef2de15" -version = "0.2.0" authors = ["Members of JuliaDecisionFocusedLearning and contributors"] - -[workspace] -projects = ["docs", "test"] +version = "0.2.0" [deps] DecisionFocusedLearningBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f" +Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -21,12 +21,18 @@ ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7" [compat] DecisionFocusedLearningBenchmarks = "0.5.0, 0.6" DocStringExtensions = "0.9.5" +Documenter = "1.17.0" Flux = "0.16.9" InferOpt = "0.7.1" +Literate = "2.21.0" MLUtils = "0.4.8" +Plots = "1.41.6" ProgressMeter = "1.11.0" Random = "1.11.0" Statistics = "1.11.1" UnicodePlots = "3.8.2" ValueHistories = "0.5.6" julia = "1.11" + +[workspace] +projects = ["docs", "test"] diff --git a/src/DecisionFocusedLearningAlgorithms.jl b/src/DecisionFocusedLearningAlgorithms.jl index d7a6250..19fdf70 100644 --- a/src/DecisionFocusedLearningAlgorithms.jl +++ b/src/DecisionFocusedLearningAlgorithms.jl @@ -25,6 +25,7 @@ include("algorithms/abstract_algorithm.jl") include("algorithms/supervised/fyl.jl") include("algorithms/supervised/anticipative_imitation.jl") include("algorithms/supervised/dagger.jl") +include("algorithms/MirrorDescent/mirror_descent.jl") export TrainingContext @@ -41,7 +42,7 @@ export AbstractMetric, export AbstractAlgorithm, AbstractImitationAlgorithm export PerturbedFenchelYoungLossImitation, - DAgger, AnticipativeImitation, train_policy!, train_policy + DAgger, AnticipativeImitation, train_policy!, train_policy, MirrorDescent export AbstractPolicy, DFLPolicy end diff --git a/src/algorithms/MirrorDescent/mirror_descent.jl b/src/algorithms/MirrorDescent/mirror_descent.jl new file mode 100644 index 0000000..2f282e2 --- /dev/null +++ b/src/algorithms/MirrorDescent/mirror_descent.jl @@ -0,0 +1,138 @@ +""" +$TYPEDEF + +Mirror Descent algorithm for learning coordinated solutions. + +This algorithm is designed for stochastic benchmarks. + +Reference: + +# Fields +$TYPEDFIELDS +""" +@kwdef struct MirrorDescent{A} <: AbstractImitationAlgorithm + "inner imitation algorithm for supervised learning" + inner_algorithm::A = PerturbedFenchelYoungLossImitation() +end + +""" +$TYPEDSIGNATURES +Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm. + +# Core training method +""" + + +function train_policy( + algorithm::MirrorDescent, + benchmark::ExogenousStochasticBenchmark; + dataset_size=30, + epochs=10, + iterations=10, + κ = 1.0, + metrics::Tuple=(), + seed=nothing, +) + + train_dataset = generate_dataset(benchmark, dataset_size; seed=seed) + + # Initialize model and create policy + model = generate_statistical_model(benchmark; seed=seed) + maximizer = generate_maximizer(benchmark) + policy = DFLPolicy(model, maximizer) + + # vector because we store one history per iteration + histories_per_iteration = MVHistory[] + + anticipative_solver = generate_anticipative_solver(benchmark;) + parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark;) + + # perturb = true correspond to "real" iterations of mirror descent + # we compute solutions with the penalized anticipative solver + perturbation + + # perturb = false correspond to imitation learning + # we use the anticipative solver without perturbation + # usefull to start with one iteration of pure imitation learning + perturb = false + + # Train policy + for n_it in 1:iterations + println("Iteration $n_it / $iterations") + + if n_it > 1 + perturb = true + end + + + # Generate anticipative solutions as training data + augmented_dataset = augment_dataset( + algorithm.inner_algorithm, benchmark, train_dataset, model, anticipative_solver, parametric_anticipative_solver; + κ = κ, perturb = perturb + ) + + + # Train policy on augmented dataset + history = train_policy!( + algorithm.inner_algorithm, + policy, + augmented_dataset; + epochs = epochs, + metrics = metrics, + maximizer_kwargs=sample -> sample.context, + ) + + push!(histories_per_iteration, history) + end + + return histories_per_iteration, policy +end + + +function augment_dataset( + algorithm::PerturbedFenchelYoungLossImitation, + bench::AbstractStochasticBenchmark, + train_dataset::AbstractArray, + model, + anticipative_solver, + parametric_anticipative_solver; + κ = 1.0, + perturb = false +) + + (; nb_samples, ε, threaded, training_optimizer, seed) = algorithm + + augmented_dataset = Vector{DataSample}() + + if perturb + perturbed_maximizer = PerturbedAdditive( + parametric_anticipative_solver; ε=κ*ε, nb_samples=nb_samples + ) + end + + + for sample in train_dataset + + θ = model(sample.x) + + if perturb + if is_minimization_problem(bench) + y = perturbed_maximizer(-κ*θ; scenario = sample.scenario, context = sample) + else + y = perturbed_maximizer(κ*θ; scenario = sample.scenario, context = sample) + end + else + y = anticipative_solver(sample.scenario; context = sample) + end + + augmented_datasample = DataSample(; + x = sample.x, + y, + instance = sample.context, + extra = sample.extra + ) + + push!(augmented_dataset, augmented_datasample) + end + + return augmented_dataset +end \ No newline at end of file diff --git a/src/algorithms/mirror_descent/mirror_descent.jl b/src/algorithms/mirror_descent/mirror_descent.jl new file mode 100644 index 0000000..b0847cb --- /dev/null +++ b/src/algorithms/mirror_descent/mirror_descent.jl @@ -0,0 +1,196 @@ +""" +$TYPEDEF + +Mirror Descent algorithm for learning coordinated solutions. + +This algorithm is designed for stochastic benchmarks. + +Reference: + +# Fields +$TYPEDFIELDS +""" +@kwdef struct MirrorDescent{A<:PerturbedFenchelYoungLossImitation} <: AbstractAlgorithm + "inner imitation algorithm for supervised learning" + inner_algorithm::A = PerturbedFenchelYoungLossImitation() +end + +""" +$TYPEDSIGNATURES + +Train a DFLPolicy using the Mirror Descent algorithm on a provided training dataset. + +# Core training method + +# Arguments +- `epochs`: number of training epochs per iteration +- `iterations`: number of mirror descent iterations +- `κ`: scaling factor for the perturbation magnitude +- `metrics`: tuple of metrics to track during training +- `verbose`: if true, prints progress at each iteration +- `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation) +""" + +function train_policy!( + benchmark::ExogenousStochasticBenchmark, + algorithm::MirrorDescent, + policy::DFLPolicy, + train_dataset, + anticipative_solver, + perturbed_anticipative_solver; + epochs=10, + iterations=10, + κ=1.0, + metrics::Tuple=(), + verbose::Bool=false, + imitation_start::Bool=true +) + + augmented_dataset = train_dataset + return map(1:iterations) do n_it + if verbose + println("Iteration $n_it / $iterations") + end + + perturb = n_it > 1 || !imitation_start + + augmented_dataset = augment_dataset( + benchmark, augmented_dataset, policy.statistical_model, anticipative_solver, perturbed_anticipative_solver; + κ=κ, perturb=perturb + ) + + train_policy!( + algorithm.inner_algorithm, + policy, + augmented_dataset; + epochs=epochs, + metrics=metrics, + maximizer_kwargs=sample -> sample.context, + ) + end +end + +""" +$TYPEDSIGNATURES + +Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm. + +# Benchmark convenience wrapper + +This high-level function handles all setup from the benchmark and returns a trained policy. + +# Arguments +- `dataset_size`: number of samples in the training dataset +- `epochs`: number of training epochs per iteration +- `iterations`: number of mirror descent iterations +- `κ`: scaling factor for the perturbation magnitude +- `metrics`: tuple of metrics to track during training +- `seed`: random seed for reproducibility +- `verbose`: if true, prints progress at each iteration +- `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation) +- `model_kwargs`: additional keyword arguments passed to `generate_statistical_model` +- `maximizer_kwargs`: additional keyword arguments passed to `generate_maximizer` +- `solver_kwargs`: additional keyword arguments passed to `generate_anticipative_solver` and `generate_parametric_anticipative_solver` +- `nb_scenarios`: number of scenarios per instance. +- `context_per_instance`: number of contexts per instance. +""" + + + +function train_policy( + algorithm::MirrorDescent, + benchmark::ExogenousStochasticBenchmark; + dataset_size=30, + epochs=10, + iterations=10, + κ=1.0, + metrics::Tuple=(), + seed=nothing, + verbose::Bool=false, + imitation_start::Bool=true, + model_kwargs=(;), + maximizer_kwargs=(;), + solver_kwargs=(;), + nb_scenarios = 1, + context_per_instance = 1, +) + train_dataset = generate_dataset(benchmark, dataset_size; nb_scenarios=nb_scenarios, contexts_per_instance=context_per_instance, seed=seed) + + model = generate_statistical_model(benchmark; seed=seed, model_kwargs...) + maximizer = generate_maximizer(benchmark; maximizer_kwargs...) + policy = DFLPolicy(model, maximizer) + + anticipative_solver = generate_anticipative_solver(benchmark; solver_kwargs...) + parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark; solver_kwargs...) + (; nb_samples, ε, threaded, seed) = algorithm.inner_algorithm + perturbed_anticipative_solver = PerturbedAdditive((θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); ε=κ*ε, nb_samples=nb_samples, seed=seed, threaded=threaded) + + + histories_per_iteration = train_policy!( + benchmark, algorithm, policy, train_dataset, anticipative_solver, perturbed_anticipative_solver; + epochs=epochs, iterations=iterations, κ=κ, metrics=metrics, verbose=verbose, imitation_start=imitation_start + ) + + return histories_per_iteration, policy +end + +function augment_dataset( + bench::ExogenousStochasticBenchmark, + train_dataset::AbstractArray, + model, + anticipative_solver, + perturbed_anticipative_solver; + κ=1.0, + perturb=false +) + return _augment_dataset( + Val(fieldtype(eltype(train_dataset), :y) !== Nothing), + bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; + κ=κ, perturb=perturb + ) +end + +# Raw dataset (samples have no y) → create new DataSamples +function _augment_dataset( + ::Val{false}, + bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; + κ=1.0, perturb=false +) + return map(train_dataset) do sample + θ = model(sample.x) + if perturb + if is_minimization_problem(bench) + y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...) + else + y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...) + end + else + y = anticipative_solver(sample.scenario; sample.context...) + end + DataSample(sample; y=y) + end +end + +# Augmented dataset (samples already have y) → update y in place +function _augment_dataset( + ::Val{true}, + bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; + κ=1.0, perturb=false +) + for (i, sample) in enumerate(train_dataset) + θ = model(sample.x) + if perturb + if is_minimization_problem(bench) + y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...) + else + y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...) + end + else + y = anticipative_solver(sample.scenario; sample.context...) + end + ET = eltype(sample.y) + y_converted = convert(typeof(sample.y), ET <: Integer ? round.(ET, y) : y) + train_dataset[i] = DataSample(sample; y=y_converted) + end + return train_dataset +end \ No newline at end of file diff --git a/test/mirror_descent.jl b/test/mirror_descent.jl new file mode 100644 index 0000000..0a42cc3 --- /dev/null +++ b/test/mirror_descent.jl @@ -0,0 +1,95 @@ +using DecisionFocusedLearningAlgorithms +using DecisionFocusedLearningBenchmarks +using Test +using ValueHistories +using Statistics: mean + +@testset "MirrorDescent Training" begin + + @testset "MirrorDescent - ContextualStochasticArgmax basic" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=5, epochs=2, iterations=2, seed=0 + ) + + @test histories isa Vector + @test length(histories) == 2 + @test all(h isa MVHistory for h in histories) + @test all(haskey(h, :training_loss) for h in histories) + @test policy isa DFLPolicy + end + + @testset "MirrorDescent - StochasticVehicleScheduling basic" begin + benchmark = StochasticVehicleSchedulingBenchmark() + algorithm = MirrorDescent() + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=1, epochs=2, iterations=2, seed=0 + ) + + @test histories isa Vector + @test length(histories) == 2 + @test all(h isa MVHistory for h in histories) + @test all(haskey(h, :training_loss) for h in histories) + @test policy isa DFLPolicy + end + + @testset "MirrorDescent - imitation_start=false" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=5, epochs=2, iterations=2, seed=0, imitation_start=false + ) + + @test histories isa Vector + @test length(histories) == 2 + @test policy isa DFLPolicy + end + + @testset "MirrorDescent - performance improves over iterations" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + val_dataset = generate_dataset(benchmark, 100; seed=99) + + val_metric = FunctionMetric(:val_obj, val_dataset) do ctx, data + vals = map(data) do s + θ = ctx.policy.statistical_model(s.x) + y = ctx.policy.maximizer(θ; s.context...) + Float64(DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y)) + end + (val_obj = mean(vals),) + end + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=20, epochs=3, iterations=5, seed=0, metrics=(val_metric,) + ) + + val_objs = [get(histories[i], :val_obj)[2][end] for i in 1:5] + + # Performance should improve at each iteration + @test (val_objs[4] > val_objs[1]) + end + + @testset "MirrorDescent - with metrics" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + metrics = (FunctionMetric(ctx -> ctx.epoch, :epoch),) + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=5, epochs=2, iterations=2, seed=0, metrics=metrics + ) + + @test all(haskey(h, :epoch) for h in histories) + end + +end