VirtualPlantLab · Samuel-amap · Apr 28, 2025 · Mar 26, 2025 · Apr 3, 2025 · Apr 3, 2025
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -2,7 +2,7 @@ name: CI
 on:
   push:
     branches:
-      - main
+      - dev
     tags: "*"
   pull_request:
   workflow_dispatch:

diff --git a/.github/workflows/Integration.yml b/.github/workflows/Integration.yml
@@ -2,7 +2,7 @@ name: Integration
 on:
   push:
     branches:
-      - main
+      - dev
     tags: "*"
   pull_request:
   workflow_dispatch:
@@ -32,8 +32,8 @@ jobs:
         arch:
           - x64
         package:
-        - {user: PalmStudio, repo: XPalm.jl, branch: PSE-API-changes}
-        - {user: VEZY, repo: PlantBioPhysics.jl, branch: ModelList-outputs-filtering-changes}
+        - {user: PalmStudio, repo: XPalm.jl, branch: dev}
+        - {user: VEZY, repo: PlantBioPhysics.jl, branch: dev}
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2

diff --git a/.github/workflows/benchmarks_and_downstream.yml b/.github/workflows/benchmarks_and_downstream.yml
@@ -1,11 +1,12 @@
-name: BenchmarksAndDownstream
+name: Benchmarks
 on:
   push:
     branches:
       - dev
       - benchmarks-github-action
     tags: "*"
-    workflow-dispatch:
+  pull_request:
+  workflow_dispatch:
 permissions:
   # deployments permission to deploy GitHub pages website
     deployments: write
@@ -16,8 +17,6 @@ jobs:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
     runs-on: ${{ matrix.os }}
     timeout-minutes: 60
-    env:
-      GROUP: ${{ matrix.package.group }}
     strategy:
       fail-fast: false
       matrix:
@@ -30,14 +29,21 @@ jobs:
         arch:
           - x64
         package:
-        # the group setting is unused atm
+       # the group setting is unused atm
         - {user: VEZY, repo: PlantSimEngine.jl, group: Downstream}
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
+      - uses: julia-actions/julia-buildpkg@v1
+      - name: Clone Downstream
+        uses: actions/checkout@v4
+        with:
+          repository: ${{ matrix.package.user }}/${{ matrix.package.repo }}
+          ref: ${{matrix.package.branch}}
+          path: downstream
       # TODO handle breaking changes the way downstream tests do ?
       # NOTE : manifest toml file is removed otherwise git whines about untracked changes when switching branches for the gh-pages commit
       - name: Run benchmarks

diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,5 @@
 .DS_Store
 docs/Manifest.toml
 test/Manifest.toml
-docs/build/
+docs/build/
+test/downstream/Manifest.toml
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -291,12 +291,12 @@ out = run!(mtg, mapping, meteo, tracked_outputs=out_vars, executor=SequentialEx(
 nothing # hide
 ```
 
-We can then extract the outputs in a `DataFrame` and sort them:
+We can then extract the outputs and convert them to a `DataFrame` for each scale and sort them:
 
 ```@example readme
 using DataFrames
-df_out = convert_outputs(out, DataFrame)
-sort!(df_out, [:timestep, :node])
+df_dict = convert_outputs(out, DataFrame)
+sort!(df_dict["Leaf"], [:timestep, :node])
 ```
 
 An example output of a multiscale simulation is shown in the documentation of PlantBiophysics.jl:

diff --git a/docs/src/multiscale/multiscale.md b/docs/src/multiscale/multiscale.md
@@ -245,11 +245,11 @@ outputs_sim = run!(mtg, mapping, meteo, tracked_outputs = outs);
 nothing # hide
 ```
 
-And that's it! We can now access the outputs for each scale as a dictionary of vectors of values per variable and scale.
+And that's it! We can now access the outputs for each scale as a dictionary of vectors of NamedTuple objects.
 
-Or as a `DataFrame` using the [`DataFrames`](https://dataframes.juliadata.org) package:
+Or as a `DataFrame` dictionary using the [`DataFrames`](https://dataframes.juliadata.org) package:
 
 ```@example usepkg
 using DataFrames
-convert_outputs(outputs_sim, DataFrame)
+df_dict = convert_outputs(outputs_sim, DataFrame)
 ```
diff --git a/docs/src/multiscale/multiscale_considerations.md b/docs/src/multiscale/multiscale_considerations.md
@@ -83,39 +83,65 @@ Instead of a [`ModelList`](@ref), it takes an MTG and a mapping. The optional `m
 
 ## Multi-scale output data structure
 
-The output structure, like the mapping, is a Julia `Dict` structure indexed by scale. In each scale, another `Dict` maps variables to their values per timestep, per node. This makes the structure a little bulkier and a little more verbose to inspect than in single-scale, but the general usage is similar. Multiscale Tree Graph nodes are also added to the output data, as a `:node` entry.
+
+The output structure, like the mapping, is a Julia `Dict` structure indexed by the scale name. Values are a per-scale `Vector{NamedTuple}` which lists the requested variables for every node at that scale, for every timestep in the simulation. Timestep and Multiscale Tree Graph nodes are also added to the output data, as a `:timestep`and a `:node` entry. 
+
+This dictionary structure makes the outputs as-is a little more verbose to inspect than in single-scale, but the general usage is similar, and it is both compact, and fast to convert to a `Dict{String, DataFrame}` which can make queries easier. 
+
+!!! note
+  Some of the mapped variables -those that map from scalar to vector- will not be added to the outputs to save some memory and space since they are redundant.
+
 
 To illustrate, here's an example output from part 3 of the Toy plant tutorial, zeroing in on a variable at the "Root" scale: [Fixing bugs in the plant simulation](@ref):
 
 ```julia
 julia> outs
 
-Dict{String, Dict{Symbol, Vector}} with 5 entries:
-  "Internode" => Dict(:carbon_root_creation_consumed=>[[50.0, 50.0], [50.0, 50.0], [50.0, 50.0], [50.0, 50.0], [50.0, …
-  "Root"      => Dict(:carbon_root_creation_consumed=>[[50.0, 50.0], [50.0, 50.0, 50.0], [50.0, 50.0, 50.0, 50.0], [50…
-  "Scene"     => Dict(:TT_cu=>[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]  …  [2099.61], [20…
-  "Plant"     => Dict(:carbon_root_creation_consumed=>[[50.0], [50.0], [50.0], [50.0], [50.0], [50.0], [50.0], [50.0],…
-  "Leaf"      => Dict(:node=>Vector{Node{NodeMTG, Dict{Symbol, Any}}}[[+ 4: Leaf…
+Dict{String, Vector} with 5 entries:
+  "Internode" => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_root_creation_consumed::Float64, TT_cu::Float64, carbon_…
+  "Root"      => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_root_creation_consumed::Float64, water_absorbed::Float64…
+  "Scene"     => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, TT_cu::Float64, TT::Float64}[(timestep = 1, node = / 1: Scene…
+  "Plant"     => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_root_creation_consumed::Float64, carbon_stock::Float64, …
+  "Leaf"      => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_captured::Float64}[(timestep = 1, node = + 4: Leaf…
 
 julia> outs["Root"]
-Dict{Symbol, Vector} with 4 entries:
-  :carbon_root_creation_consumed => [[50.0, 50.0], [50.0, 50.0, 50.0], [50.0, 50.0, 50.0, 50.0], [50.0, 50.0, 50.0, 50…
-  :node                          => Vector{Node{NodeMTG, Dict{Symbol, Any}}}[[+ 9: Root…
-  :water_absorbed                => [[0.5, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0], [1.1, 1.1, 1.1, 1.1, 0.0], [0.…
-  :root_water_assimilation       => [[1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.…
-
-julia> outs["Root"][:carbon_root_creation_consumed]
-365-element Vector{Vector{Float64}}:
- [50.0, 50.0] # timestep 1: two root nodes
- [50.0, 50.0, 50.0]
- [50.0, 50.0, 50.0, 50.0]
- [50.0, 50.0, 50.0, 50.0, 50.0]
- [50.0, 50.0, 50.0, 50.0, 50.0, 50.0]
- [50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0] # timestep 6: 7 root nodes
+3257-element Vector{@NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_root_creation_consumed::Float64, water_absorbed::Float64, root_water_assimilation::Float64}}:
+ (timestep = 1, node = + 9: Root
+└─ < 10: Root
+   └─ < 11: Root
+      └─ < 12: Root
+         └─ < 13: Root
+            └─ < 14: Root
+               └─ < 15: Root
+                  └─ < 16: Root
+                     └─ < 17: Root
+, carbon_root_creation_consumed = 50.0, water_absorbed = 0.5, root_water_assimilation = 1.0)
  ⋮
 ```
 
-As more roots get added in this simulation, the vectors expand to list the values of all the nodes for every variable for every timestep.
+Values are more complex to query than in a single-scale simulation since the indexing isn't straightforward to map to a timestep:
+
+```julia
+julia> [Pair(outs["Root"][i][:timestep], outs["Root"][i][:carbon_root_creation_consumed]) for i in 1:length(outs["Root"])]
+3257-element Vector{Pair{Int64, Float64}}:
+   1 => 50.0
+   1 => 50.0
+   2 => 50.0
+   2 => 50.0
+   2 => 50.0
+     ⋮
+ 365 => 50.0
+ 365 => 50.0
+ 365 => 50.0
+ 365 => 50.0
+ 365 => 50.0
+ 365 => 50.0
+ 365 => 50.0
+ 365 => 50.0
+ 365 => 50.0
+```
+
+Converting to a dictionary of DataFrame objects can make such queries easier to write.
 
 !!! warning
     Currently, the `:node` entry only shallow copies nodes. The `:node` values at each scale for every timestep actually reflect the final state of the node, meaning attribute values may not correspond to the value at that timestep. You may need to output these values via a dedicated model to keep track of them properly.

diff --git a/docs/src/multiscale/multiscale_example_3.md b/docs/src/multiscale/multiscale_example_3.md
@@ -244,13 +244,19 @@ Depth = 3
 
 There is one quirk you may have noticed when inspecting the data : when a root expands, the new root is immediately active, and some models may act on it immediately... including the root growth model. Meaning this new root may also sprout another root in the same timestep, and so on.
 
-You can notice this by looking at the simulation's state after the first timestep:
+You can notice this by looking at the simulation's state during the first two timesteps:
 
 ```@example usepkg
 outs = run!(mtg, mapping, first(meteo_day, 2))
-nodes_per_timestep = outs["Root"][:node]
-root_lengths_per_timestep = [length(nodes_per_timestep[i]) for i in 1:length(nodes_per_timestep)]
 
+root_nodes_per_timestep = [0, 0]
+for i in 1:length(outs["Root"])
+    if outs["Root"][i].timestep < 3
+        root_nodes_per_timestep[outs["Root"][i].timestep] += 1    
+    end
+end
+
+root_nodes_per_timestep
 ```
 
 Our root grew to full length within one timestep. Oops.

diff --git a/docs/src/multiscale/single_to_multiscale.md b/docs/src/multiscale/single_to_multiscale.md
@@ -203,20 +203,18 @@ We can access the output variables at the "Scene" scale by indexing our outputs:
 ```@example usepkg
 outputs_multiscale["Scene"]
 ```
-and then the computed `:TT_cu`:
+We have a `Vector{NamedTuple}`structure. Our single-scale output is a `Vector{T}`:
 ```@example usepkg
-outputs_multiscale["Scene"][:TT_cu]
+outputs_singlescale.TT_cu
 ```
 
-As you can see, it is a `Vector{Vector{T}}`, whereas our single-scale output is a `Vector{T}`:
+ Let's extract the multi-scale `:TT_cu`:
 ```@example usepkg
-outputs_singlescale.TT_cu
+computed_TT_cu_multiscale = [outputs_multiscale["Scene"][i].TT_cu for i in 1:length(outputs_multiscale["Scene"])]
 ```
 
-To compare them value-by-value, we can flatten the multiscale Vector and then do a piecewise approximate equality test :
+We can now compare them value-by-value and do a piecewise approximate equality test :
 ```@example usepkg
-computed_TT_cu_multiscale = collect(Base.Iterators.flatten(outputs_multiscale["Scene"][:TT_cu]))
-
 for i in 1:length(computed_TT_cu_multiscale)
     if !(computed_TT_cu_multiscale[i] ≈ outputs_singlescale.TT_cu[i])
         println(i)

diff --git a/docs/src/working_with_data/floating_point_accumulation_error.md b/docs/src/working_with_data/floating_point_accumulation_error.md
@@ -95,13 +95,11 @@ mtg_multiscale = MultiScaleTreeGraph.Node(MultiScaleTreeGraph.NodeMTG("/", "Scen
     plant = MultiScaleTreeGraph.Node(mtg_multiscale, MultiScaleTreeGraph.NodeMTG("+", "Plant", 1, 1))
 
 outputs_multiscale = run!(mtg_multiscale, mapping_multiscale, meteo_day)
-computed_TT_cu_multiscale = collect(Base.Iterators.flatten(outputs_multiscale["Scene"][:TT_cu]))
 ```
 
 ```@example usepkg
 
-computed_TT_cu_multiscale = collect(Base.Iterators.flatten(outputs_multiscale["Scene"][:TT_cu]))
-
+computed_TT_cu_multiscale = [outputs_multiscale["Scene"][i].TT_cu for i in 1:length(outputs_multiscale["Scene"])]
 is_approx_equal = length(unique(computed_TT_cu_multiscale .≈ outputs_singlescale.TT_cu)) == 1
 ```
 
@@ -110,8 +108,7 @@ Why was the comparison only approximate ? Why `≈` instead of `==`?
 Let's try it out. What if write instead:
 
 ```@example usepkg
-computed_TT_cu_multiscale = collect(Base.Iterators.flatten(outputs_multiscale["Scene"][:TT_cu]))
-
+computed_TT_cu_multiscale = [outputs_multiscale["Scene"][i].TT_cu for i in 1:length(outputs_multiscale["Scene"])]
 is_perfectly_equal = length(unique(computed_TT_cu_multiscale .== outputs_singlescale.TT_cu)) == 1
 ```
 

diff --git a/src/mtg/GraphSimulation.jl b/src/mtg/GraphSimulation.jl
@@ -25,6 +25,7 @@ struct GraphSimulation{T,S,U,O,V}
     dependency_graph::DependencyGraph
     models::Dict{String,U}
     outputs::Dict{String,O}
+    outputs_index::Dict{String, Int}
 end
 
 function GraphSimulation(graph, mapping; nsteps=1, outputs=nothing, type_promotion=nothing, check=true, verbose=false)
@@ -89,73 +90,45 @@ out = run!(mtg, mapping, meteo, tracked_outputs = Dict(
 convert_outputs(out, DataFrames)
 ```
 """
+# Another, possibly better way would be to just create the DataFrame directly from the outputs 
+# and then remove the RefVector columns and replace the node one, hmm
 function convert_outputs(outs::Dict{String,O} where O, sink; refvectors=false, no_value=nothing)
-    @assert Tables.istable(sink) "The sink argument must be compatible with the Tables.jl interface (`Tables.istable(sink)` must return `true`, *e.g.* `DataFrame`)"
-
-
-    variables_names_types = Iterators.flatten(collect(i.first => eltype(i.second[1]) for i in filter(x -> x.first != :node, vars)) for (organs, vars) in outs) |> collect
-    variables_names_types_dict = Dict{Symbol,Any}()
-
-    for (k, v) in variables_names_types
-        if !haskey(variables_names_types_dict, k)
-            variables_names_types_dict[k] = Union{typeof(no_value),v}
-        else
-            if !refvectors && v <: RefVector && !(variables_names_types_dict[k] <: Union{typeof(no_value),RefVector})
-                continue
+    ret = Dict{String, sink}()
+    for (organ, status_vector) in outs
+        # remove RefVector variables
+        refv = ()
+        if length(status_vector) > 0
+            for (var, val) in pairs(status_vector[1])
+                if !refvectors && isa(val, RefVector)
+                    refv = (refv..., var)
+                end
+                if var == :node
+                    refv = (refv..., var)
+                end
             end
-            variables_names_types_dict[k] = Union{variables_names_types_dict[k],v}
         end
-    end
+
+        # Get the new NamedTuple type
+        refv_nt = NamedTuple{refv}
 
-    # If we have a variable that is only RefVector, we remove it from the variables_names_types:    
-    !refvectors && filter!(x -> !(last(x) <: Union{typeof(no_value),RefVector}), variables_names_types_dict)
-
-    variables_names_types = (timestep=Int, organ=String, node=Int, NamedTuple(variables_names_types_dict)...)
-    var_names_all = keys(variables_names_types)
-    t = NamedTuple{var_names_all,Tuple{values(variables_names_types)...}}[]
-    #=size_hint = 0
-    for (organ, vars) in outs # organ = "Leaf"; vars = outs[organ]
-        var_names = setdiff(collect(keys(vars)), [:node])
-        if length(var_names) == 0
-            continue
-        end
-        steps_iterable = axes(vars[var_names[1]], 1)
-        for timestep in steps_iterable # timestep = 1
-            node_iterable = axes(vars[var_names[1]][timestep], 1)
-            size_hint+=length(node_iterable)
-        end
-    end
+        # Piddle around with the first element to get the final type to be able to allocate the exact vector size with a definite element type
+        vector_named_tuple_1 = NamedTuple(status_vector[1])
 
-    sizehint!(t, size_hint)=#
+        # replace the MTG node var with the id (MTG nodes aren't CSV-friendly)
+        filtered_named_tuple = (;node=MultiScaleTreeGraph.node_id(vector_named_tuple_1.node),Base.structdiff(vector_named_tuple_1, refv_nt)...)
+        filtered_vector_named_tuple = Vector{typeof(filtered_named_tuple)}(undef, length(status_vector))
 
-    for (organ, vars) in outs # organ = "Leaf"; vars = outs[organ]
-        var_names = setdiff(collect(keys(vars)), [:node])
-        if length(var_names) == 0
-            continue
+        for i in 1:length(status_vector)
+            vector_named_tuple_i = NamedTuple(status_vector[i])
+            filtered_vector_named_tuple[i] = (;node=MultiScaleTreeGraph.node_id(vector_named_tuple_i.node), Base.structdiff(vector_named_tuple_i, refv_nt)...)
         end
-        steps_iterable = axes(vars[var_names[1]], 1)
-        for timestep in steps_iterable # timestep = 1
-            node_iterable = axes(vars[var_names[1]][timestep], 1)
-            for node in node_iterable # node = 1
-                vals = Dict(zip(var_names, [something(vars[v][timestep][node], no_value) for v in var_names]))
-                # Remove RefVector values:
-                !refvectors && filter!(x -> !isa(x.second, RefVector), vals)
-                vars_values = (; timestep=timestep, organ=organ, node=MultiScaleTreeGraph.node_id(vars[:node][timestep][node]), vals...)
-                vars_no_values = setdiff(var_names_all, keys(vars_values))
-                if length(vars_no_values) > 0
-                    vars_values = (; vars_values..., zip(vars_no_values, [no_value for v in vars_no_values])...)
-                end
-                push!(
-                    t,
-                    NamedTuple{var_names_all}(vars_values)
-                )
-            end
-        end
-    end
 
-    return sink(t)
+        ret[organ] = sink(filtered_vector_named_tuple)
+    end
+    return ret
 end
 
+# TODO adapt these to new output structure or remove them
 function outputs(outs::Dict{String, O} where O, key::Symbol)
     Tables.columns(convert_outputs(outs, Vector{NamedTuple}))[key]
 end

diff --git a/src/mtg/initialisation.jl b/src/mtg/initialisation.jl
@@ -328,5 +328,6 @@ function init_simulation(mtg, mapping; nsteps=1, outputs=nothing, type_promotion
 
     outputs = pre_allocate_outputs(statuses, status_templates, reverse_multiscale_mapping, vars_need_init, outputs, nsteps, type_promotion=type_promotion, check=check)
 
-    return (; mtg, statuses, status_templates, reverse_multiscale_mapping, vars_need_init, dependency_graph=dep(mapping, verbose=verbose), models, outputs)
+    outputs_index = Dict{String, Int}(s => 1 for s in keys(outputs))
+    return (; mtg, statuses, status_templates, reverse_multiscale_mapping, vars_need_init, dependency_graph=dep(mapping, verbose=verbose), models, outputs, outputs_index)
 end