diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index bc5bc8891..020bb6920 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -2,7 +2,7 @@ name: CI on: push: branches: - - main + - dev tags: "*" pull_request: workflow_dispatch: diff --git a/.github/workflows/Integration.yml b/.github/workflows/Integration.yml index 4d0671497..9e965ce92 100644 --- a/.github/workflows/Integration.yml +++ b/.github/workflows/Integration.yml @@ -2,7 +2,7 @@ name: Integration on: push: branches: - - main + - dev tags: "*" pull_request: workflow_dispatch: @@ -32,8 +32,8 @@ jobs: arch: - x64 package: - - {user: PalmStudio, repo: XPalm.jl, branch: PSE-API-changes} - - {user: VEZY, repo: PlantBioPhysics.jl, branch: ModelList-outputs-filtering-changes} + - {user: PalmStudio, repo: XPalm.jl, branch: dev} + - {user: VEZY, repo: PlantBioPhysics.jl, branch: dev} steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 diff --git a/.github/workflows/benchmarks_and_downstream.yml b/.github/workflows/benchmarks_and_downstream.yml index f211d0fa8..0dbb01ada 100644 --- a/.github/workflows/benchmarks_and_downstream.yml +++ b/.github/workflows/benchmarks_and_downstream.yml @@ -1,11 +1,12 @@ -name: BenchmarksAndDownstream +name: Benchmarks on: push: branches: - dev - benchmarks-github-action tags: "*" - workflow-dispatch: + pull_request: + workflow_dispatch: permissions: # deployments permission to deploy GitHub pages website deployments: write @@ -16,8 +17,6 @@ jobs: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} timeout-minutes: 60 - env: - GROUP: ${{ matrix.package.group }} strategy: fail-fast: false matrix: @@ -30,7 +29,7 @@ jobs: arch: - x64 package: - # the group setting is unused atm + # the group setting is unused atm - {user: VEZY, repo: PlantSimEngine.jl, group: Downstream} steps: - uses: actions/checkout@v4 @@ -38,6 +37,13 @@ jobs: with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} + - uses: julia-actions/julia-buildpkg@v1 + - name: Clone Downstream + uses: actions/checkout@v4 + with: + repository: ${{ matrix.package.user }}/${{ matrix.package.repo }} + ref: ${{matrix.package.branch}} + path: downstream # TODO handle breaking changes the way downstream tests do ? # NOTE : manifest toml file is removed otherwise git whines about untracked changes when switching branches for the gh-pages commit - name: Run benchmarks diff --git a/.gitignore b/.gitignore index 0bd0f101a..4fead6a18 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ .DS_Store docs/Manifest.toml test/Manifest.toml -docs/build/ \ No newline at end of file +docs/build/ +test/downstream/Manifest.toml diff --git a/docs/src/index.md b/docs/src/index.md index 697124245..d946933c6 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -291,12 +291,12 @@ out = run!(mtg, mapping, meteo, tracked_outputs=out_vars, executor=SequentialEx( nothing # hide ``` -We can then extract the outputs in a `DataFrame` and sort them: +We can then extract the outputs and convert them to a `DataFrame` for each scale and sort them: ```@example readme using DataFrames -df_out = convert_outputs(out, DataFrame) -sort!(df_out, [:timestep, :node]) +df_dict = convert_outputs(out, DataFrame) +sort!(df_dict["Leaf"], [:timestep, :node]) ``` An example output of a multiscale simulation is shown in the documentation of PlantBiophysics.jl: diff --git a/docs/src/multiscale/multiscale.md b/docs/src/multiscale/multiscale.md index 3c7948954..42973bb6d 100644 --- a/docs/src/multiscale/multiscale.md +++ b/docs/src/multiscale/multiscale.md @@ -245,11 +245,11 @@ outputs_sim = run!(mtg, mapping, meteo, tracked_outputs = outs); nothing # hide ``` -And that's it! We can now access the outputs for each scale as a dictionary of vectors of values per variable and scale. +And that's it! We can now access the outputs for each scale as a dictionary of vectors of NamedTuple objects. -Or as a `DataFrame` using the [`DataFrames`](https://dataframes.juliadata.org) package: +Or as a `DataFrame` dictionary using the [`DataFrames`](https://dataframes.juliadata.org) package: ```@example usepkg using DataFrames -convert_outputs(outputs_sim, DataFrame) +df_dict = convert_outputs(outputs_sim, DataFrame) ``` \ No newline at end of file diff --git a/docs/src/multiscale/multiscale_considerations.md b/docs/src/multiscale/multiscale_considerations.md index b2ddf2b14..7f2b1fe08 100644 --- a/docs/src/multiscale/multiscale_considerations.md +++ b/docs/src/multiscale/multiscale_considerations.md @@ -83,39 +83,65 @@ Instead of a [`ModelList`](@ref), it takes an MTG and a mapping. The optional `m ## Multi-scale output data structure -The output structure, like the mapping, is a Julia `Dict` structure indexed by scale. In each scale, another `Dict` maps variables to their values per timestep, per node. This makes the structure a little bulkier and a little more verbose to inspect than in single-scale, but the general usage is similar. Multiscale Tree Graph nodes are also added to the output data, as a `:node` entry. + +The output structure, like the mapping, is a Julia `Dict` structure indexed by the scale name. Values are a per-scale `Vector{NamedTuple}` which lists the requested variables for every node at that scale, for every timestep in the simulation. Timestep and Multiscale Tree Graph nodes are also added to the output data, as a `:timestep`and a `:node` entry. + +This dictionary structure makes the outputs as-is a little more verbose to inspect than in single-scale, but the general usage is similar, and it is both compact, and fast to convert to a `Dict{String, DataFrame}` which can make queries easier. + +!!! note + Some of the mapped variables -those that map from scalar to vector- will not be added to the outputs to save some memory and space since they are redundant. + To illustrate, here's an example output from part 3 of the Toy plant tutorial, zeroing in on a variable at the "Root" scale: [Fixing bugs in the plant simulation](@ref): ```julia julia> outs -Dict{String, Dict{Symbol, Vector}} with 5 entries: - "Internode" => Dict(:carbon_root_creation_consumed=>[[50.0, 50.0], [50.0, 50.0], [50.0, 50.0], [50.0, 50.0], [50.0, … - "Root" => Dict(:carbon_root_creation_consumed=>[[50.0, 50.0], [50.0, 50.0, 50.0], [50.0, 50.0, 50.0, 50.0], [50… - "Scene" => Dict(:TT_cu=>[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0] … [2099.61], [20… - "Plant" => Dict(:carbon_root_creation_consumed=>[[50.0], [50.0], [50.0], [50.0], [50.0], [50.0], [50.0], [50.0],… - "Leaf" => Dict(:node=>Vector{Node{NodeMTG, Dict{Symbol, Any}}}[[+ 4: Leaf… +Dict{String, Vector} with 5 entries: + "Internode" => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_root_creation_consumed::Float64, TT_cu::Float64, carbon_… + "Root" => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_root_creation_consumed::Float64, water_absorbed::Float64… + "Scene" => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, TT_cu::Float64, TT::Float64}[(timestep = 1, node = / 1: Scene… + "Plant" => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_root_creation_consumed::Float64, carbon_stock::Float64, … + "Leaf" => @NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_captured::Float64}[(timestep = 1, node = + 4: Leaf… julia> outs["Root"] -Dict{Symbol, Vector} with 4 entries: - :carbon_root_creation_consumed => [[50.0, 50.0], [50.0, 50.0, 50.0], [50.0, 50.0, 50.0, 50.0], [50.0, 50.0, 50.0, 50… - :node => Vector{Node{NodeMTG, Dict{Symbol, Any}}}[[+ 9: Root… - :water_absorbed => [[0.5, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0], [1.1, 1.1, 1.1, 1.1, 0.0], [0.… - :root_water_assimilation => [[1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.… - -julia> outs["Root"][:carbon_root_creation_consumed] -365-element Vector{Vector{Float64}}: - [50.0, 50.0] # timestep 1: two root nodes - [50.0, 50.0, 50.0] - [50.0, 50.0, 50.0, 50.0] - [50.0, 50.0, 50.0, 50.0, 50.0] - [50.0, 50.0, 50.0, 50.0, 50.0, 50.0] - [50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0] # timestep 6: 7 root nodes +3257-element Vector{@NamedTuple{timestep::Int64, node::Node{NodeMTG, Dict{Symbol, Any}}, carbon_root_creation_consumed::Float64, water_absorbed::Float64, root_water_assimilation::Float64}}: + (timestep = 1, node = + 9: Root +└─ < 10: Root + └─ < 11: Root + └─ < 12: Root + └─ < 13: Root + └─ < 14: Root + └─ < 15: Root + └─ < 16: Root + └─ < 17: Root +, carbon_root_creation_consumed = 50.0, water_absorbed = 0.5, root_water_assimilation = 1.0) ⋮ ``` -As more roots get added in this simulation, the vectors expand to list the values of all the nodes for every variable for every timestep. +Values are more complex to query than in a single-scale simulation since the indexing isn't straightforward to map to a timestep: + +```julia +julia> [Pair(outs["Root"][i][:timestep], outs["Root"][i][:carbon_root_creation_consumed]) for i in 1:length(outs["Root"])] +3257-element Vector{Pair{Int64, Float64}}: + 1 => 50.0 + 1 => 50.0 + 2 => 50.0 + 2 => 50.0 + 2 => 50.0 + ⋮ + 365 => 50.0 + 365 => 50.0 + 365 => 50.0 + 365 => 50.0 + 365 => 50.0 + 365 => 50.0 + 365 => 50.0 + 365 => 50.0 + 365 => 50.0 +``` + +Converting to a dictionary of DataFrame objects can make such queries easier to write. !!! warning Currently, the `:node` entry only shallow copies nodes. The `:node` values at each scale for every timestep actually reflect the final state of the node, meaning attribute values may not correspond to the value at that timestep. You may need to output these values via a dedicated model to keep track of them properly. diff --git a/docs/src/multiscale/multiscale_example_3.md b/docs/src/multiscale/multiscale_example_3.md index 749914071..5ab116d50 100644 --- a/docs/src/multiscale/multiscale_example_3.md +++ b/docs/src/multiscale/multiscale_example_3.md @@ -244,13 +244,19 @@ Depth = 3 There is one quirk you may have noticed when inspecting the data : when a root expands, the new root is immediately active, and some models may act on it immediately... including the root growth model. Meaning this new root may also sprout another root in the same timestep, and so on. -You can notice this by looking at the simulation's state after the first timestep: +You can notice this by looking at the simulation's state during the first two timesteps: ```@example usepkg outs = run!(mtg, mapping, first(meteo_day, 2)) -nodes_per_timestep = outs["Root"][:node] -root_lengths_per_timestep = [length(nodes_per_timestep[i]) for i in 1:length(nodes_per_timestep)] +root_nodes_per_timestep = [0, 0] +for i in 1:length(outs["Root"]) + if outs["Root"][i].timestep < 3 + root_nodes_per_timestep[outs["Root"][i].timestep] += 1 + end +end + +root_nodes_per_timestep ``` Our root grew to full length within one timestep. Oops. diff --git a/docs/src/multiscale/single_to_multiscale.md b/docs/src/multiscale/single_to_multiscale.md index 169e8260c..5f5ccb240 100644 --- a/docs/src/multiscale/single_to_multiscale.md +++ b/docs/src/multiscale/single_to_multiscale.md @@ -203,20 +203,18 @@ We can access the output variables at the "Scene" scale by indexing our outputs: ```@example usepkg outputs_multiscale["Scene"] ``` -and then the computed `:TT_cu`: +We have a `Vector{NamedTuple}`structure. Our single-scale output is a `Vector{T}`: ```@example usepkg -outputs_multiscale["Scene"][:TT_cu] +outputs_singlescale.TT_cu ``` -As you can see, it is a `Vector{Vector{T}}`, whereas our single-scale output is a `Vector{T}`: + Let's extract the multi-scale `:TT_cu`: ```@example usepkg -outputs_singlescale.TT_cu +computed_TT_cu_multiscale = [outputs_multiscale["Scene"][i].TT_cu for i in 1:length(outputs_multiscale["Scene"])] ``` -To compare them value-by-value, we can flatten the multiscale Vector and then do a piecewise approximate equality test : +We can now compare them value-by-value and do a piecewise approximate equality test : ```@example usepkg -computed_TT_cu_multiscale = collect(Base.Iterators.flatten(outputs_multiscale["Scene"][:TT_cu])) - for i in 1:length(computed_TT_cu_multiscale) if !(computed_TT_cu_multiscale[i] ≈ outputs_singlescale.TT_cu[i]) println(i) diff --git a/docs/src/working_with_data/floating_point_accumulation_error.md b/docs/src/working_with_data/floating_point_accumulation_error.md index 12e09b20a..f17a482f9 100644 --- a/docs/src/working_with_data/floating_point_accumulation_error.md +++ b/docs/src/working_with_data/floating_point_accumulation_error.md @@ -95,13 +95,11 @@ mtg_multiscale = MultiScaleTreeGraph.Node(MultiScaleTreeGraph.NodeMTG("/", "Scen plant = MultiScaleTreeGraph.Node(mtg_multiscale, MultiScaleTreeGraph.NodeMTG("+", "Plant", 1, 1)) outputs_multiscale = run!(mtg_multiscale, mapping_multiscale, meteo_day) -computed_TT_cu_multiscale = collect(Base.Iterators.flatten(outputs_multiscale["Scene"][:TT_cu])) ``` ```@example usepkg -computed_TT_cu_multiscale = collect(Base.Iterators.flatten(outputs_multiscale["Scene"][:TT_cu])) - +computed_TT_cu_multiscale = [outputs_multiscale["Scene"][i].TT_cu for i in 1:length(outputs_multiscale["Scene"])] is_approx_equal = length(unique(computed_TT_cu_multiscale .≈ outputs_singlescale.TT_cu)) == 1 ``` @@ -110,8 +108,7 @@ Why was the comparison only approximate ? Why `≈` instead of `==`? Let's try it out. What if write instead: ```@example usepkg -computed_TT_cu_multiscale = collect(Base.Iterators.flatten(outputs_multiscale["Scene"][:TT_cu])) - +computed_TT_cu_multiscale = [outputs_multiscale["Scene"][i].TT_cu for i in 1:length(outputs_multiscale["Scene"])] is_perfectly_equal = length(unique(computed_TT_cu_multiscale .== outputs_singlescale.TT_cu)) == 1 ``` diff --git a/src/mtg/GraphSimulation.jl b/src/mtg/GraphSimulation.jl index 9f243aeb3..37874e12e 100644 --- a/src/mtg/GraphSimulation.jl +++ b/src/mtg/GraphSimulation.jl @@ -25,6 +25,7 @@ struct GraphSimulation{T,S,U,O,V} dependency_graph::DependencyGraph models::Dict{String,U} outputs::Dict{String,O} + outputs_index::Dict{String, Int} end function GraphSimulation(graph, mapping; nsteps=1, outputs=nothing, type_promotion=nothing, check=true, verbose=false) @@ -89,73 +90,45 @@ out = run!(mtg, mapping, meteo, tracked_outputs = Dict( convert_outputs(out, DataFrames) ``` """ +# Another, possibly better way would be to just create the DataFrame directly from the outputs +# and then remove the RefVector columns and replace the node one, hmm function convert_outputs(outs::Dict{String,O} where O, sink; refvectors=false, no_value=nothing) - @assert Tables.istable(sink) "The sink argument must be compatible with the Tables.jl interface (`Tables.istable(sink)` must return `true`, *e.g.* `DataFrame`)" - - - variables_names_types = Iterators.flatten(collect(i.first => eltype(i.second[1]) for i in filter(x -> x.first != :node, vars)) for (organs, vars) in outs) |> collect - variables_names_types_dict = Dict{Symbol,Any}() - - for (k, v) in variables_names_types - if !haskey(variables_names_types_dict, k) - variables_names_types_dict[k] = Union{typeof(no_value),v} - else - if !refvectors && v <: RefVector && !(variables_names_types_dict[k] <: Union{typeof(no_value),RefVector}) - continue + ret = Dict{String, sink}() + for (organ, status_vector) in outs + # remove RefVector variables + refv = () + if length(status_vector) > 0 + for (var, val) in pairs(status_vector[1]) + if !refvectors && isa(val, RefVector) + refv = (refv..., var) + end + if var == :node + refv = (refv..., var) + end end - variables_names_types_dict[k] = Union{variables_names_types_dict[k],v} end - end + + # Get the new NamedTuple type + refv_nt = NamedTuple{refv} - # If we have a variable that is only RefVector, we remove it from the variables_names_types: - !refvectors && filter!(x -> !(last(x) <: Union{typeof(no_value),RefVector}), variables_names_types_dict) - - variables_names_types = (timestep=Int, organ=String, node=Int, NamedTuple(variables_names_types_dict)...) - var_names_all = keys(variables_names_types) - t = NamedTuple{var_names_all,Tuple{values(variables_names_types)...}}[] - #=size_hint = 0 - for (organ, vars) in outs # organ = "Leaf"; vars = outs[organ] - var_names = setdiff(collect(keys(vars)), [:node]) - if length(var_names) == 0 - continue - end - steps_iterable = axes(vars[var_names[1]], 1) - for timestep in steps_iterable # timestep = 1 - node_iterable = axes(vars[var_names[1]][timestep], 1) - size_hint+=length(node_iterable) - end - end + # Piddle around with the first element to get the final type to be able to allocate the exact vector size with a definite element type + vector_named_tuple_1 = NamedTuple(status_vector[1]) - sizehint!(t, size_hint)=# + # replace the MTG node var with the id (MTG nodes aren't CSV-friendly) + filtered_named_tuple = (;node=MultiScaleTreeGraph.node_id(vector_named_tuple_1.node),Base.structdiff(vector_named_tuple_1, refv_nt)...) + filtered_vector_named_tuple = Vector{typeof(filtered_named_tuple)}(undef, length(status_vector)) - for (organ, vars) in outs # organ = "Leaf"; vars = outs[organ] - var_names = setdiff(collect(keys(vars)), [:node]) - if length(var_names) == 0 - continue + for i in 1:length(status_vector) + vector_named_tuple_i = NamedTuple(status_vector[i]) + filtered_vector_named_tuple[i] = (;node=MultiScaleTreeGraph.node_id(vector_named_tuple_i.node), Base.structdiff(vector_named_tuple_i, refv_nt)...) end - steps_iterable = axes(vars[var_names[1]], 1) - for timestep in steps_iterable # timestep = 1 - node_iterable = axes(vars[var_names[1]][timestep], 1) - for node in node_iterable # node = 1 - vals = Dict(zip(var_names, [something(vars[v][timestep][node], no_value) for v in var_names])) - # Remove RefVector values: - !refvectors && filter!(x -> !isa(x.second, RefVector), vals) - vars_values = (; timestep=timestep, organ=organ, node=MultiScaleTreeGraph.node_id(vars[:node][timestep][node]), vals...) - vars_no_values = setdiff(var_names_all, keys(vars_values)) - if length(vars_no_values) > 0 - vars_values = (; vars_values..., zip(vars_no_values, [no_value for v in vars_no_values])...) - end - push!( - t, - NamedTuple{var_names_all}(vars_values) - ) - end - end - end - return sink(t) + ret[organ] = sink(filtered_vector_named_tuple) + end + return ret end +# TODO adapt these to new output structure or remove them function outputs(outs::Dict{String, O} where O, key::Symbol) Tables.columns(convert_outputs(outs, Vector{NamedTuple}))[key] end diff --git a/src/mtg/initialisation.jl b/src/mtg/initialisation.jl index 16be962fb..57aef81cf 100644 --- a/src/mtg/initialisation.jl +++ b/src/mtg/initialisation.jl @@ -328,5 +328,6 @@ function init_simulation(mtg, mapping; nsteps=1, outputs=nothing, type_promotion outputs = pre_allocate_outputs(statuses, status_templates, reverse_multiscale_mapping, vars_need_init, outputs, nsteps, type_promotion=type_promotion, check=check) - return (; mtg, statuses, status_templates, reverse_multiscale_mapping, vars_need_init, dependency_graph=dep(mapping, verbose=verbose), models, outputs) + outputs_index = Dict{String, Int}(s => 1 for s in keys(outputs)) + return (; mtg, statuses, status_templates, reverse_multiscale_mapping, vars_need_init, dependency_graph=dep(mapping, verbose=verbose), models, outputs, outputs_index) end \ No newline at end of file diff --git a/src/mtg/save_results.jl b/src/mtg/save_results.jl index a024e1468..67b569148 100644 --- a/src/mtg/save_results.jl +++ b/src/mtg/save_results.jl @@ -109,6 +109,7 @@ julia> collect(keys(preallocated_vars["Leaf"])) :carbon_demand ``` """ + function pre_allocate_outputs(statuses, statuses_template, reverse_multiscale_mapping, vars_need_init, outs, nsteps; type_promotion=nothing, check=true) outs_ = Dict{String,Vector{Symbol}}() @@ -128,6 +129,20 @@ function pre_allocate_outputs(statuses, statuses_template, reverse_multiscale_ma outs_[i] = [outs[i]...] end end + + len = Dict{String, Int}() + for (organ, vals) in outs_ + len[organ] = length(outs_[organ]) + unique!(outs_[organ]) + end + + + for (organ, vals) in outs_ + if length(outs_[organ]) != len[organ] + @info "One or more requested output variable duplicated at scale $organ, removed it" + end + end + statuses_ = copy(statuses_template) # Checking that organs in outputs exist in the mtg (in the statuses): if !all(i in keys(statuses) for i in keys(outs_)) @@ -183,8 +198,6 @@ function pre_allocate_outputs(statuses, statuses_template, reverse_multiscale_ma end end - outs_tuple = Dict(i => Tuple(x for x in outs_[i]) for i in keys(outs_)) - node_types = [] for o in keys(statuses) if length(statuses[o]) > 0 @@ -196,10 +209,39 @@ function pre_allocate_outputs(statuses, statuses_template, reverse_multiscale_ma @assert length(node_type) == 1 "All plant graph nodes should have the same type, found $(unique(node_type))." node_type = only(node_type) - # Making the pre-allocated outputs: - return Dict(organ => Dict(var => [var == :node ? node_type[] : typeof(status_from_template(statuses_template[organ])[var])[] for n in 1:nsteps] for var in vars) for (organ, vars) in outs_tuple) - # Note: we use the type of the variable from the status template for each organ to pre-allocate the outputs. We transform the status template into a status to get the types of the variables - # without the reference types, e.g. RefVector{Float64} becomes Vector{Float64}. + # I don't know if this function barrier is necessary + preallocated_outputs = Dict{String, Vector}() + complete_preallocation_from_types!(preallocated_outputs, nsteps, outs_, node_type, statuses_template) + return preallocated_outputs +end + +function complete_preallocation_from_types!(preallocated_outputs, nsteps, outs_, node_type, statuses_template) + types = Vector{DataType}() + for organ in keys(outs_) + + outs_no_node = filter(x -> x != :node, outs_[organ]) + + #types = [typeof(status_from_template(statuses_template[organ])[var]) for var in outs[organ]] + values = [status_from_template(statuses_template[organ])[var] for var in outs_no_node] + + #push!(types, node_type) + + # contains :node + symbols_tuple = (:timestep, :node, outs_no_node...,) + # using node_type.parameters[1] is clunky, but covers both NodeMTG and AbstractNodeMTG types + values_tuple = (1, MultiScaleTreeGraph.Node((node_type.parameters[1])("/", "Uninitialized", 0, 0),), values...,) + + # Dummy value to make accessing the type easier + # (empty arrays don't have references to an instance, so their types can't be inspected and manipulated as easily) + dummy_status = (;zip(symbols_tuple, values_tuple)...) + data = typeof(Status(dummy_status))[] + resize!(data, nsteps) + + for ii in 1:nsteps + data[ii] = Status(dummy_status) + end + preallocated_outputs[organ] = data + end end @@ -218,14 +260,63 @@ function save_results!(object::GraphSimulation, i) end statuses = status(object) + indexes = object.outputs_index + for organ in keys(outs) + + if length(outs[organ]) == 0 + continue + end + + index = indexes[organ] + + # Samuel : Simple resizing heuristic + # This can be made much more conservative with the right heuristic, or with user hints + # The array filling bit of code is clunky, but building NamedTuples on the fly was tanking performance + # And it wasn't straightforward to avoid Status Ref reallocations causing performance issues + # I then tried various approaches with Status, resizing, using fill, deepcopying, ... + # I may have gotten a little confused while fighting the type system + # So there may be possible simplifications (maybe no need for a function barrier, perhaps the resizing could be made a one-liner...) + # But this should work without causing visible performance regressions on XPalm + len = length(outs[organ]) + if length(statuses[organ]) + index - 1 > len + min_required = max(length(statuses[organ]) + index - len, index) + + extra_length = 2*min_required - len + data = eltype(outs[organ])[] + resize!(data, extra_length) + dummy_value = NamedTuple(outs[organ][1]) + # TODO set timestep to 0 for clarity ? + + # Using fill! caused Ref issues, so call a Status constructor here instead of passing a prebuilt value + # This will avoid having all array entries point to the same ref but keep construction cost at a minimum + for new_entry in 1:extra_length + data[new_entry] = Status(dummy_value) + end - for (organ, vars) in outs - for (var, values) in vars - values[i] = [status[var] for status in statuses[organ]] + outs[organ] = cat(outs[organ], data, dims=1) + #println("len : ", len, " statuses #", length(statuses[organ]), " index ", index) + #println("min_required : ", min_required, " extra_length ", extra_length, " new len ", length(outs[organ])) end + + tracked_outputs = filter(i -> i != :timestep, keys(outs[organ][1])) + + indexes[organ] = copy_tracked_outputs_into_vector!(outs[organ], i, statuses[organ], tracked_outputs, indexes[organ]) end end +function copy_tracked_outputs_into_vector!(outs_organ, i, statuses_organ, tracked_outputs, index) + j = index + for status in statuses_organ + outs_organ[j].timestep = i + for var in tracked_outputs + outs_organ[j][var] = status[var] + end + j += 1 + end + return j +end + + function pre_allocate_outputs(m::ModelList, outs, nsteps; type_promotion=nothing, check=true) # NOTE : init_variables recreates a DependencyGraph, it's not great diff --git a/src/run.jl b/src/run.jl index 98362eeeb..8958127a3 100644 --- a/src/run.jl +++ b/src/run.jl @@ -422,6 +422,12 @@ function run!( end end + # save_results! resizes the outputs melodramatically because the total # of nodes at a given scale can't always be known + # if models create organs, so shrink it down to the final size here + for (organ, index) in object.outputs_index + resize!(outputs(object)[organ], index - 1) + end + return outputs(object) end diff --git a/test/downstream/test-all-benchmarks.jl b/test/downstream/test-all-benchmarks.jl index 4000ea609..13d790f2c 100644 --- a/test/downstream/test-all-benchmarks.jl +++ b/test/downstream/test-all-benchmarks.jl @@ -44,9 +44,9 @@ include("test-xpalm.jl") suite[suite_name]["XPalm_setup"] = @benchmarkable xpalm_default_param_create() seconds = 120 palm, models, out_vars, meteo = xpalm_default_param_create() -sim_outputs = xpalm_default_param_run(palm, models, out_vars, meteo) +sim_outputs = xpalm_default_param_run(palm, models, meteo, out_vars) -suite[suite_name]["XPalm_run"] = @benchmarkable xpalm_default_param_run($palm, $models, $out_vars, $meteo) seconds = 120 +suite[suite_name]["XPalm_run"] = @benchmarkable xpalm_default_param_run($palm, $models, $meteo, $out_vars) seconds = 120 suite[suite_name]["XPalm_convert_outputs"] = @benchmarkable xpalm_default_param_convert_outputs($sim_outputs) seconds = 120 tune!(suite) diff --git a/test/downstream/test-xpalm.jl b/test/downstream/test-xpalm.jl index e11ac6717..d6e1b7ee1 100644 --- a/test/downstream/test-xpalm.jl +++ b/test/downstream/test-xpalm.jl @@ -2,7 +2,6 @@ #Pkg.develop("PlantSimEngine") #using PlantSimEngine -# no release of XPalm yet, so can't just add it to the .toml using Pkg #Pkg.add(url="https://github.com/PalmStudio/XPalm.jl#dev") #Pkg.instantiate() @@ -21,7 +20,7 @@ function xpalm_default_param_create() out_vars = Dict{String,Any}( "Scene" => (:lai,), - # "Scene" => (:lai, :scene_leaf_area, :aPPFD, :TEff), + # "Scene" => (:LAI, :scene_leaf_area, :aPPFD, :TEff), # "Plant" => (:plant_age, :ftsw, :newPhytomerEmergence, :aPPFD, :plant_leaf_area, :carbon_assimilation, :carbon_offer_after_rm, :Rm, :TT_since_init, :TEff, :phytomer_count, :newPhytomerEmergence), "Leaf" => (:Rm, :potential_area, :TT_since_init, :TEff, :A, :carbon_demand, :carbon_allocation,), # "Leaf" => (:Rm, :potential_area), diff --git a/test/helper-functions.jl b/test/helper-functions.jl index 9cf3f8477..e4db3532d 100644 --- a/test/helper-functions.jl +++ b/test/helper-functions.jl @@ -1,9 +1,11 @@ # Simple helper functions that can be used in various tests here and there -function compare_outputs_modellist_mapping(filtered_outputs, graphsim) - outputs_df = convert_outputs(graphsim.outputs, DataFrame) +function compare_outputs_modellist_mapping(filtered_outputs, graphsim) + outputs_df_dict = convert_outputs(graphsim.outputs, DataFrame) + @assert length(outputs_df_dict) == 1 - outputs_df_outputs_only = select(outputs_df, Not([:timestep, :organ, :node])) + outputs_df = last(first(outputs_df_dict)) + outputs_df_outputs_only = select(outputs_df, Not([:timestep#=, :organ=#, :node])) models_df = DataFrame(filtered_outputs) models_df_sorted = models_df[:, sortperm(names(models_df))] @@ -12,7 +14,7 @@ function compare_outputs_modellist_mapping(filtered_outputs, graphsim) end # doesn't check for mtg equality -function compare_outputs_graphsim(graphsim, graphsim2) +function compare_outputs_graphsim_old(graphsim, graphsim2) outputs_df = convert_outputs(graphsim.outputs, DataFrame) outputs_df_sorted = outputs_df[:, sortperm(names(outputs_df))] @@ -21,6 +23,27 @@ function compare_outputs_graphsim(graphsim, graphsim2) return outputs_df_sorted == outputs2_df_sorted end +# doesn't check for mtg equality +function compare_outputs_graphsim(graphsim, graphsim2) + outputs_df_dict = convert_outputs(graphsim.outputs, DataFrame) + outputs2_df_dict = convert_outputs(graphsim2.outputs, DataFrame) + + if length(outputs_df_dict) != length(outputs2_df_dict) + return false + end + + for (organ, vals) in outputs2_df_dict + outputs_df_sorted = outputs_df_dict[organ][:, sortperm(names(outputs_df_dict[organ]))] + outputs2_df_sorted = outputs2_df_dict[organ][:, sortperm(names(outputs2_df_dict[organ]))] + + if outputs_df_sorted != outputs2_df_sorted + return false + end + end + + return true +end + function compare_outputs_modellists(filtered_outputs_1, filtered_outputs_2) models_df_1 = DataFrame(filtered_outputs_1) models_df_sorted_1 = models_df_1[:, sortperm(names(models_df_1))] diff --git a/test/test-corner-cases.jl b/test/test-corner-cases.jl index cc881a23d..e1c9ac48e 100644 --- a/test/test-corner-cases.jl +++ b/test/test-corner-cases.jl @@ -529,8 +529,9 @@ end vars = Dict{String,Any}("Leaf" => (:var1,)) out = run!(mtg, m, Atmosphere(T=20.0, Wind=1.0, Rh=0.65), tracked_outputs=vars, executor=SequentialEx()) - df = convert_outputs(out, DataFrame) - @test DataFrames.nrow(df) == 2 + df_dict = convert_outputs(out, DataFrame) + @test DataFrames.nrow(df_dict["Leaf"]) == 2 + @test DataFrames.ncol(df_dict["Leaf"]) == 3 end ########################## @@ -563,9 +564,8 @@ end sim = run!(mtg, mapping, meteo; tracked_outputs=outs) using DataFrames - df = convert_outputs(sim, DataFrame) - @test DataFrames.nrow(df) == PlantSimEngine.get_nsteps(meteo) - + df_dict = convert_outputs(sim, DataFrame) + @test DataFrames.nrow(df_dict["Default"]) == PlantSimEngine.get_nsteps(meteo) end diff --git a/test/test-mtg-dynamic.jl b/test/test-mtg-dynamic.jl index b767c391e..498f1d146 100644 --- a/test/test-mtg-dynamic.jl +++ b/test/test-mtg-dynamic.jl @@ -82,8 +82,8 @@ out = run!(sim,meteo) @test st["Internode"][1].TT_cu_emergence == 0.0 @test st["Internode"][end].TT_cu_emergence == 25.0 - out_df = convert_outputs(out, DataFrame) - @test unique(out_df[:, :organ]) |> sort == ["Internode", "Leaf", "Plant", "Soil"] - @test filter(row -> row.organ == "Internode", out_df)[:, :TT_cu_emergence] == [0.0, 0.0, 0.0, 0.0, 25.0] - @test filter(row -> row.organ == "Leaf", out_df)[:, :carbon_demand] == [0.5, 0.5, 0.75, 0.75, 0.75] + out_df_dict = convert_outputs(out, DataFrame) + @test collect(keys(out_df_dict)) |> sort == ["Internode", "Leaf", "Plant", "Soil"] + @test out_df_dict["Internode"][:, :TT_cu_emergence] == [0.0, 0.0, 0.0, 0.0, 25.0] + @test out_df_dict["Leaf"][:, :carbon_demand] == [0.5, 0.5, 0.75, 0.75, 0.75] end \ No newline at end of file diff --git a/test/test-mtg-multiscale-cyclic-dep.jl b/test/test-mtg-multiscale-cyclic-dep.jl index 134941e86..ca3258ea2 100644 --- a/test/test-mtg-multiscale-cyclic-dep.jl +++ b/test/test-mtg-multiscale-cyclic-dep.jl @@ -226,5 +226,16 @@ end ref_path = joinpath(pkgdir(PlantSimEngine), "test/references/ref_output_simulation.csv") # CSV.write(ref_path, sort(convert_outputs(out, DataFrame, no_value=missing), [:timestep, :node]), transform=(col, val) -> something(val, missing)) ref_df = CSV.read(ref_path, DataFrame) - @test isequal(sort(convert_outputs(out, DataFrame, no_value=missing), [:timestep, :node]), ref_df) + + @test sort(unique(ref_df.organ)) == sort(collect(keys(out))) + + out_df_dict = convert_outputs(out, DataFrame, no_value=missing) + + for organ in keys(out) + reduced_ref_df = ref_df[(ref_df.organ .== organ), Not(:organ)] + reduced_ref_df_no_missing = reduced_ref_df[:, any.(!ismissing, eachcol(reduced_ref_df))] + sorted_reduced_ref_df_no_missing = select(reduced_ref_df_no_missing, sort(propertynames(reduced_ref_df_no_missing))) + sorted_out_df_dict_organ = select(out_df_dict[organ], sort(propertynames(out_df_dict[organ]))) + @test sorted_reduced_ref_df_no_missing == sorted_out_df_dict_organ + end end \ No newline at end of file diff --git a/test/test-mtg-multiscale.jl b/test/test-mtg-multiscale.jl index d1113f46b..ecb34fd38 100644 --- a/test/test-mtg-multiscale.jl +++ b/test/test-mtg-multiscale.jl @@ -213,8 +213,10 @@ end outs_ = @test_logs (:info, "You requested outputs for organs Soil, Flowers, Leaf, but organs Flowers have no models.") (:info, "You requested outputs for variable non_existing_variable in organ Leaf, but it has no model.") PlantSimEngine.pre_allocate_outputs(statuses, status_templates, reverse_multiscale_mapping, vars_need_init, outs, nsteps, check=false) @test outs_ == Dict( - "Soil" => Dict(:node => [[], []], :soil_water_content => [[], []]), - "Leaf" => Dict(:carbon_assimilation => [[], []], :node => [[], []], :carbon_demand => [[], []]) + "Soil" => [Status(timestep = 1, node = MultiScaleTreeGraph.Node(NodeMTG("/", "Uninitialized", 0, 0)), soil_water_content = -Inf), + Status(timestep = 1, node = MultiScaleTreeGraph.Node(NodeMTG("/", "Uninitialized", 0, 0),), soil_water_content = -Inf)], + "Leaf" => [Status(timestep = 1, node = MultiScaleTreeGraph.Node(NodeMTG("/", "Uninitialized", 0, 0),), carbon_assimilation = -Inf, carbon_demand = -Inf), + Status(timestep = 1, node = MultiScaleTreeGraph.Node(NodeMTG("/", "Uninitialized", 0, 0),), carbon_assimilation = -Inf, carbon_demand = -Inf)] ) end @@ -689,26 +691,33 @@ end @test sim.statuses["Leaf"][1].var5 == 32.4806 @test sim.statuses["Leaf"][1].var8 ≈ 1321.0700490800002 atol = 1e-6 - @test sim.outputs["Leaf"][:carbon_demand] == [[0.5, 0.5], [0.5, 0.5]] - @test sim.outputs["Leaf"][:soil_water_content][1] == fill(sim.outputs["Soil"][:soil_water_content][1][1], 2) - @test sim.outputs["Leaf"][:soil_water_content][2] == fill(sim.outputs["Soil"][:soil_water_content][2][1], 2) + @test unique([sim.outputs["Leaf"][i][:carbon_demand] == 0.5 for i in 1:4]) == [1] + @test sim.outputs["Leaf"][1][:soil_water_content] == sim.outputs["Soil"][1][:soil_water_content] + @test sim.outputs["Leaf"][2][:soil_water_content] == sim.outputs["Soil"][2][:soil_water_content] - @test sim.outputs["Leaf"][:carbon_allocation] == sim.outputs["Internode"][:carbon_allocation] - @test sim.outputs["Plant"][:carbon_allocation][1][1][1] === sim.outputs["Internode"][:carbon_allocation][1][1] + @test all([sim.outputs["Leaf"][i][:carbon_allocation] == sim.outputs["Internode"][i][:carbon_allocation] for i in 1:4])==1 + @test sim.outputs["Plant"][1][:carbon_allocation][1] === sim.outputs["Internode"][1][:carbon_allocation] # Testing the outputs if transformed into a DataFrame: - outs = convert_outputs(out, DataFrame) + outs_df_dict = convert_outputs(out, DataFrame) - @test isa(outs, DataFrame) - @test size(outs) == (12, 7) + @test isa(outs_df_dict, Dict{String, DataFrame}) + @test size(outs_df_dict["Leaf"]) == (4, 6) + @test size(outs_df_dict["Internode"]) == (4, 3) + @test size(outs_df_dict["Soil"]) == (2, 3) + @test size(outs_df_dict["Plant"]) == (2, 2) - @test unique(outs.timestep) == [1, 2] - @test sort(unique(outs.organ)) == sort(collect(keys(out_vars))) - @test length(filter(x -> x !== nothing, outs.carbon_assimilation)) == length(filter(x -> x, traverse(mtg, node -> MultiScaleTreeGraph.scale(node) == 2))) - # a = status(out, TimeStepTable{Status}) - A = outputs(out, :carbon_assimilation) - @test A == outs.carbon_assimilation + @test sort(collect(keys(outs_df_dict))) == sort(collect(keys(out_vars))) + for organ in keys(outs_df_dict) + @test unique(outs_df_dict[organ][!, :timestep]) == [1, 2] + end + # output structure change makes this test obsolete without any trivial equivalent + #@test length(filter(x -> x !== nothing, outs.carbon_assimilation)) == length(filter(x -> x, traverse(mtg, node -> MultiScaleTreeGraph.scale(node) == 2))) + + # TODO, find some equivalent with new output structure + #A = outputs(out, :carbon_assimilation) + #@test A == outs.carbon_assimilation - A2 = outputs(out, 5) - @test A == A2 + #A2 = outputs(out, 5) + #@test A == A2 end \ No newline at end of file