diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 65fc060..4243585 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -24,13 +24,13 @@ jobs:
matrix:
version:
- '1.12'
- - 'pre'
+ #- 'pre'
os:
- ubuntu-latest
arch:
- x64
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
@@ -39,7 +39,7 @@ jobs:
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- - uses: codecov/codecov-action@v4
+ - uses: codecov/codecov-action@v5
with:
files: lcov.info
token: ${{ secrets.CODECOV_TOKEN }}
@@ -52,7 +52,7 @@ jobs:
contents: write
statuses: write
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- uses: julia-actions/setup-julia@v2
with:
version: '1'
diff --git a/.gitignore b/.gitignore
index e43b0f9..a28df6c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
.DS_Store
+Manifest.toml
+docs/build/
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index 4cd00ca..9f77054 100644
--- a/Project.toml
+++ b/Project.toml
@@ -22,9 +22,18 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[compat]
+BenchmarkTools = "1"
+CSV = "0.10"
+BSON = "0.3"
CUDA = "5.9.2"
+CairoMakie = "0.15"
+DecisionTree = "0.12"
+DataFrames = "1"
LinearAlgebra = "1.12.0"
+MKL = "0.9"
MatrixDepot = "1.0.13"
OpenAI = "0.12.0"
+OrderedCollections = "1"
+ScikitLearn = "0.7"
Test = "1.11.0"
julia = "1.11"
diff --git a/README.md b/README.md
index 3bd7369..118a610 100644
--- a/README.md
+++ b/README.md
@@ -1,56 +1,14 @@
-
-
SmartSolve.jl is a Julia-based toolbox for AI-guided algorithmic discovery, designed to accelerate computations by generating enhanced algorithmic and architectural selection strategies. Envisioned as a general-purpose tool for scientific computing, current efforts focus on challenges in computational linear algebra. The toolbox addresses the growing complexity of selecting efficient solvers, data formats, precision strategies, and hardware resources for structurally diverse matrices—where conventional approaches offer substantial opportunities for improvement. SmartSolve.jl constructs a performance database through systematic benchmarking and applies automated Pareto analysis to identify optimal trade-offs between accuracy and speed. This database underpins a data-driven model that synthesizes dispatch strategies tailored to high-performance linear algebra software.
-## How to start
-
-In the following example SmartSolve is used to automatically generate SmartLU, an optimized version of the LU decomposition.
-
-```bash
-cd SmartSolve/examples/smartlu
-julia --project=.
-```
-
-```julia
-pkg> dev ../..
-pkg> instantiate
-julia> include("generate_smartlu.jl")
-```
-
-## Publications
-
-- Emmanuel Lujan and Alan Edelman. "When Structure is Silent: Opportunities for Algorithmic Dispatch in Linear Algebra," 2025 IEEE High Performance Extreme Computing Conference (HPEC). Submitted.
-- Rushil Shah, Emmanuel Lujan, and Rabab Alomairy, and Alan Edelman. "Data-Driven Dynamic Algorithm Dispatch with Large Language Models," 2025 IEEE High Performance Extreme Computing Conference (HPEC). Submitted.
-- Emmanuel Lujan, Rushil Shah, Rabab Alomairy, and Alan Edelman. "SmartSolve.jl: AI for Algorithmic Discovery," 0.1.0-alpha. Zenodo [(link)](https://doi.org/10.5281/zenodo.15784217).
-- Rabab Alomairy, Felipe Tome, Julian Samaroo, Alan Edelman. _"Dynamic Task Scheduling with Data Dependency Awareness Using Julia"_, 2024 IEEE High Performance Extreme Computing Conference (HPEC) [(link)](https://ieeexplore.ieee.org/document/10938467).
-
-## Talks
-
-- Rushil Shah, Emmanuel Lujan, and Rabab Alomairy. _"Automated Algorithm Selection Discovery via LLMs,"_ JuliaCon 2025, Lightning Talk. [(Link)](https://pretalx.com/juliacon-2025/talk/review/FXWAYZEZ9XEPYPHL3JJNAS7NBACU3GXE).
-- Alan Edelman et al. _"Julia, Portable Numerical Linear Algebra and Beyond."_ Presentation at Householder Symposium, 2025. [(Link)](https://householder-symposium.github.io/presenters.html). Accessed June 20, 2025.
-- Alan Edelman, _"Improving the HPC Experience: Did Julia Get It Right, or Will AI Hide the Problem (or Both)?"_ Keynote at the Workshop on Asynchronous Many-Task Systems and Applications (WAMTA), 2025. [(Link)](https://wamta25.github.io/keynote). Accessed June 20, 2025.
-
-## How to Cite
-
-```bibtex
-@software{SmartSolve2025,
- author = {Lujan, Emmanuel and Shah, Rushil N. and Alomairy, Rabab and Edelman, Alan},
- title = {SmartSolve.jl: AI for Algorithmic Discovery},
- month = jul,
- year = 2025,
- publisher = {Zenodo},
- version = {0.1.0-alpha},
- doi = {10.5281/zenodo.15784217},
- url = {https://doi.org/10.5281/zenodo.15784217},
-}
-```
-
-## Acknowledgements
-
-We thank [DARPA](https://www.darpa.mil/research/programs/mathematics-for-the-discovery-of-algorithms-and-architectures) for supporting this work at MIT.
-
-
-
-
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/Project.toml b/docs/Project.toml
index 112dd8c..dfb4f00 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,3 +1,6 @@
[deps]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
+Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
SmartSolve = "4fbb3a3c-2fa1-4c19-8d57-bae8bc1e16ac"
diff --git a/docs/citation.bib b/docs/citation.bib
new file mode 100644
index 0000000..e69de29
diff --git a/docs/make.jl b/docs/make.jl
index a5ea0b3..0aded8d 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,23 +1,78 @@
using SmartSolve
using Documenter
+using DocumenterCitations
+using Literate
-DocMeta.setdocmeta!(SmartSolve, :DocTestSetup, :(using SmartSolve); recursive=true)
-
-makedocs(;
- modules=[SmartSolve],
- authors="JuliaLabs",
- sitename="SmartSolve.jl",
- format=Documenter.HTML(;
- canonical="https://JuliaLabs.github.io/SmartSolve.jl",
- edit_link="main",
- assets=String[],
- ),
- pages=[
- "Home" => "index.md",
- ],
+DocMeta.setdocmeta!(
+ SmartSolve,
+ :DocTestSetup,
+ :(using SmartSolve);
+ recursive = true,
+)
+
+ENV["BASE_PATH"] = joinpath(@__DIR__, "../")
+
+# Citations ####################################################################
+bib = CitationBibliography(joinpath(@__DIR__, "citation.bib"))
+
+
+# Generate examples ############################################################
+const examples_path = joinpath(@__DIR__, "..", "examples")
+const output_path = joinpath(@__DIR__, "src/generated")
+function create_examples(examples, examples_path, output_path)
+ for (_, example_path) in examples
+ s = split(example_path, "/")
+ sub_path, file_name = string(s[1:end-1]...), s[end]
+ example_filepath = joinpath(examples_path, example_path)
+ Literate.markdown(example_filepath,
+ joinpath(output_path, sub_path),
+ documenter = true)
+ end
+ examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md"))
+ for (title, example_path) in examples]
+ return examples
+end
+
+# Basic examples
+examples = [
+# "Simple example" => "simple-example/simple-example.jl",
+]
+basic_examples = create_examples(examples, examples_path, output_path)
+
+# Make and deploy docs #########################################################
+
+makedocs(
+ root = joinpath(dirname(pathof(SmartSolve)), "..", "docs"),
+ source = "src",
+ build = "build",
+ clean = true,
+ doctest = true,
+ modules = [SmartSolve],
+ repo = "https://github.com/JuliaLabs/SmartSolve.jl/blob/{commit}{path}#{line}",
+ highlightsig = true,
+ sitename = "SmartSolve.jl",
+ expandfirst = [],
+ draft = false,
+ pages = ["Home" => "index.md",
+ "Install" => "install.md",
+ #"Examples" => basic_examples,
+ "Simple examples" => ["Generate linear solver" =>
+ "gen_lin_solver.md",
+ "Benchmark & generate linear solver" =>
+ "benchmark_and_gen_lin_solver.md"],
+ "Awards, Publications & Talks" => "publications.md",
+ "API" => "api.md"],
+ format = Documenter.HTML(;
+ prettyurls = get(ENV, "CI", "false") == "true",
+ canonical = "https://github.com/JuliaLabs/SmartSolve.jl",
+ assets = String[],
+ ),
+ plugins=[bib]
)
deploydocs(;
- repo="github.com/JuliaLabs/SmartSolve.jl",
- devbranch="main",
+ repo = "https://github.com/JuliaLabs/SmartSolve.jl",
+ devbranch = "main",
+ push_preview = true,
)
+
diff --git a/docs/src/api.md b/docs/src/api.md
new file mode 100644
index 0000000..8d0f813
--- /dev/null
+++ b/docs/src/api.md
@@ -0,0 +1,9 @@
+# API Reference
+
+This page provides a list of all documented types and functions and in SmartSolve.jl.
+
+
+```@autodocs
+Modules = [SmartSolve]
+Order = [:type, :function, :constant]
+```
diff --git a/docs/src/assets/smartsolve-2.png b/docs/src/assets/smartsolve-2.png
new file mode 100644
index 0000000..fecc123
Binary files /dev/null and b/docs/src/assets/smartsolve-2.png differ
diff --git a/docs/src/assets/smartsolve.png b/docs/src/assets/smartsolve.png
new file mode 100644
index 0000000..21e5a40
Binary files /dev/null and b/docs/src/assets/smartsolve.png differ
diff --git a/docs/src/benchmark_and_gen_lin_solver.md b/docs/src/benchmark_and_gen_lin_solver.md
new file mode 100644
index 0000000..544def9
--- /dev/null
+++ b/docs/src/benchmark_and_gen_lin_solver.md
@@ -0,0 +1,39 @@
+# Benchmark and generate linear solver
+
+Automatically generate an optimized LU decomposition by selecting a specialized solver tailored to the input matrix structure.
+
+Define candidate algorithms
+```julia
+dgetrf(A::Matrix) = lu(A)
+dgetrf(A::SparseMatrixCSC) = lu(Matrix(A))
+dgetrf(A::SparseMatrixCSC{Bool, Int64}) = lu(Matrix(A))
+dgetrf(A::Symmetric) = lu(A.data)
+bandedlu(A::Matrix) = BandedMatrices.lu(BandedMatrix(sparse(A)))
+bandedlu(A::SparseMatrixCSC{Float64, Int64}) = BandedMatrices.lu(BandedMatrix(A))
+bandedlu(A::SparseMatrixCSC{Int64, Int64}) = BandedMatrices.lu(BandedMatrix(Float64.(A)))
+bandedlu(A::SparseMatrixCSC{Bool, Int64}) = BandedMatrices.lu(BandedMatrix(Float64.(A)))
+bandedlu(A::Symmetric) = BandedMatrices.lu(Float64.(BandedMatrix(sparse(A.data))))
+algs = [dgetrf, bandedlu]
+```
+
+Define your custom matrices to be included in training
+```julia
+n = 2^12;
+A = ...
+B = ...
+mats = [A, B]
+```
+
+Generate a smart version of the algorithm
+```julia
+alg_name = "lu"
+alg_path = "smartlu/"
+smartsolve(alg_path, alg_name, algs; n_experiments = 1,
+ mats = mats, ns = [2^8.2^10,2^12], features = [:isbandedpattern])
+```
+
+Include and run the newly generated algorithm
+```julia
+include("$alg_path/smart$alg_name.jl")
+smartlu(A)\b
+```
diff --git a/docs/src/gen_lin_solver.md b/docs/src/gen_lin_solver.md
new file mode 100644
index 0000000..7f32980
--- /dev/null
+++ b/docs/src/gen_lin_solver.md
@@ -0,0 +1,15 @@
+# Generate LU with iterative refinement
+```julia
+using SmartSolve
+
+prompt = """
+Generate a high performance Julia implementation of LU
+with iterative refinement using the following reference:
+https://nhigham.com/2023/03/13/what-is-iterative-refinement
+"""
+
+secret_key = ENV["OPENAI_API_KEY"]
+
+code, hist, conv = generate(prompt, secret_key)
+
+```
diff --git a/docs/src/index.md b/docs/src/index.md
index 62bcc99..1b6cbad 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,13 +2,28 @@
CurrentModule = SmartSolve
```
-# SmartSolve
+
-Documentation for [SmartSolve](https://github.com/JuliaLabs/SmartSolve.jl).
+SmartSolve.jl is a Julia-based toolbox for AI-guided algorithmic discovery, designed to accelerate computations by generating enhanced algorithmic and architectural selection strategies. Envisioned as a general-purpose tool for scientific computing, current efforts focus on challenges in computational linear algebra. The toolbox addresses the growing complexity of selecting efficient solvers, data formats, precision strategies, and hardware resources for structurally diverse matrices—where conventional approaches offer substantial opportunities for improvement. SmartSolve.jl constructs a performance database through systematic benchmarking and applies automated Pareto analysis to identify optimal trade-offs between accuracy and speed. This database underpins a data-driven model that synthesizes dispatch strategies tailored to high-performance linear algebra software.
-```@index
-```
+
+## How to Cite
-```@autodocs
-Modules = [SmartSolve]
+```bibtex
+@software{SmartSolve2025,
+ author = {Lujan, Emmanuel and Shah, Rushil N. and Alomairy, Rabab and Edelman, Alan},
+ title = {SmartSolve.jl: AI for Algorithmic Discovery},
+ month = jul,
+ year = 2025,
+ publisher = {Zenodo},
+ version = {0.1.0-alpha},
+ doi = {10.5281/zenodo.15784217},
+ url = {https://doi.org/10.5281/zenodo.15784217},
+}
```
+
+## Acknowledgements
+
+We thank [DARPA](https://www.darpa.mil/research/programs/mathematics-for-the-discovery-of-algorithms-and-architectures) for supporting this work at MIT.
+
+
diff --git a/docs/src/install.md b/docs/src/install.md
new file mode 100644
index 0000000..158e780
--- /dev/null
+++ b/docs/src/install.md
@@ -0,0 +1,5 @@
+# Install
+```julia
+~$ julia
+pkg> add https://github.com/JuliaLabs/SmartSolve.jl
+```
diff --git a/docs/src/publications.md b/docs/src/publications.md
new file mode 100644
index 0000000..af904ce
--- /dev/null
+++ b/docs/src/publications.md
@@ -0,0 +1,13 @@
+## Awards
+- Outstanding Short Paper Award. Rushil Shah, Emmanuel Lujan, and Rabab Alomairy, and Alan Edelman. "Data-Driven Dynamic Algorithm Dispatch with Large Language Models," 2025 IEEE High Performance Extreme Computing Conference (HPEC).
+
+## Publications
+- Emmanuel Lujan and Alan Edelman. "When Structure is Silent: Opportunities for Algorithmic Dispatch in Linear Algebra," 2025 IEEE High Performance Extreme Computing Conference (HPEC).
+- Outstanding Short Paper Award. Rushil Shah, Emmanuel Lujan, and Rabab Alomairy, and Alan Edelman. "Data-Driven Dynamic Algorithm Dispatch with Large Language Models," 2025 IEEE High Performance Extreme Computing Conference (HPEC).
+- Emmanuel Lujan, Rushil Shah, Rabab Alomairy, and Alan Edelman. "SmartSolve.jl: AI for Algorithmic Discovery," 0.1.0-alpha. Zenodo [(link)](https://doi.org/10.5281/zenodo.15784217).
+- Rabab Alomairy, Felipe Tome, Julian Samaroo, Alan Edelman. _"Dynamic Task Scheduling with Data Dependency Awareness Using Julia"_, 2024 IEEE High Performance Extreme Computing Conference (HPEC) [(link)](https://ieeexplore.ieee.org/document/10938467).
+
+## Talks
+- Rushil Shah, Emmanuel Lujan, and Rabab Alomairy. _"Automated Algorithm Selection Discovery via LLMs,"_ JuliaCon 2025, Lightning Talk. [(Link)](https://pretalx.com/juliacon-2025/talk/review/FXWAYZEZ9XEPYPHL3JJNAS7NBACU3GXE).
+- Alan Edelman et al. _"Julia, Portable Numerical Linear Algebra and Beyond."_ Presentation at Householder Symposium, 2025. [(Link)](https://householder-symposium.github.io/presenters.html). Accessed June 20, 2025.
+- Alan Edelman, _"Improving the HPC Experience: Did Julia Get It Right, or Will AI Hide the Problem (or Both)?"_ Keynote at the Workshop on Asynchronous Many-Task Systems and Applications (WAMTA), 2025. [(Link)](https://wamta25.github.io/keynote). Accessed June 20, 2025.
diff --git a/examples/agentic/example.jl b/examples/agentic/deprecated/example.jl
similarity index 100%
rename from examples/agentic/example.jl
rename to examples/agentic/deprecated/example.jl
diff --git a/examples/agentic/test_sorted.jl b/examples/agentic/deprecated/test_sorted.jl
similarity index 100%
rename from examples/agentic/test_sorted.jl
rename to examples/agentic/deprecated/test_sorted.jl
diff --git a/examples/agentic/generate-cpu-linear-solver/Project.toml b/examples/agentic/generate-cpu-linear-solver/Project.toml
new file mode 100644
index 0000000..1793f86
--- /dev/null
+++ b/examples/agentic/generate-cpu-linear-solver/Project.toml
@@ -0,0 +1,9 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+SmartSolve = "4fbb3a3c-2fa1-4c19-8d57-bae8bc1e16ac"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[sources]
+SmartSolve = {path = "../../.."}
diff --git a/examples/agentic/generate-cpu-linear-solver/benchmark.jl b/examples/agentic/generate-cpu-linear-solver/benchmark.jl
new file mode 100644
index 0000000..db7d835
--- /dev/null
+++ b/examples/agentic/generate-cpu-linear-solver/benchmark.jl
@@ -0,0 +1,132 @@
+using LinearAlgebra
+using SparseArrays
+using CUDA
+using BenchmarkTools
+using OrderedCollections
+using Plots
+
+println("CPU benchmark with error-vs-time plot:\n")
+
+include("solver.jl")
+
+# Configuration
+N = 15_000
+sparsity_levels = [0.1, 0.5, 0.9]
+
+umfpack_control = SparseArrays.UMFPACK.get_umfpack_control(Float64, Int64) # read Julia default configuration for a Float64 sparse matrix
+#SparseArrays.UMFPACK.show_umf_ctrl(umfpack_control) # optional - display values
+umfpack_control[SparseArrays.UMFPACK.JL_UMFPACK_IRSTEP] = 2.0 # reenable iterative refinement (2 is UMFPACK default max iterative refinement steps)
+
+solvers = OrderedDict(
+ "Default" => (A, b) -> (A \ b),
+ "UMFPACK" => (A, b) -> lu(A; control = umfpack_control) \ b,
+ "Generated" => (A, b) -> proposed_fn(A, b)
+)
+
+# Store results for plotting
+results = Dict()
+
+for sparsity in sparsity_levels
+ println("\n=== Sparsity: $sparsity ===")
+
+ # Generate problem
+ A = sprand(N, N, sparsity)
+ b = rand(N)
+
+ results[sparsity] = Dict()
+
+ for (solver_name, solver_fn) in solvers
+ println(" $solver_name...")
+
+ # Warm-up
+ b_warm = copy(b)
+ try
+ x_warm = solver_fn(A, b_warm)
+ catch e
+ println(" Warning: solver failed during warm-up: $e")
+ continue
+ end
+
+ # Benchmark
+ b_bench = copy(b)
+ try
+ bench = @benchmark begin
+ x = $(solver_fn)($A, $b_bench)
+ end seconds = 5 samples = 10
+
+ time_ms = median(bench.times) / 1e9 # Convert to s
+
+ # Compute error
+ b_err = copy(b)
+ x_sol = solver_fn(A, b_err)
+ error = norm(A*x_sol - b_err) / norm(b_err)
+
+ results[sparsity][solver_name] = (time=time_ms, error=error)
+ println(" Time: $(round(time_ms, digits=3)) s, Error: $(round(error, sigdigits=3))")
+ catch e
+ println(" Error during benchmark: $e")
+ end
+ end
+end
+
+# Create error-vs-time plot
+p = plot(
+ size=(800, 800),
+ #legend=:topright,
+ legend=:bottomleft,
+ xlabel="Time (s)",
+ ylabel="Relative residual: ||Ax - b||₂ / ||b||₂",
+ # xscale=:log10,
+ yscale=:log10,
+ guidefontsize=22,#18,
+ tickfontsize=20, #16,
+ legendfontsize=18, #14,
+ margin=5*Plots.mm,
+ framestyle=:box,
+ title="Random Matrices of Size $(N)x$(N),\n Varying Sparsity Levels (ρ) and\n CPU Solvers",
+ titlefontsize=22,
+)
+
+# Symbols encode sparsity levels; colors encode solvers.
+# Define marker for each sparsity and a color for each solver.
+## Sparsity shapes
+marker_map_sparsity = OrderedDict(0.1=>:circle, 0.5=>:square, 0.9=>:utriangle)
+## Solver color shades
+color_map_solver = OrderedDict("Default"=>:red, "UMFPACK"=>:blue, "Generated"=>:green)
+
+# Plot each point individually so marker shape shows sparsity and color shows solver.
+for solver_name in keys(solvers)
+ for sparsity in sparsity_levels
+ if sparsity in keys(results) && solver_name in keys(results[sparsity])
+ t = results[sparsity][solver_name].time
+ e = results[sparsity][solver_name].error
+ scatter!(p, [t], [e];
+ label="",
+ marker=marker_map_sparsity[sparsity],
+ markersize=15,
+ color=color_map_solver[solver_name],
+ markerstrokecolor=:black,
+ markerstrokewidth=0.0,#0.8,
+ alpha=0.45)
+ end
+ end
+end
+
+# Create a combined legend
+for solver_name in keys(solvers)
+ for s in sparsity_levels
+ lbl = "$(solver_name), ρ:$(s)"
+ scatter!(p, [NaN], [NaN]; label=lbl,
+ marker=marker_map_sparsity[s],
+ markersize=15,
+ color=color_map_solver[solver_name],
+ markerstrokecolor=:black,
+ markerstrokewidth=0.0,
+ alpha=0.45)
+ end
+end
+
+savefig(p, "error_vs_time.pdf")
+println("\n✓ Plot saved as error_vs_time.pdf")
+
+display(p)
\ No newline at end of file
diff --git a/examples/agentic/generate-cpu-linear-solver/generate.jl b/examples/agentic/generate-cpu-linear-solver/generate.jl
new file mode 100644
index 0000000..80d1ccf
--- /dev/null
+++ b/examples/agentic/generate-cpu-linear-solver/generate.jl
@@ -0,0 +1,17 @@
+using SmartSolve
+using LinearAlgebra
+using SparseArrays
+using BenchmarkTools
+
+prompt = """
+Generate a high-performance CPU implementation in Julia of a linear solver for sparse matrices
+based on LU with iterative refinement (at least 5 refinement iterations), using the following
+reference: https://nhigham.com/2023/03/13/what-is-iterative-refinement
+"""
+
+secret_key = ENV["OPENAI_API_KEY"]
+solver, hist, conv = gen_linear_solver(prompt, secret_key, max_iters = 5)
+
+println("Generated Code:\n")
+println(solver)
+write("solver.jl", solver)
\ No newline at end of file
diff --git a/examples/agentic/generate-cpu-linear-solver/readme b/examples/agentic/generate-cpu-linear-solver/readme
new file mode 100644
index 0000000..a319c73
--- /dev/null
+++ b/examples/agentic/generate-cpu-linear-solver/readme
@@ -0,0 +1 @@
+This example generates a high-performance CPU implementation in Julia for solving sparse linear systems using an LU-based method with iterative refinement.
diff --git a/examples/agentic/generate-cpu-linear-solver/solver.jl b/examples/agentic/generate-cpu-linear-solver/solver.jl
new file mode 100644
index 0000000..f9b8a54
--- /dev/null
+++ b/examples/agentic/generate-cpu-linear-solver/solver.jl
@@ -0,0 +1,67 @@
+function proposed_fn(A::SparseMatrixCSC, b::AbstractVector)
+ @assert size(A,1) == size(A,2) "A must be square"
+ n = length(b)
+ @assert size(A,2) == n "Dimensions of A and b must agree"
+
+ niters = 4
+
+ # Convert sparse matrix to dense double for accurate residual computation
+ # and to dense single for fast factorization/solves with multithreaded BLAS.
+ Ad64 = Array(A) # dense Float64
+ Ad32 = Array{Float32}(undef, n, n)
+ @inbounds for j in 1:n
+ for i in 1:n
+ Ad32[i,j] = Float32(Ad64[i,j])
+ end
+ end
+
+ # Convert rhs to Float32 once
+ b32 = Vector{Float32}(undef, n)
+ @inbounds @simd for i in 1:n
+ b32[i] = Float32(b[i])
+ end
+
+ # Factorize dense single-precision matrix (uses LAPACK/BLAS and is multithreaded)
+ F32 = lu(Ad32)
+
+ # Initial solve in single precision, in-place if possible
+ x32 = copy(b32)
+ try
+ LinearAlgebra.ldiv!(F32, x32) # in-place: x32 <- Ad32 \ b32
+ catch
+ x32 = F32 \ b32 # fallback
+ end
+
+ # Promote to double precision for accumulation and residual computation
+ x = Vector{Float64}(undef, n)
+ @inbounds @simd for i in 1:n
+ x[i] = Float64(x32[i])
+ end
+
+ # Preallocate working vectors
+ r = similar(b) # Float64 residual
+ r32 = Vector{Float32}(undef, n) # single-precision correction (in-place)
+
+ for iter in 1:niters
+ # r = b - Ad64 * x (use BLAS for dense matvec)
+ mul!(r, Ad64, x) # r = Ad64 * x
+ @inbounds @simd for i in 1:n
+ r[i] = b[i] - r[i]
+ r32[i] = Float32(r[i])
+ end
+
+ # Solve correction in single precision using the LU factorization
+ try
+ LinearAlgebra.ldiv!(F32, r32) # r32 <- Ad32 \ r32 (in-place)
+ catch
+ r32 = F32 \ r32 # fallback
+ end
+
+ # Update double-precision solution
+ @inbounds @simd for i in 1:n
+ x[i] += Float64(r32[i])
+ end
+ end
+
+ return x
+end
\ No newline at end of file
diff --git a/examples/agentic/generate-cuda-linear-solver/Project.toml b/examples/agentic/generate-cuda-linear-solver/Project.toml
new file mode 100644
index 0000000..9cab641
--- /dev/null
+++ b/examples/agentic/generate-cuda-linear-solver/Project.toml
@@ -0,0 +1,10 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+SmartSolve = "4fbb3a3c-2fa1-4c19-8d57-bae8bc1e16ac"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[sources]
+SmartSolve = {path = "../../.."}
diff --git a/examples/agentic/generate-cuda-linear-solver/benchmark.jl b/examples/agentic/generate-cuda-linear-solver/benchmark.jl
new file mode 100644
index 0000000..9ed3a2d
--- /dev/null
+++ b/examples/agentic/generate-cuda-linear-solver/benchmark.jl
@@ -0,0 +1,136 @@
+using LinearAlgebra
+using SparseArrays
+using CUDA
+using BenchmarkTools
+using OrderedCollections
+using Plots
+
+println("GPU benchmark with error-vs-time plot:\n")
+
+include("solver.jl")
+
+# Configuration
+N = 15_000
+sparsity_levels = [0.1, 0.5, 0.9]
+solvers = OrderedDict(
+ "Default" => (Ad, bd) -> (Ad \ bd),
+ "gesv!" => (Ad, bd) -> begin
+ x = CuArray(zeros(size(Ad, 1)))
+ CUDA.CUSOLVER.gesv!(x, Ad, bd)
+ x
+ end,
+ "Generated" => (Ad, bd) -> proposed_fn(Ad, bd)
+)
+
+# Store results for plotting
+results = Dict()
+
+for sparsity in sparsity_levels
+ println("\n=== Sparsity: $sparsity ===")
+
+ # Generate problem
+ A = sprand(N, N, sparsity)
+ b = rand(N)
+ Ad = CuArray(Matrix(A))
+ bd = CuArray(b)
+
+ results[sparsity] = Dict()
+
+ for (solver_name, solver_fn) in solvers
+ println(" $solver_name...")
+
+ # Warm-up
+ bd_warm = CuArray(copy(b))
+ try
+ x_warm = solver_fn(Ad, bd_warm)
+ CUDA.synchronize()
+ catch e
+ println(" Warning: solver failed during warm-up: $e")
+ continue
+ end
+
+ # Benchmark
+ bd_bench = CuArray(copy(b))
+ try
+ bench = @benchmark begin
+ x = $(solver_fn)($Ad, $bd_bench)
+ CUDA.synchronize()
+ end seconds = 5 samples = 10
+
+ time_ms = median(bench.times) / 1e9 # Convert to s
+
+ # Compute error
+ bd_err = CuArray(copy(b))
+ x_sol = solver_fn(Ad, bd_err)
+ CUDA.synchronize()
+ error = norm(Ad*x_sol - bd_err) / norm(bd_err)
+
+ results[sparsity][solver_name] = (time=time_ms, error=error)
+ println(" Time: $(round(time_ms, digits=3)) s, Error: $(round(error, sigdigits=3))")
+ catch e
+ println(" Error during benchmark: $e")
+ end
+ end
+end
+
+# Create error-vs-time plot
+p = plot(
+ size=(800, 800),
+ #legend=:topright,
+ legend=:bottomright,
+ xlabel="Time (s)",
+ ylabel="Relative residual: ||Ax - b||₂ / ||b||₂",
+ # xscale=:log10,
+ yscale=:log10,
+ guidefontsize=22,#18,
+ tickfontsize=20, #16,
+ legendfontsize=18, #14,
+ margin=5*Plots.mm,
+ framestyle=:box,
+ title="Random Matrices of Size $(N)x$(N),\n Varying Sparsity Levels (ρ) and\n GPU Solvers",
+ titlefontsize=22,
+)
+
+# Symbols encode sparsity levels; colors encode solvers.
+# Define marker for each sparsity and a color for each solver.
+## Sparsity shapes
+marker_map_sparsity = OrderedDict(0.1=>:circle, 0.5=>:square, 0.9=>:utriangle)
+## Solver color shades
+color_map_solver = OrderedDict("Default"=>:red, "gesv!"=>:blue, "Generated"=>:green)
+
+# Plot each point individually so marker shape shows sparsity and color shows solver.
+for solver_name in keys(solvers)
+ for sparsity in sparsity_levels
+ if sparsity in keys(results) && solver_name in keys(results[sparsity])
+ t = results[sparsity][solver_name].time
+ e = results[sparsity][solver_name].error
+ scatter!(p, [t], [e];
+ label="",
+ marker=marker_map_sparsity[sparsity],
+ markersize=15,
+ color=color_map_solver[solver_name],
+ markerstrokecolor=:black,
+ markerstrokewidth=0.0,#0.8,
+ alpha=0.45)
+ end
+ end
+end
+
+# Create a combined legend
+for solver_name in keys(solvers)
+ for s in sparsity_levels
+ lbl = "$(solver_name), ρ:$(s)"
+ scatter!(p, [NaN], [NaN]; label=lbl,
+ marker=marker_map_sparsity[s],
+ markersize=15,
+ color=color_map_solver[solver_name],
+ markerstrokecolor=:black,
+ markerstrokewidth=0.0,
+ alpha=0.45)
+ end
+end
+
+savefig(p, "error_vs_time.pdf")
+println("\n✓ Plot saved as error_vs_time.pdf")
+
+display(p)
\ No newline at end of file
diff --git a/examples/agentic/generate-cuda-linear-solver/error_vs_time.pdf b/examples/agentic/generate-cuda-linear-solver/error_vs_time.pdf
new file mode 100644
index 0000000..170ca06
Binary files /dev/null and b/examples/agentic/generate-cuda-linear-solver/error_vs_time.pdf differ
diff --git a/examples/agentic/generate-cuda-linear-solver/generate.jl b/examples/agentic/generate-cuda-linear-solver/generate.jl
new file mode 100644
index 0000000..179005a
--- /dev/null
+++ b/examples/agentic/generate-cuda-linear-solver/generate.jl
@@ -0,0 +1,18 @@
+using SmartSolve
+using LinearAlgebra
+using SparseArrays
+using CUDA
+using BenchmarkTools
+
+prompt = """
+Generate a high-performance CUDA implementation in Julia of a linear solver for sparse matrices
+based on LU with iterative refinement (at least 5 refinement iterations), using the following
+reference: https://nhigham.com/2023/03/13/what-is-iterative-refinement
+"""
+
+secret_key = ENV["OPENAI_API_KEY"]
+solver, hist, conv = gen_linear_solver_cuda(prompt, secret_key, max_iters = 5)
+
+println("Generated Code:\n")
+println(solver)
+write("solver.jl", solver)
\ No newline at end of file
diff --git a/examples/agentic/generate-cuda-linear-solver/readme b/examples/agentic/generate-cuda-linear-solver/readme
new file mode 100644
index 0000000..9db40d0
--- /dev/null
+++ b/examples/agentic/generate-cuda-linear-solver/readme
@@ -0,0 +1 @@
+This example generates a high-performance CUDA implementation in Julia for solving sparse linear systems using an LU-based method with iterative refinement.
diff --git a/examples/agentic/generate-cuda-linear-solver/solver.jl b/examples/agentic/generate-cuda-linear-solver/solver.jl
new file mode 100644
index 0000000..d480453
--- /dev/null
+++ b/examples/agentic/generate-cuda-linear-solver/solver.jl
@@ -0,0 +1,84 @@
+function proposed_fn(A_d, b_d)
+ @assert CUDA.has_cuda() "CUDA not available"
+
+ # Ensure A and b are dense GPU arrays (CuArray). Convert any CPU/sparse inputs.
+ A_gpu = isa(A_d, CuArray) && ndims(A_d) == 2 ? A_d : cu(Matrix(A_d))
+ b_gpu = isa(b_d, CuArray) ? b_d : cu(b_d)
+
+ n = size(A_gpu, 2)
+ @assert length(b_gpu) == n
+
+ T = eltype(A_gpu)
+
+ if T === Float64
+ # Mixed precision: factorize in Float32 for speed, do residuals in Float64 for accuracy.
+ As = Float32.(A_gpu) # single-precision matrix on GPU
+ bs = Float32.(b_gpu) # single-precision rhs on GPU
+
+ F = lu(As) # single-precision LU (GPU)
+ CUDA.synchronize()
+
+ # initial solution in single precision, then promote
+ x_s = F \ bs # CuArray{Float32}
+ x = Float64.(x_s) # CuArray{Float64}
+
+ # preallocate temporaries on GPU
+ tmp = similar(b_gpu) # Float64
+ r = similar(b_gpu) # Float64
+ r_s = similar(bs) # Float32
+
+ for i in 1:5
+ # tmp = A * x (in-place)
+ mul!(tmp, A_gpu, x)
+ @. r = b_gpu - tmp # residual in Float64
+ @. r_s = Float32(r) # convert residual to Float32
+ delta_s = F \ r_s # solve in single precision (CuArray{Float32})
+ @. x += Float64.(delta_s) # promote and update solution
+ end
+
+ CUDA.synchronize()
+ return x
+
+ elseif T === Float32
+ # Pure single-precision factorization and refinement
+ F = lu(A_gpu)
+ CUDA.synchronize()
+
+ x = F \ b_gpu
+
+ tmp = similar(b_gpu)
+ r = similar(b_gpu)
+
+ for i in 1:5
+ mul!(tmp, A_gpu, x)
+ @. r = b_gpu - tmp
+ delta = F \ r
+ @. x += delta
+ end
+
+ CUDA.synchronize()
+ return x
+
+ else
+ # Fallback: operate in Float64 on GPU
+ Ad = Float64.(A_gpu)
+ bd = Float64.(b_gpu)
+
+ F = lu(Ad)
+ CUDA.synchronize()
+
+ x = F \ bd
+ tmp = similar(bd)
+ r = similar(bd)
+
+ for i in 1:10
+ mul!(tmp, Ad, x)
+ @. r = bd - tmp
+ delta = F \ r
+ @. x += delta
+ end
+
+ CUDA.synchronize()
+ return x
+ end
+end
\ No newline at end of file
diff --git a/examples/agentic/linear_solver_example_1.jl b/examples/agentic/linear_solver_example_1.jl
deleted file mode 100644
index fb93c55..0000000
--- a/examples/agentic/linear_solver_example_1.jl
+++ /dev/null
@@ -1,9 +0,0 @@
-using SmartSolve
-
-prompt = "https://nhigham.com/2023/03/13/what-is-iterative-refinement/ \n" *
- "Using this blog, give me a high performance Julia implementation of LU + iterative refinement."
-
-secret_key = ENV["OPENAI_API_KEY"]
-# checker_filename = "test_performance.jl"
-
-code, hist, timedout = generate_linear_solver_code(prompt, secret_key)
\ No newline at end of file
diff --git a/smartsolve.png b/smartsolve.png
index fecc123..21e5a40 100644
Binary files a/smartsolve.png and b/smartsolve.png differ
diff --git a/src/Agentic.jl b/src/Agentic.jl
index 20b85a1..a3a9842 100644
--- a/src/Agentic.jl
+++ b/src/Agentic.jl
@@ -83,10 +83,16 @@ end
src_dir = @__DIR__
-function generate_linear_solver_code(prompt, secret_key, checker_filename = src_dir * "/test_performance.jl", model = "gpt-5-mini"; max_iters = 3)
+function gen_linear_solver(prompt, secret_key, checker_filename = src_dir * "/test_performance.jl", model = "gpt-5-mini"; max_iters = 10)
return generate_default_code(prompt, secret_key, checker_filename, model, ls_dev_prompt_maker; max_iters = max_iters)
end
-function generate_linear_solver_cuda_code(prompt, secret_key,checker_filename = src_dir *"/test_performance_cuda.jl", model = "gpt-5-mini"; max_iters = 3)
+function gen_linear_solver_cuda(prompt, secret_key,checker_filename = src_dir *"/test_performance_cuda.jl", model = "gpt-5-mini"; max_iters = 10)
return generate_default_code(prompt, secret_key, checker_filename, model, ls_cuda_dev_prompt_maker; max_iters = max_iters)
+end
+
+function printhist(hist)
+ for (i, (role, message)) in enumerate(hist)
+ println("Message $i $(role[2]):\n$(message[2])\n")
+ end
end
\ No newline at end of file
diff --git a/src/SmartSolve.jl b/src/SmartSolve.jl
index 7432fa9..9d3023e 100644
--- a/src/SmartSolve.jl
+++ b/src/SmartSolve.jl
@@ -22,6 +22,6 @@ include("Agentic.jl")
include("test_performance.jl")
# include("test_performance_cuda.jl")
-export generate_default_code, generate_linear_solver_code
+export generate_default_code, gen_linear_solver, gen_linear_solver_cuda, printhist
end # module SmartSolve
diff --git a/src/test_performance.jl b/src/test_performance.jl
index 99e2978..85cb8b9 100644
--- a/src/test_performance.jl
+++ b/src/test_performance.jl
@@ -1,35 +1,50 @@
-# test_matrix_names = ["Bai/af23560", "Engwirda/airfoil_2d", "vanHeukelum/cage10"]
-# test_matrices = matrixdepot.(test_matrix_names)
test_matrices = []
-push!(test_matrices, sprand(10000, 10000, 0.1))
-push!(test_matrices, sprand(10000, 10000, 0.1))
-push!(test_matrices, sprand(10000, 10000, 0.1))
+N = 5000
+push!(test_matrices, sprand(N, N, 0.1))
+push!(test_matrices, sprand(N, N, 0.2))
+push!(test_matrices, sprand(N, N, 0.3))
-base_prompt(rel_errs, speedups, alloc_ratios) = "Here are the errors compared to built-in linear solver:
-$(rel_errs)
-and here are the speed-up ratio compared to built-in solver:
-$(speedups).
-the ratio of allocations (base_alloc/proposed_alloc) is:
-$(alloc_ratios)."
+function get_report(m_err, m_runtime, m_alloc,
+ err_threshold, runtime_threshold, alloc_threshold)
+ report = """
+ Median error ratio (error_default / error_gen): $(m_err)
+ Desired median error ratio: >= $err_threshold
+ Median Runtime ratio or speedup (runtime_default / runtime_gen): $(m_runtime)
+ Desired median runtime ratio: >= $runtime_threshold
+ Allocation median ratio (alloc_default / alloc_gen): $(m_alloc)
+ Desired median allocation ratio: >= $alloc_threshold
+ """
+ return report
+end
-function evaluator(proposed_fn, tol = 1e-6)
- rel_errors = Float64[]
- speedups = Float64[]
+function evaluator(proposed_fn, err_threshold=1.0,
+ runtime_threshold=1.1,
+ alloc_threshold=0.0)
+ error_ratios = Float64[]
+ runtime_ratios = Float64[]
alloc_ratios = Float64[]
for A in test_matrices
b = randn(size(A,2))
- x_exact = A \ b
- x_alg = Base.invokelatest(proposed_fn, A, b)
+ x_default = A \ b
+ x_gen = Base.invokelatest(proposed_fn, A, b)
- push!(rel_errors, norm(x_alg - x_exact)/norm(x_exact))
- base_runtime = @btimed $A \ $b
- alg_runtime = @btimed $Base.invokelatest($proposed_fn, $A, $b)
- push!(speedups, base_runtime.time/alg_runtime.time)
-
- base_alloc = @ballocated $A \ $b
- alg_alloc = @ballocated $Base.invokelatest($proposed_fn, $A, $b)
- push!(alloc_ratios, base_alloc/ alg_alloc)
- # println("done")
+ err_default = norm(A * x_default - b)
+ err_gen = norm(A * x_gen - b)
+ push!(error_ratios, err_default/err_gen)
+
+ b_default = @benchmark $A \ $b
+ b_gen = @benchmark $Base.invokelatest($proposed_fn, $A, $b)
+ push!(runtime_ratios, median(b_default.times)/median(b_gen.times))
+ push!(alloc_ratios, median(b_default.allocs)/median(b_gen.allocs))
end
- return mean(rel_errors) < tol && mean(speedups) > 1.1, base_prompt(rel_errors, speedups, alloc_ratios)
-end
\ No newline at end of file
+ m_err = median(error_ratios)
+ m_runtime = median(runtime_ratios)
+ m_alloc = median(alloc_ratios)
+ report = get_report(m_err, m_runtime, m_alloc, err_threshold,
+ runtime_threshold, alloc_threshold)
+ println(report)
+ return m_err >= err_threshold && # 1.0 means no worse error
+ m_runtime >= runtime_threshold && # 1.1 means at least 10% faster
+ m_alloc >= m_alloc, # 0.0 means no allocation requirement
+ report
+end
\ No newline at end of file
diff --git a/src/test_performance_cuda.jl b/src/test_performance_cuda.jl
index 5ceb7e0..44921f3 100644
--- a/src/test_performance_cuda.jl
+++ b/src/test_performance_cuda.jl
@@ -1,64 +1,76 @@
-function benchmark_ms( myfunc, args...;kwargs...)
- elapsed=0.0
- best=100000
- i=0
- numruns = 20
- while(elapsed<200.0 || i<2)
- CUDA.synchronize()
- start = time_ns()
- for i=1:numruns
- myfunc(args...;kwargs...)
- CUDA.synchronize()
- end
- endtime = time_ns()
- thisduration=(endtime-start)/1e6
- elapsed += thisduration
- best = min(thisduration/numruns,best)
- i+=1
- end
- return best
+test_matrices = []
+N = 10_000
+push!(test_matrices, sprand(N, N, 0.1))
+push!(test_matrices, sprand(N, N, 0.2))
+push!(test_matrices, sprand(N, N, 0.3))
+
+function get_report(m_err, m_runtime, m_alloc,
+ err_threshold, runtime_threshold, alloc_threshold)
+ report = """
+ Median error ratio (error_default / error_gen): $(m_err)
+ Desired median error ratio: >= $err_threshold
+ Median runtime ratio or speedup (runtime_default / runtime_gen): $(m_runtime)
+ Desired median runtime ratio: >= $runtime_threshold
+ Allocation median ratio (alloc_default / alloc_gen): $(m_alloc)
+ Desired median allocation ratio: >= $alloc_threshold
+ """
+ return report
end
-# test_matrix_names = ["Bai/af23560", "Engwirda/airfoil_2d", "vanHeukelum/cage10"]
-# test_matrices = matrixdepot.(test_matrix_names)
-test_matrices = []
-push!(test_matrices, sprand(20000, 20000, 0.1))
-push!(test_matrices, sprand(20000, 20000, 0.1))
-push!(test_matrices, sprand(20000, 20000, 0.1))
+function evaluator_cuda(proposed_fn;
+ err_threshold::Float64 = 1.0,
+ runtime_threshold::Float64 = 1.1,
+ alloc_threshold::Float64 = 0.0)
-cuda_test_matrices = CuArray.(test_matrices)
+ error_ratios = Float64[]
+ runtime_ratios = Float64[]
+ alloc_ratios = Float64[]
+ for A_cpu in test_matrices
+ # right-hand side on CPU
+ b_cpu = randn(size(A_cpu, 2))
-base_prompt(rel_errs, speedups) = "Here are the errors compared to built-in linear solver:
-$(rel_errs)
-and here are the speed-up ratio compared to built-in solver:
-$(speedups)."
-function evaluator(proposed_fn, tol = 1e-6)
- rel_errors = Float64[]
- speedups = Float64[]
- for A in cuda_test_matrices
- b = CUDA.randn(Float64, size(A,2))
-
- x_exact = A \ b
- alg_solver(A, b) = Base.invokelatest(proposed_fn, A, b)
+ # move to GPU; here we use a dense GPU matrix
+ # If you have a sparse GPU solver, you can switch to CuSparseMatrixCSR(A_cpu)
+ A_d = cu(Matrix(A_cpu))
+ b_d = cu(b_cpu)
- x_alg = alg_solver(A, b)
-
+ # --- Solve once to ensure kernels are compiled (warm-up) ---
+ x_default = A_d \ b_d
+ x_gen = Base.invokelatest(proposed_fn, A_d, b_d)
+ CUDA.synchronize()
- push!(rel_errors, norm(x_alg - x_exact)/norm(x_exact))
+ # --- Error ratios (all on GPU, scalars on CPU) ---
+ err_default = norm(A_d * x_default - b_d)
+ err_gen = norm(A_d * x_gen - b_d)
+ push!(error_ratios, err_default / err_gen)
- benchmark_ms(\, A, b)
- base_runtime = benchmark_ms(\, A, b)
+ # --- Runtime ratios (GPU) ---
+ b_default = @benchmark begin
+ x = $A_d \ $b_d
+ CUDA.synchronize()
+ end
+ b_gen = @benchmark begin
+ x = Base.invokelatest($proposed_fn, $A_d, $b_d)
+ CUDA.synchronize()
+ end
- benchmark_ms(alg_solver, A, b)
+ push!(runtime_ratios, median(b_default.times) / median(b_gen.times))
+ push!(alloc_ratios, median(b_default.allocs) / median(b_gen.allocs))
+ end
+ m_err = median(error_ratios)
+ m_runtime = median(runtime_ratios)
+ m_alloc = median(alloc_ratios)
- alg_runtime = benchmark_ms(alg_solver, A, b)
- push!(speedups, base_runtime/alg_runtime)
+ report = get_report(m_err, m_runtime, m_alloc,
+ err_threshold, runtime_threshold, alloc_threshold)
+ println(report)
+ ok = (m_err >= err_threshold) && # 1.0 => no worse error
+ (m_runtime >= runtime_threshold) && # 1.1 => at least 10% faster
+ (m_alloc >= alloc_threshold) # 0.0 => no alloc requirement
- # println("done")
- end
- return mean(rel_errors) < tol && mean(speedups) > 1.1, base_prompt(rel_errors, speedups)
-end
\ No newline at end of file
+ return ok, report
+end