Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
e307e09
Top-down type inference support
abadams Nov 11, 2025
8463f42
Inductive functions rebase
stevenraphael Nov 21, 2025
795d580
fixes
stevenraphael Nov 21, 2025
99a1f08
rebase additional fixes
stevenraphael Nov 22, 2025
5124b2e
size_t fix
stevenraphael Nov 22, 2025
8b2e239
get default types back to normal; add new test
stevenraphael Nov 23, 2025
2ca58c1
clang-format
stevenraphael Nov 23, 2025
2bdd6ff
add test and documentation
stevenraphael Nov 23, 2025
657b90b
add user error and additional support for function declarations
stevenraphael Nov 24, 2025
ddd2bb7
clang-format
stevenraphael Nov 24, 2025
18aadb8
clang-tidy and additional safety check. All tests pass
stevenraphael Dec 4, 2025
7477158
patched makefile
stevenraphael Dec 5, 2025
e6c7738
Serialization fix
stevenraphael Dec 5, 2025
a77274d
more dimtype fixes
stevenraphael Dec 5, 2025
7fbd8f3
fix memory leaks
stevenraphael Dec 11, 2025
5701a78
remove commented-out code
stevenraphael Dec 13, 2025
a3bcb7a
clang-format
stevenraphael Dec 13, 2025
1ba55ff
Fix explicit storage folding check for inductive functions
stevenraphael Jan 12, 2026
6f91c5a
Fix function call
stevenraphael Jan 12, 2026
6b0859f
Apply pre-commit auto-fixes
halide-ci[bot] May 28, 2026
af31a21
rebase fixes
stevenraphael May 28, 2026
b6867b1
codespell
stevenraphael May 28, 2026
0573a07
additional bugfixes and tests
stevenraphael May 29, 2026
8cea75d
Starting to add new benchmark
stevenraphael Jun 13, 2026
c75c9f7
add two apps: StereoBM and IIR cascade. Made modifications to Sliding…
stevenraphael Jun 18, 2026
821b771
more robust inductive functions
stevenraphael Jun 24, 2026
0781a7d
remove test
stevenraphael Jun 24, 2026
16648c3
fix another bug
stevenraphael Jun 24, 2026
a0021bf
Fix issues for tuples and add test; fix opencv dependency (latter par…
stevenraphael Jun 24, 2026
592983b
fix makefile for sterebm again, co-authored with Claude Opus 4.7 <nor…
stevenraphael Jun 24, 2026
e19a6bc
add opencv dependency and fix makefile for sterebm again, co-authored…
stevenraphael Jun 24, 2026
b2cd1e3
fix makefile for stereobm again, co-authored with Claude Opus 4.7 <no…
stevenraphael Jun 25, 2026
824ff46
fix macos stereobm dependency, co-authored with Claude Opus 4.7 <nore…
stevenraphael Jun 25, 2026
4b31d31
clang tidy
stevenraphael Jun 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/testing-make.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,14 @@ jobs:
pkg-config \
libpng-dev \
libjpeg-turbo8-dev \
libopencv-dev \
"llvm-${LLVM_VERSION}-dev" \
"clang-${LLVM_VERSION}" \
"lld-${LLVM_VERSION}" \
"liblld-${LLVM_VERSION}-dev"
echo "LLVM_CONFIG=llvm-config-${LLVM_VERSION}" | tee -a "$GITHUB_ENV"
elif [ "$RUNNER_OS" = "macOS" ]; then
brew install libjpeg-turbo libpng pkgconf protobuf "llvm@${LLVM_VERSION}" "lld@${LLVM_VERSION}"
brew install libjpeg-turbo libpng pkgconf protobuf opencv "llvm@${LLVM_VERSION}" "lld@${LLVM_VERSION}"
echo "LLVM_CONFIG=$(brew --prefix "llvm@${LLVM_VERSION}")/bin/llvm-config" | tee -a "$GITHUB_ENV"
fi

Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ SOURCE_FILES = \
HexagonOffload.cpp \
HexagonOptimize.cpp \
ImageParam.cpp \
Inductive.cpp \
InferArguments.cpp \
InjectHostDevBufferCopies.cpp \
Inline.cpp \
Expand Down Expand Up @@ -724,6 +725,7 @@ HEADER_FILES = \
HexagonOffload.h \
HexagonOptimize.h \
ImageParam.h \
Inductive.h \
InferArguments.h \
InjectHostDevBufferCopies.h \
Inline.h \
Expand Down Expand Up @@ -2210,6 +2212,7 @@ TEST_APPS=\
resize \
resnet_50 \
stencil_chain \
stereobm \
wavelet

TEST_APPS_DEPS=$(TEST_APPS:%=%_test_app)
Expand Down
2 changes: 2 additions & 0 deletions apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ add_app(hexagon_benchmarks)
# add_app(hexagon_dma) # TODO(#5374): missing CMake build
add_app(hist)
add_app(iir_blur)
add_app(iir_cascade)
add_app(interpolate)
add_app(lens_blur)
add_app(linear_algebra)
Expand All @@ -67,6 +68,7 @@ add_app(resize)
# add_app(resnet_50) # TODO(#5374): missing CMake build
# add_app(simd_op_check) # TODO(#5374): missing CMake build
add_app(stencil_chain)
add_app(stereobm)
add_app(unsharp)
add_app(wavelet)
# keep-sorted end
48 changes: 48 additions & 0 deletions apps/iir_cascade/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
cmake_minimum_required(VERSION 3.28)
project(iir_cascade)

enable_testing()

# Set up language settings
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS NO)

# Find Halide
find_package(Halide REQUIRED)

# Generator
add_halide_generator(
iir_cascade.generator
SOURCES iir_cascade_generator.cpp
LINK_LIBRARIES Halide::Tools
)

# Filters
add_halide_library(
iir_cascade
FROM iir_cascade.generator
${IIR_CASCADE_FEATURES}
PARAMS inductive=true
)
add_halide_library(
iir_cascade_noninductive
FROM iir_cascade.generator
GENERATOR iir_cascade
${IIR_CASCADE_FEATURES}
PARAMS inductive=false
)

# Main executable
add_executable(iir_cascade_test test.cpp)
target_link_libraries(iir_cascade_test PRIVATE Halide::Tools iir_cascade iir_cascade_noninductive)

# Test that the app actually works!
add_test(NAME iir_cascade_test COMMAND iir_cascade_test)
set_tests_properties(
iir_cascade_test
PROPERTIES
LABELS iir_cascade
PASS_REGULAR_EXPRESSION "Success!"
SKIP_REGULAR_EXPRESSION "\\[SKIP\\]"
)
31 changes: 31 additions & 0 deletions apps/iir_cascade/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
include ../support/Makefile.inc

.PHONY: build clean test

build: $(BIN)/$(HL_TARGET)/test

$(GENERATOR_BIN)/iir_cascade.generator: iir_cascade_generator.cpp $(GENERATOR_DEPS)
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS)

$(BIN)/%/iir_cascade.a: $(GENERATOR_BIN)/iir_cascade.generator
@mkdir -p $(@D)
$^ -g iir_cascade -f iir_cascade -e $(GENERATOR_OUTPUTS) -o $(@D) target=$*-no_runtime inductive=true

$(BIN)/%/iir_cascade_noninductive.a: $(GENERATOR_BIN)/iir_cascade.generator
@mkdir -p $(@D)
$^ -g iir_cascade -f iir_cascade_noninductive -e $(GENERATOR_OUTPUTS) -o $(@D) target=$*-no_runtime inductive=false

$(BIN)/%/runtime.a: $(GENERATOR_BIN)/iir_cascade.generator
@mkdir -p $(@D)
$^ -r runtime -o $(@D) target=$*

$(BIN)/%/test: test.cpp $(BIN)/%/iir_cascade.a $(BIN)/%/iir_cascade_noninductive.a $(BIN)/%/runtime.a
@mkdir -p $(@D)
$(CXX-$*) $(CXXFLAGS-$*) -Wall -O2 -I$(BIN)/$* $^ -o $@ $(LDFLAGS-$*)

clean:
rm -rf $(BIN)

test: $(BIN)/$(HL_TARGET)/test
$<
95 changes: 95 additions & 0 deletions apps/iir_cascade/iir_cascade_generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Creates a cascade of 1D forward-only IIR filters with tanh nonlinearity.
// Contains a version that uses inductive functions, and a version that does not.

#include "Halide.h"

using namespace Halide;
using namespace Halide::BoundaryConditions;

class IIRCascade : public Generator<IIRCascade> {
public:
Input<Buffer<float, 2>> input{"input"};
GeneratorParam<int> N{"N", 4}; // number of filter iterations
GeneratorParam<float> weight{"weight", 0.3f}; // IIR coefficient
GeneratorParam<float> gain{"gain", 3.f}; // gain applied to filter output before nonlinearity
GeneratorParam<bool> inductive{"inductive", true};
Output<Buffer<float, 2>> output{"output"};

void generate() {
Var t("t"), s("s");
Func in_f("in");
in_f(t, s) = BoundaryConditions::repeat_edge(input)(t, s);
std::vector<Func> filt(N);
RDom rt(1, input.width() - 1, "rt");

for (int k = 0; k < N; k++) {
filt[k] = Func(Float(32), "filt" + std::to_string(k));
Func src;
if (k == 0) {
src = in_f;
} else {
src = filt[k - 1];
}
if (inductive) {
filt[k](t, s) = select(t <= 0,
weight * src(t, s),
likely((1.f - weight) * filt[k](t - 1, s) + weight * tanh(gain * src(t, s))));
} else {
filt[k](t, s) = undef<float>();
filt[k](0, s) = weight * src(0, s);
filt[k](rt, s) = (1.f - weight) * filt[k](rt - 1, s) + weight * tanh(gain * src(rt, s));
}
}

output(t, s) = undef<float>();
RDom ro(0, input.width(), "ro");
output(ro, s) = tanh(gain * filt[N - 1](ro, s));

int VEC;
if (inductive) {
VEC = 32;
} else {
VEC = 4;
}

Var so("so"), si("si");
if (get_target().has_feature(Target::CUDA)) {
// Similarly to iir_blur, we can't get parallelism from the recursive dimension.
// The inductive version uses less i/o because it does not have to write the
// entire intermediate filter outputs to global memory.
const int WARP = 32;
output.update().split(s, so, si, WARP).gpu_blocks(so).gpu_lanes(si).reorder(ro, si, so);
for (int k = 0; k < N; k++) {
if (inductive) {
filt[k].fold_storage(t, 2);
filt[k].store_at(output, si).compute_at(output, ro);
} else {
filt[k].compute_at(output, si).reorder_storage(s, t).store_in(MemoryType::Heap);
filt[k].update(1).unroll(rt, 8);
}
}
} else {
// The inductive version is generally not faster on CPU unless the non-linearity is changed to a
// less expensive function and the input is large enough to saturate the last-level cache.
output.split(s, so, si, VEC).vectorize(si);
output.update()
.split(s, so, si, VEC)
.reorder(si, ro, so)
.vectorize(si);

for (int k = 0; k < N; k++) {
if (inductive) {
filt[k].reorder_storage(s, t).fold_storage(t, 2);
filt[k].store_at(output, so).compute_at(output, ro).vectorize(s, VEC);
} else {
filt[k].compute_at(output, so).reorder_storage(s, t).vectorize(s, VEC).update().vectorize(s, VEC);
filt[k].update(1).vectorize(s, VEC);
}
}

output.dim(0).set_bounds(0, input.width());
}
}
};

HALIDE_REGISTER_GENERATOR(IIRCascade, iir_cascade)
63 changes: 63 additions & 0 deletions apps/iir_cascade/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdio>
#include <cstdlib>

#include "HalideBuffer.h"
#include "HalideRuntime.h"

#include "iir_cascade.h"
#include "iir_cascade_noninductive.h"

#include "halide_benchmark.h"

using namespace Halide::Tools;

int main(int argc, char **argv) {

const int T = 1024; // number of time steps
const int S = 1024; // number of strips

Halide::Runtime::Buffer<float> input(T, S);
Halide::Runtime::Buffer<float> out_inductive(T, S);
Halide::Runtime::Buffer<float> out_noninductive(T, S);

// Set an element to something non-zero, to make sure the generators actually write to the output buffers.
out_inductive(5, 5) = 1.f;
out_noninductive(5, 5) = 2.f;

input.for_each_element([&](int x, int y) {
input(x, y) = 0.5f * x + 10.0f * sinf(0.01f * x + 0.02f * y); // some arbitrary input signal
});

double t_inductive = benchmark([&]() {
iir_cascade(input, out_inductive);
out_inductive.device_sync();
});
printf("inductive time: %gms\n", t_inductive * 1e3);

double t_noninductive = benchmark([&]() {
iir_cascade_noninductive(input, out_noninductive);
out_noninductive.device_sync();
});
printf("non-inductive time: %gms\n", t_noninductive * 1e3);

// out_inductive.copy_to_host();
// out_noninductive.copy_to_host();

float max_err = 0.f;
for (int y = 0; y < S; y++) {
for (int x = 0; x < T; x++) {
max_err = std::max(max_err, std::abs(out_inductive(x, y) - out_noninductive(x, y)));
}
}
printf("max abs difference: %g\n", max_err);
if (max_err > 1e-4f) {
printf("Inductive and non-inductive outputs differ!\n");
return 1;
}

printf("Success!\n");
return 0;
}
5 changes: 5 additions & 0 deletions apps/images/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Attribution

aloeL.png and aloeR.png are derived from: D. Scharstein and C. Pal. Learning
conditional random fields for stereo. In IEEE Computer Society Conference on
Computer Vision and Pattern Recognition (CVPR 2007), Minneapolis, MN, June 2007.
Binary file added apps/images/aloeL.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added apps/images/aloeR.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Loading