ROCm · mapatel-amd · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 15, 2026
@@ -9,3 +9,4 @@ CMakeUserPresets.json
 .cline_storage
 config.mk
 .claude/
+.venv/
@@ -42,6 +42,7 @@ add_subdirectory(convolution)
 add_subdirectory(floyd_warshall)
 add_subdirectory(histogram)
 add_subdirectory(prefix_sum)
+add_subdirectory(optical_flow)
 
 if(NOT CXX_FS_HEADER_FOUND)
     message(WARNING "filesystem not found, not building fdtd example")

@@ -0,0 +1,2 @@
+applications_optical_flow
+*.flo
@@ -0,0 +1,53 @@
+# MIT License
+#
+# Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name applications_optical_flow)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+include("${CMAKE_CURRENT_LIST_DIR}/../../Common/HipPlatform.cmake")
+select_gpu_language()
+
+enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE})
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17)
+set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF)
+set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON)
+select_hip_platform()
+
+include("${CMAKE_CURRENT_LIST_DIR}/../../Common/ROCmPath.cmake")
+
+add_executable(${example_name} main.hip flowHIP.hip flowGold.cpp)
+add_test(NAME ${example_name} COMMAND ${example_name})
+
+set(include_dirs "../../Common")
+if(ROCM_EXAMPLES_HIP_PLATFORM STREQUAL "nvidia")
+    list(APPEND include_dirs "${ROCM_PATH}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip flowHIP.hip PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE})
+
+install(TARGETS ${example_name})
@@ -0,0 +1,61 @@
+# MIT License
+#
+# Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := applications_optical_flow
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME ?= HIP
+
+ROCM_PATH ?= /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_PATH)/include
+
+HIPCXX ?= $(ROCM_PATH)/bin/hipcc
+
+CXX_STD   := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS  :=
+ILDLIBS   :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ICXXFLAGS += -x cu
+ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+CXXFLAGS ?= -Wall -Wextra
+else
+$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS  += $(LDFLAGS)
+ILDLIBS   += $(LDLIBS)
+
+$(EXAMPLE): main.hip flowHIP.hip flowGold.cpp $(COMMON_INCLUDE_DIR)/example_utils.hpp
+	$(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ main.hip flowHIP.hip flowGold.cpp $(ILDLIBS)
+
+test: $(EXAMPLE)
+	./$(EXAMPLE) $(TEST_ARGS)
+
+clean:
+	$(RM) $(EXAMPLE)
+
+.PHONY: clean test
@@ -0,0 +1,103 @@
+# Optical Flow
+
+## Description
+
+This example implements the **Horn-Schunck variational optical flow** algorithm using HIP. It estimates the motion field (displacement vectors) between two consecutive image frames by minimizing a global energy functional that balances data fidelity and spatial smoothness.
+
+The algorithm operates on a Gaussian image pyramid: flow is computed coarse-to-fine, with each level refining the estimate from the level above. At each pyramid level, image warping aligns the target frame with the source, and a Jacobi iterative solver computes the incremental flow update.
+
+The program computes optical flow on both the CPU (`flowGold`) and GPU (`flowHIP`), compares the results via L1 norm, and writes two `.flo` files (Middlebury format) for inspection.
+
+## Application Flow
+
+1. Load two consecutive frames (`data/frame10.ppm`, `data/frame11.ppm`) as single-channel FP32 images.
+2. Build a Gaussian pyramid with `nLevels` levels by repeatedly downscaling with a 4-tap filter.
+3. At each pyramid level (coarse to fine):
+   - Upscale the flow estimate from the coarser level.
+   - Warp the target image toward the source using the current flow estimate.
+   - Compute image derivatives (Ix, Iy, Iz) via finite differences.
+   - Run `nSolverIters` Jacobi iterations to solve for the incremental flow update.
+   - Repeat for `nWarpIters` warping passes.
+4. Copy GPU results to host and compare against the CPU reference (L1 norm per pixel).
+5. Write `FlowGPU.flo` and `FlowCPU.flo` to disk.
+
+## Key APIs and Concepts
+
+| Concept | HIP API |
+|---|---|
+| Texture objects with bilinear filtering | `hipCreateTextureObject`, `hipTextureObject_t` |
+| Pitched 2D texture resource | `hipResourceTypePitch2D`, `hipResourceDesc` |
+| Mirror address mode | `hipAddressModeMirror` |
+| Normalized texture coordinates | `texDescr.normalizedCoords = true` |
+| In-kernel texture fetch | `tex2D<float>(tex, x, y)` |
+| Block synchronization | `cg::this_thread_block()`, `cg::sync()` |
+
+### Pitch Alignment Requirement
+
+ROCm requires `pitchInBytes` for `hipResourceTypePitch2D` to be a multiple of **256 bytes** (64 floats × 4 bytes). The `StrideAlignment` constant in `common.h` is set to `64` to satisfy this constraint. CUDA only requires 128 bytes (32 floats), so porting code that used `StrideAlignment = 32` will fail at texture creation.
+
+## Prerequisites
+
+- A ROCm-capable AMD GPU
+- ROCm SDK installed ([installation guide](https://rocm.docs.amd.com/en/latest/index.html) or [TheRock releases](https://github.com/ROCm/TheRock/blob/main/RELEASES.md))
+- `hipcc` on your `PATH`
+
+## Building
+
+### Make
+
+```bash
+cd Applications/optical_flow
+make
+```
+
+### CMake
+
+```bash
+cd Applications/optical_flow
+cmake -B build -DROCM_PATH=<path-to-rocm-sdk> -DCMAKE_HIP_COMPILER=<path-to-hipcc>
+cmake --build build -j$(nproc)
+```
+
+Replace `<path-to-rocm-sdk>` and `<path-to-hipcc>` with the actual paths for your ROCm installation, for example `/opt/rocm` and `/opt/rocm/bin/hipcc`.
+
+## Running
+
+The binary expects two PPM images at `data/frame10.ppm` and `data/frame11.ppm` relative to the working directory. Sample frames from the Middlebury dataset work well.
+
+```bash
+#!/bin/bash
+# From the optical_flow directory (Make build)
+./optical_flow
+
+# Or if built with CMake
+./build/applications_optical_flow
+```
+
+### Expected Output
+
+```text
+HSOpticalFlow Starting...
+
+Using device: <GPU name>
+Loading "data/frame10.ppm" ...
+Loading "data/frame11.ppm" ...
+L1 error : 0.000xxx
+```
+
+The program exits with `EXIT_SUCCESS` when the L1 error between the GPU and CPU results is below `0.05`. Two output files are written:
+
+- `FlowGPU.flo` — GPU optical flow result
+- `FlowCPU.flo` — CPU reference result
+
+Both files use the [Middlebury `.flo` format](http://vision.middlebury.edu/flow/code/flow-code/README.txt) and can be visualized with tools such as `flowiz` or the Middlebury flow utilities.
+
+## Key Notes
+
+- StrideAlignment is 64 since `hipResourceTypePitch2D` requires `pitchInBytes` to be a multiple of `256 bytes` (4 bytes * 64 floats = 256 bytes)
+- If you are running ROCm through Python packages and having trouble with compiling due to the program not being able to find dependencies, please run the following in your virtual environment:
+
+```bash
+#!/bin/bash
+export ROCM_PATH=PATH_TO_VENV/.venv/lib/python3.12/site-packages/_rocm_sdk_devel
+```
@@ -0,0 +1,57 @@
+// MIT License
+//
+// Copyright (c) 2023-2026 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "common.h"
+
+///////////////////////////////////////////////////////////////////////////////
+/// \brief add two vectors of size _count_
+///
+/// HIP kernel
+/// \param[in]  op1   term one
+/// \param[in]  op2   term two
+/// \param[in]  count vector size
+/// \param[out] sum   result
+///////////////////////////////////////////////////////////////////////////////
+__global__ void AddKernel(const float *op1, const float *op2, int count, float *sum)
+{
+    const int pos = threadIdx.x + blockIdx.x * blockDim.x;
+
+    if (pos >= count)
+        return;
+
+    sum[pos] = op1[pos] + op2[pos];
+}
+
+///////////////////////////////////////////////////////////////////////////////
+/// \brief add two vectors of size _count_
+/// \param[in]  op1   term one
+/// \param[in]  op2   term two
+/// \param[in]  count vector size
+/// \param[out] sum   result
+///////////////////////////////////////////////////////////////////////////////
+static void Add(const float *op1, const float *op2, int count, float *sum)
+{
+    dim3 threads(256);
+    dim3 blocks(iDivUp(count, threads.x));
+
+    AddKernel<<<blocks, threads>>>(op1, op2, count, sum);
+}
@@ -0,0 +1,50 @@
+// MIT License
+//
+// Copyright (c) 2023-2026 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#pragma once
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+// HIP hipResourceTypePitch2D requires pitchInBytes to be a multiple of 256 bytes.
+// 64 floats * 4 bytes = 256 bytes satisfies this
+constexpr int STRIDE_ALIGNMENT = 64;
+
+inline int iAlignUp(int n, int m = STRIDE_ALIGNMENT)
+{
+    int mod = n % m;
+    if (mod)
+        return n + m - mod;
+    else
+        return n;
+}
+
+inline int iDivUp(int n, int m) { return (n + m - 1) / m; }
+
+template <typename T> inline void Swap(T &a, T &b)
+{
+    T t = a;
+    a   = b;
+    b   = t;
+}