Skip to content

Commit 54a84b4

Browse files
larsgebclaude
andcommitted
test_01: load metallib by path to run on paravirtual CI GPU
Replace the MetalAdder class usage with a direct Metal C++ dispatch that loads default.metallib by explicit path (just like tests 02 and 03 do). newDefaultLibrary() searches for an app bundle and silently produces all-zero results on the CI paravirtual device; newLibrary(path) works fine. Also drops MetalAdder.cpp from the test executable (no longer needed) and removes the SKIP_RETURN_CODE 77 — the test now runs and must pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 89d5497 commit 54a84b4

2 files changed

Lines changed: 73 additions & 30 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,18 +194,15 @@ compile_metal_shader(
194194
# =============================================================================
195195
enable_testing()
196196

197-
add_executable(test_01_adder tests/test_01_adder.cpp 01-MetalAdder/MetalAdder.cpp)
197+
add_executable(test_01_adder tests/test_01_adder.cpp)
198198
configure_metal_target(test_01_adder)
199-
target_include_directories(test_01_adder PRIVATE "${CMAKE_SOURCE_DIR}/01-MetalAdder")
200199
add_dependencies(test_01_adder metallib_01)
201200
set_target_properties(test_01_adder PROPERTIES
202201
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/01-MetalAdder"
203202
)
204203
add_test(NAME test_01_adder COMMAND test_01_adder
205204
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/01-MetalAdder"
206205
)
207-
# newDefaultLibrary() doesn't work on the CI paravirtual GPU; skip gracefully.
208-
set_tests_properties(test_01_adder PROPERTIES SKIP_RETURN_CODE 77)
209206

210207
add_executable(test_02_1d_ops
211208
tests/test_02_1d_ops.cpp

tests/test_01_adder.cpp

Lines changed: 72 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
// Test: Verify GPU array addition matches CPU computation
2-
// Use a small array so the test runs on both real Apple Silicon and the
3-
// CI paravirtual GPU (which can't handle the 108 M element sample size).
4-
#define METAL_ADDER_ARRAY_LENGTH 10000
1+
// Verify the add_arrays GPU kernel from 01-MetalAdder.
2+
// Loads default.metallib by explicit path so this test works on both real
3+
// Apple Silicon and the CI paravirtual GPU, which rejects newDefaultLibrary()
4+
// (that API searches for an app bundle and silently fails on the VM).
55

6-
#include <iostream>
76
#include <cstdlib>
8-
#include <cmath>
7+
#include <iostream>
98

109
#define NS_PRIVATE_IMPLEMENTATION
1110
#define CA_PRIVATE_IMPLEMENTATION
@@ -14,7 +13,7 @@
1413
#include "Metal/Metal.hpp"
1514
#include "QuartzCore/QuartzCore.hpp"
1615

17-
#include "MetalAdder.hpp"
16+
static const unsigned int kN = 10000;
1817

1918
int main()
2019
{
@@ -24,31 +23,74 @@ int main()
2423
std::cerr << "FAIL: No Metal device found." << std::endl;
2524
return 1;
2625
}
27-
std::string deviceName = device->name()->utf8String();
28-
std::cout << "Running on " << deviceName << std::endl;
26+
std::cout << "Running on " << device->name()->utf8String() << std::endl;
27+
28+
NS::Error *error = nullptr;
2929

30-
// MetalAdder uses newDefaultLibrary() which the Apple Paravirtual device
31-
// cannot load correctly — results are silently all-zero regardless of size.
32-
// Tests 02 and 03 load their metallib explicitly and run fine on the VM.
33-
if (deviceName.find("Paravirtual") != std::string::npos)
30+
// Load by explicit path — works on both real and paravirtual GPUs.
31+
auto libPath = NS::String::string("default.metallib", NS::ASCIIStringEncoding);
32+
MTL::Library *lib = device->newLibrary(libPath, &error);
33+
if (!lib)
3434
{
35-
std::cout << "SKIP: newDefaultLibrary() not supported on Paravirtual device." << std::endl;
35+
std::cerr << "FAIL: Could not load default.metallib: "
36+
<< (error ? error->description()->utf8String() : "unknown") << std::endl;
3637
device->release();
37-
return 77; // CTest SKIP_RETURN_CODE
38+
return 1;
3839
}
3940

40-
MetalAdder *adder = new MetalAdder(device);
41+
auto fnName = NS::String::string("add_arrays", NS::ASCIIStringEncoding);
42+
MTL::Function *fn = lib->newFunction(fnName);
43+
lib->release();
44+
if (!fn)
45+
{
46+
std::cerr << "FAIL: add_arrays not found in default.metallib" << std::endl;
47+
device->release();
48+
return 1;
49+
}
4150

42-
// Run GPU addition
43-
adder->sendComputeCommand();
51+
MTL::ComputePipelineState *pso = device->newComputePipelineState(fn, &error);
52+
fn->release();
53+
if (!pso)
54+
{
55+
std::cerr << "FAIL: Could not create pipeline state" << std::endl;
56+
device->release();
57+
return 1;
58+
}
59+
60+
MTL::CommandQueue *queue = device->newCommandQueue();
61+
62+
// Allocate shared-memory buffers and fill inputs with random data.
63+
size_t nbytes = kN * sizeof(float);
64+
MTL::Buffer *bufA = device->newBuffer(nbytes, MTL::ResourceStorageModeShared);
65+
MTL::Buffer *bufB = device->newBuffer(nbytes, MTL::ResourceStorageModeShared);
66+
MTL::Buffer *bufResult = device->newBuffer(nbytes, MTL::ResourceStorageModeShared);
67+
68+
float *a = (float *)bufA->contents();
69+
float *b = (float *)bufB->contents();
70+
for (unsigned int i = 0; i < kN; i++)
71+
{
72+
a[i] = (float)rand() / RAND_MAX;
73+
b[i] = (float)rand() / RAND_MAX;
74+
}
4475

45-
// Verify against CPU
46-
float *a = (float *)adder->_mBufferA->contents();
47-
float *b = (float *)adder->_mBufferB->contents();
48-
float *result = (float *)adder->_mBufferResult->contents();
76+
// Dispatch the kernel.
77+
auto cmdBuf = queue->commandBuffer();
78+
auto enc = cmdBuf->computeCommandEncoder();
79+
enc->setComputePipelineState(pso);
80+
enc->setBuffer(bufA, 0, 0);
81+
enc->setBuffer(bufB, 0, 1);
82+
enc->setBuffer(bufResult, 0, 2);
83+
NS::UInteger tgSize = pso->maxTotalThreadsPerThreadgroup();
84+
if (tgSize > kN) tgSize = kN;
85+
enc->dispatchThreads(MTL::Size::Make(kN, 1, 1), MTL::Size::Make(tgSize, 1, 1));
86+
enc->endEncoding();
87+
cmdBuf->commit();
88+
cmdBuf->waitUntilCompleted();
4989

90+
// Verify against CPU reference.
91+
float *result = (float *)bufResult->contents();
5092
int errors = 0;
51-
for (unsigned long i = 0; i < arrayLength; i++)
93+
for (unsigned int i = 0; i < kN; i++)
5294
{
5395
if (result[i] != (a[i] + b[i]))
5496
{
@@ -59,14 +101,18 @@ int main()
59101
}
60102
}
61103

62-
delete adder;
104+
pso->release();
105+
queue->release();
106+
bufA->release();
107+
bufB->release();
108+
bufResult->release();
63109
device->release();
64110

65111
if (errors > 0)
66112
{
67-
std::cerr << "FAIL: " << errors << " mismatches out of " << arrayLength << std::endl;
113+
std::cerr << "FAIL: " << errors << " mismatches out of " << kN << std::endl;
68114
return 1;
69115
}
70-
std::cout << "PASS: GPU addition matches CPU (" << arrayLength << " elements)" << std::endl;
116+
std::cout << "PASS: GPU addition matches CPU (" << kN << " elements)" << std::endl;
71117
return 0;
72118
}

0 commit comments

Comments
 (0)