diff --git a/include/infinicore/tensor.hpp b/include/infinicore/tensor.hpp index b3570e9fe..52d8350aa 100644 --- a/include/infinicore/tensor.hpp +++ b/include/infinicore/tensor.hpp @@ -27,6 +27,7 @@ struct TensorMetaData { infiniopTensorDescriptor_t desc; TensorMetaData(const Shape &shape, const Strides &strides, const DataType &dtype); + ~TensorMetaData(); }; struct TensorData { diff --git a/src/infinicore-test/README.md b/src/infinicore-test/README.md new file mode 100644 index 000000000..2b128aaca --- /dev/null +++ b/src/infinicore-test/README.md @@ -0,0 +1,277 @@ +# InfiniCore Memory Management Test Suite + +This test suite provides comprehensive testing for the InfiniCore memory management system, focusing on the critical issues identified in the memory management architecture analysis. + +## Overview + +The test suite includes six main test categories: + +1. **Basic Memory Tests** - Basic allocation, deallocation, and memory operations +2. **Concurrency Tests** - Thread safety and concurrent access testing +3. **Exception Safety Tests** - Exception handling and safety testing +4. **Memory Leak Tests** - Memory leak detection and prevention +5. **Performance Tests** - Performance benchmarks and optimization validation +6. **Stress Tests** - High-load stress testing and edge cases + +## Building + +### Using XMake (if integrated with main build) +```bash +# From InfiniCore root directory +xmake build infinicore-test +``` + +## Running Tests + +### Run All Tests +```bash +./infinicore-test +``` + +### Run Specific Test Categories +```bash +# Basic memory tests +./infinicore-test --test basic + +# Concurrency tests +./infinicore-test --test concurrency + +# Exception safety tests +./infinicore-test --test exception + +# Memory leak tests +./infinicore-test --test leak + +# Performance tests +./infinicore-test --test performance + +# Stress tests +./infinicore-test --test stress +``` + +### Run with Specific Device +```bash +# Run on CPU +./infinicore-test --cpu + +# Run on NVIDIA GPU +./infinicore-test --nvidia + +# Run on other devices +./infinicore-test --cambricon +./infinicore-test --ascend +./infinicore-test --metax +./infinicore-test --moore +./infinicore-test --iluvatar +./infinicore-test --kunlun +./infinicore-test --hygon +``` + +### Customize Test Parameters +```bash +# Run with custom thread count +./infinicore-test --threads 8 + +# Run with custom iteration count +./infinicore-test --iterations 5000 + +# Combine options +./infinicore-test --nvidia --test concurrency --threads 16 --iterations 2000 +``` + +## Test Categories + +### 1. Basic Memory Tests +Tests fundamental memory operations: +- Memory allocation and deallocation +- Memory size and device properties +- Memory read/write operations +- Pinned memory allocation +- Memory data integrity + +### 2. Concurrency Tests +Tests thread safety and concurrent access: +- **Concurrent Allocations**: Multiple threads allocating memory simultaneously +- **Concurrent Device Switching**: Multiple threads switching device contexts +- **Memory Allocation Race**: Race condition testing for memory operations + +### 3. Exception Safety Tests +Tests exception handling and safety: +- **Allocation Failure**: Tests behavior when allocation fails +- **Deallocation Exception**: Tests exception safety during deallocation +- **Context Switch Exception**: Tests exception handling during device switching + +### 4. Memory Leak Tests +Tests memory leak detection and prevention: +- **Basic Leak Detection**: Basic memory leak detection +- **Cross-Device Leak Detection**: Memory leaks in cross-device scenarios +- **Exception Leak Detection**: Memory leaks during exception handling + +### 5. Performance Tests +Tests performance and benchmarks: +- **Allocation Performance**: Memory allocation speed benchmarks +- **Concurrent Performance**: Performance under concurrent load +- **Memory Copy Performance**: Memory copy bandwidth tests + +### 6. Stress Tests +Tests high-load scenarios and edge cases: +- **High Frequency Allocations**: Rapid allocation/deallocation cycles +- **Large Memory Allocations**: Large memory block allocation +- **Cross-Device Stress**: Stress testing across multiple devices + +## Expected Results + +### Critical Issues to Watch For + +The tests are designed to detect the critical issues identified in the memory management analysis: + +1. **Thread Safety Violations** + - Race conditions in concurrent allocations + - Inconsistent device context switching + - Global state corruption + +2. **Memory Leaks** + - Unfreed memory after deallocation + - Cross-device memory not properly cleaned up + - Exception-related memory leaks + +3. **Exception Safety Issues** + - Exceptions during allocation causing resource leaks + - Exceptions in destructors causing `std::terminate` + - Incomplete cleanup on exceptions + +4. **Performance Issues** + - Slow allocation/deallocation performance + - Poor concurrent performance + - Inefficient memory copy operations + +### Performance Thresholds + +The tests include performance thresholds: + +- **Allocation Performance**: < 100μs per allocation +- **Concurrent Performance**: < 200μs per allocation under load +- **Memory Bandwidth**: > 100 MB/s for memory copies + +## Test Output + +### Successful Test Run +``` +============================================== +InfiniCore Memory Management Test Suite +============================================== +Device: 0 +Threads: 4 +Iterations: 1000 +============================================== + +[SUITE] Running: BasicMemoryTest +[TEST] Starting: BasicMemoryTest +[TEST] PASSED: BasicMemoryTest (Duration: 1234μs) + +[SUITE] Running: ConcurrencyTest +[TEST] Starting: ConcurrencyTest +[TEST] PASSED: ConcurrencyTest (Duration: 5678μs) + +... + +============================================== +Test Summary +============================================== +Total Tests: 6 +Passed: 6 +Failed: 0 +Total Time: 12345μs +============================================== + +✅ All tests passed! +``` + +### Failed Test Run +``` +[TEST] FAILED: ConcurrencyTest - Concurrent allocation test failed: expected 8000 successes, got 7995 successes and 5 failures + +============================================== +Final Results +============================================== +Total Tests: 6 +Passed: 5 +Failed: 1 +============================================== + +❌ Some tests failed. Please review the output above. +``` + +## Debugging Failed Tests + +### Common Issues and Solutions + +1. **Thread Safety Failures** + - Check for race conditions in global state access + - Verify proper synchronization in allocators + - Review device context switching logic + +2. **Memory Leak Failures** + - Check deallocation logic in allocators + - Verify cross-device cleanup mechanisms + - Review exception safety in destructors + +3. **Performance Failures** + - Profile allocation/deallocation paths + - Check for unnecessary context switching + - Review memory copy implementations + +4. **Exception Safety Failures** + - Verify no-throw guarantees in destructors + - Check exception handling in allocation paths + - Review resource cleanup on exceptions + +## Integration with CI/CD + +### GitHub Actions Example +```yaml +- name: Run Memory Tests + run: | + cd src/infinicore-test + mkdir build && cd build + cmake .. + make + ./infinicore-test --test all +``` + +### Custom Test Targets +```bash +# Run specific test categories +make test-memory-basic +make test-memory-concurrency +make test-memory-exception +make test-memory-leak +make test-memory-performance +make test-memory-stress +make test-memory-all +``` + +## Contributing + +When adding new tests: + +1. Follow the existing test framework pattern +2. Add appropriate error messages and logging +3. Include performance thresholds where applicable +4. Test both success and failure scenarios +5. Update this README with new test descriptions + +## Dependencies + +- InfiniCore library (infinicore, infiniop, infinirt, infiniccl) +- C++17 compatible compiler +- Threading library (pthread on Linux) +- CMake 3.16+ (for CMake build) + +## Notes + +- Tests are designed to be deterministic where possible +- Some tests may have timing dependencies +- Performance tests may vary based on system load +- Memory leak detection is basic and may not catch all leaks +- Tests assume proper InfiniCore initialization diff --git a/src/infinicore-test/main.cc b/src/infinicore-test/main.cc new file mode 100644 index 000000000..39f70de80 --- /dev/null +++ b/src/infinicore-test/main.cc @@ -0,0 +1,234 @@ +#include "memory_test.h" +#include "test_tensor_destructor.h" +#include +#include +#include +#include + +struct ParsedArgs { + infiniDevice_t device_type = INFINI_DEVICE_CPU; + bool run_basic = true; + bool run_concurrency = true; + bool run_exception_safety = true; + bool run_memory_leak = true; + bool run_performance = true; + bool run_stress = true; + int num_threads = 4; + int iterations = 1000; +}; + +void printUsage() { + std::cout << "Usage:" << std::endl + << " infinicore-test [--] [--test ] [--threads ] [--iterations ]" << std::endl + << std::endl + << "Options:" << std::endl + << " -- Specify the device type (default: cpu)" << std::endl + << " --test Run specific test (basic|concurrency|exception|leak|performance|stress|all)" << std::endl + << " --threads Number of threads for concurrency tests (default: 4)" << std::endl + << " --iterations Number of iterations for stress tests (default: 1000)" << std::endl + << " --help Show this help message" << std::endl + << std::endl + << "Available devices:" << std::endl + << " cpu - Default" << std::endl + << " nvidia" << std::endl + << " cambricon" << std::endl + << " ascend" << std::endl + << " metax" << std::endl + << " moore" << std::endl + << " iluvatar" << std::endl + << " kunlun" << std::endl + << " hygon" << std::endl + << std::endl + << "Available tests:" << std::endl + << " basic - Basic memory allocation and deallocation tests" << std::endl + << " concurrency - Thread safety and concurrent access tests" << std::endl + << " exception - Exception safety tests" << std::endl + << " leak - Memory leak detection tests" << std::endl + << " performance - Performance and benchmark tests" << std::endl + << " stress - Stress tests with high load" << std::endl + << " all - Run all tests (default)" << std::endl + << std::endl; + exit(EXIT_SUCCESS); +} + +ParsedArgs parseArgs(int argc, char *argv[]) { + ParsedArgs args; + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + + if (arg == "--help" || arg == "-h") { + printUsage(); + } else if (arg == "--cpu") { + args.device_type = INFINI_DEVICE_CPU; + } else if (arg == "--nvidia") { + args.device_type = INFINI_DEVICE_NVIDIA; + } else if (arg == "--cambricon") { + args.device_type = INFINI_DEVICE_CAMBRICON; + } else if (arg == "--ascend") { + args.device_type = INFINI_DEVICE_ASCEND; + } else if (arg == "--metax") { + args.device_type = INFINI_DEVICE_METAX; + } else if (arg == "--moore") { + args.device_type = INFINI_DEVICE_MOORE; + } else if (arg == "--iluvatar") { + args.device_type = INFINI_DEVICE_ILUVATAR; + } else if (arg == "--kunlun") { + args.device_type = INFINI_DEVICE_KUNLUN; + } else if (arg == "--hygon") { + args.device_type = INFINI_DEVICE_HYGON; + } else if (arg == "--test") { + if (i + 1 >= argc) { + std::cerr << "Error: --test requires a test name" << std::endl; + exit(EXIT_FAILURE); + } + + std::string test_name = argv[++i]; + args.run_basic = args.run_concurrency = args.run_exception_safety = args.run_memory_leak = args.run_performance = args.run_stress = false; + + if (test_name == "basic") { + args.run_basic = true; + } else if (test_name == "concurrency") { + args.run_concurrency = true; + } else if (test_name == "exception") { + args.run_exception_safety = true; + } else if (test_name == "leak") { + args.run_memory_leak = true; + } else if (test_name == "performance") { + args.run_performance = true; + } else if (test_name == "stress") { + args.run_stress = true; + } else if (test_name == "all") { + args.run_basic = args.run_concurrency = args.run_exception_safety = args.run_memory_leak = args.run_performance = args.run_stress = true; + } else { + std::cerr << "Error: Unknown test name: " << test_name << std::endl; + exit(EXIT_FAILURE); + } + } else if (arg == "--threads") { + if (i + 1 >= argc) { + std::cerr << "Error: --threads requires a number" << std::endl; + exit(EXIT_FAILURE); + } + args.num_threads = std::stoi(argv[++i]); + if (args.num_threads <= 0) { + std::cerr << "Error: Number of threads must be positive" << std::endl; + exit(EXIT_FAILURE); + } + } else if (arg == "--iterations") { + if (i + 1 >= argc) { + std::cerr << "Error: --iterations requires a number" << std::endl; + exit(EXIT_FAILURE); + } + args.iterations = std::stoi(argv[++i]); + if (args.iterations <= 0) { + std::cerr << "Error: Number of iterations must be positive" << std::endl; + exit(EXIT_FAILURE); + } + } else { + std::cerr << "Error: Unknown argument: " << arg << std::endl; + exit(EXIT_FAILURE); + } + } + + return args; +} + +int main(int argc, char *argv[]) { + try { + // Initialize spdlog for debugging + spdlog::set_level(spdlog::level::debug); + spdlog::info("Starting InfiniCore Memory Management Test Suite"); + + ParsedArgs args = parseArgs(argc, argv); + spdlog::debug("Arguments parsed successfully"); + + std::cout << "==============================================\n" + << "InfiniCore Memory Management Test Suite\n" + << "==============================================\n" + << "Device: " << static_cast(args.device_type) << "\n" + << "Threads: " << args.num_threads << "\n" + << "Iterations: " << args.iterations << "\n" + << "==============================================" << std::endl; + + spdlog::debug("About to initialize InfiniCore context"); + // Initialize InfiniCore context + infinicore::context::setDevice(infinicore::Device(static_cast(args.device_type), 0)); + spdlog::debug("InfiniCore context initialized successfully"); + + spdlog::debug("Creating test runner"); + // Create test runner + infinicore::test::MemoryTestRunner runner; + spdlog::debug("Test runner created successfully"); + + // Add tests based on arguments + if (args.run_basic) { + spdlog::debug("Adding BasicMemoryTest"); + runner.addTest(std::make_unique()); + spdlog::debug("BasicMemoryTest added successfully"); + + spdlog::debug("Adding TensorDestructorTest"); + runner.addTest(std::make_unique()); + spdlog::debug("TensorDestructorTest added successfully"); + } + + if (args.run_concurrency) { + runner.addTest(std::make_unique()); + } + + if (args.run_exception_safety) { + // runner.addTest(std::make_unique()); + } + + if (args.run_memory_leak) { + runner.addTest(std::make_unique()); + } + + if (args.run_performance) { + runner.addTest(std::make_unique()); + } + + if (args.run_stress) { + runner.addTest(std::make_unique()); + } + + spdlog::debug("About to run all tests"); + // Run all tests + auto results = runner.runAllTests(); + spdlog::debug("All tests completed"); + + // Count results + size_t passed = 0, failed = 0; + for (const auto &result : results) { + if (result.passed) { + passed++; + } else { + failed++; + } + } + + // Print final summary + std::cout << "\n==============================================\n" + << "Final Results\n" + << "==============================================\n" + << "Total Tests: " << results.size() << "\n" + << "Passed: " << passed << "\n" + << "Failed: " << failed << "\n" + << "==============================================" << std::endl; + + // Exit with appropriate code + if (failed > 0) { + std::cout << "\n❌ Some tests failed. Please review the output above." << std::endl; + return EXIT_FAILURE; + } else { + std::cout << "\n✅ All tests passed!" << std::endl; + return EXIT_SUCCESS; + } + + } catch (const std::exception &e) { + std::cerr << "Fatal error: " << e.what() << std::endl; + return EXIT_FAILURE; + } catch (...) { + std::cerr << "Fatal error: Unknown exception" << std::endl; + return EXIT_FAILURE; + } +} diff --git a/src/infinicore-test/memory_test.cc b/src/infinicore-test/memory_test.cc new file mode 100644 index 000000000..d36d12093 --- /dev/null +++ b/src/infinicore-test/memory_test.cc @@ -0,0 +1,903 @@ +#include "memory_test.h" +#include +#include +#include + +namespace infinicore::test { + +// Basic Memory Test Implementation +TestResult BasicMemoryTest::run() { + return measureTime("BasicMemoryTest", [this]() -> bool { + try { + spdlog::debug("BasicMemoryTest: Starting test"); + // Test basic memory allocation + spdlog::debug("BasicMemoryTest: About to allocate memory"); + auto memory = context::allocateMemory(1024); + spdlog::debug("BasicMemoryTest: Memory allocated successfully"); + if (!memory) { + std::cerr << "Failed to allocate memory" << std::endl; + return false; + } + + spdlog::debug("BasicMemoryTest: Testing memory properties"); + // Test memory properties + if (memory->size() != 1024) { + std::cerr << "Memory size mismatch: expected 1024, got " << memory->size() << std::endl; + return false; + } + spdlog::debug("BasicMemoryTest: Memory size check passed"); + + spdlog::debug("BasicMemoryTest: Testing memory access"); + // Test memory access + std::byte *data = memory->data(); + spdlog::debug("BasicMemoryTest: Got memory data pointer: {}", static_cast(data)); + if (!data) { + std::cerr << "Memory data pointer is null" << std::endl; + return false; + } + spdlog::debug("BasicMemoryTest: Memory data pointer is valid"); + + // Check if this is GPU memory that can't be accessed directly + Device current_device = context::getDevice(); + spdlog::debug("BasicMemoryTest: Current device type: {}", static_cast(current_device.getType())); + spdlog::debug("BasicMemoryTest: Memory is pinned: {}", memory->is_pinned()); + + // For GPU memory, we shouldn't try to access it directly with memset + if (current_device.getType() != Device::Type::CPU) { + spdlog::debug("BasicMemoryTest: Skipping direct memory access for GPU device"); + spdlog::debug("BasicMemoryTest: GPU memory access test completed (skipped)"); + } else { + spdlog::debug("BasicMemoryTest: Testing memory write/read"); + // Test memory write/read + std::memset(data, 0xAB, 1024); + spdlog::debug("BasicMemoryTest: Memory memset completed"); + for (size_t i = 0; i < 1024; ++i) { + if (data[i] != static_cast(0xAB)) { + std::cerr << "Memory write/read test failed at index " << i << std::endl; + return false; + } + } + spdlog::debug("BasicMemoryTest: Memory write/read test completed"); + } + + spdlog::debug("BasicMemoryTest: Testing pinned memory allocation"); + // Test pinned memory allocation + auto pinned_memory = context::allocatePinnedHostMemory(512); + spdlog::debug("BasicMemoryTest: Pinned memory allocated"); + if (!pinned_memory) { + std::cerr << "Failed to allocate pinned memory" << std::endl; + return false; + } + + spdlog::debug("BasicMemoryTest: Checking pinned memory properties"); + // For CPU devices, pinned memory falls back to regular memory, so it may not be marked as pinned + Device pinned_device = context::getDevice(); + if (pinned_device.getType() != Device::Type::CPU && !pinned_memory->is_pinned()) { + std::cerr << "Pinned memory not marked as pinned" << std::endl; + return false; + } + spdlog::debug("BasicMemoryTest: Pinned memory test completed"); + + return true; + } catch (const std::exception &e) { + std::cerr << "BasicMemoryTest failed with exception: " << e.what() << std::endl; + return false; + } + }); +} + +// Concurrency Test Implementation +TestResult ConcurrencyTest::run() { + return measureTime("ConcurrencyTest", [this]() -> bool { + try { + // Run all concurrency subtests + auto result1 = testConcurrentAllocations(); + if (!result1.passed) { + std::cerr << "Concurrent allocations test failed: " << result1.error_message << std::endl; + return false; + } + + auto result2 = testConcurrentDeviceSwitching(); + if (!result2.passed) { + std::cerr << "Concurrent device switching test failed: " << result2.error_message << std::endl; + return false; + } + + auto result3 = testMemoryAllocationRace(); + if (!result3.passed) { + std::cerr << "Memory allocation race test failed: " << result3.error_message << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "ConcurrencyTest failed with exception: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult ConcurrencyTest::testConcurrentAllocations() { + return measureTime("ConcurrentAllocations", [this]() -> bool { + const int num_threads = 8; + const int allocations_per_thread = 100; + std::vector threads; + std::atomic success_count{0}; + std::atomic failure_count{0}; + + for (int i = 0; i < num_threads; ++i) { + threads.emplace_back([&, i]() { + try { + for (int j = 0; j < allocations_per_thread; ++j) { + // Allocate memory of random size + size_t size = 64 + (j % 1024); + auto memory = context::allocateMemory(size); + if (memory && memory->size() == size) { + success_count++; + } else { + failure_count++; + } + + // Small delay to increase chance of race conditions + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + } catch (const std::exception &e) { + failure_count++; + std::cerr << "Thread " << i << " failed: " << e.what() << std::endl; + } + }); + } + + for (auto &thread : threads) { + thread.join(); + } + + int total_expected = num_threads * allocations_per_thread; + if (success_count.load() != total_expected) { + std::cerr << "Concurrent allocation test failed: expected " << total_expected + << " successes, got " << success_count.load() + << " successes and " << failure_count.load() << " failures" << std::endl; + return false; + } + + return true; + }); +} + +TestResult ConcurrencyTest::testConcurrentDeviceSwitching() { + return measureTime("ConcurrentDeviceSwitching", [this]() -> bool { + const int num_threads = 4; + std::vector threads; + std::atomic success_count{0}; + std::atomic failure_count{0}; + + // Get available devices + std::vector devices; + for (int type = 0; type < static_cast(Device::Type::COUNT); ++type) { + size_t count = context::getDeviceCount(static_cast(type)); + for (size_t i = 0; i < count; ++i) { + devices.emplace_back(static_cast(type), i); + } + } + + if (devices.size() < 2) { + std::cout << "Skipping device switching test - need at least 2 devices" << std::endl; + return true; + } + + for (int i = 0; i < num_threads; ++i) { + threads.emplace_back([&, i, devices]() { + try { + for (int j = 0; j < 50; ++j) { + // Switch to random device + Device target_device = devices[j % devices.size()]; + context::setDevice(target_device); + + // Verify device was set correctly + Device current_device = context::getDevice(); + if (current_device == target_device) { + success_count++; + } else { + failure_count++; + std::cerr << "Device switching failed: expected " + << static_cast(target_device.getType()) + << ", got " << static_cast(current_device.getType()) << std::endl; + } + + // Allocate memory to test device context + auto memory = context::allocateMemory(256); + if (memory && memory->device() == target_device) { + success_count++; + } else { + failure_count++; + } + + std::this_thread::sleep_for(std::chrono::microseconds(10)); + } + } catch (const std::exception &e) { + failure_count++; + std::cerr << "Thread " << i << " failed: " << e.what() << std::endl; + } + }); + } + + for (auto &thread : threads) { + thread.join(); + } + + if (failure_count.load() > 0) { + std::cerr << "Concurrent device switching test failed: " + << failure_count.load() << " failures out of " + << (success_count.load() + failure_count.load()) << " operations" << std::endl; + return false; + } + + return true; + }); +} + +TestResult ConcurrencyTest::testMemoryAllocationRace() { + return measureTime("MemoryAllocationRace", [this]() -> bool { + const int num_threads = 16; + const int allocations_per_thread = 1000; + std::vector threads; + std::atomic success_count{0}; + std::atomic failure_count{0}; + std::vector> all_allocations; + std::mutex allocations_mutex; + + for (int i = 0; i < num_threads; ++i) { + threads.emplace_back([&, i]() { + std::vector> thread_allocations; + try { + for (int j = 0; j < allocations_per_thread; ++j) { + size_t size = 64 + (j % 1024); + auto memory = context::allocateMemory(size); + if (memory) { + thread_allocations.push_back(memory); + success_count++; + } else { + failure_count++; + } + + // Occasionally deallocate some memory to test concurrent alloc/dealloc + if (j % 10 == 0 && !thread_allocations.empty()) { + thread_allocations.pop_back(); + } + } + + // Store remaining allocations + std::lock_guard lock(allocations_mutex); + all_allocations.insert(all_allocations.end(), + thread_allocations.begin(), + thread_allocations.end()); + } catch (const std::exception &e) { + failure_count++; + std::cerr << "Thread " << i << " failed: " << e.what() << std::endl; + } + }); + } + + for (auto &thread : threads) { + thread.join(); + } + + // Verify all allocations are valid + for (const auto &memory : all_allocations) { + if (!memory || !memory->data()) { + std::cerr << "Invalid memory allocation found" << std::endl; + return false; + } + } + + int total_expected = num_threads * allocations_per_thread; + if (success_count.load() < total_expected * 0.9) { // Allow 10% failure rate + std::cerr << "Memory allocation race test failed: expected at least " + << total_expected * 0.9 << " successes, got " << success_count.load() << std::endl; + return false; + } + + return true; + }); +} + +// Exception Safety Test Implementation +TestResult ExceptionSafetyTest::run() { + return measureTime("ExceptionSafetyTest", [this]() -> bool { + try { + auto result1 = testAllocationFailure(); + if (!result1.passed) { + std::cerr << "Allocation failure test failed: " << result1.error_message << std::endl; + return false; + } + + auto result2 = testDeallocationException(); + if (!result2.passed) { + std::cerr << "Deallocation exception test failed: " << result2.error_message << std::endl; + return false; + } + + auto result3 = testContextSwitchException(); + if (!result3.passed) { + std::cerr << "Context switch exception test failed: " << result3.error_message << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "ExceptionSafetyTest failed with exception: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult ExceptionSafetyTest::testAllocationFailure() { + return measureTime("AllocationFailure", [this]() -> bool { + try { + // Test allocation with extremely large size (should fail) + try { + auto memory = context::allocateMemory(SIZE_MAX); + std::cerr << "Expected allocation to fail with SIZE_MAX" << std::endl; + return false; + } catch (const std::exception &e) { + // Expected to fail + std::cout << "Allocation correctly failed with SIZE_MAX: " << e.what() << std::endl; + } + + // Test allocation with zero size + try { + auto memory = context::allocateMemory(0); + if (memory) { + std::cerr << "Zero-size allocation should return null or throw" << std::endl; + return false; + } + } catch (const std::exception &e) { + // Also acceptable + std::cout << "Zero-size allocation correctly failed: " << e.what() << std::endl; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "Allocation failure test failed with unexpected exception: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult ExceptionSafetyTest::testDeallocationException() { + return measureTime("DeallocationException", [this]() -> bool { + try { + // Test that deallocation doesn't throw exceptions + std::vector> memories; + + // Allocate some memory + for (int i = 0; i < 10; ++i) { + auto memory = context::allocateMemory(1024); + if (memory) { + memories.push_back(memory); + } + } + + // Test that destruction doesn't throw + try { + memories.clear(); // This should trigger deallocation + } catch (const std::exception &e) { + std::cerr << "Memory deallocation threw exception: " << e.what() << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "Deallocation exception test failed: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult ExceptionSafetyTest::testContextSwitchException() { + return measureTime("ContextSwitchException", [this]() -> bool { + try { + // Test context switching with invalid device + Device original_device = context::getDevice(); + + try { + // Try to switch to a device that might not exist + Device invalid_device(Device::Type::COUNT, 999); + context::setDevice(invalid_device); + std::cerr << "Expected device switching to fail with invalid device" << std::endl; + return false; + } catch (const std::exception &e) { + // Expected to fail + std::cout << "Device switching correctly failed with invalid device: " << e.what() << std::endl; + } + + // Verify original device is still set + Device current_device = context::getDevice(); + if (current_device != original_device) { + std::cerr << "Device context not restored after exception" << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "Context switch exception test failed: " << e.what() << std::endl; + return false; + } + }); +} + +// Memory Leak Test Implementation +TestResult MemoryLeakTest::run() { + return measureTime("MemoryLeakTest", [this]() -> bool { + try { + auto result1 = testBasicLeakDetection(); + if (!result1.passed) { + std::cerr << "Basic leak detection test failed: " << result1.error_message << std::endl; + return false; + } + + auto result2 = testCrossDeviceLeakDetection(); + if (!result2.passed) { + std::cerr << "Cross-device leak detection test failed: " << result2.error_message << std::endl; + return false; + } + + auto result3 = testExceptionLeakDetection(); + if (!result3.passed) { + std::cerr << "Exception leak detection test failed: " << result3.error_message << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "MemoryLeakTest failed with exception: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult MemoryLeakTest::testBasicLeakDetection() { + return measureTime("BasicLeakDetection", [this]() -> bool { + try { + // Reset leak detector + MemoryLeakDetector::instance().reset(); + + // Allocate and deallocate memory + std::vector> memories; + for (int i = 0; i < 100; ++i) { + auto memory = context::allocateMemory(1024); + if (memory) { + memories.push_back(memory); + } + } + + // Clear memories to trigger deallocation + memories.clear(); + + // Force garbage collection if available + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + // Check for leaks (this is a basic test - real leak detection would need more sophisticated tools) + size_t leaked_memory = MemoryLeakDetector::instance().getLeakedMemory(); + if (leaked_memory > 0) { + std::cerr << "Potential memory leak detected: " << leaked_memory << " bytes" << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "Basic leak detection test failed: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult MemoryLeakTest::testCrossDeviceLeakDetection() { + return measureTime("CrossDeviceLeakDetection", [this]() -> bool { + try { + // Get available devices + std::vector devices; + for (int type = 0; type < static_cast(Device::Type::COUNT); ++type) { + size_t count = context::getDeviceCount(static_cast(type)); + for (size_t i = 0; i < count; ++i) { + devices.emplace_back(static_cast(type), i); + } + } + + if (devices.size() < 2) { + std::cout << "Skipping cross-device leak test - need at least 2 devices" << std::endl; + return true; + } + + // Allocate pinned memory on one device + context::setDevice(devices[0]); + auto pinned_memory = context::allocatePinnedHostMemory(1024); + + if (!pinned_memory) { + std::cerr << "Failed to allocate pinned memory" << std::endl; + return false; + } + + // Switch to another device and deallocate + context::setDevice(devices[1]); + pinned_memory.reset(); // This should trigger cross-device deallocation + + // Force garbage collection + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + // Check for leaks + size_t leaked_memory = MemoryLeakDetector::instance().getLeakedMemory(); + if (leaked_memory > 0) { + std::cerr << "Potential cross-device memory leak detected: " << leaked_memory << " bytes" << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "Cross-device leak detection test failed: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult MemoryLeakTest::testExceptionLeakDetection() { + return measureTime("ExceptionLeakDetection", [this]() -> bool { + try { + // Test that exceptions don't cause memory leaks + std::vector> memories; + + try { + // Allocate some memory + for (int i = 0; i < 10; ++i) { + auto memory = context::allocateMemory(1024); + if (memory) { + memories.push_back(memory); + } + } + + // Simulate an exception + throw std::runtime_error("Simulated exception"); + + } catch (const std::exception &e) { + // Memory should still be properly cleaned up + memories.clear(); + } + + // Force garbage collection + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + // Check for leaks + size_t leaked_memory = MemoryLeakDetector::instance().getLeakedMemory(); + if (leaked_memory > 0) { + std::cerr << "Potential exception-related memory leak detected: " << leaked_memory << " bytes" << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "Exception leak detection test failed: " << e.what() << std::endl; + return false; + } + }); +} + +// Performance Test Implementation +TestResult PerformanceTest::run() { + return measureTime("PerformanceTest", [this]() -> bool { + try { + auto result1 = testAllocationPerformance(); + if (!result1.passed) { + std::cerr << "Allocation performance test failed: " << result1.error_message << std::endl; + return false; + } + + auto result2 = testConcurrentPerformance(); + if (!result2.passed) { + std::cerr << "Concurrent performance test failed: " << result2.error_message << std::endl; + return false; + } + + auto result3 = testMemoryCopyPerformance(); + if (!result3.passed) { + std::cerr << "Memory copy performance test failed: " << result3.error_message << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "PerformanceTest failed with exception: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult PerformanceTest::testAllocationPerformance() { + return measureTime("AllocationPerformance", [this]() -> bool { + try { + const int num_allocations = 10000; + const size_t allocation_size = 1024; + + auto start = std::chrono::high_resolution_clock::now(); + + std::vector> memories; + for (int i = 0; i < num_allocations; ++i) { + auto memory = context::allocateMemory(allocation_size); + if (memory) { + memories.push_back(memory); + } + } + + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + + double avg_time_per_allocation = static_cast(duration.count()) / num_allocations; + std::cout << "Average allocation time: " << avg_time_per_allocation << "μs" << std::endl; + + // Performance threshold: should be under 100μs per allocation + if (avg_time_per_allocation > 100.0) { + std::cerr << "Allocation performance too slow: " << avg_time_per_allocation << "μs per allocation" << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "Allocation performance test failed: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult PerformanceTest::testConcurrentPerformance() { + return measureTime("ConcurrentPerformance", [this]() -> bool { + try { + const int num_threads = 4; + const int allocations_per_thread = 1000; + + auto start = std::chrono::high_resolution_clock::now(); + + std::vector threads; + std::atomic success_count{0}; + + for (int i = 0; i < num_threads; ++i) { + threads.emplace_back([&]() { + for (int j = 0; j < allocations_per_thread; ++j) { + auto memory = context::allocateMemory(512); + if (memory) { + success_count++; + } + } + }); + } + + for (auto &thread : threads) { + thread.join(); + } + + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + + double total_allocations = num_threads * allocations_per_thread; + double avg_time_per_allocation = static_cast(duration.count()) / total_allocations; + std::cout << "Concurrent allocation time: " << avg_time_per_allocation << "μs per allocation" << std::endl; + + // Performance threshold: should be under 200μs per allocation under concurrent load + if (avg_time_per_allocation > 200.0) { + std::cerr << "Concurrent allocation performance too slow: " << avg_time_per_allocation << "μs per allocation" << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "Concurrent performance test failed: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult PerformanceTest::testMemoryCopyPerformance() { + return measureTime("MemoryCopyPerformance", [this]() -> bool { + try { + const size_t data_size = 1024 * 1024; // 1MB + const int num_copies = 100; + + // Allocate source and destination memory + auto src_memory = context::allocateMemory(data_size); + auto dst_memory = context::allocateMemory(data_size); + + if (!src_memory || !dst_memory) { + std::cerr << "Failed to allocate memory for copy test" << std::endl; + return false; + } + + // Initialize source data + std::memset(src_memory->data(), 0xAB, data_size); + + auto start = std::chrono::high_resolution_clock::now(); + + // Perform memory copies + for (int i = 0; i < num_copies; ++i) { + context::memcpyD2D(dst_memory->data(), src_memory->data(), data_size); + } + + // Synchronize to ensure copies complete + context::syncDevice(); + + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + + double avg_time_per_copy = static_cast(duration.count()) / num_copies; + double bandwidth = (data_size * num_copies) / (duration.count() / 1e6) / (1024 * 1024); // MB/s + + std::cout << "Average copy time: " << avg_time_per_copy << "μs" << std::endl; + std::cout << "Memory bandwidth: " << bandwidth << " MB/s" << std::endl; + + // Performance threshold: should achieve at least 100 MB/s + if (bandwidth < 100.0) { + std::cerr << "Memory copy performance too slow: " << bandwidth << " MB/s" << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "Memory copy performance test failed: " << e.what() << std::endl; + return false; + } + }); +} + +// Stress Test Implementation +TestResult StressTest::run() { + return measureTime("StressTest", [this]() -> bool { + try { + auto result1 = testHighFrequencyAllocations(); + if (!result1.passed) { + std::cerr << "High frequency allocations test failed: " << result1.error_message << std::endl; + return false; + } + + auto result2 = testLargeMemoryAllocations(); + if (!result2.passed) { + std::cerr << "Large memory allocations test failed: " << result2.error_message << std::endl; + return false; + } + + auto result3 = testCrossDeviceStress(); + if (!result3.passed) { + std::cerr << "Cross-device stress test failed: " << result3.error_message << std::endl; + return false; + } + + return true; + } catch (const std::exception &e) { + std::cerr << "StressTest failed with exception: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult StressTest::testHighFrequencyAllocations() { + return measureTime("HighFrequencyAllocations", [this]() -> bool { + try { + const int num_allocations = 100000; + std::vector> memories; + memories.reserve(num_allocations); + + auto start = std::chrono::high_resolution_clock::now(); + + for (int i = 0; i < num_allocations; ++i) { + size_t size = 64 + (i % 1024); + auto memory = context::allocateMemory(size); + if (memory) { + memories.push_back(memory); + } + + // Periodically deallocate some memory to test alloc/dealloc stress + if (i % 1000 == 0 && !memories.empty()) { + memories.erase(memories.begin(), memories.begin() + std::min(100, static_cast(memories.size()))); + } + } + + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + + std::cout << "High frequency allocations completed: " << num_allocations + << " allocations in " << duration.count() << "ms" << std::endl; + + // Clear remaining memory + memories.clear(); + + return true; + } catch (const std::exception &e) { + std::cerr << "High frequency allocations test failed: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult StressTest::testLargeMemoryAllocations() { + return measureTime("LargeMemoryAllocations", [this]() -> bool { + try { + const size_t large_size = 100 * 1024 * 1024; // 100MB + const int num_allocations = 10; + + std::vector> memories; + + for (int i = 0; i < num_allocations; ++i) { + try { + auto memory = context::allocateMemory(large_size); + if (memory) { + memories.push_back(memory); + std::cout << "Allocated " << large_size / (1024 * 1024) << "MB memory block " << i + 1 << std::endl; + } + } catch (const std::exception &e) { + std::cout << "Large allocation " << i + 1 << " failed (expected): " << e.what() << std::endl; + break; // Expected to fail at some point due to memory limits + } + } + + std::cout << "Successfully allocated " << memories.size() << " large memory blocks" << std::endl; + + // Clear memory + memories.clear(); + + return true; + } catch (const std::exception &e) { + std::cerr << "Large memory allocations test failed: " << e.what() << std::endl; + return false; + } + }); +} + +TestResult StressTest::testCrossDeviceStress() { + return measureTime("CrossDeviceStress", [this]() -> bool { + try { + // Get available devices + std::vector devices; + for (int type = 0; type < static_cast(Device::Type::COUNT); ++type) { + size_t count = context::getDeviceCount(static_cast(type)); + for (size_t i = 0; i < count; ++i) { + devices.emplace_back(static_cast(type), i); + } + } + + if (devices.size() < 2) { + std::cout << "Skipping cross-device stress test - need at least 2 devices" << std::endl; + return true; + } + + const int num_operations = 1000; + std::vector> pinned_memories; + + for (int i = 0; i < num_operations; ++i) { + // Switch to random device + Device target_device = devices[i % devices.size()]; + context::setDevice(target_device); + + // Allocate pinned memory + auto pinned_memory = context::allocatePinnedHostMemory(1024); + if (pinned_memory) { + pinned_memories.push_back(pinned_memory); + } + + // Periodically deallocate some memory + if (i % 100 == 0 && !pinned_memories.empty()) { + pinned_memories.erase(pinned_memories.begin(), + pinned_memories.begin() + std::min(10, static_cast(pinned_memories.size()))); + } + } + + std::cout << "Cross-device stress test completed: " << num_operations + << " operations across " << devices.size() << " devices" << std::endl; + + // Clear remaining memory + pinned_memories.clear(); + + return true; + } catch (const std::exception &e) { + std::cerr << "Cross-device stress test failed: " << e.what() << std::endl; + return false; + } + }); +} + +} // namespace infinicore::test diff --git a/src/infinicore-test/memory_test.h b/src/infinicore-test/memory_test.h new file mode 100644 index 000000000..cd9692066 --- /dev/null +++ b/src/infinicore-test/memory_test.h @@ -0,0 +1,276 @@ +#ifndef __INFINICORE_MEMORY_TEST_H__ +#define __INFINICORE_MEMORY_TEST_H__ + +#include "../infinicore/context/allocators/memory_allocator.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace infinicore::test { + +// Test result structure +struct TestResult { + std::string test_name; + bool passed; + std::string error_message; + std::chrono::microseconds duration; + + TestResult(const std::string &name, bool pass, const std::string &error = "", + std::chrono::microseconds dur = std::chrono::microseconds(0)) + : test_name(name), passed(pass), error_message(error), duration(dur) {} +}; + +// Test framework base class +class MemoryTestFramework { +public: + virtual ~MemoryTestFramework() = default; + virtual TestResult run() = 0; + virtual std::string getName() const = 0; + +protected: + void logTestStart(const std::string &test_name) { + std::cout << "[TEST] Starting: " << test_name << std::endl; + } + + void logTestResult(const TestResult &result) { + std::cout << "[TEST] " << (result.passed ? "PASSED" : "FAILED") + << ": " << result.test_name; + if (!result.passed && !result.error_message.empty()) { + std::cout << " - " << result.error_message; + } + std::cout << " (Duration: " << result.duration.count() << "μs)" << std::endl; + } + + template + TestResult measureTime(const std::string &test_name, Func &&func) { + auto start = std::chrono::high_resolution_clock::now(); + try { + bool result = func(); + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + return TestResult(test_name, result, "", duration); + } catch (const std::exception &e) { + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + return TestResult(test_name, false, e.what(), duration); + } + } +}; + +// Mock allocator for testing exception safety +class MockAllocator : public infinicore::MemoryAllocator { +public: + MockAllocator(bool should_throw = false, size_t max_allocations = SIZE_MAX) + : should_throw_(should_throw), max_allocations_(max_allocations), + allocation_count_(0), total_allocated_(0) {} + + std::byte *allocate(size_t size) override { + if (should_throw_) { + throw std::runtime_error("Mock allocation failure"); + } + if (allocation_count_ >= max_allocations_) { + throw std::runtime_error("Mock allocation limit exceeded"); + } + allocation_count_++; + total_allocated_ += size; + return static_cast(std::malloc(size)); + } + + void deallocate(std::byte *ptr) override { + if (ptr) { + std::free(ptr); + } + } + + size_t getAllocationCount() const { return allocation_count_; } + size_t getTotalAllocated() const { return total_allocated_; } + +private: + bool should_throw_; + size_t max_allocations_; + std::atomic allocation_count_; + std::atomic total_allocated_; +}; + +// Memory leak detector +class MemoryLeakDetector { +public: + static MemoryLeakDetector &instance() { + static MemoryLeakDetector detector; + return detector; + } + + void recordAllocation(void *ptr, size_t size) { + std::lock_guard lock(mutex_); + allocations_[ptr] = size; + total_allocated_ += size; + } + + void recordDeallocation(void *ptr) { + std::lock_guard lock(mutex_); + auto it = allocations_.find(ptr); + if (it != allocations_.end()) { + total_allocated_ -= it->second; + allocations_.erase(it); + } + } + + size_t getLeakedMemory() const { + std::lock_guard lock(mutex_); + return total_allocated_; + } + + size_t getLeakCount() const { + std::lock_guard lock(mutex_); + return allocations_.size(); + } + + void reset() { + std::lock_guard lock(mutex_); + allocations_.clear(); + total_allocated_ = 0; + } + +private: + mutable std::mutex mutex_; + std::unordered_map allocations_; + size_t total_allocated_ = 0; +}; + +// Test categories +class BasicMemoryTest : public MemoryTestFramework { +public: + TestResult run() override; + std::string getName() const override { return "BasicMemoryTest"; } +}; + +class ConcurrencyTest : public MemoryTestFramework { +public: + TestResult run() override; + std::string getName() const override { return "ConcurrencyTest"; } + +private: + TestResult testConcurrentAllocations(); + TestResult testConcurrentDeviceSwitching(); + TestResult testMemoryAllocationRace(); +}; + +class ExceptionSafetyTest : public MemoryTestFramework { +public: + TestResult run() override; + std::string getName() const override { return "ExceptionSafetyTest"; } + +private: + TestResult testAllocationFailure(); + TestResult testDeallocationException(); + TestResult testContextSwitchException(); +}; + +class MemoryLeakTest : public MemoryTestFramework { +public: + TestResult run() override; + std::string getName() const override { return "MemoryLeakTest"; } + +private: + TestResult testBasicLeakDetection(); + TestResult testCrossDeviceLeakDetection(); + TestResult testExceptionLeakDetection(); +}; + +class PerformanceTest : public MemoryTestFramework { +public: + TestResult run() override; + std::string getName() const override { return "PerformanceTest"; } + +private: + TestResult testAllocationPerformance(); + TestResult testConcurrentPerformance(); + TestResult testMemoryCopyPerformance(); +}; + +class StressTest : public MemoryTestFramework { +public: + TestResult run() override; + std::string getName() const override { return "StressTest"; } + +private: + TestResult testHighFrequencyAllocations(); + TestResult testLargeMemoryAllocations(); + TestResult testCrossDeviceStress(); +}; + +// Test runner +class MemoryTestRunner { +public: + void addTest(std::unique_ptr test) { + tests_.push_back(std::move(test)); + } + + std::vector runAllTests() { + std::vector results; + + std::cout << "==============================================\n" + << "InfiniCore Memory Management Test Suite\n" + << "==============================================" << std::endl; + + for (auto &test : tests_) { + logTestStart(test->getName()); + TestResult result = test->run(); + logTestResult(result); + results.push_back(result); + } + + printSummary(results); + return results; + } + +private: + std::vector> tests_; + + void logTestStart(const std::string &test_name) { + std::cout << "\n[SUITE] Running: " << test_name << std::endl; + } + + void logTestResult(const TestResult &result) { + std::cout << "[SUITE] " << (result.passed ? "PASSED" : "FAILED") + << ": " << result.test_name << std::endl; + } + + void printSummary(const std::vector &results) { + size_t passed = 0, failed = 0; + std::chrono::microseconds total_time(0); + + for (const auto &result : results) { + if (result.passed) { + passed++; + } else { + failed++; + } + total_time += result.duration; + } + + std::cout << "\n==============================================\n" + << "Test Summary\n" + << "==============================================\n" + << "Total Tests: " << results.size() << "\n" + << "Passed: " << passed << "\n" + << "Failed: " << failed << "\n" + << "Total Time: " << total_time.count() << "μs\n" + << "==============================================" << std::endl; + } +}; + +} // namespace infinicore::test + +#endif // __INFINICORE_MEMORY_TEST_H__ diff --git a/src/infinicore-test/test_tensor_destructor.cc b/src/infinicore-test/test_tensor_destructor.cc new file mode 100644 index 000000000..f61ddc169 --- /dev/null +++ b/src/infinicore-test/test_tensor_destructor.cc @@ -0,0 +1,303 @@ +#include "test_tensor_destructor.h" + +namespace infinicore::test { + +// Test 1: Basic tensor creation and destruction +TestResult TensorDestructorTest::testBasicTensorDestruction() { + return measureTime("BasicTensorDestruction", [this]() { + { + // Create a tensor in a scope to test automatic destruction + auto tensor = Tensor::empty({2, 3}, DataType::F32, Device::Type::CPU); + + // Verify tensor was created successfully + if (!tensor.operator->()) { + return false; + } + if (tensor->shape().size() != 2) { + return false; + } + if (tensor->shape()[0] != 2) { + return false; + } + if (tensor->shape()[1] != 3) { + return false; + } + if (tensor->dtype() != DataType::F32) { + return false; + } + + std::cout << "Tensor created successfully with shape: "; + for (auto dim : tensor->shape()) { + std::cout << dim << " "; + } + std::cout << std::endl; + } + + // Tensor should be destroyed when it goes out of scope + // This should trigger the TensorMetaData destructor + std::cout << "Tensor destroyed successfully - destructor called" << std::endl; + return true; + }); +} + +// Test 2: Multiple tensor creation and destruction +TestResult TensorDestructorTest::testMultipleTensorDestruction() { + return measureTime("MultipleTensorDestruction", [this]() { + std::vector tensors; + + // Create multiple tensors with different shapes and types + tensors.push_back(Tensor::empty({1, 2, 3}, DataType::F32, Device::Type::CPU)); + tensors.push_back(Tensor::empty({4, 5}, DataType::F64, Device::Type::CPU)); + tensors.push_back(Tensor::zeros({2, 2, 2}, DataType::I32, Device::Type::CPU)); + tensors.push_back(Tensor::ones({3, 4}, DataType::F16, Device::Type::CPU)); + + // Verify all tensors were created + if (tensors.size() != 4) { + return false; + } + for (size_t i = 0; i < tensors.size(); ++i) { + if (!tensors[i].operator->()) { + return false; + } + std::cout << "Tensor " << i << " created with shape: "; + for (auto dim : tensors[i]->shape()) { + std::cout << dim << " "; + } + std::cout << std::endl; + } + + std::cout << "All " << tensors.size() << " tensors created successfully" << std::endl; + + // All tensors will be destroyed when the vector goes out of scope + // This should trigger all TensorMetaData destructors + return true; + }); +} + +// Test 3: Different data types +TestResult TensorDestructorTest::testDifferentDataTypes() { + return measureTime("DifferentDataTypes", [this]() { + std::vector> data_types = { + {DataType::F32, "F32"}, + {DataType::F64, "F64"}, + {DataType::F16, "F16"}, + {DataType::I32, "I32"}, + {DataType::I64, "I64"}, + {DataType::I8, "I8"}, + {DataType::U8, "U8"}, + {DataType::BOOL, "BOOL"}}; + + for (const auto &[dtype, name] : data_types) { + { + auto tensor = Tensor::empty({2, 2}, dtype, Device::Type::CPU); + if (!tensor.operator->()) { + return false; + } + if (tensor->dtype() != dtype) { + return false; + } + std::cout << "Created tensor with data type: " << name << std::endl; + } + std::cout << "Destroyed tensor with data type: " << name << std::endl; + } + + return true; + }); +} + +// Test 4: Different shapes +TestResult TensorDestructorTest::testDifferentShapes() { + return measureTime("DifferentShapes", [this]() { + std::vector shapes = { + {1}, // 1D + {2, 3}, // 2D + {4, 5, 6}, // 3D + {1, 2, 3, 4}, // 4D + {2, 3, 4, 5, 6}, // 5D + {1000}, // Large 1D + {100, 100}, // Large 2D + {10, 10, 10, 10} // Large 4D + }; + + for (const auto &shape : shapes) { + { + auto tensor = Tensor::empty(shape, DataType::F32, Device::Type::CPU); + if (!tensor.operator->()) { + return false; + } + if (tensor->shape() != shape) { + return false; + } + std::cout << "Created tensor with shape: "; + for (auto dim : shape) { + std::cout << dim << " "; + } + std::cout << std::endl; + } + std::cout << "Destroyed tensor with shape: "; + for (auto dim : shape) { + std::cout << dim << " "; + } + std::cout << std::endl; + } + + return true; + }); +} + +// Test 5: Tensor from blob +TestResult TensorDestructorTest::testTensorFromBlob() { + return measureTime("TensorFromBlob", [this]() { + // Create a blob of data + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + + { + // Create tensor from blob + auto tensor = Tensor::from_blob(data.data(), {2, 3}, DataType::F32, Device::Type::CPU); + if (!tensor.operator->()) { + return false; + } + if (tensor->shape() != Shape({2, 3})) { + return false; + } + if (tensor->dtype() != DataType::F32) { + return false; + } + + std::cout << "Created tensor from blob with shape: "; + for (auto dim : tensor->shape()) { + std::cout << dim << " "; + } + std::cout << std::endl; + } + + std::cout << "Destroyed tensor from blob successfully" << std::endl; + return true; + }); +} + +// Test 6: Strided tensor +TestResult TensorDestructorTest::testStridedTensor() { + return measureTime("StridedTensor", [this]() { + { + // Create a strided tensor + auto tensor = Tensor::empty({4, 4}, DataType::F32, Device::Type::CPU); + if (!tensor.operator->()) { + return false; + } + + // Create a narrowed view + std::vector slices = { + {0, 0, 2}, // dimension 0: start at 0, length 2 + {1, 0, 2} // dimension 1: start at 0, length 2 + }; + auto strided_tensor = tensor->narrow(slices); + if (!strided_tensor.operator->()) { + return false; + } + + std::cout << "Created strided tensor with shape: "; + for (auto dim : strided_tensor->shape()) { + std::cout << dim << " "; + } + std::cout << std::endl; + } + + std::cout << "Destroyed strided tensor successfully" << std::endl; + return true; + }); +} + +// Test 7: Memory leak detection +TestResult TensorDestructorTest::testMemoryLeakDetection() { + return measureTime("MemoryLeakDetection", [this]() { + // Reset memory leak detector + MemoryLeakDetector::instance().reset(); + + size_t initial_leaks = MemoryLeakDetector::instance().getLeakCount(); + + // Create and destroy many tensors + for (int i = 0; i < 100; ++i) { + { + auto tensor = Tensor::empty({10, 10}, DataType::F32, Device::Type::CPU); + if (!tensor.operator->()) { + return false; + } + } + } + + size_t final_leaks = MemoryLeakDetector::instance().getLeakCount(); + + std::cout << "Initial leaks: " << initial_leaks << std::endl; + std::cout << "Final leaks: " << final_leaks << std::endl; + + // Should not have more leaks than we started with + return final_leaks <= initial_leaks; + }); +} + +// Test 8: Tensor copy destruction +TestResult TensorDestructorTest::testTensorCopyDestruction() { + return measureTime("TensorCopyDestruction", [this]() { + { + auto original_tensor = Tensor::empty({3, 3}, DataType::F32, Device::Type::CPU); + if (!original_tensor.operator->()) { + return false; + } + + // Create a copy (using assignment operator) + auto copied_tensor = original_tensor; + if (!copied_tensor.operator->()) { + return false; + } + + std::cout << "Created original and copied tensors" << std::endl; + std::cout << "Original tensor shape: "; + for (auto dim : original_tensor->shape()) { + std::cout << dim << " "; + } + std::cout << std::endl; + std::cout << "Copied tensor shape: "; + for (auto dim : copied_tensor->shape()) { + std::cout << dim << " "; + } + std::cout << std::endl; + } + + std::cout << "Destroyed original and copied tensors successfully" << std::endl; + return true; + }); +} + +// Main test runner +TestResult TensorDestructorTest::run() { + std::vector results; + + std::cout << "==============================================\n" + << "Tensor Destructor Test Suite\n" + << "==============================================" << std::endl; + + // Run all tests + results.push_back(testBasicTensorDestruction()); + results.push_back(testMultipleTensorDestruction()); + results.push_back(testDifferentDataTypes()); + results.push_back(testDifferentShapes()); + results.push_back(testTensorFromBlob()); + results.push_back(testStridedTensor()); + results.push_back(testMemoryLeakDetection()); + results.push_back(testTensorCopyDestruction()); + + // Check if all tests passed + bool all_passed = true; + for (const auto &result : results) { + if (!result.passed) { + all_passed = false; + break; + } + } + + return TestResult("TensorDestructorTest", all_passed, + all_passed ? "" : "Some tensor destructor tests failed"); +} + +} // namespace infinicore::test diff --git a/src/infinicore-test/test_tensor_destructor.h b/src/infinicore-test/test_tensor_destructor.h new file mode 100644 index 000000000..a453b1a54 --- /dev/null +++ b/src/infinicore-test/test_tensor_destructor.h @@ -0,0 +1,31 @@ +#ifndef __INFINICORE_TEST_TENSOR_DESTRUCTOR_H__ +#define __INFINICORE_TEST_TENSOR_DESTRUCTOR_H__ + +#include "infinicore/context/context.hpp" +#include "infinicore/tensor.hpp" +#include "memory_test.h" +#include +#include +#include + +namespace infinicore::test { + +class TensorDestructorTest : public MemoryTestFramework { +public: + TestResult run() override; + std::string getName() const override { return "TensorDestructorTest"; } + +private: + TestResult testBasicTensorDestruction(); + TestResult testMultipleTensorDestruction(); + TestResult testDifferentDataTypes(); + TestResult testDifferentShapes(); + TestResult testTensorFromBlob(); + TestResult testStridedTensor(); + TestResult testMemoryLeakDetection(); + TestResult testTensorCopyDestruction(); +}; + +} // namespace infinicore::test + +#endif // __INFINICORE_TEST_TENSOR_DESTRUCTOR_H__ diff --git a/src/infinicore/context/runtime/runtime.cc b/src/infinicore/context/runtime/runtime.cc index 8a7c37b0a..1f192011d 100644 --- a/src/infinicore/context/runtime/runtime.cc +++ b/src/infinicore/context/runtime/runtime.cc @@ -63,6 +63,10 @@ std::shared_ptr Runtime::allocateMemory(size_t size) { } std::shared_ptr Runtime::allocatePinnedHostMemory(size_t size) { + if (!pinned_host_memory_allocator_) { + spdlog::warn("For CPU devices, pinned memory is not supported, falling back to regular host memory"); + return allocateMemory(size); + } std::byte *data_ptr = pinned_host_memory_allocator_->allocate(size); return std::make_shared( data_ptr, size, device_, diff --git a/src/infinicore/device.cc b/src/infinicore/device.cc index 1553f48c9..3430327c1 100644 --- a/src/infinicore/device.cc +++ b/src/infinicore/device.cc @@ -39,10 +39,11 @@ std::string Device::toString(const Type &type) { return "KUNLUN"; case Type::HYGON: return "HYGON"; + case Type::COUNT: + return "COUNT"; + default: + return "UNKNOWN"; } - - // TODO: Add error handling. - return ""; } bool Device::operator==(const Device &other) const { diff --git a/src/infinicore/tensor/tensor.cc b/src/infinicore/tensor/tensor.cc index f1cce6e42..5454bb8e4 100644 --- a/src/infinicore/tensor/tensor.cc +++ b/src/infinicore/tensor/tensor.cc @@ -65,6 +65,13 @@ TensorMetaData::TensorMetaData(const Shape &_shape, const Strides &_strides, con INFINICORE_CHECK_ERROR(infiniopCreateTensorDescriptor(&desc, shape.size(), shape.data(), strides.data(), (infiniDtype_t)dtype)); } +TensorMetaData::~TensorMetaData() { + if (desc) { + infiniopDestroyTensorDescriptor(desc); + desc = nullptr; + } +} + TensorImpl::TensorImpl(const Shape &shape, const DataType &dtype) : meta_(TensorMetaData(shape, calculate_contiguous_strides(shape), dtype)) {} diff --git a/xmake.lua b/xmake.lua index e517aeec2..10c47aa69 100644 --- a/xmake.lua +++ b/xmake.lua @@ -298,13 +298,14 @@ target("infiniccl") if has_config("moore-gpu") then add_deps("infiniccl-moore") end + if has_config("kunlun-xpu") then add_deps("infiniccl-kunlun") end if has_config("hygon-dcu") then add_deps("infiniccl-hygon") end - + set_languages("cxx17") add_files("src/infiniccl/*.cc") diff --git a/xmake/test.lua b/xmake/test.lua index 92af965d6..550b9afe6 100644 --- a/xmake/test.lua +++ b/xmake/test.lua @@ -4,7 +4,7 @@ target("infiniutils-test") set_warnings("all", "error") set_languages("cxx17") - + add_files(os.projectdir().."/src/utils-test/*.cc") set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")) target_end() @@ -18,7 +18,7 @@ target("infiniop-test") set_languages("cxx17") set_warnings("all", "error") - + add_includedirs(INFINI_ROOT.."/include") add_linkdirs(INFINI_ROOT.."/lib") add_links("infiniop", "infinirt") @@ -27,7 +27,7 @@ target("infiniop-test") add_cxflags("-fopenmp") add_ldflags("-fopenmp") end - + add_includedirs(os.projectdir().."/src/infiniop-test/include") add_files(os.projectdir().."/src/infiniop-test/src/*.cpp") add_files(os.projectdir().."/src/infiniop-test/src/ops/*.cpp") @@ -63,3 +63,31 @@ target("infinirt-test") add_files(os.projectdir().."/src/infinirt-test/*.cc") set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")) target_end() + +target("infinicore-test") + set_kind("binary") + add_deps("infiniop", "infinirt", "infiniccl") + set_default(false) + + set_languages("cxx17") + set_warnings("all", "error") + + local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini") + add_includedirs(INFINI_ROOT.."/include") + add_linkdirs(INFINI_ROOT.."/lib") + add_links("infiniop", "infinirt", "infiniccl") + + -- Add spdlog support + add_includedirs("third_party/spdlog/include") + add_defines("SPDLOG_ACTIVE_LEVEL=0") -- Enable all log levels + + add_files(os.projectdir().."/src/infinicore/*.cc") + add_files(os.projectdir().."/src/infinicore/context/*.cc") + add_files(os.projectdir().."/src/infinicore/context/*/*.cc") + add_files(os.projectdir().."/src/infinicore/tensor/*.cc") + add_files(os.projectdir().."/src/infinicore/op/*/*.cc") + + add_files(os.projectdir().."/src/infinicore-test/*.cc") + + set_installdir(INFINI_ROOT) +target_end()