Skip to content

Commit 0517d43

Browse files
CongMa13Copilot
andauthored
[CK TILE] remove dependency on std chrono (#3599)
* [CK TILE] remove dependency on std chrono * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent f3aafb9 commit 0517d43

3 files changed

Lines changed: 103 additions & 9 deletions

File tree

include/ck_tile/host.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "ck_tile/host/device_prop.hpp"
1212
#include "ck_tile/host/fill.hpp"
1313
#include "ck_tile/host/flush_icache.hpp"
14+
#include "ck_tile/host/high_res_cpu_clock.hpp"
1415
#include "ck_tile/host/hip_check_error.hpp"
1516
#include "ck_tile/host/host_tensor.hpp"
1617
#include "ck_tile/host/joinable_thread.hpp"
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
// SPDX-License-Identifier: MIT
3+
4+
#pragma once
5+
6+
#include <stdint.h>
7+
8+
namespace ck_tile {
9+
10+
// Time structure to hold nanoseconds since epoch or arbitrary start point
11+
struct timepoint_t
12+
{
13+
int64_t nanoseconds;
14+
};
15+
16+
// Platform-specific includes and implementation
17+
#if defined(_WIN32) || defined(_WIN64)
18+
// Windows
19+
#include <windows.h>
20+
21+
static inline timepoint_t high_res_now()
22+
{
23+
// Cache the performance counter frequency; it is constant for the system lifetime.
24+
static LARGE_INTEGER frequency = []() {
25+
LARGE_INTEGER f;
26+
QueryPerformanceFrequency(&f);
27+
return f;
28+
}();
29+
30+
LARGE_INTEGER counter;
31+
timepoint_t tp;
32+
QueryPerformanceCounter(&counter);
33+
34+
// Convert to nanoseconds using floating-point to avoid 64-bit integer overflow
35+
tp.nanoseconds =
36+
static_cast<int64_t>((static_cast<long double>(counter.QuadPart) * 1000000000.0L) /
37+
static_cast<long double>(frequency.QuadPart));
38+
39+
return tp;
40+
}
41+
42+
#elif defined(__linux__) || defined(__unix__) || defined(_POSIX_VERSION)
43+
// Linux/Unix/POSIX
44+
#include <time.h>
45+
46+
static inline timepoint_t high_res_now()
47+
{
48+
struct timespec ts;
49+
timepoint_t tp;
50+
51+
// Use CLOCK_MONOTONIC for consistent timing unaffected by system time changes
52+
// Use CLOCK_REALTIME if you need wall-clock time
53+
clock_gettime(CLOCK_MONOTONIC, &ts);
54+
55+
tp.nanoseconds = static_cast<int64_t>(ts.tv_sec * 1000000000LL + ts.tv_nsec);
56+
57+
return tp;
58+
}
59+
60+
#else
61+
// Fallback for other platforms
62+
#include <time.h>
63+
64+
static inline timepoint_t high_res_now()
65+
{
66+
timepoint_t tp;
67+
time_t t = time(NULL);
68+
tp.nanoseconds = static_cast<int64_t>(t * 1000000000LL);
69+
return tp;
70+
}
71+
72+
#endif
73+
74+
// Duration calculation functions
75+
static inline int64_t duration_ns(timepoint_t start, timepoint_t end)
76+
{
77+
return end.nanoseconds - start.nanoseconds;
78+
}
79+
80+
static inline int64_t duration_us(timepoint_t start, timepoint_t end)
81+
{
82+
return (end.nanoseconds - start.nanoseconds) / 1000LL;
83+
}
84+
85+
static inline int64_t duration_ms(timepoint_t start, timepoint_t end)
86+
{
87+
return (end.nanoseconds - start.nanoseconds) / 1000000LL;
88+
}
89+
90+
static inline double duration_sec(timepoint_t start, timepoint_t end)
91+
{
92+
return static_cast<double>(end.nanoseconds - start.nanoseconds) / 1000000000.0;
93+
}
94+
95+
} // namespace ck_tile

include/ck_tile/host/timer.hpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
#include "ck_tile/core/config.hpp"
77
#include "ck_tile/host/hip_check_error.hpp"
8+
#include "ck_tile/host/high_res_cpu_clock.hpp"
89
#include <hip/hip_runtime.h>
910
#include <cstddef>
10-
#include <chrono>
1111

1212
namespace ck_tile {
1313

@@ -54,26 +54,24 @@ struct cpu_timer
5454
CK_TILE_HOST void start(const hipStream_t& s)
5555
{
5656
HIP_CHECK_ERROR(hipStreamSynchronize(s));
57-
start_tick = std::chrono::high_resolution_clock::now();
57+
start_tick = high_res_now();
5858
}
5959
// torch.utils.benchmark.Timer(), there is a sync inside each timer callback
6060
CK_TILE_HOST void stop(const hipStream_t& s)
6161
{
6262
HIP_CHECK_ERROR(hipStreamSynchronize(s));
63-
stop_tick = std::chrono::high_resolution_clock::now();
63+
stop_tick = high_res_now();
6464
}
6565
// return in ms
6666
CK_TILE_HOST float duration() const
6767
{
68-
double sec =
69-
std::chrono::duration_cast<std::chrono::duration<double>>(stop_tick - start_tick)
70-
.count();
71-
return static_cast<float>(sec * 1e3);
68+
auto us = duration_us(start_tick, stop_tick);
69+
return static_cast<float>(us) / 1e3;
7270
}
7371

7472
private:
75-
std::chrono::time_point<std::chrono::high_resolution_clock> start_tick;
76-
std::chrono::time_point<std::chrono::high_resolution_clock> stop_tick;
73+
timepoint_t start_tick;
74+
timepoint_t stop_tick;
7775
};
7876

7977
} // namespace ck_tile

0 commit comments

Comments
 (0)