Skip to content

Commit 75e4c28

Browse files
authored
Reduce allocs + improve benchmark table (#74)
- Reduces allocations by using `deleteat!` instead of `filter` for remove `gRPCRequest` from `gRPCCURL` - Format URL with `IOBuffer()` and `string()` instead of interpolation - Format `grpc-timeout` with `string()` instead of interpolation - Remove the need to check for / replace grpc/grpcs with http/https - Update benchmark table format to be easier to understand
1 parent eb373de commit 75e4c28

5 files changed

Lines changed: 53 additions & 55 deletions

File tree

README.md

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,33 +22,33 @@ By default Julia 1.12 starts with just one thread. The closer to `@async` we get
2222
However, it is unlikely Julia will be used this way in the real world.
2323

2424
```
25-
╭──────────────────────────────────┬─────────────────┬─────────────┬──────────┬────────────┬──────────────┬─────────┬──────┬──────╮
26-
│ Benchmark │ N │ Memory │ Allocations │ Duration │ Throughput │ Avg duration │ Std-dev │ Min │ Max │
27-
│ │ calls │ MiB │ │ s │ calls/s │ μs │ μs │ μs │ μs │
28-
├──────────────────────────────────┼─────────────────┼─────────────┼──────────┼────────────┼──────────────┼─────────┼──────┼──────┤
29-
│ workload_smol │ 94000 │ 3.7485110 │ 5.011875653 4.24771
30-
│ workload_32_224_224_uint8 │ 2800 │ 63.789230 │ 5.11 │ 548 │ 1826 │ 378.615982657
31-
│ workload_streaming_request │ 2566000 │ 0.61 │ 6615 │ 4.99514001 │ 2 │ 0.61 │ 1 │ 16
32-
│ workload_streaming_response │ 985000 │ 13.027721 │ 5.0197101 │ 5 │ 0.48 │ 4 │ 7
33-
│ workload_streaming_bidirectional │ 2568000 │ 1.98 │ 25503 │ 4.99514539 │ 2 │ 0.5 │ 1 │ 12
34-
╰──────────────────────────────────┴─────────────────┴─────────────┴──────────┴────────────┴──────────────┴─────────┴──────┴──────╯
25+
╭──────────────────────────────────┬─────────────┬────────────────┬────────────┬──────────────┬─────────┬──────┬──────╮
26+
│ Benchmark │ Avg Memory │ Avg Allocs │ Throughput │ Avg duration │ Std-dev │ Min │ Max │
27+
│ │ KiB/message │ allocs/message │ calls/s │ μs │ μs │ μs │ μs │
28+
├──────────────────────────────────┼─────────────┼────────────────┼────────────┼──────────────┼─────────┼──────┼──────┤
29+
│ workload_smol │ 2.95 72.518424543.544866
30+
│ workload_32_224_224_uint8 │ 637.0 79.1 │ 548 │ 1826 │ 405.4816022730
31+
│ workload_streaming_request │ 0.61 │ 6.6508983 │ 2 │ 0.67 │ 1 │ 15
32+
│ workload_streaming_response │ 12.99 27.6194689 │ 5 │ 0.52 │ 4 │ 9
33+
│ workload_streaming_bidirectional │ 1.98 │ 25.5490718 │ 2 │ 0.59 │ 1 │ 13
34+
╰──────────────────────────────────┴─────────────┴────────────────┴────────────┴──────────────┴─────────┴──────┴──────╯
3535
```
3636

3737
### Real World: `julia -t auto`
3838

3939
Using more threads isn't great for async IO, but this is likely how most people will be using `gRPCClient.jl`.
4040

4141
```
42-
╭──────────────────────────────────┬─────────────────┬─────────────┬──────────┬────────────┬──────────────┬─────────┬──────┬──────╮
43-
│ Benchmark │ N │ Memory │ Allocations │ Duration │ Throughput │ Avg duration │ Std-dev │ Min │ Max │
44-
│ │ calls │ MiB │ │ s │ calls/s │ μs │ μs │ μs │ μs │
45-
├──────────────────────────────────┼─────────────────┼─────────────┼──────────┼────────────┼──────────────┼─────────┼──────┼──────┤
46-
│ workload_smol │ 91000 │ 3.7585123 │ 5.031807955 │ 3.964867
47-
│ workload_32_224_224_uint8 │ 2900 │ 63.789188 │ 5.01579172897.8616141899
48-
│ workload_streaming_request │ 1841000 │ 0.896482 │ 4.99368669 │ 3 │ 1.35 │ 2 │ 21
49-
│ workload_streaming_response │ 330000 │ 13.0 │ 27838 │ 5.026577115 5.2 │ 6 │ 37
50-
│ workload_streaming_bidirectional │ 405000 │ 1.4825672 │ 5.0 80948 128.52362
51-
╰──────────────────────────────────┴─────────────────┴─────────────┴──────────┴────────────┴──────────────┴─────────┴──────┴──────╯
42+
╭──────────────────────────────────┬─────────────┬────────────────┬────────────┬──────────────┬─────────┬──────┬──────╮
43+
│ Benchmark │ Avg Memory │ Avg Allocs │ Throughput │ Avg duration │ Std-dev │ Min │ Max │
44+
│ │ KiB/message │ allocs/message │ calls/s │ μs │ μs │ μs │ μs │
45+
├──────────────────────────────────┼─────────────┼────────────────┼────────────┼──────────────┼─────────┼──────┼──────┤
46+
│ workload_smol │ 2.95 72.51801456 │ 3.085064
47+
│ workload_32_224_224_uint8 │ 637.0 79.7567176299.0716281911
48+
│ workload_streaming_request │ 0.86 6.5341851 │ 3 │ 1.68 │ 2 │ 30
49+
│ workload_streaming_response │ 13.0 │ 27.764515165.12 │ 6 │ 33
50+
│ workload_streaming_bidirectional │ 1.41 25.6102072 106.23452
51+
╰──────────────────────────────────┴─────────────┴────────────────┴────────────┴──────────────┴─────────┴──────┴──────╯
5252
```
5353

5454
## Acknowledgement

docs/src/index.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -156,16 +156,16 @@ benchmark_table()
156156
```
157157

158158
```
159-
╭──────────────────────────────────┬─────────────────┬─────────────┬──────────┬────────────┬──────────────┬─────────┬──────┬──────╮
160-
│ Benchmark │ N │ Memory │ Allocations │ Duration │ Throughput │ Avg duration │ Std-dev │ Min │ Max │
161-
│ │ calls │ MiB │ │ s │ calls/s │ μs │ μs │ μs │ μs │
162-
├──────────────────────────────────┼─────────────────┼─────────────┼──────────┼────────────┼──────────────┼─────────┼──────┼──────┤
163-
│ workload_smol │ 94000 │ 3.7485110 │ 5.011875653 4.24771
164-
│ workload_32_224_224_uint8 │ 2800 │ 63.789230 │ 5.115481826378.615982657
165-
│ workload_streaming_request │ 2566000 │ 0.616615 │ 4.9951400120.61116
166-
│ workload_streaming_response │ 985000 │ 13.0 │ 27721 │ 5.0197101 5 0.484 7
167-
│ workload_streaming_bidirectional │ 2568000 │ 1.9825503 │ 4.99514539 2 0.5112
168-
╰──────────────────────────────────┴─────────────────┴─────────────┴──────────┴────────────┴──────────────┴─────────┴──────┴──────╯
159+
╭──────────────────────────────────┬─────────────┬────────────────┬────────────┬──────────────┬─────────┬──────┬──────╮
160+
│ Benchmark │ Avg Memory │ Avg Allocs │ Throughput │ Avg duration │ Std-dev │ Min │ Max │
161+
│ │ KiB/message │ allocs/message │ calls/s │ μs │ μs │ μs │ μs │
162+
├──────────────────────────────────┼─────────────┼────────────────┼────────────┼──────────────┼─────────┼──────┼──────┤
163+
│ workload_smol │ 2.95 72.518014563.085064
164+
│ workload_32_224_224_uint8 │ 637.0 79.7567176299.0716281911
165+
│ workload_streaming_request │ 0.86 6.534185131.68230
166+
│ workload_streaming_response │ 13.0 │ 27.7 64515 16 5.12633
167+
│ workload_streaming_bidirectional │ 1.41 25.610207210 6.23452
168+
╰──────────────────────────────────┴─────────────┴────────────────┴────────────┴──────────────┴─────────┴──────┴──────╯
169169
```
170170

171171
### Stress Workloads

src/Curl.jl

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -138,21 +138,21 @@ end
138138
function grpc_timeout_header_val(timeout::Real)
139139
if round(Int, timeout) == timeout
140140
timeout_secs = round(Int64, timeout)
141-
return "$(timeout_secs)S"
141+
return "$(string(timeout_secs))S"
142142
end
143143
timeout *= 1000
144144
if round(Int, timeout) == timeout
145145
timeout_millisecs = round(Int64, timeout)
146-
return "$(timeout_millisecs)m"
146+
return "$(string(timeout_millisecs))m"
147147
end
148148
timeout *= 1000
149149
if round(Int, timeout) == timeout
150150
timeout_microsecs = round(Int64, timeout)
151-
return "$(timeout_microsecs)u"
151+
return "$(string(timeout_microsecs))u"
152152
end
153153
timeout *= 1000
154154
timeout_nanosecs = round(Int64, timeout)
155-
return "$(timeout_nanosecs)n"
155+
return "$(string(timeout_nanosecs))n"
156156
end
157157

158158
mutable struct gRPCRequest
@@ -238,22 +238,19 @@ mutable struct gRPCRequest
238238
# Uncomment this for debugging purposes
239239
# curl_easy_setopt(easy_handle, CURLOPT_VERBOSE, UInt32(1))
240240

241-
http_url = replace(url, "grpc://" => "http://")
242-
http_url = replace(http_url, "grpcs://" => "https://")
243-
244-
curl_easy_setopt(easy_handle, CURLOPT_URL, http_url)
241+
curl_easy_setopt(easy_handle, CURLOPT_URL, url)
245242
curl_easy_setopt(easy_handle, CURLOPT_TIMEOUT, deadline)
246243
curl_easy_setopt(easy_handle, CURLOPT_PIPEWAIT, Clong(1))
247244
curl_easy_setopt(easy_handle, CURLOPT_POST, Clong(1))
248245
curl_easy_setopt(easy_handle, CURLOPT_CUSTOMREQUEST, "POST")
249246

250-
if startswith(http_url, "http://")
247+
if startswith(url, "http://")
251248
curl_easy_setopt(
252249
easy_handle,
253250
CURLOPT_HTTP_VERSION,
254251
CURL_HTTP_VERSION_2_PRIOR_KNOWLEDGE,
255252
)
256-
elseif startswith(http_url, "https://")
253+
elseif startswith(url, "https://")
257254
curl_easy_setopt(easy_handle, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2TLS)
258255
end
259256

@@ -275,7 +272,7 @@ mutable struct gRPCRequest
275272
easy_handle,
276273
grpc.multi,
277274
headers,
278-
http_url,
275+
url,
279276
request,
280277
0,
281278
response,
@@ -797,8 +794,9 @@ function check_multi_info(grpc::gRPCCURL)
797794
# The actual cleanup/notification happens here
798795
cleanup_request(grpc, req)
799796

800-
# Remove from the list of requests associated
801-
grpc.requests = filter(x -> x !== req, grpc.requests)
797+
# Remove from the list of requests associated (in-place, no allocation)
798+
idx = findfirst(x -> x === req, grpc.requests)
799+
!isnothing(idx) && deleteat!(grpc.requests, idx)
802800
else
803801
@error("curl_multi_info_read: unknown message", message, maxlog = 1_000)
804802
end

src/gRPC.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,15 @@ end
8888

8989
function url(client::gRPCServiceClient)
9090
protocol = if client.secure
91-
"grpcs"
91+
"https"
9292
else
93-
"grpc"
93+
"http"
9494
end
95-
"$protocol://$(client.host):$(client.port)$(client.path)"
95+
96+
# "$protocol://$(client.host):$(client.port)$(client.path)"
97+
buffer = IOBuffer()
98+
write(buffer, protocol, "://", client.host, ":", string(client.port), client.path)
99+
String(take!(buffer))
96100
end
97101

98102

utils/gRPCClientUtils.jl/src/Benchmark.jl

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@ function perform_benchmark(f, N)
1515

1616
return [
1717
f,
18-
N_sample,
19-
mem,
20-
b.allocs,
21-
round(timing, digits = 2),
18+
1000*mem / N, # Avg Memory
19+
round(b.allocs / N, digits = 1), # Avg Allocs
2220
round(Int, N_sample/timing), # Throughput
2321
round(Int, mean(timings_us) / N), # Avg duration
2422
round(std(timings_us) / N, digits = 2),
@@ -33,17 +31,15 @@ function benchmark_table()
3331
column_labels = [
3432
[
3533
"Benchmark",
36-
"N",
37-
"Memory",
38-
"Allocations",
39-
"Duration",
34+
"Avg Memory",
35+
"Avg Allocs",
4036
"Throughput",
4137
"Avg duration",
4238
"Std-dev",
4339
"Min",
4440
"Max",
4541
],
46-
["", "calls", "MiB", "", "s", "calls/s", "μs", "μs", "μs", "μs"],
42+
["", "KiB/message", "allocs/message", "calls/s", "μs", "μs", "μs", "μs"],
4743
]
4844
all_benchmarks = [
4945
(workload_smol, 1_000),

0 commit comments

Comments
 (0)