Skip to content

Commit 63277bd

Browse files
committed
2026-02-27 23:04
1 parent 5bb83a8 commit 63277bd

2 files changed

Lines changed: 191 additions & 5 deletions

File tree

src/notes/writeups/2021-interview/checklist.rst

Lines changed: 120 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,18 +140,25 @@ TCP 四次挥手 |o|
140140
TCP Keepalive
141141
默认 7200s,用于回收 TCP 连接,并非给应用层使用。
142142

143-
UDP |_|
143+
UDP |o|
144144
-------
145145

146146
和 TCP 区别
147147
:TCP: 全双工,面向连接,可靠,一对一通信
148148
:UDP: 无连接,不可靠,可多播、广播
149149

150-
用于实时性要求高的场景(RTC/Real-Time Communication 场景)
151-
150+
UDP 的核心贡献:给 IP 层加了端口,让数据能区分不同的应用。其他一切交给上层。
152151

153-
select,epoll |_|
154-
-----------------
152+
常用于实时性要求高的场景:
153+
154+
- 实时通信(RTC/Real-Time Communication):视频通话、直播
155+
- HTTP3/QUIC
156+
- DNS
157+
- IoT
158+
- ...
159+
160+
select、epoll、io_uring |_|
161+
---------------------------
155162

156163
:zhwiki:`Select_(Unix)`
157164
- 是个单独的系统调用
@@ -164,6 +171,110 @@ select,epoll |_|
164171
- 连接数:API 上无限制
165172
- 边沿触发(异步推荐)、状态触发
166173

174+
:enwiki:`Io_uring`
175+
176+
io_uring |_|
177+
------------
178+
179+
.. seealso::
180+
181+
学习工作流程 :ghrepo:`shuveb/io_uring-by-example`,实践上推荐使用 :ghrepo:`axboe/liburing`。
182+
183+
在看 ``02_cat_uring`` 有些疑惑:
184+
185+
``IORING_FEAT_SINGLE_MMAP``
186+
在支持 ``IORING_FEAT_SINGLE_MMAP`` 的系统(Linux 5.14+)上,可以只用一次 mmap
187+
取到 sq cq 的虚拟地址,用同一个 base 配合 ``p.{sq,cq}_off`` 即可,如下:
188+
189+
.. code-block:: c
190+
:caption: https://github.com/shuveb/io_uring-by-example/blob/master/02_cat_uring/main.c#L106-L198
191+
192+
struct io_uring_params p = {0};
193+
void *sq_ptr, *cq_ptr;
194+
195+
// ...
196+
ring_fd = io_uring_setup(QUEUE_DEPTH, &p);
197+
// ...
198+
199+
sq_ptr = mmap(...)
200+
201+
if (p.features & IORING_FEAT_SINGLE_MMAP) {
202+
cq_ptr = sq_ptr;
203+
} else {
204+
cq_ptr = mmap(...)
205+
}
206+
207+
sring->head = sq_ptr + p.sq_off.head;
208+
// ...
209+
cring->head = cq_ptr + p.cq_off.head;
210+
// ...
211+
212+
那要是用户不知道这个 feature 依然 mmap 两次呢?试了一下不会出错。
213+
那之前初始化的 ``p.{sq,cq}_off`` 怎么就能适应这两种情况?
214+
215+
AI(DeepSeek、MiniMax M2.5)一番胡说八道,将信将疑调查一番结论是:
216+
217+
1. 至少在支持 IORING_FEAT_SINGLE_MMAP 的系统上,sq 和 cq 的物理内存是连续的
218+
219+
.. code-block:: c
220+
:caption: https://github.com/torvalds/linux/blob/v5.14/fs/io_uring.c#L140
221+
222+
struct io_rings {
223+
struct io_uring sq, cq;
224+
// ...
225+
}
226+
227+
2. ``p.cq_off.head`` 并不是 ``head`` 相对于 ``struct io_uring`` 的偏移,
228+
而是相对于 ``struct io_rings`` 的偏移,那么 ``p.{sq,cq}_off`` 应该都
229+
*对应同一个base ptr*
230+
231+
.. code-block:: c
232+
:caption: https://github.com/torvalds/linux/blob/v5.14/fs/io_uring.c#L9712-L9722
233+
234+
p->sq_off.head = offsetof(struct io_rings, sq.head);
235+
p->sq_off.tail = offsetof(struct io_rings, sq.tail);
236+
// ...
237+
p->cq_off.head = offsetof(struct io_rings, cq.head);
238+
p->cq_off.tail = offsetof(struct io_rings, cq.tail);
239+
240+
3. 在 ``IORING_FEAT_SINGLE_MMAP`` 情况下会有 ``cq_ptr = sq_ptr;``,没有问题,
241+
在两次 mmap 的情况呢?``{sq,cq}_ptr`` 两个不同的虚拟地址其实会指向同一个物理地址,
242+
从 ``io_uring_mmap → io_uring_validate_mmap_request`` 可见:
243+
244+
.. code-block:: c
245+
:caption: https://github.com/torvalds/linux/blob/v5.14/fs/io_uring.c#L9216-L9241
246+
247+
switch (offset) {
248+
case IORING_OFF_SQ_RING:
249+
case IORING_OFF_CQ_RING:
250+
ptr = ctx->rings;
251+
break;
252+
// ...
253+
}
254+
// ...
255+
return ptr;
256+
257+
让 AI 写点代码验证一下:
258+
259+
.. dropdown:: ``io_uring_single_mmap.c``
260+
261+
.. literalinclude:: ./io_uring_single_mmap.c
262+
:language: c
263+
264+
.. code-block:: console
265+
266+
$ gcc io_uring_single_mmap.c
267+
# ./a.out
268+
sq_ptr = 0x7f492a741000 -> phys = 0x000000028173e000
269+
cq_ptr = 0x7f492a740000 -> phys = 0x000000028173e000
270+
have same phys addr? true
271+
*sq_ptr = 0
272+
*cq_ptr = 0
273+
write 0x1234 to sq_ptr, but not cq_ptr
274+
*sq_ptr = 1234
275+
*cq_ptr = 1234
276+
277+
167278
Web
168279
---
169280

@@ -627,6 +738,10 @@ Huge Page |_|
627738

628739
讲一下操作系统死锁是如何发生的,以及如何解决死锁
629740

741+
mmap |_|
742+
--------
743+
744+
630745
Golang
631746
======
632747

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <string.h>
4+
#include <unistd.h>
5+
#include <fcntl.h>
6+
#include <stdint.h>
7+
#include <inttypes.h>
8+
#include <sys/syscall.h>
9+
#include <sys/mman.h>
10+
#include <linux/io_uring.h>
11+
12+
static inline uint64_t page_size(void) {
13+
return sysconf(_SC_PAGESIZE);
14+
}
15+
16+
// See also https://man7.org/linux/man-pages/man5/proc_pid_pagemap.5.html
17+
uint64_t virt_to_phys(void *virt) {
18+
uint64_t addr = (uint64_t)virt, ps = page_size();
19+
int fd = open("/proc/self/pagemap", O_RDONLY);
20+
if (fd < 0) {
21+
perror("open /proc/self/pagemap failed");
22+
return 0;
23+
}
24+
25+
uint64_t entry;
26+
uint64_t offset = (addr / ps) * sizeof(uint64_t);
27+
if (lseek(fd, offset, SEEK_SET) < 0 || read(fd, &entry, 8) != 8 ||
28+
!(entry & (1ULL << 63))) {
29+
perror("read pagemap failed");
30+
close(fd);
31+
return 0;
32+
}
33+
close(fd);
34+
return (entry & ((1ULL << 55) - 1)) * ps + (addr % ps);
35+
}
36+
37+
void get_sq_cq_ptrs(void **sq, void **cq) {
38+
struct io_uring_params p = {0};
39+
int fd = syscall(__NR_io_uring_setup, 1, &p);
40+
if (fd < 0) {
41+
perror("syscall io_uring_setup failed");
42+
return;
43+
}
44+
45+
int ssz = p.sq_off.array + p.sq_entries * sizeof(unsigned);
46+
int csz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
47+
48+
*sq = mmap(0, ssz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
49+
*cq = mmap(0, csz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
50+
}
51+
52+
int main() {
53+
void *sq, *cq;
54+
get_sq_cq_ptrs(&sq, &cq);
55+
56+
// Test phys addr eq.
57+
uint64_t sq_phys = virt_to_phys(sq);
58+
uint64_t cq_phys = virt_to_phys(cq);
59+
printf("sq_ptr = %p -> phys = 0x%016" PRIx64 "\n", sq, sq_phys);
60+
printf("cq_ptr = %p -> phys = 0x%016" PRIx64 "\n", cq, cq_phys);
61+
printf("have same phys addr? %s\n", sq_phys == cq_phys ? "true" : "false");
62+
63+
// Test read and write.
64+
printf("*sq_ptr = %x\n", *(int *)sq);
65+
printf("*cq_ptr = %x\n", *(int *)cq);
66+
int v = 0x1234;
67+
printf("write 0x%x to sq_ptr, but not cq_ptr\n", v);
68+
*(int *)sq = v;
69+
printf("*sq_ptr = %x\n", *(int *)sq);
70+
printf("*cq_ptr = %x\n", *(int *)cq);
71+
}

0 commit comments

Comments
 (0)