-
Notifications
You must be signed in to change notification settings - Fork 465
Expand file tree
/
Copy pathprotocol_inference.h
More file actions
4591 lines (4183 loc) · 140 KB
/
Copy pathprotocol_inference.h
File metadata and controls
4591 lines (4183 loc) · 140 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* This code runs using bpf in the Linux kernel.
* Copyright 2022- The Yunshan Networks Authors.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* SPDX-License-Identifier: GPL-2.0
*/
/*
* Due to the limitation of 4096 eBPF instructions in Linux kernels below version 5.2,
* the protocol inference code can easily exceed this limit when more protocols are added.
* To address this issue, the protocol inference logic has been split into three separate programs.
* The updated workflow is as follows:
*
* [openssl Uprobe] --
* |
* \|/
* [syscall Kprobe/tracepoint] --> [protocol inference 2] --> [protocol inference 3] --> [data submission] --> [data output]
* | /|\
* | |
* |----- general file I/O -----> [I/O event handling] ------------------------------------------------------
*
* Explanation:
* `[openssl Uprobe]` and `[syscall Kprobe/tracepoint]` perform initial setup for eBPF probe entry,
* and contain the first part of Layer 7 (L7) protocol inference logic.
* `protocol inference 2` : part 2 of protocol inference
* `protocol inference 3` : part 3 of protocol inference
* Newly added protocol inference code is recommended to be placed within the `infer_protocol_3()` interface.
*/
#ifndef DF_BPF_PROTO_INFER_H
#define DF_BPF_PROTO_INFER_H
#include "common.h"
#include "socket_trace.h"
#define L7_PROTO_INFER_PROG_1 0
#define L7_PROTO_INFER_PROG_2 1
static __inline bool is_nginx_process(void)
{
char comm[TASK_COMM_LEN];
bpf_get_current_comm(comm, sizeof(comm));
if (comm[0] == 'n' && comm[1] == 'g' && comm[2] == 'i' &&
comm[3] == 'n' && comm[4] == 'x' && comm[5] == '\0')
return true;
return false;
}
static __inline bool is_set_ports_bitmap(ports_bitmap_t * ports, __u16 port)
{
/*
* Avoid using the form `ports->bitmap[port >> 3]` to index the
* bitmap, as it may lead to the following error:
*
* 115: (85) call bpf_map_lookup_elem#1
* 116: (15) if r0 == 0x0 goto pc+5
* 117: (79) r1 = *(u64 *)(r10 -168)
* 118: (77) r1 >>= 3
* 119: (0f) r0 += r1
* 120: (71) r1 = *(u8 *)(r0 +0)
* R0 unbounded memory access, make sure to bounds check any array
* access into a map
*
* The error message indicates that we need to perform boundary checks
* for R0.
*/
const __u8 *end = (void *)ports + sizeof(*ports);
const __u8 *start = (__u8 *) ports;
const __u8 *addr = start + (port >> 3);
if (addr >= start && addr < end) {
/*
* Here, we must restrict the type of 'mask' to 'u8'; otherwise,
* when compiling as 'u64,' errors will occur upon loading the
* program:
*
* 122: (3d) if r1 >= r0 goto pc+6
* 123: (79) r2 = *(u64 *)(r10 -168)
* 124: (57) r2 &= 7
* 125: (71) r1 = *(u8 *)(r1 +0)
* R1 unbounded memory access, make sure to bounds check any
* array access into a map
*/
const __u8 mask = 1 << (port & 0x7);
if (*addr & mask)
return true;
}
return false;
}
static __inline bool
__protocol_port_check(enum traffic_protocol proto,
struct conn_info_s *conn_info, __u8 prog_num)
{
if (!is_protocol_enabled(proto)) {
return false;
}
if (conn_info->sk_type == SOCK_UNIX)
return true;
__u32 key = proto;
ports_bitmap_t *ports = proto_ports_bitmap__lookup(&key);
if (ports) {
/*
* If the "is_set_ports_bitmap()" function is used in both stages,
* there may be the following error when loading an eBPF program in
* the 4.14 kernel:
* `failed. name: df_T_exit_sendmmsg, Argument list too long errno: 7`
* To avoid this situation, it is necessary to differentiate the calls.
*/
if (prog_num == L7_PROTO_INFER_PROG_1) {
if (is_set_bitmap(ports->bitmap, conn_info->tuple.num)
|| is_set_bitmap(ports->bitmap,
conn_info->tuple.dport))
return true;
} else {
if (is_set_ports_bitmap(ports, conn_info->tuple.num) ||
is_set_ports_bitmap(ports, conn_info->tuple.dport))
return true;
}
}
return false;
}
static __inline bool
protocol_port_check_1(enum traffic_protocol proto,
struct conn_info_s *conn_info)
{
return __protocol_port_check(proto, conn_info, L7_PROTO_INFER_PROG_1);
}
static __inline bool
protocol_port_check_2(enum traffic_protocol proto,
struct conn_info_s *conn_info)
{
#if defined(LINUX_VER_KFUNC) || defined(LINUX_VER_5_2_PLUS)
return __protocol_port_check(proto, conn_info, L7_PROTO_INFER_PROG_1);
#else
return __protocol_port_check(proto, conn_info, L7_PROTO_INFER_PROG_2);
#endif
}
static __inline bool is_infer_socket_valid(struct socket_info_s *sk_info)
{
/*
* Since the kernel collects TLS handshake data, the socket type is set
* to 'PROTO_TLS' during this process. UPROBE-collected TLS plaintext data
* needs to be re-evaluated, so here we specify that a socket type of
* 'PROTO_TLS' is invalid and requires re-evaluation.
*
* Additionally, 'PROTO_UNKNOWN' also needs to be re-evaluated. This situation
* is common when pre-storing some data, which establishes socket information
* but sets 'l7_proto' to 'PROTO_UNKNOWN'. The data needs to be combined with
* the next segment to be re-evaluated as a whole.
*/
return (sk_info != NULL && sk_info->uid != 0
&& sk_info->l7_proto != PROTO_TLS
&& sk_info->l7_proto != PROTO_UNKNOWN);
}
// When calling this function, count must be a constant, and at this time, the
// compiler can optimize it into an immediate value and write it into the
// instruction.
static __inline void save_prev_data_from_kern(const char *buf,
struct conn_info_s *conn_info,
size_t count)
{
if (is_socket_info_valid(conn_info->socket_info_ptr)) {
bpf_probe_read_kernel(conn_info->socket_info_ptr->prev_data,
count, buf);
conn_info->socket_info_ptr->prev_data_len = count;
/*
* This piece of data needs to be merged with subsequent data, so
* the direction of the previous piece of data needs to be saved here.
*
* For example:
* A --> out
* B1 <-- in
* B2 <-- in
*
* The data of 'B1' and 'B2' will be merged into a single data stream,
* meaning that the data from B1 will be merged into 'B2' for transmission.
* Therefore, the direction of the previously merged data from B2 will be
* the same as the direction of 'A' (out), rather than the direction of 'B1'.
* This is saved using 'pre_direction'.
*/
conn_info->socket_info_ptr->pre_direction =
conn_info->socket_info_ptr->direction;
conn_info->socket_info_ptr->direction = conn_info->direction;
} else {
bpf_probe_read_kernel(conn_info->prev_buf, count, buf);
conn_info->prev_count = count;
}
}
static __inline bool is_same_command(char *a, char *b)
{
static const int KERNEL_COMM_MAX = 16;
for (int idx = 0; idx < KERNEL_COMM_MAX; ++idx) {
if (a[idx] == '\0' && a[idx] == b[idx])
return true;
if (a[idx] != b[idx])
return false;
}
// 16个字符都相同,并且没有遇到'\0',理论上不应该执行到这里
return true;
}
static __inline bool is_current_comm(char *comm)
{
static const int KERNEL_COMM_MAX = 16;
char current_comm[KERNEL_COMM_MAX];
if (bpf_get_current_comm(¤t_comm, sizeof(current_comm)))
return false;
return is_same_command(comm, current_comm);
}
static __inline int is_http_response(const char *data)
{
/*
* Here, we have removed HTTP/1.x 1xx-type responses because if a server
* returns two consecutive responses - such as HTTP 100 and HTTP 200 -
* after an HTTP request, the upper layer will not process the HTTP 100
* response. This results in the HTTP request and the HTTP 200 response
* failing to be merged.
*/
return (data[0] == 'H' && data[1] == 'T' && data[2] == 'T'
&& data[3] == 'P' && data[4] == '/' && data[5] == '1'
&& data[6] == '.' && data[8] == ' ' && data[9] != '1');
}
static __inline int is_http_request(const char *data, int data_len,
struct conn_info_s *conn_info)
{
switch (data[0]) {
/* DELETE */
case 'D':
if ((data[1] != 'E') || (data[2] != 'L') || (data[3] != 'E')
|| (data[4] != 'T') || (data[5] != 'E')
|| (data[6] != ' ')) {
return 0;
}
break;
/* GET */
case 'G':
if ((data[1] != 'E') || (data[2] != 'T') || (data[3] != ' ')) {
return 0;
}
break;
/* HEAD */
case 'H':
if ((data[1] != 'E') || (data[2] != 'A') || (data[3] != 'D')
|| (data[4] != ' ')) {
return 0;
}
/*
* In the context of NGINX, we exclude tracking of HEAD type requests
* in the HTTP protocol, as HEAD requests are often used for health
* checks. This avoids generating excessive HEAD type data in the call
* chain tree.
*/
if (is_nginx_process())
conn_info->no_trace = true;
break;
/* OPTIONS */
case 'O':
if (data_len < 8 || (data[1] != 'P') || (data[2] != 'T')
|| (data[3] != 'I') || (data[4] != 'O') || (data[5] != 'N')
|| (data[6] != 'S') || (data[7] != ' ')) {
return 0;
}
break;
/* PATCH/POST/PUT */
case 'P':
switch (data[1]) {
case 'A':
if ((data[2] != 'T') || (data[3] != 'C')
|| (data[4] != 'H') || (data[5] != ' ')) {
return 0;
}
break;
case 'O':
if ((data[2] != 'S') || (data[3] != 'T')
|| (data[4] != ' ')) {
return 0;
}
break;
case 'U':
if ((data[2] != 'T') || (data[3] != ' ')) {
return 0;
}
break;
default:
return 0;
}
break;
default:
return 0;
}
return 1;
}
static __inline __u8 get_block_fragment_offset(__u8 fix_sz,
__u8 flags_padding,
__u8 flags_priority)
{
__u8 offset = 0;
offset = fix_sz;
if (flags_padding)
offset += 1;
if (flags_priority)
offset += 5;
return offset;
}
#define try_find__static_table_idx() \
do { \
if (table_idx > max || table_idx == 0) \
table_idx = buf[++offset] & 0x7f; \
} while(0)
static __inline __u8 find_idx_from_block_fragment(const __u8 * buf,
__u8 offset, __u8 max)
{
/*
* Header Block Fragment解析出静态表索引值,最多取前面6个字节。
* 例如:Header Block Fragment: ddda8386e6e5e4e3e2d0 最多分析'dd da 83 86 e6 e5'
*/
__u8 table_idx = buf[offset] & 0x7f;
try_find__static_table_idx();
try_find__static_table_idx();
try_find__static_table_idx();
try_find__static_table_idx();
try_find__static_table_idx();
return table_idx;
}
static bool is_http2_magic(const char *buf_src, size_t count)
{
static const char magic[] = "PRI * HTTP/2";
char buffer[sizeof(magic)] = { 0 };
bpf_probe_read_user(buffer, sizeof(buffer) - 1, buf_src);
for (int idx = 0; idx < sizeof(magic); ++idx) {
if (magic[idx] == buffer[idx])
continue;
return false;
}
return true;
}
// https://tools.ietf.org/html/rfc7540#section-4.1
// 帧的结构:
// +-----------------------------------------------+
// | Length (24) |
// +---------------+---------------+---------------+
// | Type (8) | Flags (8) |
// +-+-------------+---------------+-------------------------------+
// |R| Stream Identifier (31) |
// +=+=============================================================+
// | Frame Payload (0...) ...
// +---------------------------------------------------------------+
//
// HEADERS 帧格式:
// +---------------+
// |Pad Length? (8)|
// +-+-------------+-----------------------------------------------+
// |E| Stream Dependency? (31) |
// +-+-------------+-----------------------------------------------+
// | Weight? (8) |
// +-+-------------+-----------------------------------------------+
// | Header Block Fragment (*) ...
// +---------------------------------------------------------------+
// | Padding (*) ...
// +---------------------------------------------------------------+
//
// Pad Length: 指定 Padding 长度,存在则代表 PADDING flag 被设置
// E: 一个比特位声明流的依赖性是否是排他的,存在则代表 PRIORITY flag 被设置
// Stream Dependency: 指定一个 stream identifier,代表当前流所依赖的流的 id,存在则代表 PRIORITY flag 被设置
// Weight: 一个无符号 8bit,代表当前流的优先级权重值 (1~256),存在则代表 PRIORITY flag 被设置
// Header Block Fragment: header 块片段
// Padding: 填充字节,没有具体语义,作用与 DATA 的 Padding 一样,存在则代表 PADDING flag 被设置
//
// request:
// 1 :authority
// 2 :method GET
// 3 :method POST
// 4 :path /
// 5 :path /index.html
// others as response.
static __inline enum message_type parse_http2_headers_frame(const char
*buf_kern,
size_t syscall_len,
const char *buf_src,
size_t count,
struct conn_info_s
*conn_info,
const bool is_first)
{
#define HTTPV2_FRAME_PROTO_SZ 0x9
#define HTTPV2_FRAME_TYPE_DATA 0x0
#define HTTPV2_FRAME_TYPE_HEADERS 0x1
// In some cases, the compiled binary instructions exceed the limit, the
// specific reason is unknown, reduce the number of cycles of http2, which
// may cause http2 packet loss
#if defined(LINUX_VER_KFUNC) || defined(LINUX_VER_5_2_PLUS)
#define HTTPV2_LOOP_MAX 8
#else
#define HTTPV2_LOOP_MAX 5
#endif
/*
* HTTPV2_FRAME_READ_SZ取值考虑以下3部分:
* (1) fixed 9-octet header
*
* HEADERS 帧:
* (2) Pad Length (8) + E(1) + Stream Dependency(31) + Weight(8) = 6 bytes
* (3) Header Block Fragment (*) 取 6bytes
*/
#define HTTPV2_FRAME_READ_SZ 21
#define HTTPV2_STATIC_TABLE_IDX_MAX 61
/*
* If the server reads data in multiple passes, and the previous pass
* has already read the first 9 bytes of the protocol header, and it
* has been determined as HEADER, then the current data is directly
* PUSHed to the upper layer.
*/
if (conn_info->prev_count == HTTPV2_FRAME_PROTO_SZ) {
return MSG_REQUEST;
}
// fixed 9-octet header
if (count < HTTPV2_FRAME_PROTO_SZ)
return MSG_UNKNOWN;
__u32 offset = 0;
__u8 flags_unset = 0, flags_padding = 0, flags_priority = 0;
__u8 type = 0, reserve = 0, static_table_idx, i, block_fragment_offset;
__u8 msg_type = MSG_UNKNOWN;
__u8 buf[HTTPV2_FRAME_READ_SZ] = { 0 };
// When Magic and header are in the same TCP packet, it will cause
// packet loss. When Magic is detected, the offset is corrected to the
// starting position of the header.
if (is_first && is_http2_magic(buf_src, count)) {
static const int HTTP2_MAGIC_SIZE = 24;
offset = HTTP2_MAGIC_SIZE;
} else {
/*
* The frame payload length (excluding the initial 9 bytes) must not
* exceed the actual length of the system call.
*/
if ((__bpf_ntohl(*(__u32 *) buf_kern) >> 8) > syscall_len - HTTPV2_FRAME_PROTO_SZ)
return MSG_UNKNOWN;
/*
* The highest bit of the 5th byte (i.e., the first byte of the Stream
* Identifier) must be 0, indicating that the reserved bit (R) is 0;
* otherwise, it violates the HTTP/2 specification.
*/
if (buf_kern[5] >> 7 != 0)
return MSG_UNKNOWN;
}
/*
* Use '#pragma unroll' to avoid the following error during the
* loading process in Linux 5.2.x:
* bpf load "socket-trace-bpf-linux-5.2_plus" failed, error:Invalid argument (22)
*/
#pragma unroll
for (i = 0; i < HTTPV2_LOOP_MAX; i++) {
/*
* 这个地方考虑iovecs的情况,传递过来进行协议推断的数据
* 是&args->iov[0]第一个iovec,count的值也是第一个
* iovec的数据长度。存在协议分析出来长度是大于count的情况
* 因此这里不能通过“offset == count”来进行判断。
*/
if (offset >= count)
break;
bpf_probe_read_user(buf, sizeof(buf), buf_src + offset);
offset += (__bpf_ntohl(*(__u32 *) buf) >> 8) +
HTTPV2_FRAME_PROTO_SZ;
type = buf[3];
if (type == HTTPV2_FRAME_TYPE_DATA && !is_first)
return MSG_REQUEST;
// 如果不是Header继续寻找下一个Frame
if (type != HTTPV2_FRAME_TYPE_HEADERS)
continue;
flags_unset = buf[4] & 0xd2;
flags_padding = buf[4] & 0x08;
flags_priority = buf[4] & 0x20;
reserve = buf[5] & 0x01;
// flags_unset和reserve必须为0,否则直接放弃判断。
if (flags_unset || reserve)
return MSG_UNKNOWN;
if (syscall_len == HTTPV2_FRAME_PROTO_SZ) {
msg_type = MSG_PRESTORE;
break;
}
/*
* If the protocol inference is complete, it can be directly
* pushed to the upper layer.
*/
if (!is_first)
return MSG_REQUEST;
/*
* 根据帧结构中的flags的不同设置(具体检查PADDING位和PRIORITY位)
* 来确定HEADERS帧的内容从而得到Header Block Fragment的偏移。
*/
block_fragment_offset =
get_block_fragment_offset(HTTPV2_FRAME_PROTO_SZ,
flags_padding, flags_priority);
// 对Header Block Fragment的内容进行分析得到静态表的索引。
static_table_idx =
find_idx_from_block_fragment(buf, block_fragment_offset,
HTTPV2_STATIC_TABLE_IDX_MAX);
// 静态索引表的Index取值范围 [1, 61]
if (static_table_idx > HTTPV2_STATIC_TABLE_IDX_MAX &&
static_table_idx == 0)
continue;
/*
* ref : https://datatracker.ietf.org/doc/html/rfc7541#appendix-A
* Static Table Entries:
* +-------+-----------------------------+---------------+
* | Index | Header Name | Header Value |
* +-------+-----------------------------+---------------+
* | 1 | :authority | |
* | 2 | :method | GET |
* | 3 | :method | POST |
* | 4 | :path | / |
* | 5 | :path | /index.html |
* | 6 | :scheme | http |
* | 7 | :scheme | https |
* | 8 | :status | 200 |
* | 9 | :status | 204 |
* | 10 | :status | 206 |
* | 11 | :status | 304 |
* | 12 | :status | 400 |
* | 13 | :status | 404 |
* | 14 | :status | 500 |
*/
if (static_table_idx >= 1 && static_table_idx <= 7) {
msg_type = MSG_REQUEST;
conn_info->role =
(conn_info->direction ==
T_INGRESS) ? ROLE_SERVER : ROLE_CLIENT;
} else if (static_table_idx >= 8 && static_table_idx <= 14) {
conn_info->role =
(conn_info->direction ==
T_EGRESS) ? ROLE_SERVER : ROLE_CLIENT;
msg_type = MSG_RESPONSE;
}
break;
}
if (msg_type == MSG_PRESTORE)
save_prev_data_from_kern(buf_kern, conn_info,
HTTPV2_FRAME_PROTO_SZ);
return msg_type;
}
static __inline enum message_type infer_http2_message(const char *buf_kern,
size_t syscall_len,
const char *buf_src,
size_t count,
struct conn_info_s
*conn_info)
{
if (!protocol_port_check_1(PROTO_HTTP2, conn_info))
return MSG_UNKNOWN;
// When go uprobe http2 cannot be used, use kprobe/tracepoint to collect data
if (skip_http2_kprobe()) {
if (conn_info->direction == T_INGRESS &&
conn_info->tuple.l4_protocol == IPPROTO_TCP) {
struct http2_tcp_seq_key tcp_seq_key = {
.tgid = bpf_get_current_pid_tgid() >> 32,
.fd = conn_info->fd,
.tcp_seq_end =
get_tcp_read_seq(conn_info->fd, NULL, NULL),
};
// make linux 4.14 validator happy
__u32 tcp_seq = tcp_seq_key.tcp_seq_end - count;
bpf_map_update_elem(&http2_tcp_seq_map, &tcp_seq_key,
&tcp_seq, BPF_NOEXIST);
}
return MSG_UNKNOWN;
}
bool is_first = true; // Is it the first inference?
if (is_infer_socket_valid(conn_info->socket_info_ptr)) {
if (conn_info->socket_info_ptr->l7_proto != PROTO_HTTP2)
return MSG_UNKNOWN;
is_first = false;
}
enum message_type ret =
parse_http2_headers_frame(buf_kern, syscall_len, buf_src, count,
conn_info, is_first);
return ret;
}
static __inline enum message_type infer_http_message(const char *buf,
size_t count,
struct conn_info_s
*conn_info)
{
// HTTP/1.1 200 OK\r\n (HTTP response is 17 characters)
// GET x HTTP/1.1\r\n (HTTP response is 16 characters)
// MAY be without "OK", ref:https://www.rfc-editor.org/rfc/rfc7231
if (count < 14) {
return MSG_UNKNOWN;
}
if (!protocol_port_check_1(PROTO_HTTP1, conn_info))
return MSG_UNKNOWN;
if (is_infer_socket_valid(conn_info->socket_info_ptr)) {
if (conn_info->socket_info_ptr->l7_proto != PROTO_HTTP1)
return MSG_UNKNOWN;
}
if (is_http_response(buf)) {
return MSG_RESPONSE;
}
if (is_http_request(buf, count, conn_info)) {
return MSG_REQUEST;
}
return MSG_UNKNOWN;
}
// MySQL and Kafka need the previous n bytes of data for inference
static __inline __u32 check_and_fetch_prev_data(struct conn_info_s *conn_info)
{
if (conn_info->socket_info_ptr != NULL &&
conn_info->socket_info_ptr->prev_data_len > 0) {
/*
* For adjacent read/write in the same direction.
*/
if (conn_info->direction ==
conn_info->socket_info_ptr->direction) {
bpf_probe_read_kernel(conn_info->prev_buf,
sizeof(conn_info->prev_buf),
conn_info->socket_info_ptr->
prev_data);
conn_info->prev_count =
conn_info->socket_info_ptr->prev_data_len;
/*
* When data is merged, that is, when two or more data with the same
* direction are merged together and processed as one data, the previously
* saved direction needs to be restored.
*
* At the beginning of the inference stage, 'socket_info_ptr->direction'
* represents the direction of the previously sent data. During the final
* data transmission stage, it will be updated to reflect the direction of
* the current data.
*/
conn_info->socket_info_ptr->direction =
conn_info->socket_info_ptr->pre_direction;
}
/*
* Clean up previously stored data.
*/
conn_info->socket_info_ptr->prev_data_len = 0;
}
return conn_info->prev_count;
}
// MySQL packet:
// 0 8 16 24 32
// +---------+---------+---------+---------+
// | payload_length | seq_id |
// +---------+---------+---------+---------+
// | |
// . ... body ... .
// . .
// . .
// +----------------------------------------
// ref : https://dev.mysql.com/doc/internals/en/com-process-kill.html
static __inline enum message_type infer_mysql_message(const char *buf,
size_t count,
struct conn_info_s
*conn_info)
{
if (!protocol_port_check_1(PROTO_MYSQL, conn_info))
return MSG_UNKNOWN;
if (count == 4) {
save_prev_data_from_kern(buf, conn_info, 4);
return MSG_PRESTORE;
}
/*
* ref: https://dev.mysql.com/doc/dev/mysql-server/latest/page_protocol_com_query.html
*/
static const __u8 kComQuery = 0x03;
static const __u8 kComConnect = 0x0b;
static const __u8 kComStmtPrepare = 0x16;
static const __u8 kComStmtExecute = 0x17;
static const __u8 kComStmtClose = 0x19;
static const __u8 kComStmtQuit = 0x01;
if (is_infer_socket_valid(conn_info->socket_info_ptr)) {
if (conn_info->socket_info_ptr->l7_proto != PROTO_MYSQL)
return MSG_UNKNOWN;
}
if (!conn_info->sk)
return MSG_UNKNOWN;
__u32 len;
__u8 seq, com, point_1, point_2;
len = *((__u32 *) buf) & 0x00ffffff;
seq = buf[3];
com = buf[4];
point_1 = buf[6];
point_2 = buf[8];
if (conn_info->prev_count == 4) {
len = *(__u32 *) conn_info->prev_buf & 0x00ffffff;
seq = conn_info->prev_buf[3];
count += 4;
com = buf[0];
point_1 = buf[2];
point_2 = buf[4];
}
if (count < 5 || len == 0)
return MSG_UNKNOWN;
/*
* To prevent stale data from a previous map value remaining in
* the unused portion of `__infer_buf->data` when the current
* syscall provides fewer than 9 bytes of actual data.
*/
if (count < 9) {
point_1 = point_2 = 0;
}
bool is_mysqld = is_current_comm("mysqld");
if (is_socket_info_valid(conn_info->socket_info_ptr)) {
/*
* When MySQL reassembly is enabled, all related traffic must be forced into
* the reassembly processing pipeline.
*
* Otherwise, under a single-packet-based protocol detection model, the
* detection capability is limited, and consecutive same-direction packets may
* be incorrectly identified as complete MySQL requests or responses.
*
* In such cases, these packets will continue to be parsed as valid MySQL
* traffic, and MSG_UNKNOWN will not be returned, thus preventing entry into
* the reassembly process.
*
* This leads to behavior inconsistent with expectations, since correct
* reassembly logic relies on returning MSG_UNKNOWN for incomplete packets in
* order to trigger reassembly.
*
* The goal of this design is to ensure that, when reassembly is enabled,
* consecutive same-direction data is not reported as independent complete
* MySQL messages (requests or responses).
*/
if (conn_info->enable_reasm) {
return MSG_UNKNOWN;
}
/*
* Ensure the authentication response packet is captured
* and distinguish it based on the 5th byte (Payload start):
*
* - **Authentication Success (OK Packet):** `0x00`
* - **Authentication Failure (ERR Packet):** `0xFF`
* - **Authentication Switch Request (Auth Switch Request):** `0xFE`
*/
if (seq <= 1 || (seq == 2 && (com == 0x0 || com == 0xFF || com == 0xFE)))
goto out;
return MSG_UNKNOWN;
}
/*
* When initially determining the process, if it is a 'mysqld' process,
* the judgment is completed as the MySQL protocol.
*/
if (is_mysqld) {
return conn_info->direction ==
T_INGRESS ? MSG_REQUEST : MSG_RESPONSE;
}
/*
* Strengthen length checking, such as the following MYSQL protocol data:
* MySQL Protocol
* - Packet Length: 15 --- len
* - Packet Number: 0
* - Request Command Query
* - Command: Query (3)
* - Statement: show databases
*/
if (count != (len + 4))
return MSG_UNKNOWN;
if (seq != 0)
return MSG_UNKNOWN;
// 请求长度判断来提高推断准确率。
if (len > 10000) {
return MSG_UNKNOWN;
}
/*
* After establishing a connection, the MySQL server sends a handshake packet.
* The process is as follows:
* - **Server > Client (Handshake Packet)**
* The server sends this handshake packet, which includes the MySQL version,
* thread ID, authentication method, and other information.
* - **Client > Server (Login Request Packet)**
* The client computes the encrypted password based on `auth-plugin-data` and
* sends it back to the server for verification.
* - **Server > Client (Login Success or Failure)**
* The server verifies the client's identity and returns either an **OK Packet** or an **ERR Packet**.
*
* The handshake packet sent by the server is used for identification.
* 0x0A indicates the current mainstream protocol version (MySQL 4.1+).
* e.g.: 4A(J) 00 00 00 0A 35(5) 2E(.) 37(7) 2E(.) 31(1) 38(8) 00
* **35 2E 37 2E 31 38 00 ASCII decoding results in 5.7.18 (MySQL 5.7.18).**
* If the data contains a version string in the format x.x.x, it is highly likely to be MySQL.
*/
if (com == 0x0A && point_1 == 0x2e && point_2 == 0x2e) {
return MSG_REQUEST;
}
if (com != kComConnect && com != kComQuery &&
com != kComStmtPrepare && com != kComStmtExecute &&
com != kComStmtClose && com != kComStmtQuit) {
return MSG_UNKNOWN;
}
out:
if (com == kComStmtClose || com == kComStmtQuit)
conn_info->keep_trace = 1;
if (is_mysqld)
return conn_info->direction ==
T_INGRESS ? MSG_REQUEST : MSG_RESPONSE;
else
return conn_info->direction ==
T_INGRESS ? MSG_RESPONSE : MSG_REQUEST;
return MSG_UNKNOWN;
/*
e.g:
-----------------------------------------------------------
Query:
MySQL Protocol
Packet Length: 33 (21 00 00) ------> 先读取这四个字节
Packet Number: 0 (00) ------------> /
---------------------------------- 下面这些下一次读取
Request Command Query
Command: Query (3) (03)
Statement: select user,host from mysql.user
-----------------------------------------------------------
Response:
MySQL Protocol
Packet Length: 1
Packet Number: 1
Number of fields: 2
MySQL Protocol
Packet Length: 43
Packet Number: 2
Catalog: def
Database: mysql
Table: user
Original table: user
Name: user
Original name: User
Charset number: utf8 COLLATE utf8_general_ci (33)
Length: 48
Type: FIELD_TYPE_STRING (254)
Flags: 0x4083
Decimals: 0
MySQL Protocol
Packet Length: 43
Packet Number: 3
Catalog: def
Database: mysql
Table: user
Original table: user
Name: host
Original name: Host
*/
}
static __inline bool infer_pgsql_startup_message(const char *buf, size_t count)
{
// ref: https://developer.aliyun.com/article/751984#slide-5
// int32 len | int32 protocol | "user" string 4 bytes
static const __u8 min_msg_len = 12;
// startup message wont be larger than 10240 (10KiB).
static const __u32 max_msg_len = 10240;
if (count < min_msg_len)
return false;
__u32 length = __bpf_ntohl(*(__u32 *) & buf[0]);
if (length < min_msg_len || length > max_msg_len)
return false;
// PostgreSQL 3.0
if (!(buf[4] == 0 && buf[5] == 3 && buf[6] == 0 && buf[7] == 0))
return false;
// "user" string, We hope it is a valid string that checks for
// letter characters in a relaxed manner.
// This is a loose check and still covers some non alphabetic
// characters (e.g. `\`)
if (buf[8] < 'A' || buf[9] < 'A' || buf[10] < 'A' || buf[11] < 'A')
return false;
return true;
}
/*
* ref: https://developer.aliyun.com/article/751984
* | char tag | int32 len | payload |
* tag ref: src/flow_generator/protocol_logs/sql/postgresql.rs
*
* Message flow patterns in PostgreSQL protocol:
* 'P' (Parse) is usually followed by 'B' (Bind), but sometimes directly followed by 'S' (Sync).
* 'B' (Bind) is usually followed by 'E' (Execute), or sometimes 'S' (Sync).
* 'E' (Execute) is usually followed by 'S' (Sync).
* 'S' (Sync) generally does not have any message following it; it signals the end of a batch of messages.
* The 'Q' (Query) and 'C' (Close) messages always end with a null terminator character '\0'.
*/
static __inline enum message_type infer_pgsql_query_message(const char *buf,
const char *s_buf,
size_t count)
{
// In the protocol format, the size of the "len" field is 4 bytes,
// and the minimum command length is 4 bytes for "COPY/MOVE",
// The minimal length is therefore 8.
static const __u32 min_payload_len = 8;
// Typical query message size is below an artificial limit.
// 30000 is copied from postgres code base:
// https://github.com/postgres/postgres/tree/master/src/interfaces/libpq/fe-protocol3.c#L94
static const __u32 max_payload_len = 30000;
// Minimum length = tag(char) + len(int32)
static const int min_msg_len = 1 + sizeof(__u32);
// Msg length check