Skip to content

Latest commit

 

History

History
779 lines (623 loc) · 16.8 KB

File metadata and controls

779 lines (623 loc) · 16.8 KB

11.5 网络优化

📍 导航返回目录 | 上一节:内存优化 | 下一节:数据库优化


TCP 参数调优

1. 内核参数优化

# /etc/sysctl.conf 配置

# TCP 缓冲区大小
net.core.rmem_max = 16777216          # 接收缓冲区最大值 16MB
net.core.wmem_max = 16777216          # 发送缓冲区最大值 16MB
net.ipv4.tcp_rmem = 4096 87380 16777216  # TCP 接收缓冲
net.ipv4.tcp_wmem = 4096 65536 16777216  # TCP 发送缓冲

# 连接队列
net.core.somaxconn = 4096             # 监听队列最大长度
net.ipv4.tcp_max_syn_backlog = 8192   # SYN 队列长度

# TIME_WAIT 优化
net.ipv4.tcp_tw_reuse = 1             # 复用 TIME_WAIT 连接
net.ipv4.tcp_fin_timeout = 30         # FIN_WAIT_2 超时时间

# Keep-Alive 参数
net.ipv4.tcp_keepalive_time = 600     # 600秒后发送探测包
net.ipv4.tcp_keepalive_intvl = 30     # 探测间隔 30秒
net.ipv4.tcp_keepalive_probes = 3     # 探测次数 3次

# 拥塞控制
net.ipv4.tcp_congestion_control = bbr # 使用 BBR 算法

# 应用配置
sysctl -p

2. Go 程序 TCP 调优

import (
    "net"
    "time"
)

// ✅ 优化 TCP 连接参数
func optimizeTCPConn(conn *net.TCPConn) error {
    // 禁用 Nagle 算法(低延迟优先)
    if err := conn.SetNoDelay(true); err != nil {
        return err
    }
    
    // 设置 Keep-Alive
    if err := conn.SetKeepAlive(true); err != nil {
        return err
    }
    if err := conn.SetKeepAlivePeriod(30 * time.Second); err != nil {
        return err
    }
    
    // 设置缓冲区大小
    if err := conn.SetReadBuffer(1024 * 1024); err != nil {  // 1MB
        return err
    }
    if err := conn.SetWriteBuffer(1024 * 1024); err != nil { // 1MB
        return err
    }
    
    return nil
}

连接管理优化

1. 连接池

import (
    "context"
    "errors"
    "net"
    "sync"
    "time"
)

// ✅ 高性能连接池
type ConnectionPool struct {
    mu          sync.Mutex
    conns       chan *net.TCPConn
    factory     func() (*net.TCPConn, error)
    maxIdle     int
    maxActive   int
    idleTimeout time.Duration
}

func NewConnectionPool(factory func() (*net.TCPConn, error), maxIdle, maxActive int) *ConnectionPool {
    pool := &ConnectionPool{
        conns:       make(chan *net.TCPConn, maxIdle),
        factory:     factory,
        maxIdle:     maxIdle,
        maxActive:   maxActive,
        idleTimeout: 5 * time.Minute,
    }
    return pool
}

func (p *ConnectionPool) Get(ctx context.Context) (*net.TCPConn, error) {
    select {
    case conn := <-p.conns:
        // 检查连接是否仍然有效
        if p.isConnValid(conn) {
            return conn, nil
        }
        conn.Close()
    case <-ctx.Done():
        return nil, ctx.Err()
    default:
    }
    
    // 创建新连接
    return p.factory()
}

func (p *ConnectionPool) Put(conn *net.TCPConn) {
    if conn == nil {
        return
    }
    
    select {
    case p.conns <- conn:
        // 成功放回池中
    default:
        // 池已满,关闭连接
        conn.Close()
    }
}

func (p *ConnectionPool) isConnValid(conn *net.TCPConn) bool {
    // 设置极短的读超时来检测连接
    conn.SetReadDeadline(time.Now().Add(1 * time.Millisecond))
    one := make([]byte, 1)
    _, err := conn.Read(one)
    conn.SetReadDeadline(time.Time{}) // 重置超时
    
    if err == nil {
        // 有数据可读,连接有效但有未读数据,需要处理
        return false
    }
    
    // 超时或其他错误,连接可能仍有效
    return true
}

func (p *ConnectionPool) Close() {
    close(p.conns)
    for conn := range p.conns {
        conn.Close()
    }
}

性能对比

  • 短连接(每次创建):100 ms/req(含 3次握手)
  • 连接池:10 ms/req
  • 性能提升:10x

2. HTTP 连接复用

import (
    "net/http"
    "time"
)

// ✅ 优化的 HTTP 客户端
var optimizedHTTPClient = &http.Client{
    Timeout: 30 * time.Second,
    Transport: &http.Transport{
        // 连接池配置
        MaxIdleConns:        100,              // 最大空闲连接
        MaxIdleConnsPerHost: 10,               // 每个 host 的空闲连接
        MaxConnsPerHost:     100,              // 每个 host 的最大连接
        IdleConnTimeout:     90 * time.Second, // 空闲超时
        
        // TCP 配置
        DisableKeepAlives:   false,            // 启用 Keep-Alive
        DisableCompression:  false,            // 启用压缩
        
        // 拨号器配置
        DialContext: (&net.Dialer{
            Timeout:   5 * time.Second,  // 连接超时
            KeepAlive: 30 * time.Second, // Keep-Alive 周期
        }).DialContext,
        
        // TLS 握手超时
        TLSHandshakeTimeout: 10 * time.Second,
        
        // 响应头超时
        ResponseHeaderTimeout: 10 * time.Second,
    },
}

func makeHTTPRequest(url string) (*http.Response, error) {
    return optimizedHTTPClient.Get(url)
}

3. WebSocket 连接优化

import (
    "github.com/gorilla/websocket"
    "net/http"
)

// ✅ WebSocket 连接池
type WSConnectionPool struct {
    mu    sync.Mutex
    conns map[string]*websocket.Conn
}

func NewWSConnectionPool() *WSConnectionPool {
    return &WSConnectionPool{
        conns: make(map[string]*websocket.Conn),
    }
}

func (pool *WSConnectionPool) Get(url string) (*websocket.Conn, error) {
    pool.mu.Lock()
    defer pool.mu.Unlock()
    
    if conn, ok := pool.conns[url]; ok {
        return conn, nil
    }
    
    // 创建新连接
    conn, _, err := websocket.DefaultDialer.Dial(url, nil)
    if err != nil {
        return nil, err
    }
    
    // 配置连接
    conn.SetReadDeadline(time.Now().Add(60 * time.Second))
    conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
    
    pool.conns[url] = conn
    return conn, nil
}

数据传输优化

1. 批量发送

// ❌ 逐条发送(低效)
func sendMessagesSlow(conn net.Conn, messages []string) error {
    for _, msg := range messages {
        _, err := conn.Write([]byte(msg + "\n"))
        if err != nil {
            return err
        }
    }
    return nil
}

// ✅ 批量发送(高效)
func sendMessagesFast(conn net.Conn, messages []string) error {
    var buf bytes.Buffer
    for _, msg := range messages {
        buf.WriteString(msg)
        buf.WriteByte('\n')
    }
    
    _, err := conn.Write(buf.Bytes())  // 一次系统调用
    return err
}

性能对比(发送 1000 条消息):

  • 逐条发送:1000 次系统调用,100ms
  • 批量发送:1 次系统调用,5ms
  • 性能提升:20x

2. 数据压缩

import (
    "compress/gzip"
    "io"
)

// ✅ Gzip 压缩传输
func sendCompressed(conn net.Conn, data []byte) error {
    writer := gzip.NewWriter(conn)
    defer writer.Close()
    
    _, err := writer.Write(data)
    return err
}

func receiveCompressed(conn net.Conn) ([]byte, error) {
    reader, err := gzip.NewReader(conn)
    if err != nil {
        return nil, err
    }
    defer reader.Close()
    
    return io.ReadAll(reader)
}

性能对比(传输 1MB JSON 数据):

  • 未压缩:1MB 传输时间
  • Gzip 压缩:200KB 传输时间(压缩比 5:1)
  • 带宽节省:80%

3. 零拷贝传输

import (
    "io"
    "net"
    "os"
)

// ✅ 使用 io.Copy 零拷贝
func sendFileOptimized(conn net.Conn, filename string) error {
    file, err := os.Open(filename)
    if err != nil {
        return err
    }
    defer file.Close()
    
    // io.Copy 在 Linux 上会使用 sendfile 系统调用
    _, err = io.Copy(conn, file)
    return err
}

负载均衡

1. 客户端负载均衡

import (
    "math/rand"
    "sync/atomic"
)

// ✅ 轮询(Round Robin)
type RoundRobinBalancer struct {
    servers []string
    current uint32
}

func (b *RoundRobinBalancer) Next() string {
    n := atomic.AddUint32(&b.current, 1)
    return b.servers[int(n-1)%len(b.servers)]
}

// ✅ 随机
type RandomBalancer struct {
    servers []string
}

func (b *RandomBalancer) Next() string {
    return b.servers[rand.Intn(len(b.servers))]
}

// ✅ 加权轮询
type WeightedRoundRobinBalancer struct {
    servers []Server
    current int
}

type Server struct {
    Addr   string
    Weight int
}

func (b *WeightedRoundRobinBalancer) Next() string {
    totalWeight := 0
    for _, s := range b.servers {
        totalWeight += s.Weight
    }
    
    b.current = (b.current + 1) % totalWeight
    
    sum := 0
    for _, s := range b.servers {
        sum += s.Weight
        if b.current < sum {
            return s.Addr
        }
    }
    return b.servers[0].Addr
}

// ✅ 最小连接数
type LeastConnectionBalancer struct {
    mu      sync.RWMutex
    servers map[string]int  // server -> active connections
}

func (b *LeastConnectionBalancer) Next() string {
    b.mu.RLock()
    defer b.mu.RUnlock()
    
    var minServer string
    minConns := int(^uint(0) >> 1)  // max int
    
    for server, conns := range b.servers {
        if conns < minConns {
            minConns = conns
            minServer = server
        }
    }
    
    return minServer
}

func (b *LeastConnectionBalancer) IncConn(server string) {
    b.mu.Lock()
    b.servers[server]++
    b.mu.Unlock()
}

func (b *LeastConnectionBalancer) DecConn(server string) {
    b.mu.Lock()
    b.servers[server]--
    b.mu.Unlock()
}

2. 健康检查

import (
    "context"
    "time"
)

// ✅ 服务器健康检查
type HealthChecker struct {
    servers   []string
    healthy   map[string]bool
    mu        sync.RWMutex
    checkFunc func(string) bool
}

func NewHealthChecker(servers []string, checkFunc func(string) bool) *HealthChecker {
    hc := &HealthChecker{
        servers:   servers,
        healthy:   make(map[string]bool),
        checkFunc: checkFunc,
    }
    
    // 初始化所有服务器为健康
    for _, server := range servers {
        hc.healthy[server] = true
    }
    
    // 启动定期检查
    go hc.startChecking()
    
    return hc
}

func (hc *HealthChecker) startChecking() {
    ticker := time.NewTicker(5 * time.Second)
    defer ticker.Stop()
    
    for range ticker.C {
        for _, server := range hc.servers {
            healthy := hc.checkFunc(server)
            
            hc.mu.Lock()
            hc.healthy[server] = healthy
            hc.mu.Unlock()
        }
    }
}

func (hc *HealthChecker) GetHealthyServers() []string {
    hc.mu.RLock()
    defer hc.mu.RUnlock()
    
    var healthy []string
    for server, isHealthy := range hc.healthy {
        if isHealthy {
            healthy = append(healthy, server)
        }
    }
    return healthy
}

// TCP 健康检查
func tcpHealthCheck(addr string) bool {
    conn, err := net.DialTimeout("tcp", addr, 2*time.Second)
    if err != nil {
        return false
    }
    conn.Close()
    return true
}

// HTTP 健康检查
func httpHealthCheck(url string) bool {
    client := &http.Client{Timeout: 2 * time.Second}
    resp, err := client.Get(url)
    if err != nil {
        return false
    }
    resp.Body.Close()
    return resp.StatusCode == http.StatusOK
}

协议优化

1. HTTP/2 vs HTTP/1.1

import (
    "crypto/tls"
    "golang.org/x/net/http2"
)

// ✅ 启用 HTTP/2
func createHTTP2Client() *http.Client {
    return &http.Client{
        Transport: &http2.Transport{
            TLSClientConfig: &tls.Config{
                InsecureSkipVerify: false,
            },
        },
    }
}

// ✅ HTTP/2 服务器
func startHTTP2Server() {
    srv := &http.Server{
        Addr:    ":8443",
        Handler: handler,
    }
    
    // 配置 HTTP/2
    http2.ConfigureServer(srv, &http2.Server{})
    
    // 启动 HTTPS(HTTP/2 需要 TLS)
    srv.ListenAndServeTLS("cert.pem", "key.pem")
}

HTTP/2 优势

  • 多路复用(一个连接多个请求)
  • 头部压缩(HPACK)
  • 服务器推送
  • 二进制协议

性能对比

  • HTTP/1.1:100 个请求需要 100 个连接
  • HTTP/2:100 个请求共用 1 个连接
  • 延迟降低:50%

2. gRPC 优化

import (
    "google.golang.org/grpc"
    "google.golang.org/grpc/keepalive"
)

// ✅ gRPC 客户端优化
func createGRPCClient(addr string) (*grpc.ClientConn, error) {
    return grpc.Dial(addr,
        grpc.WithInsecure(),
        grpc.WithKeepaliveParams(keepalive.ClientParameters{
            Time:                10 * time.Second, // 发送 ping 的间隔
            Timeout:             3 * time.Second,  // ping 超时
            PermitWithoutStream: true,             // 无流时也发送 ping
        }),
        grpc.WithDefaultCallOptions(
            grpc.MaxCallRecvMsgSize(100 * 1024 * 1024), // 100MB
            grpc.MaxCallSendMsgSize(100 * 1024 * 1024), // 100MB
        ),
    )
}

// ✅ gRPC 服务器优化
func createGRPCServer() *grpc.Server {
    return grpc.NewServer(
        grpc.KeepaliveParams(keepalive.ServerParameters{
            Time:    10 * time.Second,
            Timeout: 3 * time.Second,
        }),
        grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{
            MinTime:             5 * time.Second,
            PermitWithoutStream: true,
        }),
        grpc.MaxRecvMsgSize(100 * 1024 * 1024),
        grpc.MaxSendMsgSize(100 * 1024 * 1024),
    )
}

3. WebSocket vs HTTP

适用场景对比

场景 HTTP WebSocket
请求-响应 ✅ 推荐 ❌ 过度
实时推送 ❌ 轮询低效 ✅ 推荐
双向通信 ❌ 复杂 ✅ 简单
低延迟 ❌ 每次握手 ✅ 持久连接
// ✅ WebSocket 服务器
var upgrader = websocket.Upgrader{
    ReadBufferSize:  4096,
    WriteBufferSize: 4096,
    CheckOrigin: func(r *http.Request) bool {
        return true  // 生产环境需要验证 Origin
    },
}

func wsHandler(w http.ResponseWriter, r *http.Request) {
    conn, err := upgrader.Upgrade(w, r, nil)
    if err != nil {
        return
    }
    defer conn.Close()
    
    // 设置超时
    conn.SetReadDeadline(time.Now().Add(60 * time.Second))
    conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
    
    // 心跳保活
    go func() {
        ticker := time.NewTicker(30 * time.Second)
        defer ticker.Stop()
        
        for range ticker.C {
            if err := conn.WriteMessage(websocket.PingMessage, nil); err != nil {
                return
            }
        }
    }()
    
    // 处理消息
    for {
        messageType, message, err := conn.ReadMessage()
        if err != nil {
            break
        }
        
        // 处理消息...
        conn.WriteMessage(messageType, message)
    }
}

网络监控

1. 连接数监控

import (
    "net/http"
    "sync/atomic"
)

// ✅ 监控活跃连接数
type ConnectionMonitor struct {
    activeConns int64
}

func (m *ConnectionMonitor) IncConn() {
    atomic.AddInt64(&m.activeConns, 1)
}

func (m *ConnectionMonitor) DecConn() {
    atomic.AddInt64(&m.activeConns, -1)
}

func (m *ConnectionMonitor) GetActiveConns() int64 {
    return atomic.LoadInt64(&m.activeConns)
}

// HTTP 中间件
func (m *ConnectionMonitor) Middleware(next http.Handler) http.Handler {
    return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
        m.IncConn()
        defer m.DecConn()
        next.ServeHTTP(w, r)
    })
}

2. 网络流量监控

# 实时网络流量
iftop -i eth0

# 网络统计
netstat -s

# 连接状态统计
ss -s
netstat -an | awk '/^tcp/ {print $6}' | sort | uniq -c

网络优化检查清单

TCP 层

  • 是否调优了内核参数?(缓冲区、队列大小)
  • 是否启用了 TCP Keep-Alive?
  • 是否优化了 TIME_WAIT?
  • 是否使用了 BBR 拥塞控制?

连接管理

  • 是否使用了连接池?
  • 是否复用了 HTTP 连接?
  • 是否配置了合理的超时?
  • 是否实现了健康检查?

数据传输

  • 是否批量发送数据?
  • 是否启用了数据压缩?
  • 是否使用了零拷贝?
  • 是否禁用了 Nagle 算法(低延迟场景)?

协议选择

  • 是否使用了 HTTP/2?
  • 是否考虑了 gRPC?
  • 是否适合使用 WebSocket?

本章小结

核心要点

  1. TCP 调优:内核参数、Keep-Alive、拥塞控制
  2. 连接复用:连接池、HTTP Keep-Alive 减少握手
  3. 批量传输:合并数据、减少系统调用
  4. 协议升级:HTTP/2、gRPC 提升性能
  5. 负载均衡:合理分配请求、健康检查

优化优先级

连接复用 > TCP调优 > 批量传输 > 协议升级 > 负载均衡

⏮️ 上一节:内存优化 | ⏭️ 下一节:数据库优化