Skip to content
Open

4.3.5 #172

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# 更新日志

4.3.5-rc.0 - 2025-09-14
---
- PRE-RELEASE: v4.3.5-rc.0是v4.3.5的预发布版本,请勿在生产环境中使用;
- CHANGE: 改进`nest`实现, 减少内存分配`10371 B/op -> 1852 B/op` `43 allocs/op -> 14 allocs/op`
- CHANGE: 为`nest`加入`dispatcher`实现, 为不同情况分配适合的处理器以保证性能与兼容性
- CHANGE: 改进路径匹配热点的内存分配

4.3.4 - 2025-09-14
---
- CHANGE: 改进嵌套加速实现, 增强稳定性
Expand All @@ -25,7 +32,7 @@

4.3.0-rc.0 - 2025-08-11
---
- PRE-RELEASE: v4.3.0-rc.0是v4.3.0发布版本,请勿在生产环境中使用;
- PRE-RELEASE: v4.3.0-rc.0是v4.3.0的预发布版本,请勿在生产环境中使用;
- CHANGE: 为OCI镜像(Docker)代理带来自动library附加功能
- CHANGE(refactor): 改进OCI镜像(Docker)代理路径组成流程
- ADD: 新增[WANF](https://github.com/WJQSERVER/wanf)配置文件格式支持
Expand Down
2 changes: 1 addition & 1 deletion DEV-VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.3.0-rc.0
4.3.5-rc.0
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ require (
github.com/fenthope/ipfilter v0.0.1
github.com/fenthope/reco v0.0.4
github.com/fenthope/record v0.0.4
github.com/go-json-experiment/json v0.0.0-20250813233538-9b1f9ea2e11b
github.com/go-json-experiment/json v0.0.0-20250910080747-cc2cfa0554c3
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/infinite-iroha/touka v0.3.7
github.com/wjqserver/modembed v0.0.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ github.com/fenthope/record v0.0.4 h1:/1JHNCxiXGLL/qCh4LEGaAvhj4CcKsb6siTxjLmjdO4
github.com/fenthope/record v0.0.4/go.mod h1:G0a6KCiCDyX2SsC3nfzSN651fJKxH482AyJvzlnvAJU=
github.com/go-json-experiment/json v0.0.0-20250813233538-9b1f9ea2e11b h1:6Q4zRHXS/YLOl9Ng1b1OOOBWMidAQZR3Gel0UKPC/KU=
github.com/go-json-experiment/json v0.0.0-20250813233538-9b1f9ea2e11b/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M=
github.com/go-json-experiment/json v0.0.0-20250910080747-cc2cfa0554c3 h1:02WINGfSX5w0Mn+F28UyRoSt9uvMhKguwWMlOAh6U/0=
github.com/go-json-experiment/json v0.0.0-20250910080747-cc2cfa0554c3/go.mod h1:uNVvRXArCGbZ508SxYYTC5v1JWoz2voff5pm25jU1Ok=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/infinite-iroha/touka v0.3.7 h1:bIIZW5Weh7lVpyOWh4FmyR9UOfb5FOt+cR9yQ30FJLA=
Expand Down
2 changes: 1 addition & 1 deletion proxy/chunkreq.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func ChunkedProxyRequest(ctx context.Context, c *touka.Context, u string, cfg *c

var reader io.Reader

reader, _, err = processLinks(bodyReader, c.Request.Host, cfg, c)
reader, _, err = processLinks(bodyReader, c.Request.Host, cfg, c, bodySize)
c.WriteStream(reader)
if err != nil {
c.Errorf("%s %s %s %s %s Failed to copy response body: %v", c.ClientIP(), c.Request.Method, u, c.UserAgent(), c.Request.Proto, err)
Expand Down
43 changes: 30 additions & 13 deletions proxy/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,32 @@ import (
"github.com/infinite-iroha/touka"
)

// buildHandlerPath 使用 strings.Builder 来高效地构建最终的 URL.
// 这避免了使用标准字符串拼接时发生的多次内存分配.
func buildHandlerPath(path, matcher string) string {
var sb strings.Builder
sb.Grow(len(path) + 50)

if matcher == "blob" && strings.HasPrefix(path, "github.com") {
sb.WriteString("https://raw.githubusercontent.com")
if len(path) > 10 { // len("github.com")
pathSegment := path[10:] // skip "github.com"
if i := strings.Index(pathSegment, "/blob/"); i != -1 {
sb.WriteString(pathSegment[:i])
sb.WriteString("/")
sb.WriteString(pathSegment[i+len("/blob/"):])
} else {
sb.WriteString(pathSegment)
}
}
} else {
sb.WriteString("https://")
sb.WriteString(path)
}

return sb.String()
}
Comment on lines +12 to +36
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

这个 buildHandlerPath 函数与 proxy/routing.go 中的 buildRoutingPath 函数功能非常相似,导致了代码重复。建议将它们合并为一个通用的函数,以提高代码的可维护性。合并后的函数可以放在一个共享的文件中,例如 proxy/pathbuilder.go


var re = regexp.MustCompile(`^(http:|https:)?/?/?(.*)`) // 匹配http://或https://开头的路径

func NoRouteHandler(cfg *config.Config) touka.HandlerFunc {
Expand All @@ -32,21 +58,16 @@ func NoRouteHandler(cfg *config.Config) touka.HandlerFunc {
}

// 制作url
rawPath = "https://" + matches[2]

var (
user string
repo string
matcher string
)

path := matches[2]
var matcherErr *GHProxyErrors
user, repo, matcher, matcherErr = Matcher(rawPath, cfg)
user, repo, matcher, matcherErr := Matcher("https://"+path, cfg)
if matcherErr != nil {
ErrorPage(c, matcherErr)
return
}

rawPath = buildHandlerPath(path, matcher)

shoudBreak = listCheck(cfg, c, user, repo, rawPath)
if shoudBreak {
return
Expand All @@ -57,11 +78,7 @@ func NoRouteHandler(cfg *config.Config) touka.HandlerFunc {
return
}

// 处理blob/raw路径
if matcher == "blob" {
rawPath = rawPath[18:]
rawPath = "https://raw.githubusercontent.com" + rawPath
rawPath = strings.Replace(rawPath, "/blob/", "/", 1)
matcher = "raw"
}

Expand Down
219 changes: 161 additions & 58 deletions proxy/nest.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,78 @@ package proxy

import (
"bufio"
"bytes"
"fmt"
"ghproxy/config"
"io"
"strings"
"sync"

"github.com/infinite-iroha/touka"
)

var (
prefixGithub = []byte("https://github.com")
prefixRawUser = []byte("https://raw.githubusercontent.com")
prefixRaw = []byte("https://raw.github.com")
prefixGistUser = []byte("https://gist.githubusercontent.com")
prefixGist = []byte("https://gist.github.com")
prefixAPI = []byte("https://api.github.com")
prefixHTTP = []byte("http://")
prefixHTTPS = []byte("https://")
)
Comment on lines +15 to +24
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

域名匹配缺少主机边界,可能误改写相似域名(安全/正确性)

HasPrefix("https://github.com") 会匹配 https://github.com.evil.com/... 等相似域名,存在误改写风险。建议将前缀改为包含斜杠的主机边界,或显式解析并校验 Host。

应用最小改动(增加结尾 /):

-	prefixGithub   = []byte("https://github.com")
-	prefixRawUser  = []byte("https://raw.githubusercontent.com")
-	prefixRaw      = []byte("https://raw.github.com")
-	prefixGistUser = []byte("https://gist.githubusercontent.com")
-	prefixGist     = []byte("https://gist.github.com")
-	prefixAPI      = []byte("https://api.github.com")
+	prefixGithub   = []byte("https://github.com/")
+	prefixRawUser  = []byte("https://raw.githubusercontent.com/")
+	prefixRaw      = []byte("https://raw.github.com/")
+	prefixGistUser = []byte("https://gist.githubusercontent.com/")
+	prefixGist     = []byte("https://gist.github.com/")
+	prefixAPI      = []byte("https://api.github.com/")

并同步调整字符串版 EditorMatcher 对应常量。

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
var (
prefixGithub = []byte("https://github.com")
prefixRawUser = []byte("https://raw.githubusercontent.com")
prefixRaw = []byte("https://raw.github.com")
prefixGistUser = []byte("https://gist.githubusercontent.com")
prefixGist = []byte("https://gist.github.com")
prefixAPI = []byte("https://api.github.com")
prefixHTTP = []byte("http://")
prefixHTTPS = []byte("https://")
)
var (
prefixGithub = []byte("https://github.com/")
prefixRawUser = []byte("https://raw.githubusercontent.com/")
prefixRaw = []byte("https://raw.github.com/")
prefixGistUser = []byte("https://gist.githubusercontent.com/")
prefixGist = []byte("https://gist.github.com/")
prefixAPI = []byte("https://api.github.com/")
prefixHTTP = []byte("http://")
prefixHTTPS = []byte("https://")
)
🤖 Prompt for AI Agents
In proxy/nest.go around lines 15 to 24, the byte-slice host prefixes lack a
trailing slash and so HasPrefix checks can wrongly match domains like
"https://github.com.evil.com"; update the prefixes to include the host boundary
by appending "/" (e.g. "https://github.com/") or alternatively perform explicit
URL parsing and check the Host field; make the minimal change by adding the
trailing "/" to the byte-slice constants here and ensure any corresponding
string constants used by EditorMatcher are updated to the same values so
matching behavior remains consistent.


func EditorMatcherBytes(rawPath []byte, cfg *config.Config) (bool, error) {
if bytes.HasPrefix(rawPath, prefixGithub) {
return true, nil
}
if bytes.HasPrefix(rawPath, prefixRawUser) {
return true, nil
}
if bytes.HasPrefix(rawPath, prefixRaw) {
return true, nil
}
if bytes.HasPrefix(rawPath, prefixGistUser) {
return true, nil
}
if bytes.HasPrefix(rawPath, prefixGist) {
return true, nil
}
if cfg.Shell.RewriteAPI {
if bytes.HasPrefix(rawPath, prefixAPI) {
return true, nil
}
}
return false, nil
}

func modifyURLBytes(url []byte, host []byte, cfg *config.Config) []byte {
matched, err := EditorMatcherBytes(url, cfg)
if err != nil || !matched {
return url
}

var u []byte
if bytes.HasPrefix(url, prefixHTTPS) {
u = url[len(prefixHTTPS):]
} else if bytes.HasPrefix(url, prefixHTTP) {
u = url[len(prefixHTTP):]
} else {
u = url
}

newLen := len(prefixHTTPS) + len(host) + 1 + len(u)
newURL := make([]byte, newLen)

written := 0
written += copy(newURL[written:], prefixHTTPS)
written += copy(newURL[written:], host)
written += copy(newURL[written:], []byte("/"))
copy(newURL[written:], u)
Comment on lines +68 to +72
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

这段手动构建 []byte 的代码虽然性能不错,但略显繁琐。可以简化一下,使其更易读。例如,使用一个变量来追踪写入的位置,而不是在每一步都累加 written

Suggested change
written := 0
written += copy(newURL[written:], prefixHTTPS)
written += copy(newURL[written:], host)
written += copy(newURL[written:], []byte("/"))
copy(newURL[written:], u)
n := copy(newURL, prefixHTTPS)
n += copy(newURL[n:], host)
n += copy(newURL[n:], []byte("/"))
copy(newURL[n:], u)


return newURL
}

func EditorMatcher(rawPath string, cfg *config.Config) (bool, error) {
// 匹配 "https://github.com"开头的链接
if strings.HasPrefix(rawPath, "https://github.com") {
Expand Down Expand Up @@ -64,87 +128,126 @@ func modifyURL(url string, host string, cfg *config.Config) string {
return url
}

// processLinks 处理链接,返回包含处理后数据的 io.Reader
func processLinks(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context) (readerOut io.Reader, written int64, err error) {
pipeReader, pipeWriter := io.Pipe() // 创建 io.Pipe
var bufferPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}

// processLinksStreamingInternal is a link processing function that reads the input line by line.
// It is memory-safe for large files but less performant due to numerous small allocations.
func processLinksStreamingInternal(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context) (readerOut io.Reader, written int64, err error) {
pipeReader, pipeWriter := io.Pipe()
readerOut = pipeReader

go func() { // 在 Goroutine 中执行写入操作
go func() {
defer func() {
if pipeWriter != nil { // 确保 pipeWriter 关闭,即使发生错误
if err != nil {
if closeErr := pipeWriter.CloseWithError(err); closeErr != nil { // 如果有错误,传递错误给 reader
c.Errorf("pipeWriter close with error failed: %v, original error: %v", closeErr, err)
}
} else {
if closeErr := pipeWriter.Close(); closeErr != nil { // 没有错误,正常关闭
c.Errorf("pipeWriter close failed: %v", closeErr)
if err == nil { // 如果之前没有错误,记录关闭错误
err = closeErr
}
}
}
if err != nil {
pipeWriter.CloseWithError(err)
} else {
pipeWriter.Close()
}
}()
defer input.Close()
Comment on lines 144 to +151
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

processLinksStreamingInternal 函数中,defer 块里的 pipeWriter.Close()input.Close() 调用没有处理可能返回的错误。这与 processLinksBufferedInternal 中的实现不一致,后者会记录关闭失败的错误。这是一个潜在的回归,可能会隐藏一些资源泄露或未完全写入的问题。建议保持错误处理逻辑的一致性,对 Close() 的错误进行记录。

defer func() {
    if err != nil {
        if closeErr := pipeWriter.CloseWithError(err); closeErr != nil && c != nil {
            c.Errorf("pipeWriter close with error failed: %v", closeErr)
        }
    } else {
        if closeErr := pipeWriter.Close(); closeErr != nil && c != nil {
            c.Errorf("pipeWriter close failed: %v", closeErr)
        }
    }
}()
defer func() {
    if closeErr := input.Close(); closeErr != nil && c != nil {
        c.Errorf("input close failed: %v", closeErr)
    }
}()


defer func() {
if err := input.Close(); err != nil {
c.Errorf("input close failed: %v", err)
bufReader := bufio.NewReader(input)
bufWriter := bufio.NewWriterSize(pipeWriter, 4096)
defer bufWriter.Flush()

for {
line, readErr := bufReader.ReadString('\n')
if readErr != nil && readErr != io.EOF {
err = fmt.Errorf("read error: %w", readErr)
return
}

}()
modifiedLine := urlPattern.ReplaceAllStringFunc(line, func(originalURL string) string {
return modifyURL(originalURL, host, cfg)
})

var bufReader *bufio.Reader
var n int
n, err = bufWriter.WriteString(modifiedLine)
written += int64(n)
if err != nil {
err = fmt.Errorf("write error: %w", err)
return
}

bufReader = bufio.NewReader(input)
if readErr == io.EOF {
break
}
}
}()

var bufWriter *bufio.Writer
return readerOut, written, nil
}

bufWriter = bufio.NewWriterSize(pipeWriter, 4096) // 使用 pipeWriter
// processLinks acts as a dispatcher, choosing the best processing strategy based on file size.
// It uses a memory-safe streaming approach for large or unknown-size files,
// and a high-performance buffered approach for smaller files.
func processLinks(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context, bodySize int) (readerOut io.Reader, written int64, err error) {
const sizeThreshold = 256 * 1024 // 256KB

// Use streaming for large or unknown size files to prevent OOM
if bodySize == -1 || bodySize > sizeThreshold {
c.Debugf("Using streaming processor for large/unknown size file (%d bytes)", bodySize)
return processLinksStreamingInternal(input, host, cfg, c)
} else {
c.Debugf("Using buffered processor for small file (%d bytes)", bodySize)
return processLinksBufferedInternal(input, host, cfg, c)
}
}

// processLinksBufferedInternal a link processing function that reads the entire content into a buffer.
// It is optimized for performance on smaller files but carries an OOM risk for large files.
func processLinksBufferedInternal(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context) (readerOut io.Reader, written int64, err error) {
pipeReader, pipeWriter := io.Pipe()
readerOut = pipeReader
hostBytes := []byte(host)

//确保writer关闭
go func() {
// 在 goroutine 退出时, 根据 err 是否为 nil, 带错误或正常关闭 pipeWriter
defer func() {
if flushErr := bufWriter.Flush(); flushErr != nil {
c.Errorf("writer flush failed %v", flushErr)
// 如果已经存在错误,则保留。否则,记录此错误。
if err == nil {
err = flushErr
}
if closeErr := input.Close(); closeErr != nil {
c.Errorf("input close failed: %v", closeErr)
}
}()

// 使用正则表达式匹配 http 和 https 链接
for {
line, readErr := bufReader.ReadString('\n')
if readErr != nil {
if readErr == io.EOF {
break // 文件结束
defer func() {
if err != nil {
if closeErr := pipeWriter.CloseWithError(err); closeErr != nil {
c.Errorf("pipeWriter close with error failed: %v", closeErr)
}
} else {
if closeErr := pipeWriter.Close(); closeErr != nil {
c.Errorf("pipeWriter close failed: %v", closeErr)
}
err = fmt.Errorf("读取行错误: %v", readErr) // 传递错误
return // Goroutine 中使用 return 返回错误
}
}()

// 替换所有匹配的 URL
modifiedLine := urlPattern.ReplaceAllStringFunc(line, func(originalURL string) string {
return modifyURL(originalURL, host, cfg) // 假设 modifyURL 函数已定义
})
buf := bufferPool.Get().(*bytes.Buffer)
buf.Reset()
defer bufferPool.Put(buf)

n, writeErr := bufWriter.WriteString(modifiedLine)
written += int64(n) // 更新写入的字节数
if writeErr != nil {
err = fmt.Errorf("写入文件错误: %v", writeErr) // 传递错误
return // Goroutine 中使用 return 返回错误
}
// 将全部输入读入复用的缓冲区
if _, err = buf.ReadFrom(input); err != nil {
err = fmt.Errorf("reading input failed: %w", err)
return
}

// 在返回之前,再刷新一次 (虽然 defer 中已经有 flush,但这里再加一次确保及时刷新)
if flushErr := bufWriter.Flush(); flushErr != nil {
if err == nil { // 避免覆盖之前的错误
err = flushErr
}
return // Goroutine 中使用 return 返回错误
// 使用 ReplaceAllFunc 和字节版本辅助函数, 实现准零分配
modifiedBytes := urlPattern.ReplaceAllFunc(buf.Bytes(), func(originalURL []byte) []byte {
return modifyURLBytes(originalURL, hostBytes, cfg)
})

// 将处理后的字节写回管道
var n int
n, err = pipeWriter.Write(modifiedBytes)
if err != nil {
err = fmt.Errorf("writing to pipe failed: %w", err)
return
}
written = int64(n)
}()

return readerOut, written, nil // 返回 reader 和 written,error 由 Goroutine 通过 pipeWriter.CloseWithError 传递
return readerOut, written, nil
}
Loading
Loading