Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ jobs:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v4
uses: actions/setup-go@v5
with:
go-version: '1.20'
go-version: '1.25'

- name: Build
run: go build -v ./...
Expand Down
9 changes: 6 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
module grepturbo

go 1.20
go 1.25.0

require (
github.com/spf13/cobra v1.10.2
golang.org/x/sys v0.30.0
)

require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/spf13/cobra v1.10.2 // indirect
github.com/spf13/pflag v1.0.9 // indirect
golang.org/x/sys v0.42.0 // indirect
)
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiT
github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
57 changes: 55 additions & 2 deletions internal/index/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"io/fs"
"os"
"path/filepath"
"runtime"
"sync"
"unicode/utf8"

"grepturbo/internal/posting"
Expand Down Expand Up @@ -65,7 +67,12 @@ var defaultSkipDirs = map[string]bool{
".fastregex": true,
}

// Build walks all files under rootDir and indexes each one.
type extractResult struct {
path string
trigrams []trigram.T
}

// Build walks all files under rootDir and indexes each one concurrently.
// Directories listed in skip are skipped entirely (e.g. "node_modules").
// Directories and files that fail to read are silently skipped.
func (b *Builder) Build(rootDir string, skip ...string) error {
Expand All @@ -77,6 +84,48 @@ func (b *Builder) Build(rootDir string, skip ...string) error {
skipSet[s] = true
}

paths := make(chan string, 100)
results := make(chan extractResult, 100)

// Worker pool: read files and extract trigrams
var wg sync.WaitGroup
numWorkers := runtime.GOMAXPROCS(0)
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for path := range paths {
data, err := os.ReadFile(path)
if err != nil || !utf8.Valid(data) || len(data) > maxFileSize {
continue
}
results <- extractResult{
path: path,
trigrams: trigram.Extract(string(data)),
}
}
}()
}

// Signal workers are done
go func() {
wg.Wait()
close(results)
}()

// Collector: update Builder state (sequential, lock-free)
done := make(chan struct{})
go func() {
for res := range results {
fileID := uint32(len(b.Files))
b.Files = append(b.Files, res.path)
for _, t := range res.trigrams {
b.Posts.AddBatch(t, []uint32{fileID})
}
}
close(done)
}()

err := filepath.WalkDir(rootDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return nil
Expand All @@ -87,12 +136,16 @@ func (b *Builder) Build(rootDir string, skip ...string) error {
}
return nil
}
b.Add(path)
paths <- path
return nil
})
close(paths)

if err != nil {
return err
}

<-done
b.Posts.Finalize()
return nil
}
62 changes: 51 additions & 11 deletions scripts/benchcmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import (
"io/fs"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"unicode/utf8"

"grepturbo/internal/index"
Expand All @@ -28,6 +30,11 @@ type result struct {
TotalMs float64 `json:"total_ms"`
}

type extractResult struct {
path string
trigrams []trigram.T
}

func main() {
root := flag.String("root", ".", "root directory to index")
out := flag.String("out", "/tmp/grepturbo_bench_idx", "output index dir")
Expand All @@ -51,6 +58,46 @@ func main() {

// ── Phase: walk + extract ─────────────────────────────────────────────────
walkStart := time.Now()

paths := make(chan string, 100)
results := make(chan extractResult, 100)

var wg sync.WaitGroup
numWorkers := runtime.GOMAXPROCS(0)
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for path := range paths {
data, err := os.ReadFile(path)
if err != nil || !utf8.Valid(data) || len(data) > maxFileSize {
continue
}
results <- extractResult{
path: path,
trigrams: trigram.Extract(string(data)),
}
}
}()
}

go func() {
wg.Wait()
close(results)
}()

done := make(chan struct{})
go func() {
for res := range results {
fileID := uint32(len(b.Files))
b.Files = append(b.Files, res.path)
for _, t := range res.trigrams {
b.Posts.AddBatch(t, []uint32{fileID})
}
}
close(done)
}()

filepath.WalkDir(*root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return nil
Expand All @@ -61,19 +108,12 @@ func main() {
}
return nil
}

data, err := os.ReadFile(path)
if err != nil || !utf8.Valid(data) || len(data) > maxFileSize {
return nil
}

fileID := uint32(len(b.Files))
b.Files = append(b.Files, path)
for _, t := range trigram.Extract(string(data)) {
b.Posts.AddBatch(t, []uint32{fileID})
}
paths <- path
return nil
})
close(paths)
<-done

walkExtractMs := ms(time.Since(walkStart))

// ── Phase: finalize ───────────────────────────────────────────────────────
Expand Down
Loading