Skip to content

Commit 8f8b843

Browse files
fix: unicode handling, tweak: session save/restore
1 parent 845cf54 commit 8f8b843

14 files changed

Lines changed: 283 additions & 105 deletions

File tree

CHANGELOG.md

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
1-
### v0.5.0; 2026-03-16
2-
```
3-
initial github release
1+
### v0.5.1; 2026-03-17
2+
### Session Save/Restore
3+
- Save on SIGINT/SIGTERM
4+
- Resume with `-l` now accumulates stats
5+
- Preserve `first_started`; update only `last_updated` on resume
6+
- Fix session file deletion issue during writability checks
7+
- Faster restore via parallel loading
8+
- Improved session path handling and save path visibility
9+
- Add warning for long-running session restores
10+
### Bug Fixes
11+
- Fix session save failures when downstream pipe exits early ([#2](https://github.com/cyclone-github/pcfg-go/issues/2))
12+
- Address Unicode handling issues in OMEN and parser logic ([#3](https://github.com/cyclone-github/pcfg-go/issues/3))
13+
- Replace byte slicing with rune-safe handling
14+
- Fix context parsing edge case (e.g., `pass#123`)
15+
- Correct UTF-8 handling in website/TLD parsing
416

17+
### v0.5.0; 2026-03-16
18+
- initial github release
519
### Overview
6-
Pure Go rewrite of the Python3 `pcfg_cracker`, designed as a near drop-in replacement with significant performance gains and expanded features
7-
20+
- Pure Go rewrite of the Python3 `pcfg_cracker`, designed as a near drop-in replacement with significant performance gains and expanded features
821
### Highlights
9-
~3× faster trainer
10-
~40× faster pcfg_guesser
11-
`$HEX[]` input/output support
12-
Full multi-byte / Unicode support (not supported in Compiled C Edition)
13-
Improved and expanded keyboard detection: Fixed/tuned: QWERTY, JCUKEN Added: AZERTY, QWERTZ, Dvorak
14-
Expanded detection for TLDs, URLs, and emails in trainer
15-
Auto-save and resume support in pcfg_guesser
16-
Multi-threaded for high-throughput performance
17-
```
22+
- ~3× faster trainer
23+
- ~40× faster pcfg_guesser
24+
- `$HEX[]` input/output support
25+
- Full multi-byte / Unicode support (not supported in Compiled C Edition)
26+
- Improved and expanded keyboard detection: Fixed/tuned: QWERTY, JCUKEN Added: AZERTY, QWERTZ, Dvorak
27+
- Expanded detection for TLDs, URLs, and emails in trainer
28+
- Auto-save and resume support in pcfg_guesser
29+
- Multi-threaded for high-throughput performance

cmd/pcfg_guesser/main.go

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
URL: https://github.com/cyclone-github/
55
Repo: https://github.com/cyclone-github/pcfg-go/
66
Credits: https://github.com/lakiw/pcfg_cracker/
7-
Version: 0.5.0 (Go)
7+
Version: 0.5.1 (Go)
88
*/
99

1010
package main
@@ -16,12 +16,13 @@ import (
1616
"os"
1717
"path/filepath"
1818
"runtime"
19+
"strings"
1920

2021
"github.com/cyclone-github/pcfg-go/guesser"
2122
"github.com/cyclone-github/pcfg-go/guesser/omen"
2223
)
2324

24-
const version = "0.5.0 (Go)"
25+
const version = "0.5.1 (Go)"
2526

2627
func main() {
2728
runtime.GOMAXPROCS(runtime.NumCPU())
@@ -51,7 +52,7 @@ func main() {
5152
os.Exit(0)
5253
}
5354
if *versionFlag {
54-
fmt.Fprintln(os.Stderr, "PCFG Guesser v0.5.0 (Go)")
55+
fmt.Fprintln(os.Stderr, "PCFG Guesser v0.5.1 (Go)")
5556
fmt.Fprintln(os.Stderr, "https://github.com/cyclone-github/pcfg-go/")
5657
os.Exit(0)
5758
}
@@ -92,6 +93,23 @@ func main() {
9293

9394
exeDir := filepath.Dir(exe)
9495
savePath := filepath.Join(exeDir, *session+".sav")
96+
// Fallback to cwd if exe is in a temp dir (e.g. go run) or exe dir isn't writable
97+
if cwd, err := os.Getwd(); err == nil {
98+
useCwd := strings.HasPrefix(exeDir, "/tmp") || strings.Contains(exeDir, "go-build")
99+
if !useCwd {
100+
// test writability without truncating existing session file
101+
testPath := savePath + ".writetest"
102+
if f, err := os.Create(testPath); err != nil {
103+
useCwd = true
104+
} else {
105+
f.Close()
106+
os.Remove(testPath)
107+
}
108+
}
109+
if useCwd {
110+
savePath = filepath.Join(cwd, *session+".sav")
111+
}
112+
}
95113

96114
var gen *guesser.ParallelGuessGenerator
97115
if *loadSession {
@@ -106,8 +124,9 @@ func main() {
106124
os.Exit(1)
107125
}
108126
fmt.Fprintln(os.Stderr, "Restoring saved progress...")
127+
fmt.Fprintln(os.Stderr, "Note: Restore may take a long time for sessions that ran for hours or days.")
109128
queue := guesser.NewPcfgQueueFromSave(g, base, sav.MinProbability, sav.MaxProbability)
110-
gen = guesser.NewParallelGuessGeneratorWithQueue(g, base, queue, omenGrammar, *debug)
129+
gen = guesser.NewParallelGuessGeneratorWithQueueAndRestore(g, base, queue, omenGrammar, *debug, sav)
111130
} else {
112131
gen = guesser.NewParallelGuessGenerator(g, base, omenGrammar, *debug)
113132
}

cmd/trainer/main.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
URL: https://github.com/cyclone-github/
55
Repo: https://github.com/cyclone-github/pcfg-go/
66
Credits: https://github.com/lakiw/pcfg_cracker/
7-
Version: 0.5.0 (Go)
7+
Version: 0.5.1 (Go)
88
*/
99

1010
package main
@@ -99,7 +99,7 @@ func main() {
9999

100100
info := &trainer.ProgramInfo{
101101
Name: "PCFG Trainer",
102-
Version: "0.5.0 (Go)",
102+
Version: "0.5.1 (Go)",
103103
Author: "cyclone",
104104
Contact: "https://github.com/cyclone-github/",
105105
RuleName: "Default",
@@ -135,7 +135,7 @@ func main() {
135135
os.Exit(0)
136136
}
137137
if *versionFlag {
138-
fmt.Fprintln(os.Stderr, "PCFG Trainer v0.5.0 (Go)")
138+
fmt.Fprintln(os.Stderr, "PCFG Trainer v0.5.1 (Go)")
139139
fmt.Fprintln(os.Stderr, "https://github.com/cyclone-github/pcfg-go/")
140140
os.Exit(0)
141141
}

guesser/generator_parallel.go

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ type ParallelGuessGenerator struct {
4545
numParseTrees atomic.Int64
4646
probCoverage atomic.Int64 // scaled by 1e15 for precision
4747
startTime time.Time
48+
49+
// from previous session when resuming with -l (accumulated stats)
50+
prevRunningTime int64
51+
originalFirstStarted string // RFC3339, preserved when resuming
4852
}
4953

5054
// creates a generator that uses parallel workers
@@ -73,13 +77,35 @@ func NewParallelGuessGeneratorWithQueue(grammar pcfg.Grammar, base []pcfg.BaseSt
7377
}
7478
}
7579

80+
// creates a generator with a pre-built queue and restores accumulated stats from a previous session
81+
func NewParallelGuessGeneratorWithQueueAndRestore(grammar pcfg.Grammar, base []pcfg.BaseStructure, queue *PcfgQueue, omenGrammar *omen.Grammar, debug bool, sav *SessionConfig) *ParallelGuessGenerator {
82+
g := &ParallelGuessGenerator{
83+
Grammar: grammar,
84+
Base: base,
85+
Queue: queue,
86+
Debug: debug,
87+
OmenGrammar: omenGrammar,
88+
outputChan: make(chan []byte, outputChanSize),
89+
startTime: time.Now(),
90+
prevRunningTime: sav.RunningTime,
91+
originalFirstStarted: sav.FirstStarted,
92+
}
93+
g.totalGuesses.Store(sav.NumGuesses)
94+
g.numParseTrees.Store(sav.NumParseTrees)
95+
return g
96+
}
97+
7698
// generates guesses using all CPU cores
7799
func (g *ParallelGuessGenerator) RunParallel(limit int64) (int64, error) {
78-
return g.runParallelWithCtx(context.Background(), limit)
100+
return g.runParallelWithCtx(context.Background(), limit, nil)
79101
}
80102

81-
// runs with session save/load. On Ctrl+C, saves and exits gracefully
103+
// runs with session save/load. On Ctrl+C, saves and exits gracefully.
104+
// Save runs on every exit path: normal completion, signal (SIGINT/SIGTERM), or panic.
82105
func (g *ParallelGuessGenerator) RunParallelWithSession(limit int64, savePath, ruleName, ruleUUID string, skipBrute, skipCase bool) (int64, error) {
106+
// Ignore SIGPIPE so piping to pv, head, etc. doesn't kill us before save on Ctrl+C
107+
signal.Ignore(syscall.SIGPIPE)
108+
83109
ctx, cancel := context.WithCancel(context.Background())
84110
sigCh := make(chan os.Signal, 1)
85111
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
@@ -90,24 +116,28 @@ func (g *ParallelGuessGenerator) RunParallelWithSession(limit int64, savePath, r
90116
cancel()
91117
}()
92118

93-
total, err := g.runParallelWithCtx(ctx, limit)
119+
// Always save on exit: normal, signal, or panic. Works for first run and -l (load).
120+
defer func() {
121+
currentRunTime := int64(time.Since(g.startTime).Seconds())
122+
cfg := &SessionConfig{
123+
NumGuesses: g.totalGuesses.Load(),
124+
NumParseTrees: g.numParseTrees.Load(),
125+
ProbCoverage: 0,
126+
RunningTime: g.prevRunningTime + currentRunTime,
127+
MinProbability: g.Queue.MinProbability,
128+
MaxProbability: g.Queue.MaxProbability,
129+
}
130+
if saveErr := SaveSession(savePath, cfg, ruleName, ruleUUID, skipBrute, skipCase, g.originalFirstStarted); saveErr != nil {
131+
fmt.Fprintf(os.Stderr, "Warning: could not save session: %v\n", saveErr)
132+
} else {
133+
fmt.Fprintf(os.Stderr, "Session saved to %s\n", savePath)
134+
}
135+
}()
94136

95-
// save session on exit (normal or signal)
96-
cfg := &SessionConfig{
97-
NumGuesses: g.totalGuesses.Load(),
98-
NumParseTrees: g.numParseTrees.Load(),
99-
ProbCoverage: 0,
100-
RunningTime: int64(time.Since(g.startTime).Seconds()),
101-
MinProbability: g.Queue.MinProbability,
102-
MaxProbability: g.Queue.MaxProbability,
103-
}
104-
if saveErr := SaveSession(savePath, cfg, ruleName, ruleUUID, skipBrute, skipCase); saveErr != nil {
105-
fmt.Fprintf(os.Stderr, "Warning: could not save session: %v\n", saveErr)
106-
}
107-
return total, err
137+
return g.runParallelWithCtx(ctx, limit, cancel)
108138
}
109139

110-
func (g *ParallelGuessGenerator) runParallelWithCtx(ctx context.Context, limit int64) (int64, error) {
140+
func (g *ParallelGuessGenerator) runParallelWithCtx(ctx context.Context, limit int64, cancelOnPipe func()) (int64, error) {
111141
numWorkers := runtime.NumCPU()
112142
if numWorkers < 1 {
113143
numWorkers = 1
@@ -118,12 +148,16 @@ func (g *ParallelGuessGenerator) runParallelWithCtx(ctx context.Context, limit i
118148
var wg sync.WaitGroup
119149

120150
// writer goroutine: consume batches from outputChan
151+
// on broken pipe (e.g. pv exits), cancel so we save instead of spinning
121152
wg.Add(1)
122153
go func() {
123154
defer wg.Done()
124155
defer writer.Flush()
125156
for buf := range g.outputChan {
126-
writer.Write(buf)
157+
if _, err := writer.Write(buf); err != nil && cancelOnPipe != nil {
158+
// broken pipe, reader exited - cancel to trigger save
159+
cancelOnPipe()
160+
}
127161
}
128162
}()
129163

guesser/grammar.go

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -127,15 +127,17 @@ func loadTerminals(info *RulesetInfo, grammar pcfg.Grammar, baseDir string, conf
127127
}
128128
} else {
129129
capConfig := config["CAPITALIZATION"]
130-
var filenames []string
131-
if err := json.Unmarshal([]byte(capConfig["filenames"]), &filenames); err == nil {
132-
name := capConfig["name"]
133-
for _, file := range filenames {
134-
lenStr := strings.Split(file, ".")[0]
135-
length, _ := strconv.Atoi(lenStr)
136-
key := name + lenStr
137-
allLower := strings.Repeat("L", length)
138-
grammar[key] = []pcfg.GrammarEntry{{Values: []string{allLower}, Prob: 1.0}}
130+
if capConfig != nil {
131+
var filenames []string
132+
if err := json.Unmarshal([]byte(capConfig["filenames"]), &filenames); err == nil {
133+
name := capConfig["name"]
134+
for _, file := range filenames {
135+
lenStr := strings.Split(file, ".")[0]
136+
length, _ := strconv.Atoi(lenStr)
137+
key := name + lenStr
138+
allLower := strings.Repeat("L", length)
139+
grammar[key] = []pcfg.GrammarEntry{{Values: []string{allLower}, Prob: 1.0}}
140+
}
139141
}
140142
}
141143
}
@@ -334,8 +336,11 @@ func containsMarkov(replacements []string) bool {
334336
func addCaseMangling(base *pcfg.BaseStructure) {
335337
var newReplacements []string
336338
for _, r := range base.Replacements {
339+
if len(r) == 0 {
340+
continue
341+
}
337342
newReplacements = append(newReplacements, r)
338-
if len(r) > 0 && r[0] == 'A' {
343+
if r[0] == 'A' {
339344
lenStr := r[1:]
340345
newReplacements = append(newReplacements, "C"+lenStr)
341346
}

guesser/omen/grammar.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,12 @@ func loadNgrams(omenDir, filename string, g *Grammar, name string) error {
117117
case "ep":
118118
g.EP[ngram] = level
119119
case "cp":
120-
if len(ngram) < 1 {
120+
runes := []rune(ngram)
121+
if len(runes) < 1 {
121122
continue
122123
}
123-
searchStr := ngram[:len(ngram)-1]
124-
lastChar := ngram[len(ngram)-1:]
124+
searchStr := string(runes[:len(runes)-1])
125+
lastChar := string(runes[len(runes)-1])
125126
if g.CP[searchStr] == nil {
126127
g.CP[searchStr] = make(map[int][]string)
127128
}

guesser/omen/structure.go

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,12 @@ func (gs *GuessStructure) NextGuess() string {
5757
for {
5858
cpChars := gs.cp[last.IP][depthLevel]
5959
for last.Index < len(cpChars) {
60-
newIP := element.IP[:len(element.IP)-1] + cpChars[last.Index]
60+
elRunes := []rune(element.IP)
61+
prefix := ""
62+
if len(elRunes) > 1 {
63+
prefix = string(elRunes[:len(elRunes)-1])
64+
}
65+
newIP := prefix + cpChars[last.Index]
6166
newElements := gs.fillOutParseTree(newIP, reqLength, reqLevel-depthLevel)
6267
if newElements != nil {
6368
gs.parseTree = append(gs.parseTree, newElements...)
@@ -108,9 +113,10 @@ func (gs *GuessStructure) fillOutParseTree(ip string, length, targetLevel int) [
108113
if cpIndex == nil {
109114
return nil
110115
}
116+
ipRunes := []rune(ip)
111117
prefix := ip
112-
if len(ip) > 1 {
113-
prefix = ip[:len(ip)-1]
118+
if len(ipRunes) > 1 {
119+
prefix = string(ipRunes[:len(ipRunes)-1])
114120
}
115121
return []ParseTreeNode{{IP: prefix, Level: cpLevel, Index: 0}}
116122
}
@@ -134,12 +140,18 @@ func (gs *GuessStructure) fillOutParseTree(ip string, length, targetLevel int) [
134140
}
135141

136142
nextLength := length - 1
143+
ipRunes := []rune(ip)
137144
prefix := ip
138-
if len(ip) > 1 {
139-
prefix = ip[:len(ip)-1]
145+
if len(ipRunes) > 1 {
146+
prefix = string(ipRunes[:len(ipRunes)-1])
140147
}
141148
for curIndex := 0; curIndex < len(cpIndex); curIndex++ {
142-
nextIP := ip[1:] + cpIndex[curIndex]
149+
nextIP := ""
150+
if len(ipRunes) > 1 {
151+
nextIP = string(ipRunes[1:]) + cpIndex[curIndex]
152+
} else {
153+
nextIP = cpIndex[curIndex]
154+
}
143155
workingParseTree := gs.fillOutParseTree(nextIP, nextLength, targetLevel-cpLevel)
144156
if workingParseTree != nil {
145157
result := append([]ParseTreeNode{{IP: prefix, Level: cpLevel, Index: curIndex}}, workingParseTree...)
@@ -160,9 +172,10 @@ func (gs *GuessStructure) fillOutParseTree(ip string, length, targetLevel int) [
160172
}
161173

162174
func (gs *GuessStructure) findCP(ip string, topLevel, bottomLevel int) ([]string, int) {
175+
ipRunes := []rune(ip)
163176
prefix := ip
164-
if len(ip) > 1 {
165-
prefix = ip[:len(ip)-1]
177+
if len(ipRunes) > 1 {
178+
prefix = string(ipRunes[:len(ipRunes)-1])
166179
}
167180
if gs.maxLevel < topLevel {
168181
topLevel = gs.maxLevel

0 commit comments

Comments
 (0)