From 42755665e08852cd4b4d4171a14a30e18fffd28f Mon Sep 17 00:00:00 2001 From: RuiO <139987905+cx-rui-oliveira@users.noreply.github.com> Date: Mon, 26 May 2025 15:23:52 +0100 Subject: [PATCH 1/4] fix: use line regarding file instead of chunk --- engine/engine.go | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/engine/engine.go b/engine/engine.go index 5904351c..30295478 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -54,6 +54,8 @@ type IEngine interface { const ( customRegexRuleIdFormat = "custom-regex-%d" CxFileEndMarker = ";cx-file-end" + totalLinesField = "totalLines" + linesInChunkField = "linesInChunk" ) type EngineConfig struct { @@ -109,7 +111,7 @@ func (e *Engine) DetectFragment(item plugins.ISourceItem, secretsChannel chan *s FilePath: item.GetSource(), } - return e.detectSecrets(item, fragment, secretsChannel, pluginName) + return e.detectSecrets(context.Background(), item, fragment, secretsChannel, pluginName) } // DetectFile reads the given file and detects secrets in it @@ -137,7 +139,7 @@ func (e *Engine) DetectFile(ctx context.Context, item plugins.ISourceItem, secre } defer e.semaphore.ReleaseMemoryWeight(weight) - return e.detectChunks(item, secretsChannel) + return e.detectChunks(ctx, item, secretsChannel) } // fileSize * 2 -> data file bytes and its conversion to string weight := fileSize * 2 @@ -156,11 +158,11 @@ func (e *Engine) DetectFile(ctx context.Context, item plugins.ISourceItem, secre FilePath: item.GetSource(), } - return e.detectSecrets(item, fragment, secretsChannel, "filesystem") + return e.detectSecrets(ctx, item, fragment, secretsChannel, "filesystem") } // detectChunks reads the given file in chunks and detects secrets in each chunk -func (e *Engine) detectChunks(item plugins.ISourceItem, secretsChannel chan *secrets.Secret) error { +func (e *Engine) detectChunks(ctx context.Context, item plugins.ISourceItem, secretsChannel chan *secrets.Secret) error { f, err := os.Open(item.GetSource()) if err != nil { return fmt.Errorf("failed to open file %s: %w", item.GetSource(), err) @@ -194,20 +196,22 @@ func (e *Engine) detectChunks(item plugins.ISourceItem, secretsChannel chan *sec Raw: chunkStr, FilePath: item.GetSource(), } - if detectErr := e.detectSecrets(item, fragment, secretsChannel, "filesystem"); detectErr != nil { + ctx = context.WithValue(ctx, totalLinesField, totalLines) + ctx = context.WithValue(ctx, linesInChunkField, linesInChunk) + if detectErr := e.detectSecrets(ctx, item, fragment, secretsChannel, "filesystem"); detectErr != nil { return fmt.Errorf("failed to detect secrets: %w", detectErr) } } } // detectSecrets detects secrets and sends them to the secrets channel -func (e *Engine) detectSecrets(item plugins.ISourceItem, fragment detect.Fragment, secrets chan *secrets.Secret, +func (e *Engine) detectSecrets(ctx context.Context, item plugins.ISourceItem, fragment detect.Fragment, secrets chan *secrets.Secret, pluginName string) error { fragment.Raw += CxFileEndMarker + "\n" values := e.detector.Detect(fragment) for _, value := range values { - secret, buildErr := buildSecret(item, value, pluginName) + secret, buildErr := buildSecret(ctx, item, value, pluginName) if buildErr != nil { return fmt.Errorf("failed to build secret: %w", buildErr) } @@ -306,10 +310,10 @@ func GetRulesCommand(engineConfig *EngineConfig) *cobra.Command { } // buildSecret creates a secret object from the given source item and finding -func buildSecret(item plugins.ISourceItem, value report.Finding, pluginName string) (*secrets.Secret, error) { +func buildSecret(ctx context.Context, item plugins.ISourceItem, value report.Finding, pluginName string) (*secrets.Secret, error) { gitInfo := item.GetGitInfo() itemId := getFindingId(item, value) - startLine, endLine, err := getStartAndEndLines(pluginName, gitInfo, value) + startLine, endLine, err := getStartAndEndLines(ctx, pluginName, gitInfo, value) if err != nil { return nil, fmt.Errorf("failed to get start and end lines for source %s: %w", item.GetSource(), err) } @@ -342,13 +346,15 @@ func getFindingId(item plugins.ISourceItem, finding report.Finding) string { return fmt.Sprintf("%x", sha) } -func getStartAndEndLines(pluginName string, gitInfo *plugins.GitInfo, value report.Finding) (int, int, error) { +func getStartAndEndLines(ctx context.Context, pluginName string, gitInfo *plugins.GitInfo, value report.Finding) (int, int, error) { var startLine, endLine int var err error if pluginName == "filesystem" { - startLine = value.StartLine + 1 - endLine = value.EndLine + 1 + totalLines := ctx.Value(totalLinesField).(int) + linesInChunk := ctx.Value(linesInChunkField).(int) + startLine = value.StartLine + (totalLines - linesInChunk) + 1 + endLine = value.EndLine + (totalLines - linesInChunk) + 1 } else if pluginName == "git" { startLine, endLine, err = plugins.GetGitStartAndEndLine(gitInfo, value.StartLine, value.EndLine) if err != nil { From e5d2b8f6498d0744cbd103b35735f93c61bed070 Mon Sep 17 00:00:00 2001 From: RuiO <139987905+cx-rui-oliveira@users.noreply.github.com> Date: Mon, 26 May 2025 18:32:03 +0100 Subject: [PATCH 2/4] fix: check if context variables exist before using them --- engine/engine.go | 21 +++++++++++++-------- engine/engine_test.go | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/engine/engine.go b/engine/engine.go index 30295478..55eab79d 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -51,11 +51,13 @@ type IEngine interface { GetRuleBaseRiskScore(ruleId string) float64 } +type ctxKey int + const ( - customRegexRuleIdFormat = "custom-regex-%d" - CxFileEndMarker = ";cx-file-end" - totalLinesField = "totalLines" - linesInChunkField = "linesInChunk" + customRegexRuleIdFormat = "custom-regex-%d" + CxFileEndMarker = ";cx-file-end" + totalLinesKey ctxKey = iota + linesInChunkKey ) type EngineConfig struct { @@ -196,8 +198,8 @@ func (e *Engine) detectChunks(ctx context.Context, item plugins.ISourceItem, sec Raw: chunkStr, FilePath: item.GetSource(), } - ctx = context.WithValue(ctx, totalLinesField, totalLines) - ctx = context.WithValue(ctx, linesInChunkField, linesInChunk) + ctx = context.WithValue(ctx, totalLinesKey, totalLines) + ctx = context.WithValue(ctx, linesInChunkKey, linesInChunk) if detectErr := e.detectSecrets(ctx, item, fragment, secretsChannel, "filesystem"); detectErr != nil { return fmt.Errorf("failed to detect secrets: %w", detectErr) } @@ -351,8 +353,11 @@ func getStartAndEndLines(ctx context.Context, pluginName string, gitInfo *plugin var err error if pluginName == "filesystem" { - totalLines := ctx.Value(totalLinesField).(int) - linesInChunk := ctx.Value(linesInChunkField).(int) + var totalLines, linesInChunk int + if ctx.Value(totalLinesKey) != nil && ctx.Value(linesInChunkKey) != nil { + totalLines = ctx.Value(totalLinesKey).(int) + linesInChunk = ctx.Value(linesInChunkKey).(int) + } startLine = value.StartLine + (totalLines - linesInChunk) + 1 endLine = value.EndLine + (totalLines - linesInChunk) + 1 } else if pluginName == "git" { diff --git a/engine/engine_test.go b/engine/engine_test.go index d76bf02c..8d8f1751 100644 --- a/engine/engine_test.go +++ b/engine/engine_test.go @@ -423,7 +423,7 @@ func TestDetectChunks(t *testing.T) { tmp := t.TempDir() src := tc.makeFile(tmp) - err := engine.detectChunks(&item{source: src}, make(chan *secrets.Secret, 1)) + err := engine.detectChunks(context.Background(), &item{source: src}, make(chan *secrets.Secret, 1)) loggedMessage := logsBuffer.String() if tc.expectedErr != nil { require.ErrorContains(t, err, tc.expectedErr.Error()) From aa41e436ea2ee3224e8ce8dac100ca6474079f6a Mon Sep 17 00:00:00 2001 From: RuiO <139987905+cx-rui-oliveira@users.noreply.github.com> Date: Tue, 27 May 2025 12:11:37 +0100 Subject: [PATCH 3/4] refactor: address PR suggestions --- engine/engine.go | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/engine/engine.go b/engine/engine.go index 55eab79d..52786e5a 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -51,13 +51,13 @@ type IEngine interface { GetRuleBaseRiskScore(ruleId string) float64 } -type ctxKey int +type ctxKey string const ( customRegexRuleIdFormat = "custom-regex-%d" CxFileEndMarker = ";cx-file-end" - totalLinesKey ctxKey = iota - linesInChunkKey + totalLinesKey ctxKey = "totalLines" + linesInChunkKey ctxKey = "linesInChunk" ) type EngineConfig struct { @@ -353,13 +353,16 @@ func getStartAndEndLines(ctx context.Context, pluginName string, gitInfo *plugin var err error if pluginName == "filesystem" { - var totalLines, linesInChunk int - if ctx.Value(totalLinesKey) != nil && ctx.Value(linesInChunkKey) != nil { - totalLines = ctx.Value(totalLinesKey).(int) - linesInChunk = ctx.Value(linesInChunkKey).(int) + totalLines, totalOK := ctx.Value(totalLinesKey).(int) + chunkLines, chunkOK := ctx.Value(linesInChunkKey).(int) + + offset := 1 + if totalOK && chunkOK { + offset = (totalLines - chunkLines) + 1 } - startLine = value.StartLine + (totalLines - linesInChunk) + 1 - endLine = value.EndLine + (totalLines - linesInChunk) + 1 + + startLine = value.StartLine + offset + endLine = value.EndLine + offset } else if pluginName == "git" { startLine, endLine, err = plugins.GetGitStartAndEndLine(gitInfo, value.StartLine, value.EndLine) if err != nil { From a4cb46c9c4e881a0250a0ca5f00d0c7d100541ab Mon Sep 17 00:00:00 2001 From: RuiO <139987905+cx-rui-oliveira@users.noreply.github.com> Date: Thu, 29 May 2025 11:21:07 +0100 Subject: [PATCH 4/4] fix: ensure that reader can read data into peekedBuf --- engine/chunk/chunk.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/engine/chunk/chunk.go b/engine/chunk/chunk.go index e3e8d23b..4011d7d3 100644 --- a/engine/chunk/chunk.go +++ b/engine/chunk/chunk.go @@ -106,6 +106,11 @@ func (c *Chunk) PutBuf(window *bytes.Buffer) { // GetPeekedBuf returns a fixed-size []byte from the pool func (c *Chunk) GetPeekedBuf() (*[]byte, bool) { b, ok := c.peekedBufPool.Get().(*[]byte) + if !ok { + return nil, false + } + // reslice to its full capacity so Read can fill it + *b = (*b)[:cap(*b)] return b, ok }