Skip to content

Commit a88eb7e

Browse files
authored
Merge pull request #207 from githubnext/copilot/refactor-secret-sanitization-logic
2 parents f5dff3f + 2facb1d commit a88eb7e

5 files changed

Lines changed: 438 additions & 59 deletions

File tree

internal/logger/jsonl_logger.go

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"path/filepath"
88
"sync"
99
"time"
10+
11+
"github.com/githubnext/gh-aw-mcpg/internal/logger/sanitize"
1012
)
1113

1214
// JSONLLogger manages logging RPC messages to a JSONL file (one JSON object per line)
@@ -88,35 +90,10 @@ func (jl *JSONLLogger) Close() error {
8890

8991
// sanitizePayload sanitizes a payload by applying regex patterns to the entire string
9092
// It takes raw bytes, applies regex sanitization in one pass, and returns sanitized bytes
93+
// This function is deprecated and will be removed in a future version.
94+
// Use sanitize.SanitizeJSON() directly instead.
9195
func sanitizePayload(payloadBytes []byte) json.RawMessage {
92-
// Apply regex sanitization to the entire string in one pass
93-
sanitized := sanitizeSecrets(string(payloadBytes))
94-
95-
// Validate that the result is valid JSON for RawMessage
96-
// If not valid, wrap it in a JSON object
97-
if !json.Valid([]byte(sanitized)) {
98-
// Create a valid JSON object with the invalid content as a string
99-
wrapped := map[string]string{
100-
"_error": "invalid JSON",
101-
"_raw": sanitized,
102-
}
103-
wrappedBytes, _ := json.Marshal(wrapped)
104-
return json.RawMessage(wrappedBytes)
105-
}
106-
107-
// Marshal and unmarshal to ensure single-line JSON (removes newlines/whitespace)
108-
var tmp interface{}
109-
if err := json.Unmarshal([]byte(sanitized), &tmp); err != nil {
110-
// Should not happen since we validated above, but handle gracefully
111-
wrapped := map[string]string{
112-
"_error": "failed to parse JSON",
113-
"_raw": sanitized,
114-
}
115-
wrappedBytes, _ := json.Marshal(wrapped)
116-
return json.RawMessage(wrappedBytes)
117-
}
118-
compactBytes, _ := json.Marshal(tmp)
119-
return json.RawMessage(compactBytes)
96+
return sanitize.SanitizeJSON(payloadBytes)
12097
}
12198

12299
// LogMessage logs an RPC message to the JSONL file

internal/logger/markdown_logger.go

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ import (
44
"fmt"
55
"os"
66
"path/filepath"
7-
"regexp"
87
"strings"
98
"sync"
9+
10+
"github.com/githubnext/gh-aw-mcpg/internal/logger/sanitize"
1011
)
1112

1213
// MarkdownLogger manages logging to a markdown file for GitHub workflow previews
@@ -22,20 +23,6 @@ type MarkdownLogger struct {
2223
var (
2324
globalMarkdownLogger *MarkdownLogger
2425
globalMarkdownMu sync.RWMutex
25-
// Patterns for detecting potential secrets (simple heuristics)
26-
secretPatterns = []*regexp.Regexp{
27-
regexp.MustCompile(`(?i)(token|key|secret|password|auth)[=:]\s*[^\s]{8,}`),
28-
regexp.MustCompile(`ghp_[a-zA-Z0-9]{36,}`), // GitHub PATs
29-
regexp.MustCompile(`github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59}`), // GitHub fine-grained PATs
30-
regexp.MustCompile(`(?i)bearer\s+[a-zA-Z0-9\-._~+/]+=*`), // Bearer tokens
31-
regexp.MustCompile(`(?i)authorization:\s*[a-zA-Z0-9\-._~+/]+=*`), // Auth headers
32-
regexp.MustCompile(`[a-f0-9]{32,}`), // Long hex strings (API keys)
33-
regexp.MustCompile(`(?i)(apikey|api_key|access_key)[=:]\s*[^\s]{8,}`), // API keys
34-
regexp.MustCompile(`(?i)(client_secret|client_id)[=:]\s*[^\s]{8,}`), // OAuth secrets
35-
regexp.MustCompile(`[a-zA-Z0-9_-]{20,}\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+`), // JWT tokens
36-
// JSON-specific patterns for field:value pairs
37-
regexp.MustCompile(`(?i)"(token|password|passwd|pwd|apikey|api_key|api-key|secret|client_secret|api_secret|authorization|auth|key|private_key|public_key|credentials|credential|access_token|refresh_token|bearer_token)"\s*:\s*"[^"]{1,}"`),
38-
}
3926
)
4027

4128
// InitMarkdownLogger initializes the global markdown logger
@@ -114,22 +101,10 @@ func (ml *MarkdownLogger) Close() error {
114101
}
115102

116103
// sanitizeSecrets replaces potential secrets with [REDACTED]
104+
// This function is deprecated and will be removed in a future version.
105+
// Use sanitize.SanitizeString() directly instead.
117106
func sanitizeSecrets(message string) string {
118-
result := message
119-
for _, pattern := range secretPatterns {
120-
result = pattern.ReplaceAllStringFunc(result, func(match string) string {
121-
// Keep the prefix (key name) but redact the value
122-
if strings.Contains(match, "=") || strings.Contains(match, ":") {
123-
parts := regexp.MustCompile(`[=:]\s*`).Split(match, 2)
124-
if len(parts) == 2 {
125-
return parts[0] + "=[REDACTED]"
126-
}
127-
}
128-
// For tokens without key=value format, redact entirely
129-
return "[REDACTED]"
130-
})
131-
}
132-
return result
107+
return sanitize.SanitizeString(message)
133108
}
134109

135110
// getEmojiForLevel returns the appropriate emoji for the log level

internal/logger/rpc_logger.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"encoding/json"
55
"fmt"
66
"strings"
7+
8+
"github.com/githubnext/gh-aw-mcpg/internal/logger/sanitize"
79
)
810

911
// RPCMessageType represents the direction of an RPC message
@@ -47,7 +49,7 @@ type RPCMessageInfo struct {
4749
// truncateAndSanitize truncates the payload to max length and sanitizes secrets
4850
func truncateAndSanitize(payload string, maxLength int) string {
4951
// First sanitize secrets
50-
sanitized := sanitizeSecrets(payload)
52+
sanitized := sanitize.SanitizeString(payload)
5153

5254
// Then truncate if needed
5355
if len(sanitized) > maxLength {
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package sanitize
2+
3+
import (
4+
"encoding/json"
5+
"regexp"
6+
"strings"
7+
)
8+
9+
// SecretPatterns contains regex patterns for detecting potential secrets
10+
var SecretPatterns = []*regexp.Regexp{
11+
regexp.MustCompile(`(?i)(token|key|secret|password|auth)[=:]\s*[^\s]{8,}`),
12+
regexp.MustCompile(`ghp_[a-zA-Z0-9]{36,}`), // GitHub PATs
13+
regexp.MustCompile(`github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59}`), // GitHub fine-grained PATs
14+
regexp.MustCompile(`(?i)bearer\s+[a-zA-Z0-9\-._~+/]+=*`), // Bearer tokens
15+
regexp.MustCompile(`(?i)authorization:\s*[a-zA-Z0-9\-._~+/]+=*`), // Auth headers
16+
regexp.MustCompile(`[a-f0-9]{32,}`), // Long hex strings (API keys)
17+
regexp.MustCompile(`(?i)(apikey|api_key|access_key)[=:]\s*[^\s]{8,}`), // API keys
18+
regexp.MustCompile(`(?i)(client_secret|client_id)[=:]\s*[^\s]{8,}`), // OAuth secrets
19+
regexp.MustCompile(`[a-zA-Z0-9_-]{20,}\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+`), // JWT tokens
20+
// JSON-specific patterns for field:value pairs
21+
regexp.MustCompile(`(?i)"(token|password|passwd|pwd|apikey|api_key|api-key|secret|client_secret|api_secret|authorization|auth|key|private_key|public_key|credentials|credential|access_token|refresh_token|bearer_token)"\s*:\s*"[^"]{1,}"`),
22+
}
23+
24+
// SanitizeString replaces potential secrets in a string with [REDACTED]
25+
func SanitizeString(message string) string {
26+
result := message
27+
for _, pattern := range SecretPatterns {
28+
result = pattern.ReplaceAllStringFunc(result, func(match string) string {
29+
// Keep the prefix (key name) but redact the value
30+
if strings.Contains(match, "=") || strings.Contains(match, ":") {
31+
parts := regexp.MustCompile(`[=:]\s*`).Split(match, 2)
32+
if len(parts) == 2 {
33+
return parts[0] + "=[REDACTED]"
34+
}
35+
}
36+
// For tokens without key=value format, redact entirely
37+
return "[REDACTED]"
38+
})
39+
}
40+
return result
41+
}
42+
43+
// SanitizeJSON sanitizes a JSON payload by applying regex patterns to the entire string
44+
// It takes raw bytes, applies regex sanitization in one pass, and returns sanitized bytes
45+
func SanitizeJSON(payloadBytes []byte) json.RawMessage {
46+
// Apply regex sanitization to the entire string in one pass
47+
sanitized := SanitizeString(string(payloadBytes))
48+
49+
// Validate that the result is valid JSON for RawMessage
50+
// If not valid, wrap it in a JSON object
51+
if !json.Valid([]byte(sanitized)) {
52+
// Create a valid JSON object with the invalid content as a string
53+
wrapped := map[string]string{
54+
"_error": "invalid JSON",
55+
"_raw": sanitized,
56+
}
57+
wrappedBytes, _ := json.Marshal(wrapped)
58+
return json.RawMessage(wrappedBytes)
59+
}
60+
61+
// Marshal and unmarshal to ensure single-line JSON (removes newlines/whitespace)
62+
var tmp interface{}
63+
if err := json.Unmarshal([]byte(sanitized), &tmp); err != nil {
64+
// Should not happen since we validated above, but handle gracefully
65+
wrapped := map[string]string{
66+
"_error": "failed to parse JSON",
67+
"_raw": sanitized,
68+
}
69+
wrappedBytes, _ := json.Marshal(wrapped)
70+
return json.RawMessage(wrappedBytes)
71+
}
72+
compactBytes, _ := json.Marshal(tmp)
73+
return json.RawMessage(compactBytes)
74+
}

0 commit comments

Comments
 (0)