Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@redhat-developer/vscode-redhat-telemetry",
"version": "0.10.0",
"version": "0.10.1",
"description": "Provides Telemetry APIs for Red Hat applications",
"main": "lib/index.js",
"types": "lib",
Expand Down
132 changes: 108 additions & 24 deletions src/common/utils/telemetryUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,60 +8,144 @@
const NODE_MODULES_REGEX = /[\\\/]?(node_modules|node_modules\.asar)[\\\/]/;
const FILE_REGEX_PATTERN = /(file:\/\/)?([a-zA-Z]:(\\\\|\\|\/)|(\\\\|\\|\/))?([\w-\._]+(\\\\|\\|\/))+[\w-\._]+/g;

// Default cleanup patterns to preserve common system/library paths
const DEFAULT_CLEANUP_PATTERNS: RegExp[] = [
/java\.\S*/, // Java standard library and related packages (any non-whitespace chars)
];

/**
* Cleans a given stack of possible paths
* @param stack The stack to sanitize
* @param cleanupPatterns Cleanup patterns to remove from the stack
* @param cleanupPatterns Cleanup patterns to preserve (paths matching these patterns won't be anonymized).
* If not provided, uses default patterns for common system/library paths.
* @returns The cleaned stack
*/
export function anonymizeFilePaths(stack: string): string {
export function anonymizeFilePaths(stack: string, cleanupPatterns: RegExp[] = DEFAULT_CLEANUP_PATTERNS): string {

// Fast check to see if it is a file path to avoid doing unnecessary heavy regex work
if (!stack || (!stack.includes('/') && !stack.includes('\\'))) {
return stack;
}

// Process multiline strings line by line
const lines = stack.split('\n');
const processedLines = lines.map(line => anonymizeFilePathsInLine(line, cleanupPatterns));
return processedLines.join('\n');
}

/**
* Anonymizes file paths in a single line
* @param line The line to sanitize
* @param cleanupPatterns Cleanup patterns to preserve
* @returns The cleaned line
*/
function anonymizeFilePathsInLine(line: string, cleanupPatterns: RegExp[]): string {
// Fast check to see if it is a file path to avoid doing unnecessary heavy regex work
if (!line || (!line.includes('/') && !line.includes('\\'))) {
return line;
}

// Find all cleanup pattern matches and store their positions
const cleanUpIndexes: [number, number][] = [];
for (const regexp of cleanupPatterns) {
try {
// Create a new regex instance with global flag to avoid lastIndex mutation issues
const pattern = new RegExp(regexp.source, regexp.flags + 'g');
let match;
let iterationCount = 0;
const maxIterations = 300;
const patternMatches: [number, number][] = [];
let patternFailed = false;

while ((match = pattern.exec(line)) !== null) {
// Guard against infinite loops from bad regex patterns
if (++iterationCount > maxIterations) {
console.warn(`Warning: Cleanup pattern ${regexp.source} exceeded ${maxIterations} iterations, breaking to prevent infinite loop`);
patternFailed = true;
break;
}

// Additional guard: if we're stuck at the same position, break
if (iterationCount > 1 && match.index === patternMatches[patternMatches.length - 1]?.[0]) {
console.warn(`Warning: Cleanup pattern ${regexp.source} is stuck at position ${match.index}, breaking to prevent infinite loop`);
patternFailed = true;
break;
}

patternMatches.push([match.index, pattern.lastIndex]);
}

// Only add matches if the pattern completed successfully (didn't fail due to infinite loop protection)
if (!patternFailed) {
cleanUpIndexes.push(...patternMatches);
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.warn(`Warning: Invalid cleanup pattern ${regexp.source}: ${errorMessage}`);
// Continue with other patterns
}
}

// Create a new regex instance for this function call to avoid lastIndex mutation issues
const fileRegex = new RegExp(FILE_REGEX_PATTERN);
let updatedStack = '';
let lastIndex = 0;
let updatedLine = '';
let filePathIterationCount = 0;
const maxFilePathIterations = 300;

while (true) {
const result = fileRegex.exec(stack);
const result = fileRegex.exec(line);
if (!result) {
break;
}

// Guard against infinite loops from bad regex patterns
if (++filePathIterationCount > maxFilePathIterations) {
console.warn(`Warning: File path regex exceeded ${maxFilePathIterations} iterations, breaking to prevent infinite loop`);
break;
}

// Check if any cleanup pattern matches overlap with this file path match
const overlappingRange = cleanUpIndexes.some(([start, end]) =>
result.index < end && start < fileRegex.lastIndex
);

// Check if this is a node_modules path
const isNodeModules = NODE_MODULES_REGEX.test(result[0]);

// anoynimize user file paths that do not need to be retained or cleaned up.
if (!isNodeModules) {
updatedStack += stack.substring(lastIndex, result.index) + '<REDACTED: user-file-path>';
} else {
// For node_modules paths, anonymize the user part but preserve the node_modules part
const match = result[0];
const nodeModulesMatch = match.match(NODE_MODULES_REGEX);
if (nodeModulesMatch) {
const nodeModulesIndex = match.indexOf(nodeModulesMatch[0]);
// If the path starts with node_modules (no user part), preserve the entire path
if (nodeModulesIndex === 0) {
updatedStack += stack.substring(lastIndex, fileRegex.lastIndex);
// Preserve paths that match cleanup patterns or are node_modules
if (overlappingRange || isNodeModules) {
if (isNodeModules) {
// For node_modules paths, anonymize the user part but preserve the node_modules part
const match = result[0];
const nodeModulesMatch = match.match(NODE_MODULES_REGEX);
if (nodeModulesMatch) {
const nodeModulesIndex = match.indexOf(nodeModulesMatch[0]);
// If the path starts with node_modules (no user part), preserve the entire path
if (nodeModulesIndex === 0) {
updatedLine += line.substring(lastIndex, fileRegex.lastIndex);
} else {
// Otherwise, anonymize the user part and preserve the node_modules part
const nodeModulesPart = match.substring(nodeModulesIndex);
updatedLine += line.substring(lastIndex, result.index) + '<REDACTED: user-file-path>' + nodeModulesPart;
}
} else {
// Otherwise, anonymize the user part and preserve the node_modules part
const nodeModulesPart = match.substring(nodeModulesIndex);
updatedStack += stack.substring(lastIndex, result.index) + '<REDACTED: user-file-path>' + nodeModulesPart;
// Fallback: preserve the original text
updatedLine += line.substring(lastIndex, fileRegex.lastIndex);
}
} else {
// Fallback: preserve the original text
updatedStack += stack.substring(lastIndex, fileRegex.lastIndex);
// For cleanup pattern matches, preserve the entire path
updatedLine += line.substring(lastIndex, fileRegex.lastIndex);
}
} else {
// Anonymize user file paths that don't match cleanup patterns or node_modules
updatedLine += line.substring(lastIndex, result.index) + '<REDACTED: user-file-path>';
}
lastIndex = fileRegex.lastIndex;
}
if (lastIndex < stack.length) {
updatedStack += stack.substring(lastIndex);
if (lastIndex < line.length) {
updatedLine += line.substring(lastIndex);
}

return updatedStack;
return updatedLine;
}
136 changes: 136 additions & 0 deletions src/tests/utils/telemetryUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,140 @@ suite('Test anonymizeFilePaths', () => {
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
});

// Tests for default cleanup patterns
test('should preserve Java standard library paths with default patterns', () => {
const input = 'Error at java.base/java.lang.String.<init>(String.java:123)';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, input);
});

test('should anonymize user paths but preserve Java paths with default patterns', () => {
const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.lang.String.<init>(String.java:123)';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error at <REDACTED: user-file-path>:45 and java.base/java.lang.String.<init>(String.java:123)');
});

test('should handle complex Java stack trace with default patterns', () => {
const stackTrace = `java.lang.NullPointerException
at java.base/java.lang.String.<init>(String.java:123)
at /Users/john/project/src/main.ts:45
at java.util.ArrayList.add(ArrayList.java:456)
at /home/user/app/index.js:12
at org.springframework.context.ApplicationContext.getBean(ApplicationContext.java:789)`;

const result = anonymizeFilePaths(stackTrace);
assert(result.includes('java.base/java.lang.String.<init>(String.java:123)'));
assert(result.includes('java.util.ArrayList.add(ArrayList.java:456)'));
assert(result.includes('org.springframework.context.ApplicationContext.getBean(ApplicationContext.java:789)'));
assert(result.includes('<REDACTED: user-file-path>'));
assert(!result.includes('/Users/john/project/src/main.ts:45'));
assert(!result.includes('/home/user/app/index.js:12'));
});

// Tests for custom cleanup patterns
test('should use custom cleanup patterns when provided', () => {
const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.lang.String.<init>(String.java:123)';
const customPatterns = [/java\.base\//];
const result = anonymizeFilePaths(input, customPatterns);
assert.strictEqual(result, 'Error at <REDACTED: user-file-path>:45 and java.base/java.lang.String.<init>(String.java:123)');
});

test('should not preserve patterns not in custom cleanup patterns', () => {
const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.lang.String.<init>(String.java:123)';
const customPatterns = [/java\.util\.\S*/]; // Only preserve java.util, not java.lang
const result = anonymizeFilePaths(input, customPatterns);
assert.strictEqual(result, 'Error at <REDACTED: user-file-path>:45 and <REDACTED: user-file-path><init>(String.java:123)');
});

test('should handle empty custom cleanup patterns array', () => {
const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.lang.String.<init>(String.java:123)';
const result = anonymizeFilePaths(input, []);
assert.strictEqual(result, 'Error at <REDACTED: user-file-path>:45 and <REDACTED: user-file-path><init>(String.java:123)');
});

test('should handle multiple custom cleanup patterns', () => {
const input = 'Error at /Users/john/project/src/main.ts:45, java.base/java.lang.String.<init>(String.java:123), and org.springframework.context.ApplicationContext.getBean(ApplicationContext.java:456)';
const customPatterns = [/java\.base\//, /org\.springframework\..*/];
const result = anonymizeFilePaths(input, customPatterns);
assert(result.includes('java.base/java.lang.String.<init>(String.java:123)'));
assert(result.includes('org.springframework.context.ApplicationContext.getBean(ApplicationContext.java:456)'));
assert(result.includes('Error at <REDACTED: user-file-path>:45'));
assert(!result.includes('/Users/john/project/src/main.ts:45'));
});

test('should handle custom cleanup patterns with complex regex', () => {
const input = 'Error at /Users/john/project/src/main.ts:45 and my.custom.library/SomeClass.method(SomeClass.java:123)';
const customPatterns = [/my\.custom\..*/];
const result = anonymizeFilePaths(input, customPatterns);
assert.strictEqual(result, 'Error at <REDACTED: user-file-path>:45 and my.custom.library/SomeClass.method(SomeClass.java:123)');
});

test('should handle overlapping cleanup patterns', () => {
const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.util.ArrayList.add(ArrayList.java:123)';
const customPatterns = [/java\.base\//, /java\.util\..*/];
const result = anonymizeFilePaths(input, customPatterns);
assert(result.includes('<REDACTED: user-file-path>'));
assert(!result.includes('/Users/john/project/src/main.ts:45'));
assert(result.includes('java.base/java.util.ArrayList.add(ArrayList.java:123)'));
});

test('should preserve node_modules even with custom cleanup patterns', () => {
const input = 'Error at /Users/john/project/src/main.ts:45 and /Users/john/project/node_modules/package/index.js:12';
const customPatterns = [/java\.base\//];
const result = anonymizeFilePaths(input, customPatterns);
assert.strictEqual(result, 'Error at <REDACTED: user-file-path>:45 and <REDACTED: user-file-path>/node_modules/package/index.js:12');
});

test('should handle cleanup patterns that match partial paths', () => {
const input = 'Error at /Users/john/project/src/main.ts:45 and some.java.package.Class.method(Class.java:123)';
const customPatterns = [/java\.package\..*/];
const result = anonymizeFilePaths(input, customPatterns);
assert.strictEqual(result, 'Error at <REDACTED: user-file-path>:45 and some.java.package.Class.method(Class.java:123)');
});

test('should guard against infinite loops from bad regex patterns', () => {
// Test with a regex that could cause infinite loops
const input = 'Error at /Users/john/project/src/main.ts:45';
const badPatterns = [/(.*)*/]; // This regex can cause catastrophic backtracking

// Capture console.warn to verify the warning is issued
const originalWarn = console.warn;
let warningIssued = false;
console.warn = (message: string) => {
if (message.includes('stuck at position') || message.includes('exceeded')) {
warningIssued = true;
}
originalWarn(message);
};

try {
const result = anonymizeFilePaths(input, badPatterns);
// The function should still return a result (even if it's not perfect)
assert(typeof result === 'string');
// The warning should have been issued
assert(warningIssued, 'Expected warning about infinite loop prevention to be issued');
} finally {
console.warn = originalWarn;
}
});

test('should handle empty cleanup patterns gracefully', () => {
const input = 'Error at /Users/john/project/src/main.ts:45';
const result = anonymizeFilePaths(input, []);
assert.strictEqual(result, 'Error at <REDACTED: user-file-path>:45');
});

test('should handle malformed regex patterns gracefully', () => {
const input = 'Error at /Users/john/project/src/main.ts:45';
// Test with a regex that could cause issues but is syntactically valid
const problematicPatterns = [/(a+)+/]; // Catastrophic backtracking pattern

// This should not throw an error and should complete without hanging
const result = anonymizeFilePaths(input, problematicPatterns);
assert(typeof result === 'string');
// The pattern (a+)+ matches 'a' characters, so parts of the path containing 'a' will be preserved
// This is expected behavior - when cleanup patterns match, those parts are preserved
assert(result.includes('Error at'));
});

});