Skip to content

Commit ee74234

Browse files
fix(web): Handle ambiguous languages (#1026)
* feat(web): replace language param with path in symbol tools, make repo mandatory - find_symbol_definitions and find_symbol_references now accept `path` (the file where the symbol was encountered) instead of `language` - Language is derived internally via detectLanguageFromFilename - `repo` is now required in both tool schemas - Also fixes ambiguous extension overrides in languageDetection.ts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * delete * changelog --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent e0d4ff9 commit ee74234

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Changed
11+
- Changed language detection to resolve file extensions with multiple language resolutions (e.g., .md) to the most common resolution. [#1026](https://github.com/sourcebot-dev/sourcebot/pull/1026)
12+
1013
## [4.15.11] - 2026-03-20
1114

1215
### Changed

packages/web/src/lib/languageDetection.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,23 @@
11
import * as linguistLanguages from 'linguist-languages';
22
import path from 'path';
33

4+
// Override map for extensions that are ambiguous in linguist-languages.
5+
// These are extensions where linguist maps to multiple languages, but one
6+
// is overwhelmingly more common in practice.
7+
const ambiguousExtensionOverrides: Record<string, string> = {
8+
'.cs': 'C#', // Not Smalltalk
9+
'.fs': 'F#', // Not Forth, GLSL, or Filterscript
10+
'.html': 'HTML', // Not Ecmarkup
11+
'.json': 'JSON', // Not OASv2-json, OASv3-json
12+
'.md': 'Markdown', // Not GCC Machine Description
13+
'.rs': 'Rust', // Not RenderScript (deprecated)
14+
'.tsx': 'TSX', // Not XML
15+
'.ts': 'TypeScript', // Not XML
16+
'.txt': 'Text', // Not Adblock Filter List, Vim Help File
17+
'.yaml': 'YAML', // Not MiniYAML, OASv2-yaml, OASv3-yaml
18+
'.yml': 'YAML',
19+
};
20+
421
const extensionToLanguage = new Map<string, string>();
522

623
for (const [languageName, languageData] of Object.entries(linguistLanguages)) {
@@ -31,6 +48,12 @@ export const detectLanguageFromFilename = (filename: string): string => {
3148

3249
// Check for extension match
3350
const ext = path.extname(filename).toLowerCase();
51+
52+
// Check override map first for ambiguous extensions
53+
if (ext && ext in ambiguousExtensionOverrides) {
54+
return ambiguousExtensionOverrides[ext];
55+
}
56+
3457
if (ext && extensionToLanguage.has(ext)) {
3558
return extensionToLanguage.get(ext)!;
3659
}

0 commit comments

Comments
 (0)