sourcebot-dev
diff --git a/‎CHANGELOG.md‎
Lines changed: 11 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/api-reference/sourcebot-public.openapi.json‎
Lines changed: 1 addition & 1 deletion b/‎docs/api-reference/sourcebot-public.openapi.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/docs/configuration/idp.mdx‎
Lines changed: 48 additions & 1 deletion b/‎docs/docs/configuration/idp.mdx‎
Lines changed: 48 additions & 1 deletion
diff --git a/‎packages/backend/src/repoCompileUtils.test.ts‎
Lines changed: 28 additions & 0 deletions b/‎packages/backend/src/repoCompileUtils.test.ts‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎packages/backend/src/repoCompileUtils.ts‎
Lines changed: 9 additions & 0 deletions b/‎packages/backend/src/repoCompileUtils.ts‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎packages/queryLanguage/src/parser.terms.ts‎
Lines changed: 2 additions & 1 deletion b/‎packages/queryLanguage/src/parser.terms.ts‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎packages/queryLanguage/src/parser.ts‎
Lines changed: 1 addition & 0 deletions b/‎packages/queryLanguage/src/parser.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/queryLanguage/src/query.grammar‎
Lines changed: 2 additions & 0 deletions b/‎packages/queryLanguage/src/query.grammar‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎packages/queryLanguage/src/tokens.ts‎
Lines changed: 41 additions & 14 deletions b/‎packages/queryLanguage/src/tokens.ts‎
Lines changed: 41 additions & 14 deletions
diff --git a/‎packages/queryLanguage/test/grammar.regex.test.ts‎
Lines changed: 15 additions & 0 deletions b/‎packages/queryLanguage/test/grammar.regex.test.ts‎
Lines changed: 15 additions & 0 deletions
@@ -10,6 +10,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 - Fixed AI Search Assist incorrectly using the `repo:` filter when searching for content within files. [#1045](https://github.com/sourcebot-dev/sourcebot/pull/1045)
 
+## [4.16.3] - 2026-03-27
+
+### Added
+- Added support for `.gitattributes` `linguist-language` overrides in the file viewer ([#1048](https://github.com/sourcebot-dev/sourcebot/pull/1048))
+- Added Basic language syntax highlighting in the file viewer ([#1054](https://github.com/sourcebot-dev/sourcebot/pull/1054))
+
+### Fixed
+- Fixed Ask GitHub landing page chat box placement to be centered on the page instead of at the bottom. [#1046](https://github.com/sourcebot-dev/sourcebot/pull/1046)
+- Fixed issue where local git connections (`file://`) would fail when matching a file instead of a directory. [#1049](https://github.com/sourcebot-dev/sourcebot/pull/1049)
+- Fixed regex queries containing parentheses (e.g. `(test|render)<`) being incorrectly split into multiple search terms instead of treated as a single regex pattern. [#1050](https://github.com/sourcebot-dev/sourcebot/pull/1050)
+
 ## [4.16.2] - 2026-03-25
 
 ### Fixed
 
@@ -2,7 +2,7 @@
   "openapi": "3.0.3",
   "info": {
     "title": "Sourcebot Public API",
-    "version": "v4.16.2",
+    "version": "v4.16.3",
     "description": "OpenAPI description for the public Sourcebot REST endpoints used for search, repository listing, and file browsing."
   },
   "tags": [
 
@@ -418,12 +418,16 @@ A Keycloak connection can be used for [authentication](/docs/configuration/auth)
 </Steps>
 </Accordion>
 
-### Microsoft Entra ID
+### Microsoft Entra ID (Azure AD)
 
 [Auth.js Microsoft Entra ID Provider Docs](https://authjs.dev/getting-started/providers/microsoft-entra-id)
 
 A Microsoft Entra ID connection can be used for [authentication](/docs/configuration/auth).
 
+<Info>
+Microsoft renamed Azure Active Directory (Azure AD) to Microsoft Entra ID in 2023. If you have an existing Azure AD setup, these instructions will work for you. The underlying authentication infrastructure is the same.
+</Info>
+
 <Accordion title="instructions">
 <Steps>
     <Step title="Register an OAuth Application">
@@ -570,4 +574,47 @@ A JumpCloud connection can be used for [authentication](/docs/configuration/auth
 </Steps>
 </Accordion>
 
+### Google Cloud IAP
+
+[Google Cloud IAP Documentation](https://cloud.google.com/iap/docs)
+
+Google Cloud Identity-Aware Proxy (IAP) can be used for [authentication](/docs/configuration/auth). IAP provides a layer of security for applications deployed on Google Cloud, allowing you to control access based on user identity and context.
+
+<Info>
+GCP IAP works differently from other identity providers. Instead of redirecting users to an OAuth flow, IAP intercepts requests at the infrastructure level and adds a signed JWT header that Sourcebot validates. This means users are automatically authenticated when accessing Sourcebot through an IAP-protected endpoint.
+</Info>
+
+<Accordion title="instructions">
+<Steps>
+    <Step title="Enable IAP for your application">
+        Your Sourcebot deployment must be behind Google Cloud IAP. Follow [this guide](https://cloud.google.com/iap/docs/enabling-on-premises-howto) by Google to enable IAP for your application.
+
+        After enabling IAP, note the **Signed Header JWT Audience**. You can find this in the Google Cloud Console under **Security → Identity-Aware Proxy → (your application) → Edit OAuth Client → Application settings**.
+
+        The audience will be in the format: `/projects/<project-number>/global/backendServices/<service-id>` or `/projects/<project-number>/apps/<project-id>`.
+    </Step>
+    <Step title="Define environment variables">
+        Set the IAP audience as an environment variable. This can be named whatever you like (ex. `GCP_IAP_AUDIENCE`).
+    </Step>
+    <Step title="Define the identity provider config">
+        Create a `identityProvider` object in the [config file](/docs/configuration/config-file) with the following fields:
+
+       ```json wrap icon="code"
+        {
+            "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
+            "identityProviders": [
+                {
+                    "provider": "gcp-iap",
+                    "purpose": "sso",
+                    "audience": {
+                        "env": "GCP_IAP_AUDIENCE"
+                    }
+                }
+            ]
+        }
+        ```
+    </Step>
+</Steps>
+</Accordion>
+
 
@@ -14,17 +14,29 @@ vi.mock('glob', () => ({
     glob: vi.fn(),
 }));
 
+// Mock fs/promises so tests don't touch the real filesystem.
+// By default, stat resolves as a directory; individual tests can override this.
+vi.mock('fs/promises', () => ({
+    default: {
+        stat: vi.fn().mockResolvedValue({ isDirectory: () => true }),
+    },
+}));
+
 import { isPathAValidGitRepoRoot, getOriginUrl, isUrlAValidGitRepo } from './git.js';
 import { glob } from 'glob';
+import fs from 'fs/promises';
 
 const mockedGlob = vi.mocked(glob);
 const mockedIsPathAValidGitRepoRoot = vi.mocked(isPathAValidGitRepoRoot);
 const mockedGetOriginUrl = vi.mocked(getOriginUrl);
 const mockedIsUrlAValidGitRepo = vi.mocked(isUrlAValidGitRepo);
+const mockedFsStat = vi.mocked(fs.stat);
 
 describe('compileGenericGitHostConfig_file', () => {
     beforeEach(() => {
         vi.clearAllMocks();
+        // Default: all paths exist and are directories. Override per-test as needed.
+        mockedFsStat.mockResolvedValue({ isDirectory: () => true } as any);
     });
 
     afterEach(() => {
@@ -47,6 +59,22 @@ describe('compileGenericGitHostConfig_file', () => {
         expect(result.warnings[0]).toContain('/path/to/nonexistent/repo');
     });
 
+    test('should return warning when path is a file, not a directory', async () => {
+        mockedGlob.mockResolvedValue(['/path/to/a-file.txt']);
+        mockedFsStat.mockResolvedValue({ isDirectory: () => false } as any);
+
+        const config = {
+            type: 'git' as const,
+            url: 'file:///path/to/a-file.txt',
+        };
+
+        const result = await compileGenericGitHostConfig_file(config, 1);
+
+        expect(result.repoData).toHaveLength(0);
+        expect(result.warnings.length).toBeGreaterThanOrEqual(1);
+        expect(result.warnings.some(w => w.includes('not a directory'))).toBe(true);
+    });
+
     test('should return warning when path is not a valid git repo', async () => {
         mockedGlob.mockResolvedValue(['/path/to/not-a-repo']);
         mockedIsPathAValidGitRepoRoot.mockResolvedValue(false);
 
@@ -14,6 +14,7 @@ import { createLogger } from '@sourcebot/shared';
 import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
 import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
 import path from 'path';
+import fs from 'fs/promises';
 import { glob } from 'glob';
 import { getLocalDefaultBranch, getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
 import assert from 'assert';
@@ -611,6 +612,14 @@ export const compileGenericGitHostConfig_file = async (
     logger.info(`Found ${repoPaths.length} path(s) matching pattern '${configUrl.pathname}'`);
 
     await Promise.all(repoPaths.map((repoPath) => gitOperationLimit(async () => {
+        const stat = await fs.stat(repoPath).catch(() => null);
+        if (!stat || !stat.isDirectory()) {
+            const warning = `Skipping ${repoPath} - path is not a directory.`;
+            logger.warn(warning);
+            warnings.push(warning);
+            return;
+        }
+
         const isGitRepo = await isPathAValidGitRepoRoot({
             path: repoPath,
         });
 
@@ -23,4 +23,5 @@ export const
   RepoSetExpr = 16,
   ParenExpr = 17,
   QuotedTerm = 18,
-  Term = 19
+  Term = 19,
+  Dialect_regex = 0
@@ -13,6 +13,7 @@ export const parser = LRParser.deserialize({
   tokenData: "/U~R_XY!QYZ!Qpq!Qrs!`#T#U$S#V#W%i#Y#Z'R#`#a(_#b#c(|#c#d)X#d#e)p#f#g+]#g#h,w#j#k-`#m#n.s~!VRm~XY!QYZ!Qpq!Q~!cWOY!`Zr!`rs!{s#O!`#O#P#Q#P;'S!`;'S;=`#|<%lO!`~#QOw~~#TRO;'S!`;'S;=`#^;=`O!`~#aXOY!`Zr!`rs!{s#O!`#O#P#Q#P;'S!`;'S;=`#|;=`<%l!`<%lO!`~$PP;=`<%l!`~$VQ#b#c$]#f#g$h~$`P#m#n$c~$hO!R~~$kP#V#W$n~$qP#[#]$t~$wP#]#^$z~$}P#j#k%Q~%TP#X#Y%W~%ZP#W#X%^~%aP![!]%d~%iOq~~%lQ![!]%r#c#d%w~%wOx~~%zP#b#c%}~&QP#h#i&T~&WP#X#Y&Z~&^Q#b#c&d#l#m&p~&gP#h#i&j~&mP![!]%r~&sP#h#i&v~&yP![!]&|~'ROy~~'UR![!]'_#]#^'d#c#d'v~'dOz~~'gP#`#a'j~'mP#X#Y'p~'sP![!]'_~'yP#f#g'|~(PP#_#`(S~(VP![!](Y~(_O{~~(bP#T#U(e~(hP#b#c(k~(nP#Z#[(q~(tP![!](w~(|O!T~~)PP#c#d)S~)XOs~~)[P#b#c)_~)bP#`#a)e~)hP#m#n)k~)pOt~~)sQ#f#g)y#i#j*n~)|P#]#^*P~*SP#j#k*V~*YP#T#U*]~*`P#h#i*c~*fP#X#Y*i~*nO!Q~~*qP#U#V*t~*wP#`#a*z~*}P#]#^+Q~+TP#V#W+W~+]O!P~~+`Q![!]+f#X#Y+k~+kO!S~~+nQ#d#e+t#j#k,l~+wP#c#d+z~+}Q![!]+f#g#h,T~,WP#X#Y,Z~,^P#h#i,a~,dP![!],g~,lO!V~~,oP![!],r~,wOu~~,zP#m#n,}~-QP#a#b-T~-WP![!]-Z~-`O!U~~-cP#]#^-f~-iP#g#h-l~-oP#]#^-r~-uP#U#V-x~-{P#]#^.O~.RP#`#a.U~.XP#]#^.[~._P#h#i.b~.eP#m#n.h~.kP![!].n~.sO}~~.vP#X#Y.y~.|P#g#h/P~/UOr~",
   tokenizers: [negateToken, parenToken, wordToken, closeParenToken, orToken, 0],
   topRules: {"Program":[0,1]},
+  dialects: {regex: 0},
   tokenPrec: 200,
   termNames: {"0":"⚠","1":"@top","2":"OrExpr","3":"AndExpr","4":"NegateExpr","5":"PrefixExpr","6":"ArchivedExpr","7":"RevisionExpr","8":"ContentExpr","9":"ContextExpr","10":"FileExpr","11":"ForkExpr","12":"VisibilityExpr","13":"RepoExpr","14":"LangExpr","15":"SymExpr","16":"RepoSetExpr","17":"ParenExpr","18":"QuotedTerm","19":"Term","20":"expr+","21":"(or andExpr)+","22":"␄","23":"negate","24":"openParen","25":"word","26":"closeParen","27":"or","28":"%mainskip","29":"space","30":"query","31":"andExpr","32":"expr","33":"archivedKw","34":"\"yes\"","35":"\"no\"","36":"\"only\"","37":"revisionKw","38":"value","39":"quotedString","40":"contentKw","41":"contextKw","42":"fileKw","43":"forkKw","44":"forkValue","45":"visibilityKw","46":"visibilityValue","47":"\"public\"","48":"\"private\"","49":"\"any\"","50":"repoKw","51":"langKw","52":"symKw","53":"reposetKw"}
 })
@@ -4,6 +4,8 @@
 @external tokens closeParenToken from "./tokens" { closeParen }
 @external tokens orToken from "./tokens" { or }
 
+@dialects { regex }
+
 @top Program { query }
 
 @precedence {
 
@@ -1,5 +1,5 @@
 import { ExternalTokenizer, InputStream, Stack } from "@lezer/lr";
-import { negate, openParen, closeParen, word, or, ParenExpr } from "./parser.terms";
+import { negate, openParen, closeParen, word, or, Dialect_regex } from "./parser.terms";
 
 // Character codes
 const SPACE = 32;
@@ -243,9 +243,14 @@ function isInsideParenExpr(input: InputStream, stack: Stack): boolean {
  * This allows words like "(pr" or "func(arg)" to be parsed as single terms
  * while "(foo bar)" is parsed as a ParenExpr.
  */
-export const parenToken = new ExternalTokenizer((input) => {
+export const parenToken = new ExternalTokenizer((input, stack) => {
     if (input.next !== OPEN_PAREN) return;
-    
+
+    // In regex mode, parens are just word characters — don't emit openParen
+    if (stack.dialectEnabled(Dialect_regex)) {
+        return;
+    }
+
     if (hasBalancedParensAt(input, 0)) {
         // Found balanced parens - emit openParen (just the '(')
         input.advance();
@@ -263,6 +268,11 @@ export const parenToken = new ExternalTokenizer((input) => {
 export const closeParenToken = new ExternalTokenizer((input, stack) => {
     if (input.next !== CLOSE_PAREN) return;
 
+    // In regex mode, parens are just word characters — don't emit closeParen
+    if (stack.dialectEnabled(Dialect_regex)) {
+        return;
+    }
+
     // Check if we should emit closeParen (when inside a ParenExpr)
     if (isInsideParenExpr(input, stack)) {
         input.advance();
@@ -312,7 +322,20 @@ export const wordToken = new ExternalTokenizer((input, stack) => {
     if (startsWithPrefix(input)) {
         return;
     }
-    
+
+    // In regex mode: consume all non-whitespace characters as a single word.
+    // Parens and | are valid regex metacharacters, not query syntax in this mode.
+    if (stack.dialectEnabled(Dialect_regex)) {
+        const startPos = input.pos;
+        while (input.next !== EOF && !isWhitespace(input.next)) {
+            input.advance();
+        }
+        if (input.pos > startPos) {
+            input.acceptToken(word);
+        }
+        return;
+    }
+
     // If starts with '(' and has balanced parens, determine whether this is a
     // regex alternation value (e.g. file:(test|spec)) or a ParenExpr grouping.
     // We're in a value context when the immediately preceding non-whitespace char
@@ -419,24 +442,28 @@ export const orToken = new ExternalTokenizer((input) => {
  * External tokenizer for negation.
  * Only tokenizes `-` as negate when followed by a prefix keyword or balanced `(`.
  */
-export const negateToken = new ExternalTokenizer((input) => {
+export const negateToken = new ExternalTokenizer((input, stack) => {
     if (input.next !== DASH) return;
-    
+
     // Look ahead using peek to see what follows the dash (skipping whitespace)
     let offset = 1;
     while (isWhitespace(input.peek(offset))) {
         offset++;
     }
-    
+
     const chAfterDash = input.peek(offset);
-    
-    // Check if followed by opening paren that starts a balanced ParenExpr
-    if (chAfterDash === OPEN_PAREN && hasBalancedParensAt(input, offset)) {
-        input.advance();
-        input.acceptToken(negate);
-        return;
+
+    // In normal mode: also check for balanced paren (negated group e.g. -(foo bar))
+    // In regex mode: skip this — parens are not query grouping operators, so emitting
+    // negate before a '(' would leave the parser without a matching ParenExpr to parse.
+    if (!stack.dialectEnabled(Dialect_regex)) {
+        if (chAfterDash === OPEN_PAREN && hasBalancedParensAt(input, offset)) {
+            input.advance();
+            input.acceptToken(negate);
+            return;
+        }
     }
-    
+
     // Check if followed by a prefix keyword (by checking for keyword followed by colon)
     let foundColon = false;
     let peekOffset = offset;
 
@@ -0,0 +1,15 @@
+import { parser as _parser } from "../src/parser";
+import { fileTests } from "@lezer/generator/dist/test";
+import { describe, it } from "vitest";
+import { fileURLToPath } from "url";
+import * as fs from "fs";
+import * as path from "path";
+
+const regexParser = _parser.configure({ dialect: "regex" });
+const caseDir = path.dirname(fileURLToPath(import.meta.url));
+
+describe("regex", () => {
+    for (const { name, run } of fileTests(fs.readFileSync(path.join(caseDir, "regex.txt"), "utf8"), "regex.txt")) {
+        it(name, () => run(regexParser));
+    }
+});