rajbos
diff --git a/‎.github/copilot-instructions.md‎
Lines changed: 4 additions & 2 deletions b/‎.github/copilot-instructions.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/scripts/scrape-models.sh‎
Lines changed: 166 additions & 0 deletions b/‎.github/scripts/scrape-models.sh‎
Lines changed: 166 additions & 0 deletions
diff --git a/‎.github/workflows/check-models.yml‎
Lines changed: 182 additions & 0 deletions b/‎.github/workflows/check-models.yml‎
Lines changed: 182 additions & 0 deletions
diff --git a/‎.github/workflows/tag-on-issue.yml‎
Lines changed: 23 additions & 0 deletions b/‎.github/workflows/tag-on-issue.yml‎
Lines changed: 23 additions & 0 deletions
@@ -40,6 +40,8 @@ The entire extension's logic is contained within the `CopilotTokenTracker` class
   - `CopilotTokenTracker`: The main class.
   - `calculateDetailedStats()`: The primary data aggregation method.
   - `getDetailsHtml()`: The method responsible for rendering the webview's HTML content. All styling is inlined within this method's template string.
-  - `tokenEstimators`: A critical class property. To add or modify token calculation for a model, this object must be updated.
+- **`src/README.md`**: **IMPORTANT**: Contains detailed instructions for updating the JSON data files. Always consult this file when updating tokenEstimators.json or modelPricing.json. It includes structure definitions, update procedures, and current pricing information.
+- **`src/tokenEstimators.json`**: Character-to-token ratio estimators for different AI models. See `src/README.md` for update instructions.
+- **`src/modelPricing.json`**: Model pricing data with input/output costs per million tokens. Includes metadata about pricing sources and last update date. See `src/README.md` for detailed update instructions and current pricing sources.
 - **`package.json`**: Defines activation events, commands, and build scripts.
-- **`esbuild.js`**: The build script that bundles the TypeScript source.
+- **`esbuild.js`**: The build script that bundles the TypeScript source and JSON data files.
@@ -0,0 +1,166 @@
+#!/bin/bash
+set -e
+
+# Script to scrape GitHub Copilot supported models from documentation
+# This script uses Puppeteer to load the page and extract model names
+
+echo "Installing Puppeteer..."
+npm install puppeteer
+
+echo "Creating scraper script..."
+cat > scrape.js << 'SCRAPE_EOF'
+const puppeteer = require('puppeteer');
+const fs = require('fs');
+
+(async () => {
+  try {
+    const browser = await puppeteer.launch({ 
+      headless: 'new', 
+      args: ['--no-sandbox', '--disable-setuid-sandbox'] 
+    });
+    const page = await browser.newPage();
+    
+    console.error('Navigating to page...');
+    await page.goto('https://docs.github.com/en/copilot/reference/ai-models/supported-models', {
+      waitUntil: 'networkidle0',
+      timeout: 60000
+    });
+    
+    console.error('Content loaded, extracting models...');
+    
+    // Extract model names from the specific section
+    const models = await page.evaluate(() => {
+      const modelNames = [];
+      
+      // Find the "Supported AI models in Copilot" section
+      const headings = Array.from(document.querySelectorAll('h2, h3'));
+      const targetHeading = headings.find(h => h.textContent.includes('Supported AI models in Copilot'));
+      
+      if (!targetHeading) {
+        console.error('ERROR: Could not find "Supported AI models in Copilot" heading');
+        return [];
+      }
+      
+      console.error('Found target heading:', targetHeading.textContent);
+      
+      // Get the content section that contains this heading
+      let contentSection = targetHeading.closest('div[class*="content"]') || targetHeading.parentElement;
+      console.error('Content section found:', contentSection ? 'yes' : 'no');
+      
+      // Find all tables within this section (or after the heading)
+      let tables = [];
+      let currentElement = targetHeading.nextElementSibling;
+      
+      // Traverse siblings until we hit another h2 or run out of elements
+      while (currentElement) {
+        if (currentElement.tagName === 'H2') {
+          break; // Stop at the next major section
+        }
+        
+        if (currentElement.tagName === 'TABLE') {
+          tables.push(currentElement);
+        } else if (currentElement.querySelectorAll) {
+          // Check for tables within this element
+          const nestedTables = currentElement.querySelectorAll('table');
+          tables.push(...nestedTables);
+        }
+        
+        currentElement = currentElement.nextElementSibling;
+      }
+      
+      console.error(`Found ${tables.length} tables in the target section`);
+      
+      tables.forEach((table, tableIndex) => {
+        const rows = table.querySelectorAll('tbody tr');
+        console.error(`Table ${tableIndex}: Found ${rows.length} rows`);
+        
+        rows.forEach((row, rowIndex) => {
+          // Look for the row header (th with scope="row") which contains the model name
+          const rowHeader = row.querySelector('th[scope="row"]');
+          if (rowHeader) {
+            let text = rowHeader.textContent.trim();
+            console.error(`Table ${tableIndex}, Row ${rowIndex}: "${text}"`);
+            
+            if (text && text.length > 0) {
+              // Normalize model name: lowercase and replace spaces with dashes
+              const normalizedName = text.toLowerCase().replace(/\s+/g, '-');
+              console.error(`  Normalized: "${normalizedName}"`);
+              modelNames.push(normalizedName);
+            }
+          } else {
+            // Fallback to first td if no row header exists
+            const cells = row.querySelectorAll('td');
+            if (cells.length > 0) {
+              let text = cells[0].textContent.trim();
+              console.error(`Table ${tableIndex}, Row ${rowIndex} (fallback): "${text}"`);
+              
+              if (text && text.length > 0) {
+                // Normalize model name: lowercase and replace spaces with dashes
+                const normalizedName = text.toLowerCase().replace(/\s+/g, '-');
+                console.error(`  Normalized: "${normalizedName}"`);
+                modelNames.push(normalizedName);
+              }
+            }
+          }
+        });
+      });
+      
+      // Remove duplicates
+      return [...new Set(modelNames)];
+    });
+    
+    // Save only the relevant section HTML for debugging
+    const relevantHTML = await page.evaluate(() => {
+      const headings = Array.from(document.querySelectorAll('h2, h3'));
+      const targetHeading = headings.find(h => h.textContent.includes('Supported AI models in Copilot'));
+      
+      if (!targetHeading) {
+        return '<p>Could not find target section</p>';
+      }
+      
+      let html = '<h2>' + targetHeading.textContent + '</h2>\n';
+      let currentElement = targetHeading.nextElementSibling;
+      
+      while (currentElement && currentElement.tagName !== 'H2') {
+        html += currentElement.outerHTML + '\n';
+        currentElement = currentElement.nextElementSibling;
+      }
+      
+      return html;
+    });
+    
+    fs.writeFileSync('page-content.html', relevantHTML);
+    console.error('Saved relevant section HTML to page-content.html');
+    
+    console.error(`Extracted ${models.length} unique models`);
+    
+    // Save models as JSON
+    const modelsJson = JSON.stringify(models, null, 2);
+    fs.writeFileSync('scraped-models.json', modelsJson);
+    console.error('Saved scraped models to scraped-models.json');
+    
+    // Output for the workflow
+    console.log(JSON.stringify(models));
+    
+    await browser.close();
+  } catch (error) {
+    console.error('Error:', error.message);
+    console.error('Stack trace:', error.stack);
+    process.exit(1);
+  }
+})();
+SCRAPE_EOF
+
+echo "Running scraper..."
+node scrape.js 2>&1 | tee scraper.log
+
+# Extract the JSON output (last line)
+MODELS_JSON=$(tail -n 1 scraper.log)
+echo "Scraped models JSON: $MODELS_JSON"
+
+# Store the models, one per line
+echo "$MODELS_JSON" | jq -r '.[]' > models.txt
+echo "Models extracted to models.txt:"
+cat models.txt
+
+echo "Scraping complete!"
@@ -0,0 +1,182 @@
+name: Check for Copilot Model Updates
+
+on:
+  workflow_dispatch:
+  push:
+    paths:
+        - .github/workflows/check-models.yml
+        - .github/scripts/scrape-models.sh
+        
+  schedule:
+    - cron: '11 17 * * 1' # Run every Monday at 5:11 PM UTC
+
+jobs:
+  check-models:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      contents: read
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Fetch documentation page
+        id: fetch_docs
+        run: |
+          # Run the scraping script
+          chmod +x .github/scripts/scrape-models.sh
+          .github/scripts/scrape-models.sh
+
+      - name: Upload scraped data as artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: scraped-data
+          path: |
+            scraped-models.json
+            models.txt
+            page-content.html
+            scraper.log
+
+      - name: Extract models from scraped data
+        id: fetch_models
+        run: |
+          # Read the models from the file created by the scraping step
+          if [ ! -f models.txt ]; then
+            echo "Error: models.txt not found"
+            exit 1
+          fi
+          
+          MODELS=$(cat models.txt | sort -u)
+          
+          echo "=== Scraped Models from Documentation ==="
+          echo "$MODELS"
+          echo ""
+          echo "Count: $(echo "$MODELS" | wc -l) models"
+          echo ""
+          
+          if [ -z "$MODELS" ]; then
+            echo "Error: No models extracted"
+            exit 1
+          fi
+          
+          # Store models as a multiline output
+          {
+            echo "models_list<<EOF"
+            echo "$MODELS"
+            echo "EOF"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Check for missing models
+        id: check_missing
+        env:
+          FETCHED_MODELS: ${{ steps.fetch_models.outputs.models_list }}
+        run: |
+          # Load models from local JSON files
+          ESTIMATOR_MODELS=$(jq -r '.estimators | keys[]' src/tokenEstimators.json | sort -u)
+          PRICING_MODELS=$(jq -r '.pricing | keys[]' src/modelPricing.json | sort -u)
+
+          echo "Models in tokenEstimators.json:"
+          echo "$ESTIMATOR_MODELS"
+          echo "Models in modelPricing.json:"
+          echo "$PRICING_MODELS"
+
+          MISSING_ESTIMATORS=""
+          for model in $FETCHED_MODELS; do
+            if ! echo "$ESTIMATOR_MODELS" | grep -q "^${model}$"; then
+              MISSING_ESTIMATORS="$MISSING_ESTIMATORS\n- $model"
+            fi
+          done
+
+          MISSING_PRICING=""
+          for model in $FETCHED_MODELS; do
+            if ! echo "$PRICING_MODELS" | grep -q "^${model}$"; then
+              MISSING_PRICING="$MISSING_PRICING\n- $model"
+            fi
+          done
+
+          ISSUE_BODY=""
+          NEEDS_UPDATE="false"
+
+          if [ -n "$MISSING_ESTIMATORS" ]; then
+            NEEDS_UPDATE="true"
+            ISSUE_BODY="$ISSUE_BODY### 🚨 Missing Models in \`tokenEstimators.json\`\nThe following models are listed in the GitHub Copilot documentation but are missing from \`src/tokenEstimators.json\`:\n$MISSING_ESTIMATORS\n\n"
+          fi
+
+          if [ -n "$MISSING_PRICING" ]; then
+            NEEDS_UPDATE="true"
+            ISSUE_BODY="$ISSUE_BODY### 💰 Missing Models in \`modelPricing.json\`\nThe following models are listed in the GitHub Copilot documentation but are missing from \`src/modelPricing.json\`:\n$MISSING_PRICING\n\n"
+          fi
+
+          if [ "$NEEDS_UPDATE" = "true" ]; then
+            ISSUE_BODY="$ISSUE_BODY**Action Required:**\nPlease update the JSON configuration files with the latest models.\n\n**Source:** [GitHub Copilot Supported Models](https://docs.github.com/en/copilot/reference/ai-models/supported-models)"
+            echo "needs_update=true" >> $GITHUB_OUTPUT
+            # Use a heredoc to handle multiline body
+            echo "issue_body<<EOF" >> $GITHUB_OUTPUT
+            echo -e "$ISSUE_BODY" >> $GITHUB_OUTPUT
+            echo "EOF" >> $GITHUB_OUTPUT
+            
+            # Create step summary
+            echo "## ⚠️ Missing Models Detected" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            if [ -n "$MISSING_ESTIMATORS" ]; then
+              echo "### 🚨 Missing in \`tokenEstimators.json\`" >> $GITHUB_STEP_SUMMARY
+              echo -e "$MISSING_ESTIMATORS" >> $GITHUB_STEP_SUMMARY
+              echo "" >> $GITHUB_STEP_SUMMARY
+            fi
+            if [ -n "$MISSING_PRICING" ]; then
+              echo "### 💰 Missing in \`modelPricing.json\`" >> $GITHUB_STEP_SUMMARY
+              echo -e "$MISSING_PRICING" >> $GITHUB_STEP_SUMMARY
+              echo "" >> $GITHUB_STEP_SUMMARY
+            fi
+            echo "**Action Required:** Update the JSON configuration files with the latest models." >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "[View Documentation](https://docs.github.com/en/copilot/reference/ai-models/supported-models)" >> $GITHUB_STEP_SUMMARY
+          else
+            echo "needs_update=false" >> $GITHUB_OUTPUT
+            echo "✅ All models are up-to-date."
+            
+            # Create step summary
+            echo "## ✅ All Models Up-to-Date" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "All models from the documentation are present in both:" >> $GITHUB_STEP_SUMMARY
+            echo "- \`tokenEstimators.json\`" >> $GITHUB_STEP_SUMMARY
+            echo "- \`modelPricing.json\`" >> $GITHUB_STEP_SUMMARY
+          fi
+
+      - name: Create GitHub Issue if models are missing
+        if: steps.check_missing.outputs.needs_update == 'true'
+        uses: actions/github-script@v7
+        env:
+          ISSUE_BODY: ${{ steps.check_missing.outputs.issue_body }}
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const issueBody = process.env.ISSUE_BODY;
+            const { owner, repo } = context.repo;
+
+            // Check if a similar open issue already exists
+            const { data: issues } = await github.rest.issues.listForRepo({
+              owner,
+              repo,
+              state: 'open',
+              labels: 'maintenance',
+              creator: 'github-actions[bot]'
+            });
+
+            const title = '🤖 Action Required: Update Copilot Supported Models';
+            const existingIssue = issues.find(issue => issue.title === title);
+
+            if (existingIssue) {
+              console.log(`An open issue with the title "${title}" already exists. Skipping creation.`);
+            } else {
+              await github.rest.issues.create({
+                owner,
+                repo,
+                title: title,
+                body: issueBody,
+                labels: ['maintenance', 'autogenerated']
+              });
+              console.log('Created a new GitHub issue for missing models.');
+            }
@@ -0,0 +1,23 @@
+name: Tag on New Issue
+
+on:
+  issues:
+    types: [opened]
+
+permissions: 
+    contents: read
+
+jobs:
+  tag-user:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+    steps:
+      - name: Tag user on new issue
+        uses: devops-actions/issue-comment-tag@ec5e2b8d5f031330ff19bf4ee5b5252b1176995a # v0.1.8
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          team: ${{ vars.TAG_USER }}
+          issue: ${{ github.event.issue.number }}
+          owner: ${{ github.repository_owner }}
+          repo: ${{ github.repository }}