Skip to content

Commit 0513e30

Browse files
authored
Merge pull request #77 from rajbos/token-cost-estimates
Token cost estimates
2 parents 3c12798 + c55f84d commit 0513e30

File tree

11 files changed

+1158
-28
lines changed

11 files changed

+1158
-28
lines changed

.github/copilot-instructions.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ The entire extension's logic is contained within the `CopilotTokenTracker` class
4040
- `CopilotTokenTracker`: The main class.
4141
- `calculateDetailedStats()`: The primary data aggregation method.
4242
- `getDetailsHtml()`: The method responsible for rendering the webview's HTML content. All styling is inlined within this method's template string.
43-
- `tokenEstimators`: A critical class property. To add or modify token calculation for a model, this object must be updated.
43+
- **`src/README.md`**: **IMPORTANT**: Contains detailed instructions for updating the JSON data files. Always consult this file when updating tokenEstimators.json or modelPricing.json. It includes structure definitions, update procedures, and current pricing information.
44+
- **`src/tokenEstimators.json`**: Character-to-token ratio estimators for different AI models. See `src/README.md` for update instructions.
45+
- **`src/modelPricing.json`**: Model pricing data with input/output costs per million tokens. Includes metadata about pricing sources and last update date. See `src/README.md` for detailed update instructions and current pricing sources.
4446
- **`package.json`**: Defines activation events, commands, and build scripts.
45-
- **`esbuild.js`**: The build script that bundles the TypeScript source.
47+
- **`esbuild.js`**: The build script that bundles the TypeScript source and JSON data files.

.github/scripts/scrape-models.sh

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Script to scrape GitHub Copilot supported models from documentation
5+
# This script uses Puppeteer to load the page and extract model names
6+
7+
echo "Installing Puppeteer..."
8+
npm install puppeteer
9+
10+
echo "Creating scraper script..."
11+
cat > scrape.js << 'SCRAPE_EOF'
12+
const puppeteer = require('puppeteer');
13+
const fs = require('fs');
14+
15+
(async () => {
16+
try {
17+
const browser = await puppeteer.launch({
18+
headless: 'new',
19+
args: ['--no-sandbox', '--disable-setuid-sandbox']
20+
});
21+
const page = await browser.newPage();
22+
23+
console.error('Navigating to page...');
24+
await page.goto('https://docs.github.com/en/copilot/reference/ai-models/supported-models', {
25+
waitUntil: 'networkidle0',
26+
timeout: 60000
27+
});
28+
29+
console.error('Content loaded, extracting models...');
30+
31+
// Extract model names from the specific section
32+
const models = await page.evaluate(() => {
33+
const modelNames = [];
34+
35+
// Find the "Supported AI models in Copilot" section
36+
const headings = Array.from(document.querySelectorAll('h2, h3'));
37+
const targetHeading = headings.find(h => h.textContent.includes('Supported AI models in Copilot'));
38+
39+
if (!targetHeading) {
40+
console.error('ERROR: Could not find "Supported AI models in Copilot" heading');
41+
return [];
42+
}
43+
44+
console.error('Found target heading:', targetHeading.textContent);
45+
46+
// Get the content section that contains this heading
47+
let contentSection = targetHeading.closest('div[class*="content"]') || targetHeading.parentElement;
48+
console.error('Content section found:', contentSection ? 'yes' : 'no');
49+
50+
// Find all tables within this section (or after the heading)
51+
let tables = [];
52+
let currentElement = targetHeading.nextElementSibling;
53+
54+
// Traverse siblings until we hit another h2 or run out of elements
55+
while (currentElement) {
56+
if (currentElement.tagName === 'H2') {
57+
break; // Stop at the next major section
58+
}
59+
60+
if (currentElement.tagName === 'TABLE') {
61+
tables.push(currentElement);
62+
} else if (currentElement.querySelectorAll) {
63+
// Check for tables within this element
64+
const nestedTables = currentElement.querySelectorAll('table');
65+
tables.push(...nestedTables);
66+
}
67+
68+
currentElement = currentElement.nextElementSibling;
69+
}
70+
71+
console.error(`Found ${tables.length} tables in the target section`);
72+
73+
tables.forEach((table, tableIndex) => {
74+
const rows = table.querySelectorAll('tbody tr');
75+
console.error(`Table ${tableIndex}: Found ${rows.length} rows`);
76+
77+
rows.forEach((row, rowIndex) => {
78+
// Look for the row header (th with scope="row") which contains the model name
79+
const rowHeader = row.querySelector('th[scope="row"]');
80+
if (rowHeader) {
81+
let text = rowHeader.textContent.trim();
82+
console.error(`Table ${tableIndex}, Row ${rowIndex}: "${text}"`);
83+
84+
if (text && text.length > 0) {
85+
// Normalize model name: lowercase and replace spaces with dashes
86+
const normalizedName = text.toLowerCase().replace(/\s+/g, '-');
87+
console.error(` Normalized: "${normalizedName}"`);
88+
modelNames.push(normalizedName);
89+
}
90+
} else {
91+
// Fallback to first td if no row header exists
92+
const cells = row.querySelectorAll('td');
93+
if (cells.length > 0) {
94+
let text = cells[0].textContent.trim();
95+
console.error(`Table ${tableIndex}, Row ${rowIndex} (fallback): "${text}"`);
96+
97+
if (text && text.length > 0) {
98+
// Normalize model name: lowercase and replace spaces with dashes
99+
const normalizedName = text.toLowerCase().replace(/\s+/g, '-');
100+
console.error(` Normalized: "${normalizedName}"`);
101+
modelNames.push(normalizedName);
102+
}
103+
}
104+
}
105+
});
106+
});
107+
108+
// Remove duplicates
109+
return [...new Set(modelNames)];
110+
});
111+
112+
// Save only the relevant section HTML for debugging
113+
const relevantHTML = await page.evaluate(() => {
114+
const headings = Array.from(document.querySelectorAll('h2, h3'));
115+
const targetHeading = headings.find(h => h.textContent.includes('Supported AI models in Copilot'));
116+
117+
if (!targetHeading) {
118+
return '<p>Could not find target section</p>';
119+
}
120+
121+
let html = '<h2>' + targetHeading.textContent + '</h2>\n';
122+
let currentElement = targetHeading.nextElementSibling;
123+
124+
while (currentElement && currentElement.tagName !== 'H2') {
125+
html += currentElement.outerHTML + '\n';
126+
currentElement = currentElement.nextElementSibling;
127+
}
128+
129+
return html;
130+
});
131+
132+
fs.writeFileSync('page-content.html', relevantHTML);
133+
console.error('Saved relevant section HTML to page-content.html');
134+
135+
console.error(`Extracted ${models.length} unique models`);
136+
137+
// Save models as JSON
138+
const modelsJson = JSON.stringify(models, null, 2);
139+
fs.writeFileSync('scraped-models.json', modelsJson);
140+
console.error('Saved scraped models to scraped-models.json');
141+
142+
// Output for the workflow
143+
console.log(JSON.stringify(models));
144+
145+
await browser.close();
146+
} catch (error) {
147+
console.error('Error:', error.message);
148+
console.error('Stack trace:', error.stack);
149+
process.exit(1);
150+
}
151+
})();
152+
SCRAPE_EOF
153+
154+
echo "Running scraper..."
155+
node scrape.js 2>&1 | tee scraper.log
156+
157+
# Extract the JSON output (last line)
158+
MODELS_JSON=$(tail -n 1 scraper.log)
159+
echo "Scraped models JSON: $MODELS_JSON"
160+
161+
# Store the models, one per line
162+
echo "$MODELS_JSON" | jq -r '.[]' > models.txt
163+
echo "Models extracted to models.txt:"
164+
cat models.txt
165+
166+
echo "Scraping complete!"

.github/workflows/check-models.yml

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
name: Check for Copilot Model Updates
2+
3+
on:
4+
workflow_dispatch:
5+
push:
6+
paths:
7+
- .github/workflows/check-models.yml
8+
- .github/scripts/scrape-models.sh
9+
10+
schedule:
11+
- cron: '11 17 * * 1' # Run every Monday at 5:11 PM UTC
12+
13+
jobs:
14+
check-models:
15+
runs-on: ubuntu-latest
16+
permissions:
17+
issues: write
18+
contents: read
19+
id-token: write
20+
21+
steps:
22+
- name: Checkout repository
23+
uses: actions/checkout@v4
24+
25+
- name: Fetch documentation page
26+
id: fetch_docs
27+
run: |
28+
# Run the scraping script
29+
chmod +x .github/scripts/scrape-models.sh
30+
.github/scripts/scrape-models.sh
31+
32+
- name: Upload scraped data as artifact
33+
uses: actions/upload-artifact@v4
34+
with:
35+
name: scraped-data
36+
path: |
37+
scraped-models.json
38+
models.txt
39+
page-content.html
40+
scraper.log
41+
42+
- name: Extract models from scraped data
43+
id: fetch_models
44+
run: |
45+
# Read the models from the file created by the scraping step
46+
if [ ! -f models.txt ]; then
47+
echo "Error: models.txt not found"
48+
exit 1
49+
fi
50+
51+
MODELS=$(cat models.txt | sort -u)
52+
53+
echo "=== Scraped Models from Documentation ==="
54+
echo "$MODELS"
55+
echo ""
56+
echo "Count: $(echo "$MODELS" | wc -l) models"
57+
echo ""
58+
59+
if [ -z "$MODELS" ]; then
60+
echo "Error: No models extracted"
61+
exit 1
62+
fi
63+
64+
# Store models as a multiline output
65+
{
66+
echo "models_list<<EOF"
67+
echo "$MODELS"
68+
echo "EOF"
69+
} >> "$GITHUB_OUTPUT"
70+
71+
- name: Check for missing models
72+
id: check_missing
73+
env:
74+
FETCHED_MODELS: ${{ steps.fetch_models.outputs.models_list }}
75+
run: |
76+
# Load models from local JSON files
77+
ESTIMATOR_MODELS=$(jq -r '.estimators | keys[]' src/tokenEstimators.json | sort -u)
78+
PRICING_MODELS=$(jq -r '.pricing | keys[]' src/modelPricing.json | sort -u)
79+
80+
echo "Models in tokenEstimators.json:"
81+
echo "$ESTIMATOR_MODELS"
82+
echo "Models in modelPricing.json:"
83+
echo "$PRICING_MODELS"
84+
85+
MISSING_ESTIMATORS=""
86+
for model in $FETCHED_MODELS; do
87+
if ! echo "$ESTIMATOR_MODELS" | grep -q "^${model}$"; then
88+
MISSING_ESTIMATORS="$MISSING_ESTIMATORS\n- $model"
89+
fi
90+
done
91+
92+
MISSING_PRICING=""
93+
for model in $FETCHED_MODELS; do
94+
if ! echo "$PRICING_MODELS" | grep -q "^${model}$"; then
95+
MISSING_PRICING="$MISSING_PRICING\n- $model"
96+
fi
97+
done
98+
99+
ISSUE_BODY=""
100+
NEEDS_UPDATE="false"
101+
102+
if [ -n "$MISSING_ESTIMATORS" ]; then
103+
NEEDS_UPDATE="true"
104+
ISSUE_BODY="$ISSUE_BODY### 🚨 Missing Models in \`tokenEstimators.json\`\nThe following models are listed in the GitHub Copilot documentation but are missing from \`src/tokenEstimators.json\`:\n$MISSING_ESTIMATORS\n\n"
105+
fi
106+
107+
if [ -n "$MISSING_PRICING" ]; then
108+
NEEDS_UPDATE="true"
109+
ISSUE_BODY="$ISSUE_BODY### 💰 Missing Models in \`modelPricing.json\`\nThe following models are listed in the GitHub Copilot documentation but are missing from \`src/modelPricing.json\`:\n$MISSING_PRICING\n\n"
110+
fi
111+
112+
if [ "$NEEDS_UPDATE" = "true" ]; then
113+
ISSUE_BODY="$ISSUE_BODY**Action Required:**\nPlease update the JSON configuration files with the latest models.\n\n**Source:** [GitHub Copilot Supported Models](https://docs.github.com/en/copilot/reference/ai-models/supported-models)"
114+
echo "needs_update=true" >> $GITHUB_OUTPUT
115+
# Use a heredoc to handle multiline body
116+
echo "issue_body<<EOF" >> $GITHUB_OUTPUT
117+
echo -e "$ISSUE_BODY" >> $GITHUB_OUTPUT
118+
echo "EOF" >> $GITHUB_OUTPUT
119+
120+
# Create step summary
121+
echo "## ⚠️ Missing Models Detected" >> $GITHUB_STEP_SUMMARY
122+
echo "" >> $GITHUB_STEP_SUMMARY
123+
if [ -n "$MISSING_ESTIMATORS" ]; then
124+
echo "### 🚨 Missing in \`tokenEstimators.json\`" >> $GITHUB_STEP_SUMMARY
125+
echo -e "$MISSING_ESTIMATORS" >> $GITHUB_STEP_SUMMARY
126+
echo "" >> $GITHUB_STEP_SUMMARY
127+
fi
128+
if [ -n "$MISSING_PRICING" ]; then
129+
echo "### 💰 Missing in \`modelPricing.json\`" >> $GITHUB_STEP_SUMMARY
130+
echo -e "$MISSING_PRICING" >> $GITHUB_STEP_SUMMARY
131+
echo "" >> $GITHUB_STEP_SUMMARY
132+
fi
133+
echo "**Action Required:** Update the JSON configuration files with the latest models." >> $GITHUB_STEP_SUMMARY
134+
echo "" >> $GITHUB_STEP_SUMMARY
135+
echo "[View Documentation](https://docs.github.com/en/copilot/reference/ai-models/supported-models)" >> $GITHUB_STEP_SUMMARY
136+
else
137+
echo "needs_update=false" >> $GITHUB_OUTPUT
138+
echo "✅ All models are up-to-date."
139+
140+
# Create step summary
141+
echo "## ✅ All Models Up-to-Date" >> $GITHUB_STEP_SUMMARY
142+
echo "" >> $GITHUB_STEP_SUMMARY
143+
echo "All models from the documentation are present in both:" >> $GITHUB_STEP_SUMMARY
144+
echo "- \`tokenEstimators.json\`" >> $GITHUB_STEP_SUMMARY
145+
echo "- \`modelPricing.json\`" >> $GITHUB_STEP_SUMMARY
146+
fi
147+
148+
- name: Create GitHub Issue if models are missing
149+
if: steps.check_missing.outputs.needs_update == 'true'
150+
uses: actions/github-script@v7
151+
env:
152+
ISSUE_BODY: ${{ steps.check_missing.outputs.issue_body }}
153+
with:
154+
github-token: ${{ secrets.GITHUB_TOKEN }}
155+
script: |
156+
const issueBody = process.env.ISSUE_BODY;
157+
const { owner, repo } = context.repo;
158+
159+
// Check if a similar open issue already exists
160+
const { data: issues } = await github.rest.issues.listForRepo({
161+
owner,
162+
repo,
163+
state: 'open',
164+
labels: 'maintenance',
165+
creator: 'github-actions[bot]'
166+
});
167+
168+
const title = '🤖 Action Required: Update Copilot Supported Models';
169+
const existingIssue = issues.find(issue => issue.title === title);
170+
171+
if (existingIssue) {
172+
console.log(`An open issue with the title "${title}" already exists. Skipping creation.`);
173+
} else {
174+
await github.rest.issues.create({
175+
owner,
176+
repo,
177+
title: title,
178+
body: issueBody,
179+
labels: ['maintenance', 'autogenerated']
180+
});
181+
console.log('Created a new GitHub issue for missing models.');
182+
}

.github/workflows/tag-on-issue.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: Tag on New Issue
2+
3+
on:
4+
issues:
5+
types: [opened]
6+
7+
permissions:
8+
contents: read
9+
10+
jobs:
11+
tag-user:
12+
runs-on: ubuntu-latest
13+
permissions:
14+
issues: write
15+
steps:
16+
- name: Tag user on new issue
17+
uses: devops-actions/issue-comment-tag@ec5e2b8d5f031330ff19bf4ee5b5252b1176995a # v0.1.8
18+
with:
19+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
20+
team: ${{ vars.TAG_USER }}
21+
issue: ${{ github.event.issue.number }}
22+
owner: ${{ github.repository_owner }}
23+
repo: ${{ github.repository }}

0 commit comments

Comments
 (0)