Skip to content

Commit 09d841d

Browse files
authored
Add benchmarking, documentation updates, and GitHub Packages workflow (#30)
## Summary Publish GitHub Package using npm ## Checks - [ ] C++ build still works - [ ] Backend changes were smoke-tested locally - [ ] Frontend build still passes
2 parents 98b6983 + 979212e commit 09d841d

10 files changed

Lines changed: 369 additions & 7 deletions
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: Publish GitHub Package
2+
3+
on:
4+
workflow_dispatch:
5+
push:
6+
tags:
7+
- "v*"
8+
9+
permissions:
10+
contents: read
11+
packages: write
12+
13+
jobs:
14+
publish-github-package:
15+
name: Publish to GitHub Packages
16+
runs-on: ubuntu-latest
17+
18+
steps:
19+
- name: Check out repository
20+
uses: actions/checkout@v4
21+
22+
- name: Set up Node.js for GitHub Packages
23+
uses: actions/setup-node@v4
24+
with:
25+
node-version: "20"
26+
registry-url: "https://npm.pkg.github.com"
27+
scope: "@eamon2009"
28+
cache: "npm"
29+
cache-dependency-path: frontend/package-lock.json
30+
31+
- name: Build frontend assets
32+
run: |
33+
npm --prefix frontend ci
34+
npm --prefix frontend run build
35+
36+
- name: Prepare GitHub Packages metadata
37+
run: |
38+
npm pkg set name="@eamon2009/quadtrix"
39+
npm pkg set publishConfig.registry="https://npm.pkg.github.com"
40+
41+
- name: Publish package
42+
run: npm publish
43+
env:
44+
NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,14 @@ Quadtrix.cpp is a local language model project with several execution paths:
1414

1515
The project is designed as a technical learning implementation. The C++ path exposes the transformer internals directly: tensor operations, attention, layer normalization, cross-entropy, analytical gradients, AdamW, checkpointing, and autoregressive generation.
1616

17-
<img width="1010" height="727" alt="Quadtrix architecture image" src="https://github.com/user-attachments/assets/b52fdbe7-9fe2-4415-9282-d0b97cb00165" />
17+
## v1.1.0
18+
<img width="2442" height="1586" alt="run_20260508_110726" src="https://github.com/user-attachments/assets/ef51d1c3-e28e-4674-8a71-5513e753b174" />
19+
20+
---
21+
22+
<img width="2185" height="829" alt="run_20260430_192930" src="https://github.com/user-attachments/assets/c6db061a-aa8d-4d8d-a1e2-1a81418bb613" />
23+
24+
---
1825

1926
## Contents
2027

benchmark.cpp

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
// Run:
2+
// .\quadtrix_bench.exe data\input.txt
3+
// .\quadtrix_bench.exe data\input.txt --tokens 100 --runs 10 --warmup 3
4+
//
5+
// Flags (all optional):
6+
// --tokens N tokens to generate per run (default: 50)
7+
// --runs N how many timed runs per prompt (default: 5)
8+
// --warmup N un-timed warmup runs per prompt (default: 2)
9+
10+
#include <iostream>
11+
#include <iomanip>
12+
#include <fstream>
13+
#include <vector>
14+
#include <string>
15+
#include <chrono>
16+
#include <numeric>
17+
#include <cmath>
18+
#include <cstdlib>
19+
#include <algorithm>
20+
21+
#include "config/config.h"
22+
#include "include/dataloader.h"
23+
#include "include/gpt.h"
24+
25+
static bool file_exists(const std::string &p)
26+
{
27+
std::ifstream f(p.c_str(), std::ios::binary);
28+
return f.good();
29+
}
30+
31+
static double now_ms()
32+
{
33+
using namespace std::chrono;
34+
return duration<double, std::milli>(
35+
steady_clock::now().time_since_epoch())
36+
.count();
37+
}
38+
39+
static double mean(const std::vector<double> &v)
40+
{
41+
return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
42+
}
43+
44+
static double stdev(const std::vector<double> &v, double m)
45+
{
46+
double sq = 0.0;
47+
for (double x : v)
48+
sq += (x - m) * (x - m);
49+
return std::sqrt(sq / v.size());
50+
}
51+
52+
static double timed_run(GPTLanguageModel &model,
53+
DataLoader &dl,
54+
const std::vector<int> &prompt_ctx,
55+
int n_tokens)
56+
{
57+
std::vector<int> ctx = prompt_ctx;
58+
59+
double t0 = now_ms();
60+
for (int i = 0; i < n_tokens; ++i)
61+
{
62+
ctx = model.generate(ctx, 1);
63+
if ((int)ctx.size() > BLOCK_SIZE)
64+
ctx = std::vector<int>(ctx.end() - BLOCK_SIZE, ctx.end());
65+
}
66+
return now_ms() - t0;
67+
}
68+
69+
//
70+
71+
static void section(const std::string &title)
72+
{
73+
;
74+
std::cout << " " << title << "\n";
75+
}
76+
77+
struct PromptResult
78+
{
79+
std::string label;
80+
int prompt_tokens;
81+
int gen_tokens;
82+
double avg_ms;
83+
double min_ms;
84+
double max_ms;
85+
double std_ms;
86+
double avg_tps; // tokens per second
87+
};
88+
89+
static PromptResult bench_prompt(GPTLanguageModel &model,
90+
DataLoader &dl,
91+
const std::string &prompt,
92+
int n_tokens,
93+
int n_runs,
94+
int n_warmup)
95+
{
96+
// encode
97+
std::vector<int> ctx = dl.encode(prompt);
98+
if (ctx.empty())
99+
ctx = {0};
100+
if ((int)ctx.size() > BLOCK_SIZE)
101+
ctx = std::vector<int>(ctx.end() - BLOCK_SIZE, ctx.end());
102+
103+
int prompt_len = (int)ctx.size();
104+
105+
// warmup (un-timed)
106+
for (int i = 0; i < n_warmup; ++i)
107+
timed_run(model, dl, ctx, n_tokens);
108+
109+
// timed runs
110+
std::vector<double> times;
111+
times.reserve(n_runs);
112+
for (int i = 0; i < n_runs; ++i)
113+
times.push_back(timed_run(model, dl, ctx, n_tokens));
114+
115+
double m = mean(times);
116+
double sd = stdev(times, m);
117+
double mn = *std::min_element(times.begin(), times.end());
118+
double mx = *std::max_element(times.begin(), times.end());
119+
double tps = n_tokens / (m / 1000.0);
120+
121+
// truncate prompt for display
122+
std::string label = prompt.size() > 30
123+
? prompt.substr(0, 27) + "..."
124+
: prompt;
125+
126+
return PromptResult{label, prompt_len, n_tokens, m, mn, mx, sd, tps};
127+
}
128+
129+
static void print_table(const std::vector<PromptResult> &results)
130+
{
131+
section("RESULTS");
132+
133+
// header
134+
std::cout << std::left
135+
<< std::setw(34) << "Prompt"
136+
<< std::right
137+
<< std::setw(8) << "P.Tok"
138+
<< std::setw(8) << "G.Tok"
139+
<< std::setw(10) << "Avg ms"
140+
<< std::setw(10) << "Min ms"
141+
<< std::setw(10) << "Max ms"
142+
<< std::setw(9) << "Std ms"
143+
<< std::setw(10) << "tok/s"
144+
<< "\n";
145+
std::cout << std::string(99, '-') << "\n";
146+
147+
std::cout << std::fixed;
148+
for (const auto &r : results)
149+
{
150+
std::cout << std::left
151+
<< std::setw(34) << r.label
152+
<< std::right
153+
<< std::setw(8) << r.prompt_tokens
154+
<< std::setw(8) << r.gen_tokens
155+
<< std::setw(10) << std::setprecision(1) << r.avg_ms
156+
<< std::setw(10) << std::setprecision(1) << r.min_ms
157+
<< std::setw(10) << std::setprecision(1) << r.max_ms
158+
<< std::setw(9) << std::setprecision(1) << r.std_ms
159+
<< std::setw(10) << std::setprecision(2) << r.avg_tps
160+
<< "\n";
161+
}
162+
163+
double total_avg_tps = 0.0;
164+
double best_tps = 0.0;
165+
for (const auto &r : results)
166+
{
167+
total_avg_tps += r.avg_tps;
168+
best_tps = std::max(best_tps, r.avg_tps);
169+
}
170+
double overall_tps = total_avg_tps / results.size();
171+
172+
std::cout << "\n Overall avg throughput : "
173+
<< std::setprecision(2) << overall_tps << " tok/s\n";
174+
std::cout << " Peak throughput : "
175+
<< std::setprecision(2) << best_tps << " tok/s\n";
176+
std::cout << " ms per token (avg) : "
177+
<< std::setprecision(2) << 1000.0 / overall_tps << " ms\n";
178+
}
179+
180+
static void save_csv(const std::vector<PromptResult> &results,
181+
const std::string &path)
182+
{
183+
std::ofstream f(path);
184+
if (!f)
185+
{
186+
std::cerr << "[WARN] Could not write CSV to " << path << "\n";
187+
return;
188+
}
189+
f << "prompt,prompt_tokens,gen_tokens,avg_ms,min_ms,max_ms,std_ms,tok_per_sec\n";
190+
for (const auto &r : results)
191+
{
192+
f << "\"" << r.label << "\","
193+
<< r.prompt_tokens << ","
194+
<< r.gen_tokens << ","
195+
<< r.avg_ms << ","
196+
<< r.min_ms << ","
197+
<< r.max_ms << ","
198+
<< r.std_ms << ","
199+
<< r.avg_tps << "\n";
200+
}
201+
std::cout << "\n CSV saved to: " << path << "\n";
202+
}
203+
204+
int main(int argc, char *argv[])
205+
{
206+
207+
std::string data_path = DEFAULT_CLEANED_PATH;
208+
std::string model_path = BEST_MODEL_PATH;
209+
int n_tokens = 50;
210+
int n_runs = 5;
211+
int n_warmup = 2;
212+
213+
for (int i = 1; i < argc; ++i)
214+
{
215+
std::string a = argv[i];
216+
if (a == "--tokens" && i + 1 < argc)
217+
n_tokens = std::atoi(argv[++i]);
218+
else if (a == "--runs" && i + 1 < argc)
219+
n_runs = std::atoi(argv[++i]);
220+
else if (a == "--warmup" && i + 1 < argc)
221+
n_warmup = std::atoi(argv[++i]);
222+
else
223+
data_path = a;
224+
}
225+
226+
std::cout << " Quadtrix Inference Benchmark\n";
227+
std::cout << " data : " << data_path << "\n";
228+
std::cout << " model : " << model_path << "\n";
229+
std::cout << " tokens : " << n_tokens << " per run\n";
230+
std::cout << " runs : " << n_runs << " timed + "
231+
<< n_warmup << " warmup\n";
232+
233+
DataLoader dl;
234+
try
235+
{
236+
dl.load(data_path);
237+
}
238+
catch (const std::exception &e)
239+
{
240+
std::cerr << "[ERROR] " << e.what() << "\n";
241+
return 1;
242+
}
243+
244+
if (!file_exists(model_path))
245+
{
246+
std::cerr << "[ERROR] Weights not found at " << model_path << "\n";
247+
std::cerr << "[HINT] Train first, or set " << MODEL_PATH_ENV_VAR << "\n";
248+
return 1;
249+
}
250+
251+
GPTLanguageModel model(dl.vocab_size, N_EMBD, N_HEAD, N_LAYER, BLOCK_SIZE, SEED);
252+
model.load(model_path);
253+
254+
std::cout << "\n[OK] Model loaded (" << model.num_params() / 1.0e6f
255+
<< " M params)\n";
256+
257+
std::vector<std::string> prompts = {
258+
"",
259+
"The", // 1-token prompt
260+
"Once upon a time", // short prompt
261+
"The quick brown fox jumps", // medium prompt
262+
std::string(1, 'a'), // long prompt (stress-tests context window)
263+
};
264+
265+
section("RUNNING");
266+
std::vector<PromptResult> results;
267+
results.reserve(prompts.size());
268+
269+
for (size_t i = 0; i < prompts.size(); ++i)
270+
{
271+
std::string display = prompts[i].empty()
272+
? "(empty / BOS)"
273+
: (prompts[i].size() > 30
274+
? prompts[i].substr(0, 27) + "..."
275+
: prompts[i]);
276+
277+
std::cout << " [" << (i + 1) << "/" << prompts.size() << "] \""
278+
<< display << "\" ... " << std::flush;
279+
280+
PromptResult r = bench_prompt(model, dl,
281+
prompts[i],
282+
n_tokens, n_runs, n_warmup);
283+
results.push_back(r);
284+
285+
std::cout << std::fixed << std::setprecision(2)
286+
<< r.avg_tps << " tok/s\n";
287+
}
288+
289+
print_table(results);
290+
save_csv(results, "benchmark_results.csv");
291+
292+
std::cout << "\n";
293+
294+
std::cout << " Done.\n";
295+
return 0;
296+
}

benchmark_results.csv

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
prompt,prompt_tokens,gen_tokens,avg_ms,min_ms,max_ms,std_ms,tok_per_sec
2+
"",1,50,2027,1848.4,2314.78,168.952,24.667
3+
"The",3,50,2561.63,2241.3,2986.35,306.631,19.5188
4+
"Once upon a time",16,50,3038.74,2885.22,3225.42,126.5,16.4542
5+
"The quick brown fox jumps",25,50,3975,3561.3,4783.34,454.592,12.5786
6+
"a",1,50,1862.99,1808.71,1929.64,46.4738,26.8386
273 KB
Binary file not shown.

docs/run_20260430_192930.png

239 KB
Loading

docs/run_20260508_110726.png

333 KB
Loading

engine/fineweb_dataset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ def download_fineweb_sample(output_dir="engine", target_size_mb=30):
3636
current_bytes += sample_size
3737

3838
if (i + 1) % 100 == 0:
39-
print(f"Collected {i + 1} samples (~{current_bytes / (1024*1024):.2f} MB)")
39+
print(f"Collected {i + 1} samples ({current_bytes / (1024*1024):.2f} MB)")
4040

4141
print(f"\nDownloaded {len(samples)} samples ({current_bytes / (1024*1024):.2f} MB)")
42-
output_file = os.path.join(output_dir, "fineweb_30mb.txt")
42+
output_file = os.path.join(output_dir, "input.txt")
4343
with open(output_file, 'w', encoding='utf-8') as f:
4444
for sample in samples:
4545
f.write(sample['text'])
@@ -54,7 +54,7 @@ def download_fineweb_sample(output_dir="engine", target_size_mb=30):
5454
if __name__ == "__main__":
5555
try:
5656
download_fineweb_sample()
57-
print("\n✓ Download completed successfully!")
57+
print("\nDownload completed successfully!")
5858
except Exception as e:
5959
print(f"\ Error: {e}")
6060
print("\nMake sure you have the 'datasets' library installed:")

engine/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def success(msg): log(f" ok {msg}")
6565
# CONFIGURATION
6666

6767

68-
cleaned_path = Path(os.environ.get("QUADTRIX_TRAIN_DATA", SCRIPT_DIR / "input.txt"))
68+
cleaned_path = Path(os.environ.get("data", SCRIPT_DIR / "input.txt"))
6969
train_split = 0.9
7070
seed = 1337
7171

0 commit comments

Comments
 (0)