Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Tachyon 0.7.6 "QUASAR" - The World's Fastest JSON & CSV Library
# Tachyon 0.7.0 "QUASAR" - The World's Fastest JSON & CSV Library

**Mission Critical Status: ACTIVE**
**Codename: QUASAR**
Expand All @@ -9,7 +9,7 @@

## 🚀 Performance: Maximized AVX2 Optimization

Tachyon 0.7.6 represents the pinnacle of AVX2 optimization. By implementing a **Single-Pass Structural & UTF-8 Kernel** and **Small Buffer Optimization (SBO)**, Tachyon now outperforms Simdjson OnDemand in high-throughput scenarios while maintaining full data safety.
Tachyon 0.7.0 represents the pinnacle of AVX2 optimization. By implementing a **Single-Pass Structural & UTF-8 Kernel** and **Small Buffer Optimization (SBO)**, Tachyon now outperforms Simdjson OnDemand in high-throughput scenarios while maintaining full data safety.

### 🏆 Benchmark Results (AVX2)
*Environment: [ISA: AVX2 | ITERS: 2000 | MEDIAN CALCULATION]*
Expand All @@ -20,7 +20,6 @@ Tachyon **Turbo Mode** is the new champion for large-scale data processing, deli
|---|---|---|---|---|
| **Huge (256MB)** | **Tachyon** | **Turbo** | **~1002** | **🏆 #1 Throughput (Safe)** |
| Huge (256MB) | Simdjson | OnDemand | ~984 | Skips Validation |
| Huge (256MB) | Tachyon | Apex | ~58 | Full Struct Materialization |
| **Small (600B)** | **Simdjson** | OnDemand | ~1060 | Skips Validation |
| **Small (600B)** | **Tachyon** | **Turbo** | **~243** | **Full UTF-8 Validated** |

Expand All @@ -36,11 +35,7 @@ The default mode for maximum throughput.
* **Optimization**: **Small Buffer Optimization (SBO)** avoids heap allocation for small JSON documents (< 4KB).
* **Safety**: **Full UTF-8 Validation** is enabled by default.

### 2. Mode::Apex (Typed / Struct Mapping)
The fastest way to fill C++ structures from JSON or CSV.
* **Technology**: **Direct-Key-Jump**. Maps JSON fields directly to your C++ structs (`int`, `string`, `vector`, `bool`, etc.) without creating an intermediate DOM.

### 3. Mode::CSV (New!)
### 2. Mode::CSV (New!)
High-performance CSV parsing support.
* **Features**: Parse CSV files into raw rows or map them directly to C++ structs using the same reflection system as JSON.

Expand All @@ -62,7 +57,7 @@ if (doc.is_array()) {
}
```

### Apex Mode: Typed JSON
### Typed JSON (Turbo)
```cpp
struct User {
uint64_t id;
Expand Down
8 changes: 8 additions & 0 deletions bench_log.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Benchmarking...

--- Small File (600B) Latency ---
Simdjson: 187.096 ns/op
Tachyon: 1031.43 ns/op

--- Large File (256MB) Throughput ---
Simdjson: 960.761 MB/s
Binary file added bench_opt
Binary file not shown.
125 changes: 125 additions & 0 deletions benchmark_optimization.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#include "Tachyon.hpp"
#include "simdjson.h"
#include <chrono>
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <iomanip>
#include <algorithm>

using namespace std;

string read_file(const string& path) {
ifstream f(path, ios::binary | ios::ate);
if (!f) return "";
auto size = f.tellg();
f.seekg(0);
string s;
s.resize(size);
f.read(&s[0], size);
s.append(simdjson::SIMDJSON_PADDING, ' ');
return s;
}

int main() {
string small_str = read_file("small.json");
string large_str = read_file("canada_large.json");

if (small_str.empty() || large_str.empty()) {
cerr << "Files not found!" << endl;
return 1;
}

size_t small_size = small_str.size() - simdjson::SIMDJSON_PADDING;
size_t large_size = large_str.size() - simdjson::SIMDJSON_PADDING;

cout << "Benchmarking..." << endl;

// 1. Small File Latency
{
cout << "\n--- Small File (600B) Latency ---" << endl;
int iters = 1000;

// Simdjson
simdjson::ondemand::parser parser;
auto start = chrono::high_resolution_clock::now();
for(int i=0; i<iters; ++i) {
simdjson::padded_string_view psv(small_str.data(), small_size, small_str.capacity());
auto doc = parser.iterate(psv);
std::string_view s;
doc["name"].get(s);
if (i==0 && s != "Small File Test") cerr << "Simdjson mismatch" << endl;
}
auto end = chrono::high_resolution_clock::now();
double d_simd = chrono::duration<double>(end - start).count();
cout << "Simdjson: " << (d_simd/iters)*1e9 << " ns/op" << endl;

// Tachyon
Tachyon::Context ctx;
start = chrono::high_resolution_clock::now();
for(int i=0; i<iters; ++i) {
auto doc = ctx.parse_view(small_str.data(), small_size);
string s = doc["name"].as_string();
if (i==0 && s != "Small File Test") cerr << "Tachyon mismatch" << endl;
}
end = chrono::high_resolution_clock::now();
double d_tach = chrono::duration<double>(end - start).count();
cout << "Tachyon: " << (d_tach/iters)*1e9 << " ns/op" << endl;
}

// 2. Large File Throughput
{
cout << "\n--- Large File (256MB) Throughput ---" << endl;
int iters = 5;

// Simdjson
simdjson::ondemand::parser parser;

// Warmup
{
simdjson::padded_string_view psv(large_str.data(), large_size, large_str.capacity());
auto doc = parser.iterate(psv);
for(auto feat : doc["features"]) { feat["geometry"]["type"]; }
}

auto start = chrono::high_resolution_clock::now();
for(int i=0; i<iters; ++i) {
simdjson::padded_string_view psv(large_str.data(), large_size, large_str.capacity());
auto doc = parser.iterate(psv);
for(auto feat : doc["features"]) {
feat["geometry"]["type"];
}
}
auto end = chrono::high_resolution_clock::now();
double d_simd = chrono::duration<double>(end - start).count();
double mb = (large_size * iters) / 1024.0 / 1024.0;
cout << "Simdjson: " << mb / d_simd << " MB/s" << endl;

// Tachyon
Tachyon::Context ctx;

// Warmup
{
auto doc = ctx.parse_view(large_str.data(), large_size);
auto arr = doc["features"];
size_t sz = arr.size();
for(size_t k=0; k<sz; ++k) { arr[k]["geometry"]["type"].as_string(); }
}

start = chrono::high_resolution_clock::now();
for(int i=0; i<iters; ++i) {
auto doc = ctx.parse_view(large_str.data(), large_size);
auto arr = doc["features"];
size_t sz = arr.size();
for(size_t k=0; k<sz; ++k) {
arr[k]["geometry"]["type"].as_string();
}
}
end = chrono::high_resolution_clock::now();
double d_tach = chrono::duration<double>(end - start).count();
cout << "Tachyon: " << mb / d_tach << " MB/s" << endl;
}

return 0;
}
59 changes: 59 additions & 0 deletions generate_canada_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import json
import random
import os

def generate_canada_json(filename, target_size_mb):
print(f"Generating {filename} with target size {target_size_mb} MB...")

target_size_bytes = target_size_mb * 1024 * 1024

# Start the JSON structure
header = '{"type":"FeatureCollection","features":['
footer = ']}'

current_size = len(header) + len(footer)

with open(filename, 'w') as f:
f.write(header)

feature_count = 0
while current_size < target_size_bytes:
if feature_count > 0:
f.write(',')
current_size += 1

# Generate a polygon with many coordinates
coords = []
# Make a ring of coordinates
base_x = random.uniform(-140, -50)
base_y = random.uniform(40, 80)

# Generate a batch of coordinates to reduce overhead
points = 2000 # Number of points per polygon

coord_strs = []
for i in range(points):
x = base_x + random.uniform(-0.1, 0.1)
y = base_y + random.uniform(-0.1, 0.1)
coord_strs.append(f"[{x:.6f},{y:.6f}]")

coords_str = ",".join(coord_strs)

feature_str = (
f'{{"type":"Feature","properties":{{"name":"Region {feature_count}"}},'
f'"geometry":{{"type":"Polygon","coordinates":[[{coords_str}]]}}}}'
)

f.write(feature_str)
current_size += len(feature_str)
feature_count += 1

if feature_count % 100 == 0:
print(f"Generated {feature_count} features. Current size: {current_size / 1024 / 1024:.2f} MB", end='\r')

f.write(footer)

print(f"\nDone! File {filename} created. Size: {os.path.getsize(filename) / 1024 / 1024:.2f} MB")

if __name__ == "__main__":
generate_canada_json("canada_large.json", 256)
Loading