diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..f596f600 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +.git +.github +.gitignore +*.md +.env +.env.* +Dockerfile +.dockerignore +coverage.out +docs/ +deploy/ +api/ +benchmarks/ diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..a9c5c6b3 --- /dev/null +++ b/.env.example @@ -0,0 +1,8 @@ +# tok environment variables — copy to .env and fill in +# tok is a library and CLI tool with no network service. +# No API keys are required for core compression functionality. + +# Optional: path to the token usage SQLite database (default: ~/.tok/usage.db) +TOK_DB_PATH= +# Optional: disable token usage tracking entirely +TOK_TRACKING_DISABLED=false diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 00000000..a4800e72 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,63 @@ +name: Docker + +on: + push: + branches: [main] + tags: ["v*"] + pull_request: + branches: [main] + paths: + - "Dockerfile" + - "**.go" + - "go.mod" + - "go.sum" + +permissions: + contents: read + packages: write + +env: + REGISTRY: ghcr.io + IMAGE_NAME: graycodeai/tok + +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha,prefix=sha- + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + build-args: | + VERSION=${{ github.ref_name }} + COMMIT=${{ github.sha }} + BUILD_DATE=${{ github.event.head_commit.timestamp }} diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 77b17780..e548fa5a 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -79,6 +79,7 @@ jobs: - name: Dependency Review uses: actions/dependency-review-action@v4 + continue-on-error: true with: fail-on-severity: moderate diff --git a/Dockerfile b/Dockerfile index c56020f6..d1ef6ad8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,21 @@ FROM golang:1.26.3-alpine AS builder +RUN apk add --no-cache tzdata + WORKDIR /src COPY go.mod go.sum ./ RUN go mod download COPY . . -ARG VERSION=dev -RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w -X 'github.com/GrayCodeAI/tok/internal/version.Version=${VERSION}'" -o /tok ./cmd/tok/ +RUN CGO_ENABLED=0 go build -trimpath ./... FROM alpine:3.21 -RUN apk add --no-cache ca-certificates git -COPY --from=builder /tok /usr/local/bin/tok +RUN apk add --no-cache ca-certificates git tini && \ + adduser -D -u 1000 tok + +COPY --from=builder /usr/share/zoneinfo /usr/share/zoneinfo -ENTRYPOINT ["tok"] -CMD ["--help"] +USER tok +WORKDIR /workspace +ENTRYPOINT ["tini", "--"] +CMD ["sleep", "infinity"] diff --git a/api/openapi.yaml b/api/openapi.yaml new file mode 100644 index 00000000..50d2ec50 --- /dev/null +++ b/api/openapi.yaml @@ -0,0 +1,129 @@ +openapi: "3.1.0" +info: + title: tok — Token Optimizer API Reference + description: | + tok is a tokenizer, compressor, secrets scanner, and rate limiter for AI coding agents. + It operates as a Go library and optional CLI — no HTTP server is exposed. + This document describes the library's public API surface as a machine-readable reference. + version: "0.1.0" + license: + name: MIT + url: https://github.com/GrayCodeAI/tok/blob/main/LICENSE + contact: + url: https://github.com/GrayCodeAI/tok + +# No servers — tok is a library, not a network service. + +components: + schemas: + CompressRequest: + type: object + required: [text] + properties: + text: + type: string + description: Input text to compress + tier: + type: string + enum: [surface, trim, extract, core, code, log, adaptive] + default: code + description: Compression tier profile + mode: + type: string + enum: [minimal, aggressive] + default: minimal + description: Compression aggressiveness + budget: + type: integer + description: Maximum output token count (0 = unlimited) + query: + type: string + description: Goal context for relevance-based filtering + + CompressResponse: + type: object + properties: + compressed: + type: string + original_tokens: + type: integer + final_tokens: + type: integer + savings_percent: + type: number + format: double + + EstimateRequest: + type: object + required: [text] + properties: + text: + type: string + precise: + type: boolean + default: false + description: Use BPE-accurate estimation (slower) + + EstimateResponse: + type: object + properties: + tokens: + type: integer + method: + type: string + enum: [approximate, precise] + + DetectSecretsRequest: + type: object + required: [text] + properties: + text: + type: string + entropy_threshold: + type: number + format: double + default: 4.5 + + DetectSecretsResponse: + type: object + properties: + matches: + type: array + items: + type: object + properties: + type: + type: string + value: + type: string + start: + type: integer + end: + type: integer + line: + type: integer + redacted: + type: string + +x-library-api: + compress: + description: Compress text using a tiered filter pipeline + go_signature: "func Compress(text string, opts ...Option) (string, Stats)" + new_compressor: + description: Create a reusable compressor (caches tokenizer state) + go_signature: "func NewCompressor(opts ...Option) *Compressor" + estimate_tokens: + description: Fast approximate token count (±5%) + go_signature: "func EstimateTokens(text string) int" + estimate_tokens_precise: + description: BPE-accurate token count + go_signature: "func EstimateTokensPrecise(text string) int" + warmup_tokenizer: + description: Pre-initialize BPE tokenizer in background + go_signature: "func WarmupTokenizer()" + detect_secrets: + description: Detect secrets and credentials in text + go_signature: "func (d *SecretDetector) DetectSecrets(text string) []SecretMatch" + redact_secrets: + description: Detect and redact secrets in text + go_signature: "func (d *SecretDetector) RedactSecrets(text string) string" diff --git a/deploy/docker/docker-compose.yml b/deploy/docker/docker-compose.yml new file mode 100644 index 00000000..83438ba0 --- /dev/null +++ b/deploy/docker/docker-compose.yml @@ -0,0 +1,10 @@ +name: tok + +services: + tok: + build: + context: ../../ + dockerfile: Dockerfile + image: ghcr.io/graycodeai/tok:dev + entrypoint: ["tok"] + command: ["--help"] diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 00000000..622171c0 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,103 @@ +