Skip to content

Commit ad6927f

Browse files
authored
Merge pull request #3 from StacLabs/add-server-logging
- **Intelligent Batch Logging**: Enhanced the `/validate` endpoint with high-visibility logging for both single-item and bulk `ItemCollection` requests. - **Error Aggregation**: Implemented a frequency-based error summarizer for large batches. Instead of flooding logs, the service now identifies and counts unique failure reasons (e.g., "Top failure reason (99/100): 'datetime' is required"). - **Schema Contextualization**: Validation failures now include the specific `AbsoluteKeywordLocation`, allowing developers to click directly to the failing STAC Extension schema. - **Performance Metrics**: Real-time execution timing added to all validation logs to monitor throughput and latency. - **Project Tooling**: Added a `Makefile` and `go test` suite to standardize the contributor workflow and ensure build stability. [#3](#3)
2 parents e3819a7 + 3979984 commit ad6927f

8 files changed

Lines changed: 198 additions & 71 deletions

File tree

.github/workflows/lint.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: golangci-lint
2+
on:
3+
push:
4+
branches: [ main ]
5+
pull_request:
6+
branches: [ main ]
7+
8+
jobs:
9+
golangci-lint:
10+
name: lint
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v4
14+
15+
- name: Set up Go
16+
uses: actions/setup-go@v5
17+
with:
18+
go-version: '1.26.1'
19+
cache: true
20+
21+
- name: Install and Run golangci-lint
22+
run: |
23+
go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.64.5
24+
$(go env GOPATH)/bin/golangci-lint run --timeout=10m --out-format=github-actions

CHANGELOG.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.1.1] - 2026-03-29
9+
### Added
10+
- **Intelligent Batch Logging**: Enhanced the `/validate` endpoint with high-visibility logging for both single-item and bulk `ItemCollection` requests.
11+
- **Error Aggregation**: Implemented a frequency-based error summarizer for large batches. Instead of flooding logs, the service now identifies and counts unique failure reasons (e.g., "Top failure reason (99/100): 'datetime' is required").
12+
- **Schema Contextualization**: Validation failures now include the specific `AbsoluteKeywordLocation`, allowing developers to click directly to the failing STAC Extension schema.
13+
- **Performance Metrics**: Real-time execution timing added to all validation logs to monitor throughput and latency.
14+
- **Project Tooling**: Added a `Makefile` and `go test` suite to standardize the contributor workflow and ensure build stability. [#3](https://github.com/StacLabs/gostac-validator/pull/3)
15+
816
## [0.1.0] - 2026-03-29
917
### Added
1018
- Core STAC validation engine with PCRE regex (`^(?!eo:)`) support via `regexp2`.
@@ -14,5 +22,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1422
- High-performance CLI tool for local STAC validation.
1523
- Dockerfile for microservice deployment.
1624

17-
[Unreleased]: https://github.com/StacLabs/gostac-validator/compare/v0.1.0...main
18-
[v0.1.0]: https://github.com/StacLabs/gostac-validator/compare/v0.1.0...v0.0.0
25+
[Unreleased]: https://github.com/StacLabs/gostac-validator/compare/v0.1.1...HEAD
26+
[0.1.1]: https://github.com/StacLabs/gostac-validator/compare/v0.1.0...v0.1.1
27+
[0.1.0]: https://github.com/StacLabs/gostac-validator/releases/tag/v0.1.0

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Stage 1: Build the binary
2-
FROM golang:1.26.1-alpine AS builder
2+
FROM golang:1.26-alpine AS builder
33

44
# Install git and ca-certificates
55
RUN apk update && apk add --no-cache git ca-certificates tzdata

Makefile

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Makefile for gostac-validator
2+
3+
.PHONY: build test tidy check
4+
5+
# Default target
6+
all: check build
7+
8+
# Run everything safe
9+
check:
10+
go mod tidy
11+
go fmt ./...
12+
go vet ./...
13+
go test ./...
14+
15+
build:
16+
go build -o stac-server ./cmd/server
17+
18+
test:
19+
go test -v ./...
20+
21+
clean:
22+
rm -f stac-server

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module github.com/StacLabs/gostac-validator
22

3-
go 1.26.1
3+
go 1.26
44

55
require (
66
github.com/dlclark/regexp2 v1.11.0

internal/server/handlers.go

Lines changed: 63 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -7,124 +7,147 @@ package server
77
import (
88
"bytes"
99
"encoding/json"
10+
"log"
1011
"net/http"
1112
"sync"
13+
"time"
1214

1315
"github.com/StacLabs/gostac-validator/internal/validator"
1416
"github.com/santhosh-tekuri/jsonschema/v6"
1517
)
1618

17-
// Handler holds the dependencies required by the HTTP endpoints.
18-
// It wraps the core STAC validator so that all requests share the same
19-
// thread-safe, in-memory schema cache.
19+
// Handler holds the dependencies for the HTTP server.
2020
type Handler struct {
2121
validator *validator.STAC
2222
}
2323

24-
// NewHandler creates a new HTTP Handler injected with the provided STAC validator.
24+
// NewHandler initializes the handler with a shared validator instance.
2525
func NewHandler(v *validator.STAC) *Handler {
2626
return &Handler{validator: v}
2727
}
2828

29-
// RegisterRoutes attaches the API endpoints to the provided HTTP multiplexer.
29+
// RegisterRoutes defines the API endpoints.
3030
func (h *Handler) RegisterRoutes(mux *http.ServeMux) {
3131
mux.HandleFunc("POST /validate", h.Validate)
3232
mux.HandleFunc("GET /health", h.Health)
3333
}
3434

35-
// BatchResponse represents the JSON payload returned by the /validate endpoint.
36-
// It provides a high-level summary of the batch operation alongside the detailed
37-
// validation results for each individual STAC item processed.
35+
// BatchResponse defines the JSON structure returned to the client.
3836
type BatchResponse struct {
3937
TotalProcessed int `json:"total_processed"`
4038
ValidCount int `json:"valid_count"`
4139
InvalidCount int `json:"invalid_count"`
4240
Results []validator.Result `json:"results"`
4341
}
4442

45-
// Validate is the primary endpoint for STAC validation (POST /validate).
46-
// It intelligently detects the shape of the incoming JSON payload. If the payload
47-
// is a single STAC Item, it wraps it in a slice. If it is an array of items or a
48-
// FeatureCollection/ItemCollection, it extracts the items and validates them all
49-
// concurrently using a Goroutine worker pool.
43+
// Validate handles STAC validation for single items or batches.
5044
func (h *Handler) Validate(w http.ResponseWriter, r *http.Request) {
45+
start := time.Now()
46+
5147
body, err := readBody(w, r)
5248
if err != nil {
49+
log.Printf("❌ Error reading body: %v", err)
5350
writeError(w, http.StatusBadRequest, "could not read request body: "+err.Error())
5451
return
5552
}
5653

57-
// Parse JSON safely using jsonschema's unmarshaler to prevent Go from
58-
// silently truncating highly precise geographic coordinate floats.
5954
instance, err := jsonschema.UnmarshalJSON(bytes.NewReader(body))
6055
if err != nil {
56+
log.Printf("❌ Error parsing JSON: %v", err)
6157
writeError(w, http.StatusBadRequest, "invalid JSON: "+err.Error())
6258
return
6359
}
6460

6561
var itemsToValidate []any
6662

67-
// Smart Routing: Inspect the generic parsed JSON to determine its structure.
63+
// Smart Routing logic
6864
switch data := instance.(type) {
6965
case []any:
70-
// Payload is a raw JSON array of STAC objects.
7166
itemsToValidate = data
7267
case map[string]any:
73-
// Payload is a JSON object. Check if it is a collection of features.
7468
if typ, ok := data["type"].(string); ok && (typ == "FeatureCollection" || typ == "ItemCollection") {
7569
if features, ok := data["features"].([]any); ok {
7670
itemsToValidate = features
7771
} else {
78-
writeError(w, http.StatusBadRequest, "FeatureCollection is missing the 'features' array")
72+
writeError(w, http.StatusBadRequest, "Collection is missing the 'features' array")
7973
return
8074
}
8175
} else {
82-
// Payload is a single STAC Item, Catalog, or Collection. Wrap it for the batch processor.
8376
itemsToValidate = []any{data}
8477
}
8578
default:
8679
writeError(w, http.StatusBadRequest, "Unrecognized JSON structure")
8780
return
8881
}
8982

90-
// Process the entire batch concurrently.
83+
// Process batch concurrently
9184
results := h.validateConcurrent(itemsToValidate)
9285

93-
// Tally the results for the summary payload.
9486
validCount := 0
87+
type errorKey struct {
88+
Message string
89+
Schema string
90+
}
91+
errorFrequencies := make(map[errorKey]int)
92+
9593
for _, res := range results {
9694
if res.Valid {
9795
validCount++
96+
} else if len(res.Errors) > 0 {
97+
// We group errors by Message and the Schema URL that failed
98+
key := errorKey{
99+
Message: res.Errors[0].Message,
100+
Schema: res.Errors[0].AbsoluteKeywordLocation, // Use AbsoluteKeywordLocation here
101+
}
102+
errorFrequencies[key]++
98103
}
99104
}
105+
invalidCount := len(itemsToValidate) - validCount
100106

101-
response := BatchResponse{
102-
TotalProcessed: len(itemsToValidate),
103-
ValidCount: validCount,
104-
InvalidCount: len(itemsToValidate) - validCount,
105-
Results: results,
107+
// Log metrics with the new intelligent titles and error aggregation
108+
duration := time.Since(start)
109+
total := len(itemsToValidate)
110+
111+
if total == 1 {
112+
if invalidCount > 0 && len(results[0].Errors) > 0 {
113+
err := results[0].Errors[0]
114+
log.Printf("📄 SINGLE ITEM | Valid: 0 | Invalid: 1 | Time: %v | Reason: %s (Schema: %s)",
115+
duration, err.Message, err.AbsoluteKeywordLocation)
116+
} else {
117+
log.Printf("📄 SINGLE ITEM | Valid: 1 | Invalid: 0 | Time: %v", duration)
118+
}
119+
} else {
120+
log.Printf("⚡ BATCH PROCESSED | Total: %d | Valid: %d | Invalid: %d | Time: %v",
121+
total, validCount, invalidCount, duration)
122+
123+
if invalidCount > 0 {
124+
log.Printf(" -> Failure Summary (%d unique error types):", len(errorFrequencies))
125+
for key, count := range errorFrequencies {
126+
log.Printf(" - [%d items]: %s", count, key.Message)
127+
log.Printf(" Context: %s", key.Schema)
128+
}
129+
}
106130
}
107131

108-
writeJSON(w, http.StatusOK, response)
132+
writeJSON(w, http.StatusOK, BatchResponse{
133+
TotalProcessed: total,
134+
ValidCount: validCount,
135+
InvalidCount: invalidCount,
136+
Results: results,
137+
})
109138
}
110139

111-
// validateConcurrent validates a slice of STAC objects simultaneously.
112-
// It spawns a Goroutine for every item in the slice, allowing massive batches
113-
// to be processed in the same time it takes to process a single item. It uses a
114-
// sync.WaitGroup to block until all items have finished validating against the RAM cache.
140+
// validateConcurrent runs the validator across multiple goroutines.
115141
func (h *Handler) validateConcurrent(items []any) []validator.Result {
116142
results := make([]validator.Result, len(items))
117143
var wg sync.WaitGroup
118144

119145
for i, item := range items {
120146
wg.Add(1)
121-
122147
go func(index int, stacItem any) {
123148
defer wg.Done()
124-
125149
res, err := h.validator.Validate(stacItem)
126150
if err != nil {
127-
// Fallback for edge cases where the item itself is fundamentally malformed
128151
res = validator.Result{
129152
Valid: false,
130153
Errors: []validator.Error{{Message: err.Error()}},
@@ -133,23 +156,18 @@ func (h *Handler) validateConcurrent(items []any) []validator.Result {
133156
results[index] = res
134157
}(i, item)
135158
}
136-
137159
wg.Wait()
138-
139160
return results
140161
}
141162

142-
// Health is a simple liveness probe endpoint (GET /health) used by Docker
143-
// or Kubernetes to ensure the HTTP server is responsive.
163+
// Health is a liveness probe.
144164
func (h *Handler) Health(w http.ResponseWriter, r *http.Request) {
145165
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
146166
}
147167

148-
// readBody safely reads the HTTP request body into a byte slice.
149-
// It enforces a 50 MiB limit using http.MaxBytesReader to prevent malicious
150-
// or accidentally massive payloads from causing Out-Of-Memory (OOM) crashes.
168+
// readBody handles reading the request with a 150MB limit.
151169
func readBody(w http.ResponseWriter, r *http.Request) ([]byte, error) {
152-
const maxBytes = 150 << 20 // 150 MiB
170+
const maxBytes = 150 << 20
153171
r.Body = http.MaxBytesReader(w, r.Body, maxBytes)
154172
var buf bytes.Buffer
155173
if _, err := buf.ReadFrom(r.Body); err != nil {
@@ -158,16 +176,14 @@ func readBody(w http.ResponseWriter, r *http.Request) ([]byte, error) {
158176
return buf.Bytes(), nil
159177
}
160178

161-
// writeJSON serializes the provided Go data structure into JSON and writes it
162-
// to the HTTP response with the specified status code and headers.
179+
// writeJSON is a helper to return JSON responses.
163180
func writeJSON(w http.ResponseWriter, status int, v any) {
164181
w.Header().Set("Content-Type", "application/json")
165182
w.WriteHeader(status)
166183
_ = json.NewEncoder(w).Encode(v)
167184
}
168185

169-
// writeError is a convenience helper for formatting human-readable error messages
170-
// into a standardized JSON response.
186+
// writeError is a helper for JSON error messages.
171187
func writeError(w http.ResponseWriter, status int, msg string) {
172188
writeJSON(w, status, map[string]string{"error": msg})
173189
}

0 commit comments

Comments
 (0)