Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
name: ci

on:
push:
branches: [main]
paths:
- 'app/**'
- '.github/workflows/ci.yml'
pull_request:
branches: [main]
paths:
- 'app/**'
- '.github/workflows/ci.yml'

concurrency:
group: ci-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

permissions:
contents: read

jobs:
changes:
name: changes
runs-on: ubuntu-24.04
outputs:
app_code: ${{ steps.filter.outputs.app_code }}
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.2.2
with:
fetch-depth: 0
- uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
id: filter
with:
filters: |
app_code:
- 'app/**'
- '!app/**/*.md'
- '.github/workflows/ci.yml'

vet:
name: vet
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
go-version: ['1.23', '1.24']
defaults:
run:
working-directory: app
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.2.2
with:
fetch-depth: 1
- uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0
with:
go-version: ${{ matrix.go-version }}
cache: true
cache-dependency-path: app/go.mod
- run: go vet ./...

test:
name: test
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
go-version: ['1.23', '1.24']
defaults:
run:
working-directory: app
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.2.2
with:
fetch-depth: 1
- uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0
with:
go-version: ${{ matrix.go-version }}
cache: true
cache-dependency-path: app/go.mod
- env:
GOFLAGS: -buildvcs=false
run: go test -race -count=1 ./...

govulncheck:
name: govulncheck
runs-on: ubuntu-24.04
defaults:
run:
working-directory: app
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.2.2
with:
fetch-depth: 1
- uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0
with:
go-version: '1.26.4'
cache: true
cache-dependency-path: app/go.mod
- name: Install govulncheck
run: go install golang.org/x/vuln/cmd/govulncheck@v1.1.4
- name: Run govulncheck
run: $(go env GOPATH)/bin/govulncheck ./...

lint:
name: lint
needs: changes
if: ${{ github.event_name == 'push' || needs.changes.outputs.app_code == 'true' }}
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.2.2
with:
fetch-depth: 1
- uses: golangci/golangci-lint-action@1481404843c368bc19ca9406f87d6e0fc97bdcfd # v7.0.0
with:
version: v2.5.0
working-directory: app
cache: true

ci-ok:
name: ci-ok
if: always()
needs: [vet, test, lint, govulncheck]
runs-on: ubuntu-24.04
steps:
- run: |
test "${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}" = "false"
6 changes: 6 additions & 0 deletions app/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
data/
*.pcap
lab4-trace*
sudo
wsl
.git
32 changes: 32 additions & 0 deletions app/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# syntax=docker/dockerfile:1

FROM golang:1.24.13-alpine AS builder
WORKDIR /src

COPY go.mod ./
RUN go mod download

COPY . .
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \
-trimpath -ldflags='-s -w' -o /quicknotes .

# Tiny static HTTP probe — distroless has no shell/curl for HEALTHCHECK
RUN printf '%s\n' \
'package main' \
'import ("net/http"; "os")' \
'func main() {' \
' r, err := http.Get("http://127.0.0.1:8080/health")' \
' if err != nil || r == nil || r.StatusCode != http.StatusOK { os.Exit(1) }' \
'}' \
> /healthcheck.go && \
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \
-trimpath -ldflags='-s -w' -o /healthcheck /healthcheck.go

FROM gcr.io/distroless/static:nonroot
COPY --from=builder /quicknotes /quicknotes
COPY --from=builder /healthcheck /healthcheck
COPY seed.json /seed.json

EXPOSE 8080
USER nonroot
ENTRYPOINT ["/quicknotes"]
30 changes: 29 additions & 1 deletion app/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func do(t *testing.T, srv *Server, method, target string, body any) *httptest.Re
}
req := httptest.NewRequest(method, target, &buf)
rec := httptest.NewRecorder()
srv.Routes().ServeHTTP(rec, req)
srv.Handler().ServeHTTP(rec, req)
return rec
}

Expand Down Expand Up @@ -131,3 +131,31 @@ func TestMetrics_ExposesPrometheusFormat(t *testing.T) {
}
}

func TestSecurityHeaders_PresentOnResponses(t *testing.T) {
srv := newTestServer(t)
rec := do(t, srv, http.MethodGet, "/health", nil)

want := map[string]string{
"X-Content-Type-Options": "nosniff",
"X-Frame-Options": "DENY",
"Content-Security-Policy": "default-src 'none'",
"Referrer-Policy": "no-referrer",
}
for header, value := range want {
if got := rec.Header().Get(header); got != value {
t.Errorf("%s = %q, want %q", header, got, value)
}
}
}

func TestSecurityHeaders_AbsentWithoutMiddleware(t *testing.T) {
srv := newTestServer(t)
req := httptest.NewRequest(http.MethodGet, "/health", nil)
rec := httptest.NewRecorder()
srv.Routes().ServeHTTP(rec, req)

if got := rec.Header().Get("X-Content-Type-Options"); got != "" {
t.Fatalf("expected no security headers without middleware, got X-Content-Type-Options=%q", got)
}
}

2 changes: 1 addition & 1 deletion app/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func main() {
server := NewServer(store)
srv := &http.Server{
Addr: addr,
Handler: server.Routes(),
Handler: server.Handler(),
ReadHeaderTimeout: 5 * time.Second,
}

Expand Down
19 changes: 19 additions & 0 deletions app/middleware.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package main

import "net/http"

// SecurityHeaders applies baseline HTTP security headers to every response.
func SecurityHeaders(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Content-Type-Options", "nosniff")
w.Header().Set("X-Frame-Options", "DENY")
w.Header().Set("Content-Security-Policy", "default-src 'none'")
w.Header().Set("Referrer-Policy", "no-referrer")
next.ServeHTTP(w, r)
})
}

// Handler returns the production handler chain (security middleware + routes).
func (s *Server) Handler() http.Handler {
return SecurityHeaders(s.Routes())
}
69 changes: 69 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
services:
vol-init:
image: busybox:1.36
user: "0"
volumes:
- quicknotes-data:/data
command: ["sh", "-c", "mkdir -p /data && chown 65532:65532 /data"]
restart: "no"

quicknotes:
build:
context: ./app
dockerfile: Dockerfile
image: quicknotes:lab6
depends_on:
vol-init:
condition: service_completed_successfully
ports:
- "8080:8080"
environment:
ADDR: ":8080"
DATA_PATH: "/data/notes.json"
SEED_PATH: "/seed.json"
volumes:
- quicknotes-data:/data
restart: unless-stopped
healthcheck:
test: ["CMD", "/healthcheck"]
interval: 10s
timeout: 3s
retries: 3
start_period: 5s
cap_drop:
- ALL
read_only: true
tmpfs:
- /tmp
security_opt:
- no-new-privileges:true

prometheus:
image: prom/prometheus:v3.2.1
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./monitoring/prometheus/rules:/etc/prometheus/rules:ro
depends_on:
quicknotes:
condition: service_healthy
restart: unless-stopped

grafana:
image: grafana/grafana:13.0.3
ports:
- "3000:3000"
environment:
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: lab8-grafana-admin
GF_USERS_ALLOW_SIGN_UP: "false"
volumes:
- ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
- ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro
depends_on:
- prometheus
restart: unless-stopped

volumes:
quicknotes-data:
29 changes: 29 additions & 0 deletions docs/runbook/high-error-rate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Runbook: High HTTP Error Rate

## What this alert means

QuickNotes is returning more than 5% HTTP 4xx/5xx responses sustained for five minutes — users are likely seeing failed requests.

## Triage steps

1. **Confirm the alert** — open Prometheus (`http://localhost:9090/alerts`) or Grafana and verify `HighErrorRate` is `Firing`; note the start time.
2. **Check QuickNotes health** — `curl -s http://localhost:8080/health` and `docker compose ps quicknotes`; confirm the container is `healthy` and `status` is `ok`.
3. **Inspect recent logs** — `docker compose logs --tail=100 quicknotes` for panics, permission errors, or repeated 4xx patterns.
4. **Check the error ratio query** — in Prometheus, run:
```promql
sum(rate(quicknotes_http_responses_by_code_total{code=~"4..|5.."}[5m]))
/
sum(rate(quicknotes_http_requests_total[5m]))
```
Break down by `code` label to see whether errors are mostly 400s (bad clients) or 5xx (server faults).

## Mitigations

1. **Restart QuickNotes** — `docker compose restart quicknotes` to clear a stuck process or bad in-memory state while you investigate.
2. **Stop bad traffic** — if a script or client is sending malformed `POST /notes` bodies, pause or throttle it; errors should fall below 5% within the next evaluation window.

## Post-incident

1. Write a **blameless postmortem** using the format in [Lecture 1 — postmortems](../../lectures/lec1.md) (what happened, why, action items with owners and dates).
2. Add or tighten tests/alerts if the root cause was preventable (e.g., validation bug, missing rate limit).
3. Update this runbook if any triage step was missing or misleading.
55 changes: 55 additions & 0 deletions monitoring/docs/bonus-checkly-setup.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Lab 8 Bonus — Checkly + ngrok setup

## 1. Expose QuickNotes publicly

QuickNotes must be running (`docker compose up -d`).

In a **new PowerShell terminal** (keep it open):

```powershell
ngrok http 8080
```

Copy the **Forwarding** HTTPS URL, e.g. `https://abc123.ngrok-free.app`

Test it:

```powershell
Invoke-RestMethod https://YOUR-NGROK-URL/health
```

## 2. Create Checkly API check (free account)

1. Sign up at https://www.checklyhq.com/
2. **Checks → Add check → API check**
3. Settings:
- **Name:** `QuickNotes health (Lab 8)`
- **URL:** `https://YOUR-NGROK-URL/health`
- **Method:** GET
- **Frequency:** 1 minute
- **Locations:** pick **2 regions** (e.g. `Frankfurt (eu-central-1)` + `Singapore (ap-southeast-1)`)
- **Assertion:** status code equals `200`
- **Assertion:** response time less than `2000` ms
4. Save and enable the check.

## 3. Let it run >= 30 minutes

Leave ngrok + Checkly running. Optionally generate light traffic:

```bash
bash monitoring/scripts/generate-traffic.sh
```

## 4. Collect numbers for `submissions/lab8.md`

**Prometheus (internal):**

```bash
bash monitoring/scripts/bonus-prometheus-snapshot.sh
```

**Checkly (external):** open the check → **Check results** / **Metrics** → note p50/p95 latency and failures per region over the same 30-minute window.

## 5. Stop ngrok when done

`Ctrl+C` in the ngrok terminal.
Loading