|
1 | 1 | # Solution: Level 6 / Project 01 - SQL Connection Simulator |
2 | 2 |
|
3 | | -> **STOP** — Have you attempted this project yourself first? |
| 3 | +> **STOP** -- Have you attempted this project yourself first? |
4 | 4 | > |
5 | 5 | > Learning happens in the struggle, not in reading answers. |
6 | 6 | > Spend at least 20 minutes trying before reading this solution. |
7 | | -> If you are stuck, try the [Walkthrough](./WALKTHROUGH.md) first — it guides |
8 | | -> your thinking without giving away the answer. |
| 7 | +> If you are stuck, try the [README](./README.md) hints or re-read the |
| 8 | +> relevant [concept docs](../../../concepts/) first. |
9 | 9 |
|
10 | 10 | --- |
11 | 11 |
|
12 | | - |
13 | 12 | ## Complete solution |
14 | 13 |
|
15 | 14 | ```python |
16 | | -# WHY health_check: [explain the design reason] |
17 | | -# WHY run_demo_queries: [explain the design reason] |
18 | | -# WHY run: [explain the design reason] |
19 | | -# WHY parse_args: [explain the design reason] |
20 | | -# WHY main: [explain the design reason] |
21 | | -# WHY __init__: [explain the design reason] |
22 | | -# WHY acquire: [explain the design reason] |
23 | | -# WHY release: [explain the design reason] |
24 | | -# WHY close_all: [explain the design reason] |
25 | | -# WHY stats: [explain the design reason] |
26 | | -# WHY _connect_with_retry: [explain the design reason] |
27 | | - |
28 | | -# [paste the complete working solution here] |
29 | | -# Include WHY comments on every non-obvious line. |
| 15 | +"""Level 6 / Project 01 — SQL Connection Simulator. |
| 16 | +
|
| 17 | +Teaches SQLite connection management with context managers, |
| 18 | +connection pooling simulation, retry logic, and health checks. |
| 19 | +""" |
| 20 | + |
| 21 | +from __future__ import annotations |
| 22 | + |
| 23 | +import argparse |
| 24 | +import json |
| 25 | +import logging |
| 26 | +import random |
| 27 | +import sqlite3 |
| 28 | +import time |
| 29 | +from dataclasses import dataclass, field |
| 30 | +from pathlib import Path |
| 31 | + |
| 32 | +# --------------------------------------------------------------------------- |
| 33 | +# Configuration |
| 34 | +# --------------------------------------------------------------------------- |
| 35 | + |
| 36 | +MAX_POOL_SIZE = 5 |
| 37 | +MAX_RETRIES = 3 |
| 38 | +BASE_BACKOFF_SEC = 0.01 # kept small for fast demo runs |
| 39 | + |
| 40 | + |
| 41 | +@dataclass |
| 42 | +class ConnectionConfig: |
| 43 | + """Immutable connection configuration.""" |
| 44 | + |
| 45 | + db_path: str = ":memory:" |
| 46 | + timeout: float = 5.0 |
| 47 | + max_retries: int = MAX_RETRIES |
| 48 | + pool_size: int = MAX_POOL_SIZE |
| 49 | + |
| 50 | + |
| 51 | +# --------------------------------------------------------------------------- |
| 52 | +# Connection pool |
| 53 | +# --------------------------------------------------------------------------- |
| 54 | + |
| 55 | + |
| 56 | +class ConnectionPool: |
| 57 | + """Simple SQLite connection pool. |
| 58 | +
|
| 59 | + WHY pool connections? -- Creating a new database connection for every |
| 60 | + query involves TCP handshakes, authentication, and memory allocation. |
| 61 | + A pool keeps idle connections ready to reuse, cutting per-query |
| 62 | + overhead from milliseconds to microseconds. |
| 63 | + """ |
| 64 | + |
| 65 | + def __init__(self, config: ConnectionConfig) -> None: |
| 66 | + self.config = config |
| 67 | + # WHY a list for the pool? -- A list acts as a LIFO stack; pop() |
| 68 | + # gives the most recently released connection, which is most likely |
| 69 | + # to still be alive and warm in the OS cache. |
| 70 | + self._pool: list[sqlite3.Connection] = [] |
| 71 | + self._created = 0 |
| 72 | + self._reused = 0 |
| 73 | + |
| 74 | + def acquire(self) -> sqlite3.Connection: |
| 75 | + """Return an existing connection or create a new one.""" |
| 76 | + # WHY try the pool first? -- Reusing an idle connection avoids the |
| 77 | + # overhead of sqlite3.connect() entirely. Only when the pool is |
| 78 | + # empty do we pay the cost of opening a new connection. |
| 79 | + if self._pool: |
| 80 | + self._reused += 1 |
| 81 | + logging.info("pool_reuse total=%d reused=%d", self._created, self._reused) |
| 82 | + return self._pool.pop() |
| 83 | + |
| 84 | + conn = self._connect_with_retry() |
| 85 | + self._created += 1 |
| 86 | + logging.info("pool_create total=%d reused=%d", self._created, self._reused) |
| 87 | + return conn |
| 88 | + |
| 89 | + def release(self, conn: sqlite3.Connection) -> None: |
| 90 | + """Return a connection to the pool (or close if pool is full).""" |
| 91 | + # WHY check pool_size? -- Without a cap, the pool could grow |
| 92 | + # unboundedly during burst traffic, holding open file descriptors |
| 93 | + # that the OS eventually runs out of. |
| 94 | + if len(self._pool) < self.config.pool_size: |
| 95 | + self._pool.append(conn) |
| 96 | + else: |
| 97 | + conn.close() |
| 98 | + |
| 99 | + def close_all(self) -> None: |
| 100 | + """Drain the pool and close every connection.""" |
| 101 | + while self._pool: |
| 102 | + self._pool.pop().close() |
| 103 | + |
| 104 | + def stats(self) -> dict: |
| 105 | + """Return pool health metrics.""" |
| 106 | + return { |
| 107 | + "created": self._created, |
| 108 | + "reused": self._reused, |
| 109 | + "idle": len(self._pool), |
| 110 | + "pool_size": self.config.pool_size, |
| 111 | + } |
| 112 | + |
| 113 | + def _connect_with_retry(self) -> sqlite3.Connection: |
| 114 | + """Open a connection, retrying on transient errors.""" |
| 115 | + last_err: Exception | None = None |
| 116 | + for attempt in range(1, self.config.max_retries + 1): |
| 117 | + try: |
| 118 | + conn = sqlite3.connect( |
| 119 | + self.config.db_path, timeout=self.config.timeout |
| 120 | + ) |
| 121 | + # WHY SELECT 1? -- A connection can be "open" but the |
| 122 | + # database might be locked or corrupted. This lightweight |
| 123 | + # query verifies the connection works end-to-end. |
| 124 | + conn.execute("SELECT 1") |
| 125 | + return conn |
| 126 | + except sqlite3.OperationalError as exc: |
| 127 | + last_err = exc |
| 128 | + # WHY exponential backoff? -- A fixed delay hammers a |
| 129 | + # struggling server at constant rate. Doubling the wait |
| 130 | + # gives the server progressively more breathing room. |
| 131 | + wait = BASE_BACKOFF_SEC * (2 ** (attempt - 1)) |
| 132 | + logging.warning( |
| 133 | + "connect_retry attempt=%d wait=%.3fs err=%s", |
| 134 | + attempt, wait, exc, |
| 135 | + ) |
| 136 | + time.sleep(wait) |
| 137 | + raise ConnectionError( |
| 138 | + f"Failed after {self.config.max_retries} retries: {last_err}" |
| 139 | + ) |
| 140 | + |
| 141 | + |
| 142 | +# --------------------------------------------------------------------------- |
| 143 | +# Health check |
| 144 | +# --------------------------------------------------------------------------- |
| 145 | + |
| 146 | + |
| 147 | +def health_check(conn: sqlite3.Connection) -> dict: |
| 148 | + """Run a lightweight ping and return status info. |
| 149 | +
|
| 150 | + WHY a dedicated health check? -- In production, load balancers and |
| 151 | + monitoring systems poll /health endpoints. This function gives them |
| 152 | + a quick pass/fail signal without running real business queries. |
| 153 | + """ |
| 154 | + try: |
| 155 | + cur = conn.execute("SELECT sqlite_version()") |
| 156 | + version = cur.fetchone()[0] |
| 157 | + return {"status": "healthy", "sqlite_version": version} |
| 158 | + except sqlite3.Error as exc: |
| 159 | + return {"status": "unhealthy", "error": str(exc)} |
| 160 | + |
| 161 | + |
| 162 | +# --------------------------------------------------------------------------- |
| 163 | +# Demo workload |
| 164 | +# --------------------------------------------------------------------------- |
| 165 | + |
| 166 | + |
| 167 | +def run_demo_queries(pool: ConnectionPool, labels: list[str]) -> list[dict]: |
| 168 | + """Simulate a workload: create a table, insert rows, query them back. |
| 169 | +
|
| 170 | + WHY try/finally for pool.release? -- If an exception occurs mid-query, |
| 171 | + the connection must still be returned to the pool. Without finally, |
| 172 | + a leaked connection would exhaust the pool under repeated failures. |
| 173 | + """ |
| 174 | + conn = pool.acquire() |
| 175 | + try: |
| 176 | + conn.execute( |
| 177 | + "CREATE TABLE IF NOT EXISTS events " |
| 178 | + "(id INTEGER PRIMARY KEY, label TEXT NOT NULL)" |
| 179 | + ) |
| 180 | + for label in labels: |
| 181 | + conn.execute("INSERT INTO events (label) VALUES (?)", (label,)) |
| 182 | + conn.commit() |
| 183 | + |
| 184 | + rows = conn.execute("SELECT id, label FROM events ORDER BY id").fetchall() |
| 185 | + return [{"id": r[0], "label": r[1]} for r in rows] |
| 186 | + finally: |
| 187 | + pool.release(conn) |
| 188 | + |
| 189 | + |
| 190 | +# --------------------------------------------------------------------------- |
| 191 | +# Orchestrator |
| 192 | +# --------------------------------------------------------------------------- |
| 193 | + |
| 194 | + |
| 195 | +def run(input_path: Path, output_path: Path, config: ConnectionConfig | None = None) -> dict: |
| 196 | + """Full demo: read labels -> pool -> queries -> stats -> JSON output. |
| 197 | +
|
| 198 | + WHY a single orchestrator? -- Centralizing the pipeline in one function |
| 199 | + makes the flow visible at a glance and gives tests a single entry |
| 200 | + point to exercise the complete happy path. |
| 201 | + """ |
| 202 | + config = config or ConnectionConfig() |
| 203 | + pool = ConnectionPool(config) |
| 204 | + |
| 205 | + if not input_path.exists(): |
| 206 | + raise FileNotFoundError(f"Input not found: {input_path}") |
| 207 | + labels = [ |
| 208 | + ln.strip() |
| 209 | + for ln in input_path.read_text(encoding="utf-8").splitlines() |
| 210 | + if ln.strip() |
| 211 | + ] |
| 212 | + |
| 213 | + rows = run_demo_queries(pool, labels) |
| 214 | + |
| 215 | + # Second acquire to demonstrate pool reuse |
| 216 | + conn2 = pool.acquire() |
| 217 | + hc = health_check(conn2) |
| 218 | + pool.release(conn2) |
| 219 | + |
| 220 | + pool.close_all() |
| 221 | + |
| 222 | + summary = { |
| 223 | + "rows_inserted": len(rows), |
| 224 | + "rows": rows, |
| 225 | + "health": hc, |
| 226 | + "pool_stats": pool.stats(), |
| 227 | + } |
| 228 | + |
| 229 | + output_path.parent.mkdir(parents=True, exist_ok=True) |
| 230 | + output_path.write_text(json.dumps(summary, indent=2), encoding="utf-8") |
| 231 | + logging.info("output=%s rows=%d", output_path, len(rows)) |
| 232 | + return summary |
| 233 | + |
| 234 | + |
| 235 | +# --------------------------------------------------------------------------- |
| 236 | +# CLI |
| 237 | +# --------------------------------------------------------------------------- |
| 238 | + |
| 239 | + |
| 240 | +def parse_args() -> argparse.Namespace: |
| 241 | + parser = argparse.ArgumentParser( |
| 242 | + description="SQL Connection Simulator — connection pooling & retry demo" |
| 243 | + ) |
| 244 | + parser.add_argument("--input", default="data/sample_input.txt") |
| 245 | + parser.add_argument("--output", default="data/output_summary.json") |
| 246 | + parser.add_argument("--db", default=":memory:", help="SQLite database path") |
| 247 | + parser.add_argument( |
| 248 | + "--pool-size", type=int, default=MAX_POOL_SIZE, help="Max idle connections" |
| 249 | + ) |
| 250 | + return parser.parse_args() |
| 251 | + |
| 252 | + |
| 253 | +def main() -> None: |
| 254 | + logging.basicConfig( |
| 255 | + level=logging.INFO, |
| 256 | + format="%(asctime)s | %(levelname)s | %(message)s", |
| 257 | + ) |
| 258 | + args = parse_args() |
| 259 | + config = ConnectionConfig(db_path=args.db, pool_size=args.pool_size) |
| 260 | + summary = run(Path(args.input), Path(args.output), config) |
| 261 | + print(json.dumps(summary, indent=2)) |
| 262 | + |
| 263 | + |
| 264 | +if __name__ == "__main__": |
| 265 | + main() |
30 | 266 | ``` |
31 | 267 |
|
32 | 268 | ## Design decisions |
33 | 269 |
|
34 | 270 | | Decision | Why | Alternative considered | |
35 | 271 | |----------|-----|----------------------| |
36 | | -| health_check function | [reason] | [alternative] | |
37 | | -| run_demo_queries function | [reason] | [alternative] | |
38 | | -| run function | [reason] | [alternative] | |
| 272 | +| LIFO pool (list with pop/append) | Most-recently-used connection is warmest in OS cache and most likely still valid | FIFO queue -- fairer rotation but older connections are more likely stale | |
| 273 | +| Exponential backoff on retry | Gives a struggling database progressively more breathing room between attempts | Fixed delay -- simpler but can overwhelm a recovering server | |
| 274 | +| `ConnectionConfig` as a dataclass | Groups related settings into one immutable object, easier to pass around and test | Bare kwargs -- flexible but easy to mis-spell or forget a parameter | |
| 275 | +| Separate `health_check` function | Decouples monitoring from business logic; can be reused by different callers | Inline check inside `run` -- couples monitoring to the demo workload | |
39 | 276 |
|
40 | 277 | ## Alternative approaches |
41 | 278 |
|
42 | | -### Approach B: [Name] |
| 279 | +### Approach B: Context manager protocol |
43 | 280 |
|
44 | 281 | ```python |
45 | | -# [Different valid approach with trade-offs explained] |
| 282 | +class PooledConnection: |
| 283 | + """Use the pool via 'with' blocks for automatic release.""" |
| 284 | + |
| 285 | + def __init__(self, pool: ConnectionPool) -> None: |
| 286 | + self._pool = pool |
| 287 | + self._conn: sqlite3.Connection | None = None |
| 288 | + |
| 289 | + def __enter__(self) -> sqlite3.Connection: |
| 290 | + self._conn = self._pool.acquire() |
| 291 | + return self._conn |
| 292 | + |
| 293 | + def __exit__(self, exc_type, exc_val, exc_tb) -> None: |
| 294 | + if self._conn: |
| 295 | + self._pool.release(self._conn) |
| 296 | + self._conn = None |
| 297 | + |
| 298 | +# Usage: |
| 299 | +# with PooledConnection(pool) as conn: |
| 300 | +# conn.execute("SELECT ...") |
46 | 301 | ``` |
47 | 302 |
|
48 | | -**Trade-off:** [When you would prefer this approach vs the primary one] |
| 303 | +**Trade-off:** The context manager guarantees release even if the caller forgets `finally`, which is safer in production code. The manual acquire/release approach used in the primary solution is more explicit and better for learning what happens under the hood. |
49 | 304 |
|
50 | | -## What could go wrong |
| 305 | +### Approach C: Thread-safe pool with `queue.Queue` |
51 | 306 |
|
52 | | -| Scenario | What happens | Prevention | |
53 | | -|----------|-------------|------------| |
54 | | -| [bad input] | [error/behavior] | [how to handle] | |
55 | | -| [edge case] | [behavior] | [how to handle] | |
| 307 | +```python |
| 308 | +import queue |
| 309 | + |
| 310 | +class ThreadSafePool: |
| 311 | + def __init__(self, config: ConnectionConfig) -> None: |
| 312 | + self._pool = queue.Queue(maxsize=config.pool_size) |
| 313 | + |
| 314 | + def acquire(self, timeout: float = 5.0) -> sqlite3.Connection: |
| 315 | + try: |
| 316 | + return self._pool.get(timeout=timeout) |
| 317 | + except queue.Empty: |
| 318 | + return sqlite3.connect(self.config.db_path) |
| 319 | +``` |
| 320 | + |
| 321 | +**Trade-off:** Required when multiple threads share the pool (e.g., a web server). Adds complexity that is unnecessary for single-threaded scripts like this demo. |
56 | 322 |
|
57 | | -## Key takeaways |
| 323 | +## Common pitfalls |
58 | 324 |
|
59 | | -1. [Most important lesson from this project] |
60 | | -2. [Second lesson] |
61 | | -3. [Connection to future concepts] |
| 325 | +| Scenario | What happens | Prevention | |
| 326 | +|----------|-------------|------------| |
| 327 | +| Forgetting to release a connection | Pool drains to zero idle connections; every subsequent acquire creates a new connection, defeating the pool's purpose | Always use `try/finally` or a context manager around `acquire()`/`release()` | |
| 328 | +| Setting `pool_size=0` | Every released connection gets closed immediately -- the pool becomes a no-op and you pay full connect cost every time | Validate `pool_size >= 1` in `ConnectionConfig.__post_init__` | |
| 329 | +| Returning a closed connection to the pool | Next `acquire()` returns a dead connection; the caller's first query raises `ProgrammingError` | Ping the connection (`SELECT 1`) in `release()` before putting it back | |
0 commit comments