diff --git a/SECURITY.md b/SECURITY.md index 8fa6179..dbb0ae3 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -62,10 +62,11 @@ such as Gunicorn behind a reverse proxy. ### Path traversal mitigation -The `utils/file_server.py` module uses `os.path.basename()` to strip any -directory components from starter code paths before resolving them. This -prevents a crafted `starter_code` value in `projects.json` from reading -arbitrary files. +The `utils/file_server.py` module resolves `starter_code` paths only inside the +`starter_code/` directory (including one level of subfolders such as +`realtime_chat/app.py`). It rejects `..` segments and uses `os.path.commonpath` +with the real resolved paths so a crafted `starter_code` value in +`projects.json` cannot read arbitrary files outside that folder. ### No user input is stored diff --git a/data/projects.json b/data/projects.json index 1bad3d3..6dcdb6e 100644 --- a/data/projects.json +++ b/data/projects.json @@ -221,6 +221,84 @@ }, { "id": 8, + "title": "Real-Time Chat Application", + "skills": ["Python", "JavaScript", "HTML", "CSS"], + "level": "Advanced", + "interest": "Web", + "time": "High", + "description": "Build a browser-based chat that pushes messages instantly to every connected client using Flask-SocketIO and the Socket.IO protocol (WebSockets with HTTP long-polling fallback). You will design event names, optional rooms, and a minimal UI while learning concurrency limits and production deployment considerations.", + "features": [ + "Flask-SocketIO server with connect, disconnect, and chat events", + "Browser client using the Socket.IO JS library (emit and on)", + "Broadcast messages to all connected users (or scoped rooms)", + "Simple nickname or session identity shown with each message", + "Optional: typing indicator or join/leave system messages", + "Optional: persist recent messages to SQLite or Redis", + "Basic rate limiting or message length validation on the server", + "Run locally with eventlet/gevent worker for WebSocket support" + ], + "tech_stack": ["Python", "Flask", "Flask-SocketIO", "Socket.IO", "WebSockets", "HTML", "JavaScript", "CSS", "eventlet"], + "roadmap": [ + "Step 1: Study how Socket.IO differs from raw WebSockets and why fallbacks exist", + "Step 2: Create a virtual environment and install Flask, Flask-SocketIO, and an async worker (eventlet)", + "Step 3: Scaffold Flask app with a route that serves a chat HTML template", + "Step 4: Instantiate SocketIO on the Flask app and switch the entrypoint to socketio.run", + "Step 5: Implement connect/disconnect handlers and log or broadcast presence", + "Step 6: Add a chat_message handler that validates payload and emits to all clients", + "Step 7: Wire the front end: io(), form submit emits events, listeners append to the message log", + "Step 8: Add usernames (query param, prompt, or small form) and include them in each payload", + "Step 9: Test with multiple browser tabs; handle edge cases (empty message, disconnect mid-send)", + "Step 10: Document how you would deploy behind a reverse proxy with sticky sessions or Redis message queue" + ], + "resources": [ + "Flask-SocketIO documentation: https://flask-socketio.readthedocs.io", + "Socket.IO protocol overview: https://socket.io/docs/v4", + "Flask WebSockets background: https://flask.palletsprojects.com/en/stable/deploying/asgi", + "MDN WebSockets guide: https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API" + ], + "starter_code": "starter_code/realtime_chat/app.py" + }, + { + "id": 9, + "title": "REST API with JWT Authentication", + "skills": ["Python"], + "level": "Advanced", + "interest": "Web", + "time": "High", + "description": "Design a stateless REST API where clients authenticate with JSON Web Tokens signed by the server. Users register with passwords hashed using bcrypt; login returns a short-lived JWT used on subsequent requests. You will implement registration, login, token verification middleware, and at least one protected resource while following common security practices.", + "features": [ + "POST /register with bcrypt password hashing before storage", + "POST /login returning JWT access token and token type", + "GET /profile (or similar) protected by Bearer JWT in Authorization header", + "Configurable secret key and token expiry via environment variables", + "Consistent JSON error responses with correct HTTP status codes", + "Optional: refresh tokens or token blocklist for logout semantics", + "Optional: SQLite persistence replacing in-memory user store", + "Input validation for username length, password strength, and duplicate users" + ], + "tech_stack": ["Python", "Flask", "PyJWT", "bcrypt", "JSON", "HTTP", "Postman (for testing)"], + "roadmap": [ + "Step 1: Review JWT structure (header, payload, signature) and why secrets must stay server-side", + "Step 2: Install Flask, PyJWT, and bcrypt; set SECRET_KEY from an environment variable", + "Step 3: Define a minimal user model and in-memory or SQLite storage layer", + "Step 4: Implement registration: validate input, bcrypt.hashpw with a generated salt, store hash only", + "Step 5: Implement login: fetch user, bcrypt.checkpw, reject wrong credentials with 401", + "Step 6: On successful login, encode JWT with sub (user id or name) and exp claim", + "Step 7: Write a decorator or helper that extracts Bearer token, decodes JWT, and loads the user", + "Step 8: Protect one or more routes; return 401 for missing/invalid/expired tokens", + "Step 9: Test full flow with curl or Postman (register, login, authorized and unauthorized calls)", + "Step 10: Harden for production: HTTPS, key rotation plan, rate limiting, and never log tokens" + ], + "resources": [ + "PyJWT usage: https://pyjwt.readthedocs.io", + "bcrypt Python bindings: https://github.com/pyca/bcrypt", + "JWT introduction: https://jwt.io/introduction", + "Flask application patterns: https://flask.palletsprojects.com/en/stable/patterns/" + ], + "starter_code": "starter_code/jwt_auth_api.py" + }, + { + "id": 10, "title": "Password Strength Checker", "skills": ["Python"], "level": "Beginner", diff --git a/docs/architecture.md b/docs/architecture.md index ff3ade8..748beb9 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -115,9 +115,11 @@ Functions: - `read_starter_code(project)` — returns `{"filename": ..., "code": ...}` or None - `get_starter_code_dir()` — returns the directory path for `send_from_directory` -The `os.path.basename()` call in `resolve_starter_file` ensures that a -malicious `starter_code` value in the JSON (such as `../../etc/passwd`) cannot -cause a path traversal vulnerability. +Paths in `projects.json` may include subfolders (for example +`starter_code/realtime_chat/app.py`). `resolve_starter_file` strips an optional +`starter_code/` prefix, rejects `..` segments, resolves the path under the real +`starter_code` directory, and verifies containment with `os.path.commonpath` +so a malicious value cannot read files outside that folder. --- diff --git a/routes/main_routes.py b/routes/main_routes.py index 5cb3459..71100f7 100644 --- a/routes/main_routes.py +++ b/routes/main_routes.py @@ -7,7 +7,12 @@ from utils.recommender import get_recommendations, validate_recommendation_inputs from utils.data_loader import find_project_by_id, get_project_stats -from utils.file_server import read_starter_code, resolve_starter_file, get_starter_code_dir +from utils.file_server import ( + read_starter_code, + resolve_starter_file, + get_starter_code_dir, + starter_download_relpath, +) import os # Create the Blueprint that app.py will register @@ -106,6 +111,9 @@ def download_code(project_id): if not full_path: abort(404) - import os - filename = os.path.basename(full_path) - return send_from_directory(get_starter_code_dir(), filename, as_attachment=True) + download_name = starter_download_relpath(full_path) + return send_from_directory( + get_starter_code_dir(), + download_name, + as_attachment=True, + ) diff --git a/starter_code/jwt_auth_api.py b/starter_code/jwt_auth_api.py new file mode 100644 index 0000000..90c231c --- /dev/null +++ b/starter_code/jwt_auth_api.py @@ -0,0 +1,166 @@ +""" +jwt_auth_api.py +=============== +Project: REST API with JWT Authentication (starter) +Stack: Flask, PyJWT, bcrypt, JSON + +What you will build: + A small REST API where users register with a hashed password, log in to + receive a signed JWT, and access protected routes by sending + Authorization: Bearer . + +How to run (from the starter_code directory): + pip install -r jwt_requirements.txt + python jwt_auth_api.py + +Then test with curl or Postman (register -> login -> protected GET with header). + +Learning goals: + - Storing password hashes (never plain text) with bcrypt + - Issuing and verifying JWTs with an expiry claim + - Protecting routes with a decorator or before_request hook + - Refresh tokens or logout strategies (optional stretch goals) + +Security notes: + - Use a strong random SECRET_KEY in production (env var JWT_SECRET_KEY). + - Use HTTPS in production; JWTs in headers over HTTP are vulnerable to MITM. +""" + +import os +from functools import wraps +from datetime import datetime, timedelta, timezone + +import bcrypt +import jwt +from flask import Flask, jsonify, request + +app = Flask(__name__) +app.config["SECRET_KEY"] = os.environ.get("JWT_SECRET_KEY", "dev-only-change-me") + +# In-memory user store for the exercise — replace with a database for production. +USERS = {} # username -> {"password_hash": bytes, "created_at": str} + +JWT_ALGORITHM = "HS256" +TOKEN_HOURS = 24 + + +def utc_now(): + return datetime.now(timezone.utc) + + +def token_required(view_fn): + """Verify Bearer JWT and pass `username` (from claim sub) into the view.""" + + @wraps(view_fn) + def wrapped(*args, **kwargs): + auth = request.headers.get("Authorization", "") + if not auth.startswith("Bearer "): + return jsonify({"error": "Authorization header must be: Bearer "}), 401 + token = auth[7:].strip() + if not token: + return jsonify({"error": "Missing token."}), 401 + try: + payload = jwt.decode( + token, + app.config["SECRET_KEY"], + algorithms=[JWT_ALGORITHM], + ) + username = payload.get("sub") + if not username or not isinstance(username, str): + return jsonify({"error": "Invalid token payload."}), 401 + except jwt.ExpiredSignatureError: + return jsonify({"error": "Token has expired."}), 401 + except jwt.InvalidTokenError: + return jsonify({"error": "Invalid or malformed token."}), 401 + return view_fn(username, *args, **kwargs) + + return wrapped + + +@app.route("/", methods=["GET"]) +def health(): + return jsonify({ + "status": "running", + "message": "JWT Auth API — register, then login, then GET /profile with Bearer token.", + "endpoints": { + "register": "POST /register JSON: {\"username\", \"password\"}", + "login": "POST /login JSON: {\"username\", \"password\"}", + "profile": "GET /profile Header: Authorization: Bearer ", + }, + }), 200 + + +@app.route("/register", methods=["POST"]) +def register(): + """POST /register — create a user with bcrypt-hashed password.""" + data = request.get_json(silent=True) + if not data or not isinstance(data, dict): + return jsonify({"error": "JSON body required."}), 400 + + username = (data.get("username") or "").strip() + password = data.get("password") or "" + + if not username: + return jsonify({"error": "username is required."}), 400 + if len(username) < 3 or len(username) > 64: + return jsonify({"error": "username must be between 3 and 64 characters."}), 400 + if not password: + return jsonify({"error": "password is required."}), 400 + if len(password) < 8: + return jsonify({"error": "password must be at least 8 characters."}), 400 + + if username in USERS: + return jsonify({"error": "Username already taken."}), 409 + + pw_hash = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()) + USERS[username] = { + "password_hash": pw_hash, + "created_at": utc_now().isoformat(), + } + return jsonify({"message": "User created."}), 201 + + +@app.route("/login", methods=["POST"]) +def login(): + """POST /login — verify password and return a JWT.""" + data = request.get_json(silent=True) + if not data or not isinstance(data, dict): + return jsonify({"error": "JSON body required."}), 400 + + username = (data.get("username") or "").strip() + password = data.get("password") or "" + + if not username or not password: + return jsonify({"error": "username and password are required."}), 400 + + user = USERS.get(username) + if not user: + return jsonify({"error": "Invalid username or password."}), 401 + + if not bcrypt.checkpw(password.encode("utf-8"), user["password_hash"]): + return jsonify({"error": "Invalid username or password."}), 401 + + expires = utc_now() + timedelta(hours=TOKEN_HOURS) + payload = { + "sub": username, + "exp": expires, + "iat": utc_now(), + } + token = jwt.encode(payload, app.config["SECRET_KEY"], algorithm=JWT_ALGORITHM) + if isinstance(token, bytes): + token = token.decode("utf-8") + return jsonify({"access_token": token, "token_type": "Bearer"}), 200 + + +@app.route("/profile", methods=["GET"]) +@token_required +def profile(username): + """GET /profile — requires Authorization: Bearer .""" + if username not in USERS: + return jsonify({"error": "User no longer exists."}), 404 + return jsonify({"username": username}), 200 + + +if __name__ == "__main__": + print("JWT Auth API on http://127.0.0.1:5000") + app.run(debug=True) diff --git a/starter_code/jwt_requirements.txt b/starter_code/jwt_requirements.txt new file mode 100644 index 0000000..68172ce --- /dev/null +++ b/starter_code/jwt_requirements.txt @@ -0,0 +1,4 @@ +# Install for jwt_auth_api.py (run from repo root or starter_code/) +flask>=3.0.0 +pyjwt>=2.8.0 +bcrypt>=4.1.0 diff --git a/starter_code/realtime_chat/app.py b/starter_code/realtime_chat/app.py new file mode 100644 index 0000000..0793c2e --- /dev/null +++ b/starter_code/realtime_chat/app.py @@ -0,0 +1,66 @@ +""" +app.py +====== +Project: Real-Time Chat Application (starter) +Stack: Flask, Flask-SocketIO, WebSockets (via engine.io), HTML/JS client + +What you will build: + A multi-client chat room where messages appear instantly without polling. + The browser uses the Socket.IO JavaScript client; the server uses + Flask-SocketIO with event handlers for connect, disconnect, and chat. + +How to run (from this directory): + pip install -r requirements.txt + python app.py + +Then open http://127.0.0.1:5000 in two browser tabs. + +Learning goals: + - Real-time bidirectional communication (not request/response only) + - Broadcasting messages to all connected clients (or rooms) + - Optional: named rooms, private messages, typing indicators, persistence +""" + +from flask import Flask, render_template +from flask_socketio import SocketIO, emit + +app = Flask(__name__) +app.config["SECRET_KEY"] = "change-me-in-production" + +# cors_allowed_origins="*" is convenient for local dev only — tighten for production. +socketio = SocketIO(app, cors_allowed_origins="*") + + +@app.route("/") +def index(): + """Serve the chat page template.""" + return render_template("chat.html") + + +@socketio.on("connect") +def handle_connect(): + emit("system", {"msg": "You are connected. Say hello!"}) + + +@socketio.on("disconnect") +def handle_disconnect(): + # Optional: broadcast that someone left (omit if you prefer quiet disconnects) + pass + + +@socketio.on("chat_message") +def handle_chat_message(data): + """Receive { "user": str, "text": str } and broadcast to every connected client.""" + if not isinstance(data, dict): + return + text = (data.get("text") or "").strip() + user = (data.get("user") or "Guest").strip() or "Guest" + if not text: + return + emit("chat_message", {"user": user, "text": text}, broadcast=True) + + +if __name__ == "__main__": + print("Real-time chat starting at http://127.0.0.1:5000") + print("Open two browser tabs to test broadcasting.\n") + socketio.run(app, debug=True, host="127.0.0.1", port=5000) diff --git a/starter_code/realtime_chat/requirements.txt b/starter_code/realtime_chat/requirements.txt new file mode 100644 index 0000000..d8fd949 --- /dev/null +++ b/starter_code/realtime_chat/requirements.txt @@ -0,0 +1,5 @@ +# Real-Time Chat — install before running: pip install -r requirements.txt +flask>=3.0.0 +flask-socketio>=5.3.0 +python-socketio>=5.11.0 +eventlet>=0.35.0 diff --git a/starter_code/realtime_chat/templates/chat.html b/starter_code/realtime_chat/templates/chat.html new file mode 100644 index 0000000..6d0b2f8 --- /dev/null +++ b/starter_code/realtime_chat/templates/chat.html @@ -0,0 +1,74 @@ + + + + + + Real-Time Chat + + + +

Real-Time Chat

+

Open this page in another tab to see messages sync across clients.

+
+
+
+ +
+
+ + +
+
+ + + + diff --git a/tests/test_basic.py b/tests/test_basic.py index a66c75b..f86b996 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -297,6 +297,31 @@ def test_scoring_weights_has_all_keys(): assert set(SCORING_WEIGHTS.keys()) == expected_keys +def test_view_code_nested_starter_realtime_chat(): + """Nested starter_code paths (e.g. realtime_chat/app.py) must resolve for /code.""" + client = get_client() + response = client.get("/project/8/code") + assert response.status_code == 200 + data = response.get_json() + assert "code" in data and "filename" in data + assert "realtime_chat" in data["filename"] or data["filename"].endswith("app.py") + assert "Flask" in data["code"] or "flask" in data["code"].lower() + + +def test_download_nested_starter_realtime_chat(): + client = get_client() + response = client.get("/project/8/download") + assert response.status_code == 200 + + +def test_view_code_jwt_project(): + client = get_client() + response = client.get("/project/9/code") + assert response.status_code == 200 + data = response.get_json() + assert "jwt" in data["code"].lower() or "JWT" in data["code"] + + # ============================================================ # Run tests directly (no pytest required) # ============================================================ diff --git a/utils/file_server.py b/utils/file_server.py index 5539060..562fddd 100644 --- a/utils/file_server.py +++ b/utils/file_server.py @@ -9,20 +9,63 @@ ) +def _starter_relative_path(raw_path): + """ + Turn a projects.json starter_code value into a path relative to STARTER_CODE_DIR. + + Accepts e.g. "starter_code/task_api.py", "task_api.py", or "realtime_chat/app.py". + Returns None if the value is empty or clearly unsafe (path traversal). + """ + if not raw_path or not isinstance(raw_path, str): + return None + + normalized = raw_path.strip().replace("\\", "/") + if not normalized or normalized.startswith("/"): + return None + + prefix = "starter_code/" + if normalized.lower().startswith(prefix): + relative = normalized[len(prefix) :] + else: + relative = normalized + + if not relative or relative.startswith("/"): + return None + + parts = relative.split("/") + if ".." in parts: + return None + + return "/".join(parts) + + +def _is_inside_starter_dir(full_path): + """True if full_path is a file under STARTER_CODE_DIR (prevents path traversal).""" + try: + full = os.path.realpath(full_path) + root = os.path.realpath(STARTER_CODE_DIR) + except OSError: + return False + try: + common = os.path.commonpath([full, root]) + except ValueError: + return False + return common == root and full != root + + def resolve_starter_file(project): """ Given a project dict, return the absolute path to its starter code file. + Supports files in subfolders (e.g. realtime_chat/app.py). Returns None if the project has no starter_code field or the file does not exist. """ - raw_path = project.get("starter_code", "") - if not raw_path: + relative = _starter_relative_path(project.get("starter_code", "")) + if not relative: return None - # Only use the filename portion — never trust a full path from the data file - filename = os.path.basename(raw_path) - full_path = os.path.join(STARTER_CODE_DIR, filename) + full_path = os.path.normpath(os.path.join(STARTER_CODE_DIR, *relative.split("/"))) - if not os.path.exists(full_path): + if not _is_inside_starter_dir(full_path) or not os.path.isfile(full_path): return None return full_path @@ -31,19 +74,28 @@ def resolve_starter_file(project): def read_starter_code(project): """ Return a dict containing the filename and text content of the starter file. + Filename is relative to starter_code/ (e.g. "jwt_auth_api.py" or "realtime_chat/app.py"). Returns None if the file cannot be found. """ full_path = resolve_starter_file(project) if not full_path: return None - filename = os.path.basename(full_path) + display_name = os.path.relpath(full_path, STARTER_CODE_DIR).replace("\\", "/") with open(full_path, "r", encoding="utf-8") as f: code = f.read() - return {"filename": filename, "code": code} + return {"filename": display_name, "code": code} def get_starter_code_dir(): """Return the absolute path to the starter_code directory for use with send_from_directory.""" return STARTER_CODE_DIR + + +def starter_download_relpath(full_path): + """ + Path segment(s) for send_from_directory(STARTER_CODE_DIR, relpath), using / as separator. + """ + rel = os.path.relpath(full_path, STARTER_CODE_DIR) + return rel.replace("\\", "/")