diff --git a/.github/workflows/manual.yml b/.github/workflows/manual.yml
deleted file mode 100644
index 036e16aeae..0000000000
--- a/.github/workflows/manual.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Workflow to ensure whenever a Github PR is submitted,
-# a JIRA ticket gets created automatically.
-name: Manual Workflow
-
-# Controls when the action will run.
-on:
- # Triggers the workflow on pull request events but only for the master branch
- pull_request_target:
- types: [opened, reopened]
-
- # Allows you to run this workflow manually from the Actions tab
- workflow_dispatch:
-
-jobs:
- test-transition-issue:
- name: Convert Github Issue to Jira Issue
- runs-on: ubuntu-latest
- steps:
- - name: Checkout
- uses: actions/checkout@master
-
- - name: Login
- uses: atlassian/gajira-login@master
- env:
- JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
- JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
- JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-
- - name: Create NEW JIRA ticket
- id: create
- uses: atlassian/gajira-create@master
- with:
- project: CONUPDATE
- issuetype: Task
- summary: |
- Github PR | Repo: ${{ github.repository }} | PR# ${{github.event.number}}
- description: |
- Repo link: https://github.com/${{ github.repository }}
- PR no. ${{ github.event.pull_request.number }}
- PR title: ${{ github.event.pull_request.title }}
- PR description: ${{ github.event.pull_request.description }}
- In addition, please resolve other issues, if any.
- fields: '{"components": [{"name":"nd0821 - ML DevOpsEngineer ND"}], "customfield_16449":"https://classroom.udacity.com/", "customfield_16450":"Resolve the PR", "labels": ["github"], "priority":{"id": "4"}}'
-
- - name: Log created issue
- run: echo "Issue ${{ steps.create.outputs.issue }} was created"
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000..a771c23bee
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,188 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Machine Learning / Data Science specific
+# Models
+*.pkl
+*.joblib
+*.h5
+*.hdf5
+*.model
+*.weights
+
+# Data files (but keep small sample data)
+*.csv
+!starter/data/census.csv # Keep the census dataset
+
+# Large data directories
+data/large/
+data/raw/
+data/processed/
+
+# Jupyter notebook checkpoints
+.ipynb_checkpoints/
+
+# MLflow
+mlruns/
+mlartifacts/
+
+# DVC
+.dvc/cache/
+.dvc/tmp/
+.dvc/logs/
+
+# Weights & Biases
+wandb/
+
+# TensorBoard
+runs/
+logs/
+tensorboard/
+
+# IDE specific files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS specific files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Heroku specific
+.env.local
+.env.production
+
+# AWS credentials
+.aws/
+
+# Temporary files
+*.tmp
+*.temp
+temp/
+tmp/
+
+# Screenshots (unless specifically needed)
+screenshots/*.png
+screenshots/*.jpg
+screenshots/*.jpeg
+!screenshots/.gitkeep
+
+# API keys and secrets
+*.key
+*.pem
+secrets.json
+config.ini
\ No newline at end of file
diff --git a/PYTHON_3_13_UPDATES.md b/PYTHON_3_13_UPDATES.md
new file mode 100644
index 0000000000..e6de6a330f
--- /dev/null
+++ b/PYTHON_3_13_UPDATES.md
@@ -0,0 +1,20 @@
+# Python 3.13 Compatibility Updates
+
+This document summarizes the changes made to make the codebase compatible with Python 3.13.
+
+## Dependencies Updated
+- Python: 3.8 → 3.13.0
+- NumPy: Latest compatible (1.26.0+)
+- Pandas: Latest compatible (2.1.0+)
+- scikit-learn: Latest compatible (1.3.0+)
+- FastAPI: 0.63.0 → 0.103.0+
+- Other dependencies updated to latest stable versions
+
+## Code Changes
+1. **ML Data Processing (`ml/data.py`)**
+ - Updated OneHotEncoder parameters from `sparse=False` to `sparse_output=False` to match newer scikit-learn API
+
+## Notes
+- The core ML functionality and boilerplate structure remains unchanged
+- Dependencies updated to ensure compatibility with Python 3.13
+- Starter code structure maintained for student implementation
diff --git a/README.md b/README.md
index b3f0e882db..24c4e38669 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,24 @@
Working in a command line environment is recommended for ease of use with git and dvc. If on Windows, WSL1 or 2 is recommended.
# Environment Set up
-* Download and install conda if you don’t have it already.
- * Use the supplied requirements file to create a new environment, or
- * conda create -n [envname] "python=3.8" scikit-learn pandas numpy pytest jupyter jupyterlab fastapi uvicorn -c conda-forge
- * Install git either through conda (“conda install git”) or through your CLI, e.g. sudo apt-get git.
+* **Option 1: Using pip and venv (Recommended)**
+ * Ensure you have Python 3.13 installed
+ * Create virtual environment: `python3.13 -m venv .venv`
+ * Activate environment: `source .venv/bin/activate` (On Windows: `.venv\Scripts\activate`)
+ * Install dependencies: `pip install -r starter/requirements.txt`
+
+* **Option 2: Using conda**
+ * Download and install conda if you don't have it already.
+ * conda create -n [envname] "python=3.13" scikit-learn pandas numpy pytest jupyter jupyterlab fastapi uvicorn pydantic httpx matplotlib seaborn -c conda-forge
+ * Install git either through conda ("conda install git") or through your CLI, e.g. sudo apt-get git.
## Repositories
* Create a directory for the project and initialize git.
* As you work on the code, continually commit changes. Trained models you want to use in production must be committed to GitHub.
* Connect your local git repo to GitHub.
* Setup GitHub Actions on your repo. You can use one of the pre-made GitHub Actions if at a minimum it runs pytest and flake8 on push and requires both to pass without error.
- * Make sure you set up the GitHub Action to have the same version of Python as you used in development.
+ * Make sure you set up the GitHub Action to use Python 3.13 (same version as development).
+ * Note: Add flake8 to requirements.txt if you want to use it for linting: `pip install flake8`
# Data
* Download census.csv and commit it to dvc.
@@ -40,4 +47,5 @@ Working in a command line environment is recommended for ease of use with git an
* Enable automatic deployments that only deploy if your continuous integration passes.
* Hint: think about how paths will differ in your local environment vs. on Heroku.
* Hint: development in Python is fast! But how fast you can iterate slows down if you rely on your CI/CD to fail before fixing an issue. I like to run flake8 locally before I commit changes.
+ * Note: Install flake8 separately if needed: `pip install flake8`
* Write a script that uses the requests module to do one POST on your live API.
diff --git a/starter/README.md b/starter/README.md
index ca25260404..9f84bd2fab 100644
--- a/starter/README.md
+++ b/starter/README.md
@@ -1,10 +1,16 @@
Working in a command line environment is recommended for ease of use with git and dvc. If on Windows, WSL1 or 2 is recommended.
# Environment Set up
-* Download and install conda if you don’t have it already.
- * Use the supplied requirements file to create a new environment, or
- * conda create -n [envname] "python=3.8" scikit-learn dvc pandas numpy pytest jupyter jupyterlab fastapi uvicorn -c conda-forge
- * Install git either through conda (“conda install git”) or through your CLI, e.g. sudo apt-get git.
+* **Option 1: Using pip and venv (Recommended)**
+ * Ensure you have Python 3.13 installed
+ * Create virtual environment: `python3.13 -m venv .venv`
+ * Activate environment: `source .venv/bin/activate` (On Windows: `.venv\Scripts\activate`)
+ * Install dependencies: `pip install -r requirements.txt`
+
+* **Option 2: Using conda**
+ * Download and install conda if you don't have it already.
+ * conda create -n [envname] "python=3.13" scikit-learn dvc pandas numpy pytest jupyter jupyterlab fastapi uvicorn pydantic httpx matplotlib seaborn -c conda-forge
+ * Install git either through conda ("conda install git") or through your CLI, e.g. sudo apt-get git.
## Repositories
@@ -32,7 +38,8 @@ To use your new S3 bucket from the AWS CLI you will need to create an IAM user w
## GitHub Actions
* Setup GitHub Actions on your repository. You can use one of the pre-made GitHub Actions if at a minimum it runs pytest and flake8 on push and requires both to pass without error.
- * Make sure you set up the GitHub Action to have the same version of Python as you used in development.
+ * Make sure you set up the GitHub Action to use Python 3.13 (same version as development).
+ * Note: Add flake8 to requirements.txt if you want to use it for linting: `pip install flake8`
* Add your AWS credentials to the Action.
* Set up DVC in the action and specify a command to `dvc pull`.
@@ -70,6 +77,7 @@ To use your new S3 bucket from the AWS CLI you will need to create an IAM user w
* Enable automatic deployments that only deploy if your continuous integration passes.
* Hint: think about how paths will differ in your local environment vs. on Heroku.
* Hint: development in Python is fast! But how fast you can iterate slows down if you rely on your CI/CD to fail before fixing an issue. I like to run flake8 locally before I commit changes.
+ * Note: Install flake8 separately if needed: `pip install flake8`
* Set up DVC on Heroku using the instructions contained in the starter directory.
* Set up access to AWS on Heroku, if using the CLI: `heroku config:set AWS_ACCESS_KEY_ID=xxx AWS_SECRET_ACCESS_KEY=yyy`
* Write a script that uses the requests module to do one POST on your live API.
diff --git a/starter/data/.gitignore b/starter/data/.gitignore
deleted file mode 100644
index 8b13789179..0000000000
--- a/starter/data/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/starter/model/.gitignore b/starter/model/.gitignore
deleted file mode 100644
index 8b13789179..0000000000
--- a/starter/model/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/starter/model/.gitkeep b/starter/model/.gitkeep
new file mode 100644
index 0000000000..f9fec6a8be
--- /dev/null
+++ b/starter/model/.gitkeep
@@ -0,0 +1,2 @@
+# This file ensures the model directory is tracked by git
+# Model files (.pkl, .joblib, etc.) will be saved here during training
\ No newline at end of file
diff --git a/starter/requirements.txt b/starter/requirements.txt
index fc7fe03093..7683dc56d9 100644
--- a/starter/requirements.txt
+++ b/starter/requirements.txt
@@ -1,9 +1,41 @@
-python==3.8
-numpy
-pandas
-scikit-learn
-pytest
-requests
-fastapi==0.63.0
-uvicorn
-gunicorn
+# Exact versions from working .venv for Python 3.13
+# Generated from pip freeze - September 2025
+
+# Web framework and server
+fastapi==0.117.1
+uvicorn[standard]==0.36.0
+
+# Data validation
+pydantic==2.11.9
+
+# Testing
+pytest==8.4.2
+pytest-asyncio==1.2.0
+
+# HTTP clients
+httpx==0.28.1
+requests==2.32.5
+
+# Data science libraries
+pandas==2.3.2
+numpy==2.3.3
+matplotlib==3.10.6
+seaborn==0.13.2
+scikit-learn==1.7.2
+
+# Jupyter support
+jupyter==1.1.1
+ipykernel==6.30.1
+nbformat==5.10.4
+
+# ML fairness and visualization
+aequitas==0.42.0
+altair==4.1.0
+
+# Flask and extensions (for aequitas)
+Flask==0.12.2
+Flask-Bootstrap==3.3.7.1
+
+# Other utilities
+python-multipart==0.0.20
+httplib2==0.31.0
diff --git a/starter/screenshots/.gitignore b/starter/screenshots/.gitignore
deleted file mode 100644
index 8b13789179..0000000000
--- a/starter/screenshots/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/starter/screenshots/.gitkeep b/starter/screenshots/.gitkeep
new file mode 100644
index 0000000000..cc813f13ea
--- /dev/null
+++ b/starter/screenshots/.gitkeep
@@ -0,0 +1,2 @@
+# This file ensures the screenshots directory is tracked by git
+# Screenshots for documentation will be saved here
\ No newline at end of file
diff --git a/starter/starter/ml/data.py b/starter/starter/ml/data.py
index b46a8f0138..188e3362f6 100644
--- a/starter/starter/ml/data.py
+++ b/starter/starter/ml/data.py
@@ -51,10 +51,10 @@ def process_data(
y = np.array([])
X_categorical = X[categorical_features].values
- X_continuous = X.drop(*[categorical_features], axis=1)
+ X_continuous = X.drop(categorical_features, axis=1)
if training is True:
- encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
+ encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
lb = LabelBinarizer()
X_categorical = encoder.fit_transform(X_categorical)
y = lb.fit_transform(y.values).ravel()
diff --git a/starter/starter/ml/model.py b/starter/starter/ml/model.py
index 61eafb6c06..5f60116558 100644
--- a/starter/starter/ml/model.py
+++ b/starter/starter/ml/model.py
@@ -1,23 +1,21 @@
from sklearn.metrics import fbeta_score, precision_score, recall_score
-# Optional: implement hyperparameter tuning.
def train_model(X_train, y_train):
"""
Trains a machine learning model and returns it.
Inputs
------
- X_train : np.array
+ X_train : np.ndarray
Training data.
- y_train : np.array
+ y_train : np.ndarray
Labels.
Returns
-------
- model
+ model : RandomForestClassifier
Trained machine learning model.
"""
-
pass
@@ -27,9 +25,9 @@ def compute_model_metrics(y, preds):
Inputs
------
- y : np.array
+ y : np.ndarray
Known labels, binarized.
- preds : np.array
+ preds : np.ndarray
Predicted labels, binarized.
Returns
-------
@@ -48,13 +46,13 @@ def inference(model, X):
Inputs
------
- model : ???
+ model : RandomForestClassifier
Trained machine learning model.
- X : np.array
+ X : np.ndarray
Data used for prediction.
Returns
-------
- preds : np.array
+ preds : np.ndarray
Predictions from the model.
"""
pass