From bd40051db80342d02497c8a40b72d4f0eea58ace Mon Sep 17 00:00:00 2001
From: sudkul <sudhanshu.kulshrestha@gmail.com>
Date: Fri, 5 Sep 2025 17:17:41 +0530
Subject: [PATCH 1/3] feat: upgrade to Python 3.13

- Update Python version from 3.8 to 3.13.0
- Update dependencies to latest compatible versions
- Update scikit-learn OneHotEncoder parameter for compatibility
- Add version upgrade documentation

Dependencies updated:
- numpy >= 1.26.0
- pandas >= 2.1.0
- scikit-learn >= 1.3.0
- fastapi >= 0.103.0
- uvicorn >= 0.23.0
- gunicorn >= 21.2.0
---
 PYTHON_3_13_UPDATES.md     | 20 ++++++++++++++++++++
 starter/requirements.txt   | 18 +++++++++---------
 starter/starter/ml/data.py |  2 +-
 3 files changed, 30 insertions(+), 10 deletions(-)
 create mode 100644 PYTHON_3_13_UPDATES.md

diff --git a/PYTHON_3_13_UPDATES.md b/PYTHON_3_13_UPDATES.md
new file mode 100644
index 0000000000..e6de6a330f
--- /dev/null
+++ b/PYTHON_3_13_UPDATES.md
@@ -0,0 +1,20 @@
+# Python 3.13 Compatibility Updates
+
+This document summarizes the changes made to make the codebase compatible with Python 3.13.
+
+## Dependencies Updated
+- Python: 3.8 → 3.13.0
+- NumPy: Latest compatible (1.26.0+)
+- Pandas: Latest compatible (2.1.0+)
+- scikit-learn: Latest compatible (1.3.0+)
+- FastAPI: 0.63.0 → 0.103.0+
+- Other dependencies updated to latest stable versions
+
+## Code Changes
+1. **ML Data Processing (`ml/data.py`)**
+   - Updated OneHotEncoder parameters from `sparse=False` to `sparse_output=False` to match newer scikit-learn API
+
+## Notes
+- The core ML functionality and boilerplate structure remains unchanged
+- Dependencies updated to ensure compatibility with Python 3.13
+- Starter code structure maintained for student implementation
diff --git a/starter/requirements.txt b/starter/requirements.txt
index fc7fe03093..3988db675e 100644
--- a/starter/requirements.txt
+++ b/starter/requirements.txt
@@ -1,9 +1,9 @@
-python==3.8
-numpy
-pandas
-scikit-learn
-pytest
-requests
-fastapi==0.63.0
-uvicorn
-gunicorn
+python>=3.13.0
+numpy>=1.26.0
+pandas>=2.1.0
+scikit-learn>=1.3.0
+pytest>=7.4.0
+requests>=2.31.0
+fastapi>=0.103.0
+uvicorn>=0.23.0
+gunicorn>=21.2.0
diff --git a/starter/starter/ml/data.py b/starter/starter/ml/data.py
index b46a8f0138..ed70470643 100644
--- a/starter/starter/ml/data.py
+++ b/starter/starter/ml/data.py
@@ -54,7 +54,7 @@ def process_data(
     X_continuous = X.drop(*[categorical_features], axis=1)
 
     if training is True:
-        encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
+        encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
         lb = LabelBinarizer()
         X_categorical = encoder.fit_transform(X_categorical)
         y = lb.fit_transform(y.values).ravel()

From 9dceaf3b61942395b6aac9e65d42ea0c1d1a8a67 Mon Sep 17 00:00:00 2001
From: sudkul <sudhanshu.kulshrestha@gmail.com>
Date: Fri, 5 Sep 2025 17:21:13 +0530
Subject: [PATCH 2/3] remove deprecated action

---
 .github/workflows/manual.yml | 46 ------------------------------------
 starter/starter/ml/model.py  | 18 +++++++-------
 2 files changed, 8 insertions(+), 56 deletions(-)
 delete mode 100644 .github/workflows/manual.yml

diff --git a/.github/workflows/manual.yml b/.github/workflows/manual.yml
deleted file mode 100644
index 036e16aeae..0000000000
--- a/.github/workflows/manual.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Workflow to ensure whenever a Github PR is submitted, 
-# a JIRA ticket gets created automatically. 
-name: Manual Workflow
-
-# Controls when the action will run. 
-on:
-  # Triggers the workflow on pull request events but only for the master branch
-  pull_request_target:
-    types: [opened, reopened]
-
-  # Allows you to run this workflow manually from the Actions tab
-  workflow_dispatch:
-
-jobs:
-  test-transition-issue:
-    name: Convert Github Issue to Jira Issue
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout
-      uses: actions/checkout@master
-
-    - name: Login
-      uses: atlassian/gajira-login@master
-      env:
-        JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
-        JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
-        JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-        
-    - name: Create NEW JIRA ticket
-      id: create
-      uses: atlassian/gajira-create@master
-      with:
-        project: CONUPDATE
-        issuetype: Task
-        summary: |
-          Github PR | Repo: ${{ github.repository }}  | PR# ${{github.event.number}}
-        description: |
-           Repo link: https://github.com/${{ github.repository }}   
-           PR no. ${{ github.event.pull_request.number }} 
-           PR title: ${{ github.event.pull_request.title }}  
-           PR description: ${{ github.event.pull_request.description }}  
-           In addition, please resolve other issues, if any. 
-        fields: '{"components": [{"name":"nd0821 - ML DevOpsEngineer ND"}], "customfield_16449":"https://classroom.udacity.com/", "customfield_16450":"Resolve the PR", "labels": ["github"], "priority":{"id": "4"}}'
-
-    - name: Log created issue
-      run: echo "Issue ${{ steps.create.outputs.issue }} was created"
diff --git a/starter/starter/ml/model.py b/starter/starter/ml/model.py
index 61eafb6c06..5f60116558 100644
--- a/starter/starter/ml/model.py
+++ b/starter/starter/ml/model.py
@@ -1,23 +1,21 @@
 from sklearn.metrics import fbeta_score, precision_score, recall_score
 
 
-# Optional: implement hyperparameter tuning.
 def train_model(X_train, y_train):
     """
     Trains a machine learning model and returns it.
 
     Inputs
     ------
-    X_train : np.array
+    X_train : np.ndarray
         Training data.
-    y_train : np.array
+    y_train : np.ndarray
         Labels.
     Returns
     -------
-    model
+    model : RandomForestClassifier
         Trained machine learning model.
     """
-
     pass
 
 
@@ -27,9 +25,9 @@ def compute_model_metrics(y, preds):
 
     Inputs
     ------
-    y : np.array
+    y : np.ndarray
         Known labels, binarized.
-    preds : np.array
+    preds : np.ndarray
         Predicted labels, binarized.
     Returns
     -------
@@ -48,13 +46,13 @@ def inference(model, X):
 
     Inputs
     ------
-    model : ???
+    model : RandomForestClassifier
         Trained machine learning model.
-    X : np.array
+    X : np.ndarray
         Data used for prediction.
     Returns
     -------
-    preds : np.array
+    preds : np.ndarray
         Predictions from the model.
     """
     pass

From f34c97b345792bada30bc8a38e957a6adfde66d7 Mon Sep 17 00:00:00 2001
From: Abhi Ojha <abhiojha8@gmail.com>
Date: Mon, 22 Sep 2025 11:16:58 +0530
Subject: [PATCH 3/3] update deps and documentation

---
 .gitignore                     | 188 +++++++++++++++++++++++++++++++++
 README.md                      |  18 +++-
 starter/README.md              |  18 +++-
 starter/data/.gitignore        |   1 -
 starter/model/.gitignore       |   1 -
 starter/model/.gitkeep         |   2 +
 starter/requirements.txt       |  50 +++++++--
 starter/screenshots/.gitignore |   1 -
 starter/screenshots/.gitkeep   |   2 +
 starter/starter/ml/data.py     |   2 +-
 10 files changed, 260 insertions(+), 23 deletions(-)
 create mode 100644 .gitignore
 delete mode 100644 starter/data/.gitignore
 delete mode 100644 starter/model/.gitignore
 create mode 100644 starter/model/.gitkeep
 delete mode 100644 starter/screenshots/.gitignore
 create mode 100644 starter/screenshots/.gitkeep

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000..a771c23bee
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,188 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Machine Learning / Data Science specific
+# Models
+*.pkl
+*.joblib
+*.h5
+*.hdf5
+*.model
+*.weights
+
+# Data files (but keep small sample data)
+*.csv
+!starter/data/census.csv  # Keep the census dataset
+
+# Large data directories
+data/large/
+data/raw/
+data/processed/
+
+# Jupyter notebook checkpoints
+.ipynb_checkpoints/
+
+# MLflow
+mlruns/
+mlartifacts/
+
+# DVC
+.dvc/cache/
+.dvc/tmp/
+.dvc/logs/
+
+# Weights & Biases
+wandb/
+
+# TensorBoard
+runs/
+logs/
+tensorboard/
+
+# IDE specific files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS specific files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Heroku specific
+.env.local
+.env.production
+
+# AWS credentials
+.aws/
+
+# Temporary files
+*.tmp
+*.temp
+temp/
+tmp/
+
+# Screenshots (unless specifically needed)
+screenshots/*.png
+screenshots/*.jpg
+screenshots/*.jpeg
+!screenshots/.gitkeep
+
+# API keys and secrets
+*.key
+*.pem
+secrets.json
+config.ini
\ No newline at end of file
diff --git a/README.md b/README.md
index b3f0e882db..24c4e38669 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,24 @@
 Working in a command line environment is recommended for ease of use with git and dvc. If on Windows, WSL1 or 2 is recommended.
 
 # Environment Set up
-* Download and install conda if you don’t have it already.
-    * Use the supplied requirements file to create a new environment, or
-    * conda create -n [envname] "python=3.8" scikit-learn pandas numpy pytest jupyter jupyterlab fastapi uvicorn -c conda-forge
-    * Install git either through conda (“conda install git”) or through your CLI, e.g. sudo apt-get git.
+* **Option 1: Using pip and venv (Recommended)**
+    * Ensure you have Python 3.13 installed
+    * Create virtual environment: `python3.13 -m venv .venv`
+    * Activate environment: `source .venv/bin/activate` (On Windows: `.venv\Scripts\activate`)
+    * Install dependencies: `pip install -r starter/requirements.txt`
+
+* **Option 2: Using conda**
+    * Download and install conda if you don't have it already.
+    * conda create -n [envname] "python=3.13" scikit-learn pandas numpy pytest jupyter jupyterlab fastapi uvicorn pydantic httpx matplotlib seaborn -c conda-forge
+    * Install git either through conda ("conda install git") or through your CLI, e.g. sudo apt-get git.
 
 ## Repositories
 * Create a directory for the project and initialize git.
     * As you work on the code, continually commit changes. Trained models you want to use in production must be committed to GitHub.
 * Connect your local git repo to GitHub.
 * Setup GitHub Actions on your repo. You can use one of the pre-made GitHub Actions if at a minimum it runs pytest and flake8 on push and requires both to pass without error.
-    * Make sure you set up the GitHub Action to have the same version of Python as you used in development.
+    * Make sure you set up the GitHub Action to use Python 3.13 (same version as development).
+    * Note: Add flake8 to requirements.txt if you want to use it for linting: `pip install flake8`
 
 # Data
 * Download census.csv and commit it to dvc.
@@ -40,4 +47,5 @@ Working in a command line environment is recommended for ease of use with git an
     * Enable automatic deployments that only deploy if your continuous integration passes.
     * Hint: think about how paths will differ in your local environment vs. on Heroku.
     * Hint: development in Python is fast! But how fast you can iterate slows down if you rely on your CI/CD to fail before fixing an issue. I like to run flake8 locally before I commit changes.
+    * Note: Install flake8 separately if needed: `pip install flake8`
 * Write a script that uses the requests module to do one POST on your live API.
diff --git a/starter/README.md b/starter/README.md
index ca25260404..9f84bd2fab 100644
--- a/starter/README.md
+++ b/starter/README.md
@@ -1,10 +1,16 @@
 Working in a command line environment is recommended for ease of use with git and dvc. If on Windows, WSL1 or 2 is recommended.
 
 # Environment Set up
-* Download and install conda if you don’t have it already.
-    * Use the supplied requirements file to create a new environment, or
-    * conda create -n [envname] "python=3.8" scikit-learn dvc pandas numpy pytest jupyter jupyterlab fastapi uvicorn -c conda-forge
-    * Install git either through conda (“conda install git”) or through your CLI, e.g. sudo apt-get git.
+* **Option 1: Using pip and venv (Recommended)**
+    * Ensure you have Python 3.13 installed
+    * Create virtual environment: `python3.13 -m venv .venv`
+    * Activate environment: `source .venv/bin/activate` (On Windows: `.venv\Scripts\activate`)
+    * Install dependencies: `pip install -r requirements.txt`
+
+* **Option 2: Using conda**
+    * Download and install conda if you don't have it already.
+    * conda create -n [envname] "python=3.13" scikit-learn dvc pandas numpy pytest jupyter jupyterlab fastapi uvicorn pydantic httpx matplotlib seaborn -c conda-forge
+    * Install git either through conda ("conda install git") or through your CLI, e.g. sudo apt-get git.
 
 ## Repositories
 
@@ -32,7 +38,8 @@ To use your new S3 bucket from the AWS CLI you will need to create an IAM user w
 ## GitHub Actions
 
 * Setup GitHub Actions on your repository. You can use one of the pre-made GitHub Actions if at a minimum it runs pytest and flake8 on push and requires both to pass without error.
-   * Make sure you set up the GitHub Action to have the same version of Python as you used in development.
+   * Make sure you set up the GitHub Action to use Python 3.13 (same version as development).
+   * Note: Add flake8 to requirements.txt if you want to use it for linting: `pip install flake8`
 * Add your <a href="https://github.com/marketplace/actions/configure-aws-credentials-action-for-github-actions" target="_blank">AWS credentials to the Action</a>.
 * Set up <a href="https://github.com/iterative/setup-dvc" target="_blank">DVC in the action</a> and specify a command to `dvc pull`.
 
@@ -70,6 +77,7 @@ To use your new S3 bucket from the AWS CLI you will need to create an IAM user w
    * Enable automatic deployments that only deploy if your continuous integration passes.
    * Hint: think about how paths will differ in your local environment vs. on Heroku.
    * Hint: development in Python is fast! But how fast you can iterate slows down if you rely on your CI/CD to fail before fixing an issue. I like to run flake8 locally before I commit changes.
+   * Note: Install flake8 separately if needed: `pip install flake8`
 * Set up DVC on Heroku using the instructions contained in the starter directory.
 * Set up access to AWS on Heroku, if using the CLI: `heroku config:set AWS_ACCESS_KEY_ID=xxx AWS_SECRET_ACCESS_KEY=yyy`
 * Write a script that uses the requests module to do one POST on your live API.
diff --git a/starter/data/.gitignore b/starter/data/.gitignore
deleted file mode 100644
index 8b13789179..0000000000
--- a/starter/data/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/starter/model/.gitignore b/starter/model/.gitignore
deleted file mode 100644
index 8b13789179..0000000000
--- a/starter/model/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/starter/model/.gitkeep b/starter/model/.gitkeep
new file mode 100644
index 0000000000..f9fec6a8be
--- /dev/null
+++ b/starter/model/.gitkeep
@@ -0,0 +1,2 @@
+# This file ensures the model directory is tracked by git
+# Model files (.pkl, .joblib, etc.) will be saved here during training
\ No newline at end of file
diff --git a/starter/requirements.txt b/starter/requirements.txt
index 3988db675e..7683dc56d9 100644
--- a/starter/requirements.txt
+++ b/starter/requirements.txt
@@ -1,9 +1,41 @@
-python>=3.13.0
-numpy>=1.26.0
-pandas>=2.1.0
-scikit-learn>=1.3.0
-pytest>=7.4.0
-requests>=2.31.0
-fastapi>=0.103.0
-uvicorn>=0.23.0
-gunicorn>=21.2.0
+# Exact versions from working .venv for Python 3.13
+# Generated from pip freeze - September 2025
+
+# Web framework and server
+fastapi==0.117.1
+uvicorn[standard]==0.36.0
+
+# Data validation
+pydantic==2.11.9
+
+# Testing
+pytest==8.4.2
+pytest-asyncio==1.2.0
+
+# HTTP clients
+httpx==0.28.1
+requests==2.32.5
+
+# Data science libraries
+pandas==2.3.2
+numpy==2.3.3
+matplotlib==3.10.6
+seaborn==0.13.2
+scikit-learn==1.7.2
+
+# Jupyter support
+jupyter==1.1.1
+ipykernel==6.30.1
+nbformat==5.10.4
+
+# ML fairness and visualization
+aequitas==0.42.0
+altair==4.1.0
+
+# Flask and extensions (for aequitas)
+Flask==0.12.2
+Flask-Bootstrap==3.3.7.1
+
+# Other utilities
+python-multipart==0.0.20
+httplib2==0.31.0
diff --git a/starter/screenshots/.gitignore b/starter/screenshots/.gitignore
deleted file mode 100644
index 8b13789179..0000000000
--- a/starter/screenshots/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/starter/screenshots/.gitkeep b/starter/screenshots/.gitkeep
new file mode 100644
index 0000000000..cc813f13ea
--- /dev/null
+++ b/starter/screenshots/.gitkeep
@@ -0,0 +1,2 @@
+# This file ensures the screenshots directory is tracked by git
+# Screenshots for documentation will be saved here
\ No newline at end of file
diff --git a/starter/starter/ml/data.py b/starter/starter/ml/data.py
index ed70470643..188e3362f6 100644
--- a/starter/starter/ml/data.py
+++ b/starter/starter/ml/data.py
@@ -51,7 +51,7 @@ def process_data(
         y = np.array([])
 
     X_categorical = X[categorical_features].values
-    X_continuous = X.drop(*[categorical_features], axis=1)
+    X_continuous = X.drop(categorical_features, axis=1)
 
     if training is True:
         encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")