From bd40051db80342d02497c8a40b72d4f0eea58ace Mon Sep 17 00:00:00 2001 From: sudkul Date: Fri, 5 Sep 2025 17:17:41 +0530 Subject: [PATCH 1/3] feat: upgrade to Python 3.13 - Update Python version from 3.8 to 3.13.0 - Update dependencies to latest compatible versions - Update scikit-learn OneHotEncoder parameter for compatibility - Add version upgrade documentation Dependencies updated: - numpy >= 1.26.0 - pandas >= 2.1.0 - scikit-learn >= 1.3.0 - fastapi >= 0.103.0 - uvicorn >= 0.23.0 - gunicorn >= 21.2.0 --- PYTHON_3_13_UPDATES.md | 20 ++++++++++++++++++++ starter/requirements.txt | 18 +++++++++--------- starter/starter/ml/data.py | 2 +- 3 files changed, 30 insertions(+), 10 deletions(-) create mode 100644 PYTHON_3_13_UPDATES.md diff --git a/PYTHON_3_13_UPDATES.md b/PYTHON_3_13_UPDATES.md new file mode 100644 index 0000000000..e6de6a330f --- /dev/null +++ b/PYTHON_3_13_UPDATES.md @@ -0,0 +1,20 @@ +# Python 3.13 Compatibility Updates + +This document summarizes the changes made to make the codebase compatible with Python 3.13. + +## Dependencies Updated +- Python: 3.8 → 3.13.0 +- NumPy: Latest compatible (1.26.0+) +- Pandas: Latest compatible (2.1.0+) +- scikit-learn: Latest compatible (1.3.0+) +- FastAPI: 0.63.0 → 0.103.0+ +- Other dependencies updated to latest stable versions + +## Code Changes +1. **ML Data Processing (`ml/data.py`)** + - Updated OneHotEncoder parameters from `sparse=False` to `sparse_output=False` to match newer scikit-learn API + +## Notes +- The core ML functionality and boilerplate structure remains unchanged +- Dependencies updated to ensure compatibility with Python 3.13 +- Starter code structure maintained for student implementation diff --git a/starter/requirements.txt b/starter/requirements.txt index fc7fe03093..3988db675e 100644 --- a/starter/requirements.txt +++ b/starter/requirements.txt @@ -1,9 +1,9 @@ -python==3.8 -numpy -pandas -scikit-learn -pytest -requests -fastapi==0.63.0 -uvicorn -gunicorn +python>=3.13.0 +numpy>=1.26.0 +pandas>=2.1.0 +scikit-learn>=1.3.0 +pytest>=7.4.0 +requests>=2.31.0 +fastapi>=0.103.0 +uvicorn>=0.23.0 +gunicorn>=21.2.0 diff --git a/starter/starter/ml/data.py b/starter/starter/ml/data.py index b46a8f0138..ed70470643 100644 --- a/starter/starter/ml/data.py +++ b/starter/starter/ml/data.py @@ -54,7 +54,7 @@ def process_data( X_continuous = X.drop(*[categorical_features], axis=1) if training is True: - encoder = OneHotEncoder(sparse=False, handle_unknown="ignore") + encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore") lb = LabelBinarizer() X_categorical = encoder.fit_transform(X_categorical) y = lb.fit_transform(y.values).ravel() From 9dceaf3b61942395b6aac9e65d42ea0c1d1a8a67 Mon Sep 17 00:00:00 2001 From: sudkul Date: Fri, 5 Sep 2025 17:21:13 +0530 Subject: [PATCH 2/3] remove deprecated action --- .github/workflows/manual.yml | 46 ------------------------------------ starter/starter/ml/model.py | 18 +++++++------- 2 files changed, 8 insertions(+), 56 deletions(-) delete mode 100644 .github/workflows/manual.yml diff --git a/.github/workflows/manual.yml b/.github/workflows/manual.yml deleted file mode 100644 index 036e16aeae..0000000000 --- a/.github/workflows/manual.yml +++ /dev/null @@ -1,46 +0,0 @@ -# Workflow to ensure whenever a Github PR is submitted, -# a JIRA ticket gets created automatically. -name: Manual Workflow - -# Controls when the action will run. -on: - # Triggers the workflow on pull request events but only for the master branch - pull_request_target: - types: [opened, reopened] - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -jobs: - test-transition-issue: - name: Convert Github Issue to Jira Issue - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@master - - - name: Login - uses: atlassian/gajira-login@master - env: - JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} - JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} - JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} - - - name: Create NEW JIRA ticket - id: create - uses: atlassian/gajira-create@master - with: - project: CONUPDATE - issuetype: Task - summary: | - Github PR | Repo: ${{ github.repository }} | PR# ${{github.event.number}} - description: | - Repo link: https://github.com/${{ github.repository }} - PR no. ${{ github.event.pull_request.number }} - PR title: ${{ github.event.pull_request.title }} - PR description: ${{ github.event.pull_request.description }} - In addition, please resolve other issues, if any. - fields: '{"components": [{"name":"nd0821 - ML DevOpsEngineer ND"}], "customfield_16449":"https://classroom.udacity.com/", "customfield_16450":"Resolve the PR", "labels": ["github"], "priority":{"id": "4"}}' - - - name: Log created issue - run: echo "Issue ${{ steps.create.outputs.issue }} was created" diff --git a/starter/starter/ml/model.py b/starter/starter/ml/model.py index 61eafb6c06..5f60116558 100644 --- a/starter/starter/ml/model.py +++ b/starter/starter/ml/model.py @@ -1,23 +1,21 @@ from sklearn.metrics import fbeta_score, precision_score, recall_score -# Optional: implement hyperparameter tuning. def train_model(X_train, y_train): """ Trains a machine learning model and returns it. Inputs ------ - X_train : np.array + X_train : np.ndarray Training data. - y_train : np.array + y_train : np.ndarray Labels. Returns ------- - model + model : RandomForestClassifier Trained machine learning model. """ - pass @@ -27,9 +25,9 @@ def compute_model_metrics(y, preds): Inputs ------ - y : np.array + y : np.ndarray Known labels, binarized. - preds : np.array + preds : np.ndarray Predicted labels, binarized. Returns ------- @@ -48,13 +46,13 @@ def inference(model, X): Inputs ------ - model : ??? + model : RandomForestClassifier Trained machine learning model. - X : np.array + X : np.ndarray Data used for prediction. Returns ------- - preds : np.array + preds : np.ndarray Predictions from the model. """ pass From f34c97b345792bada30bc8a38e957a6adfde66d7 Mon Sep 17 00:00:00 2001 From: Abhi Ojha Date: Mon, 22 Sep 2025 11:16:58 +0530 Subject: [PATCH 3/3] update deps and documentation --- .gitignore | 188 +++++++++++++++++++++++++++++++++ README.md | 18 +++- starter/README.md | 18 +++- starter/data/.gitignore | 1 - starter/model/.gitignore | 1 - starter/model/.gitkeep | 2 + starter/requirements.txt | 50 +++++++-- starter/screenshots/.gitignore | 1 - starter/screenshots/.gitkeep | 2 + starter/starter/ml/data.py | 2 +- 10 files changed, 260 insertions(+), 23 deletions(-) create mode 100644 .gitignore delete mode 100644 starter/data/.gitignore delete mode 100644 starter/model/.gitignore create mode 100644 starter/model/.gitkeep delete mode 100644 starter/screenshots/.gitignore create mode 100644 starter/screenshots/.gitkeep diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..a771c23bee --- /dev/null +++ b/.gitignore @@ -0,0 +1,188 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Machine Learning / Data Science specific +# Models +*.pkl +*.joblib +*.h5 +*.hdf5 +*.model +*.weights + +# Data files (but keep small sample data) +*.csv +!starter/data/census.csv # Keep the census dataset + +# Large data directories +data/large/ +data/raw/ +data/processed/ + +# Jupyter notebook checkpoints +.ipynb_checkpoints/ + +# MLflow +mlruns/ +mlartifacts/ + +# DVC +.dvc/cache/ +.dvc/tmp/ +.dvc/logs/ + +# Weights & Biases +wandb/ + +# TensorBoard +runs/ +logs/ +tensorboard/ + +# IDE specific files +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS specific files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Heroku specific +.env.local +.env.production + +# AWS credentials +.aws/ + +# Temporary files +*.tmp +*.temp +temp/ +tmp/ + +# Screenshots (unless specifically needed) +screenshots/*.png +screenshots/*.jpg +screenshots/*.jpeg +!screenshots/.gitkeep + +# API keys and secrets +*.key +*.pem +secrets.json +config.ini \ No newline at end of file diff --git a/README.md b/README.md index b3f0e882db..24c4e38669 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,24 @@ Working in a command line environment is recommended for ease of use with git and dvc. If on Windows, WSL1 or 2 is recommended. # Environment Set up -* Download and install conda if you don’t have it already. - * Use the supplied requirements file to create a new environment, or - * conda create -n [envname] "python=3.8" scikit-learn pandas numpy pytest jupyter jupyterlab fastapi uvicorn -c conda-forge - * Install git either through conda (“conda install git”) or through your CLI, e.g. sudo apt-get git. +* **Option 1: Using pip and venv (Recommended)** + * Ensure you have Python 3.13 installed + * Create virtual environment: `python3.13 -m venv .venv` + * Activate environment: `source .venv/bin/activate` (On Windows: `.venv\Scripts\activate`) + * Install dependencies: `pip install -r starter/requirements.txt` + +* **Option 2: Using conda** + * Download and install conda if you don't have it already. + * conda create -n [envname] "python=3.13" scikit-learn pandas numpy pytest jupyter jupyterlab fastapi uvicorn pydantic httpx matplotlib seaborn -c conda-forge + * Install git either through conda ("conda install git") or through your CLI, e.g. sudo apt-get git. ## Repositories * Create a directory for the project and initialize git. * As you work on the code, continually commit changes. Trained models you want to use in production must be committed to GitHub. * Connect your local git repo to GitHub. * Setup GitHub Actions on your repo. You can use one of the pre-made GitHub Actions if at a minimum it runs pytest and flake8 on push and requires both to pass without error. - * Make sure you set up the GitHub Action to have the same version of Python as you used in development. + * Make sure you set up the GitHub Action to use Python 3.13 (same version as development). + * Note: Add flake8 to requirements.txt if you want to use it for linting: `pip install flake8` # Data * Download census.csv and commit it to dvc. @@ -40,4 +47,5 @@ Working in a command line environment is recommended for ease of use with git an * Enable automatic deployments that only deploy if your continuous integration passes. * Hint: think about how paths will differ in your local environment vs. on Heroku. * Hint: development in Python is fast! But how fast you can iterate slows down if you rely on your CI/CD to fail before fixing an issue. I like to run flake8 locally before I commit changes. + * Note: Install flake8 separately if needed: `pip install flake8` * Write a script that uses the requests module to do one POST on your live API. diff --git a/starter/README.md b/starter/README.md index ca25260404..9f84bd2fab 100644 --- a/starter/README.md +++ b/starter/README.md @@ -1,10 +1,16 @@ Working in a command line environment is recommended for ease of use with git and dvc. If on Windows, WSL1 or 2 is recommended. # Environment Set up -* Download and install conda if you don’t have it already. - * Use the supplied requirements file to create a new environment, or - * conda create -n [envname] "python=3.8" scikit-learn dvc pandas numpy pytest jupyter jupyterlab fastapi uvicorn -c conda-forge - * Install git either through conda (“conda install git”) or through your CLI, e.g. sudo apt-get git. +* **Option 1: Using pip and venv (Recommended)** + * Ensure you have Python 3.13 installed + * Create virtual environment: `python3.13 -m venv .venv` + * Activate environment: `source .venv/bin/activate` (On Windows: `.venv\Scripts\activate`) + * Install dependencies: `pip install -r requirements.txt` + +* **Option 2: Using conda** + * Download and install conda if you don't have it already. + * conda create -n [envname] "python=3.13" scikit-learn dvc pandas numpy pytest jupyter jupyterlab fastapi uvicorn pydantic httpx matplotlib seaborn -c conda-forge + * Install git either through conda ("conda install git") or through your CLI, e.g. sudo apt-get git. ## Repositories @@ -32,7 +38,8 @@ To use your new S3 bucket from the AWS CLI you will need to create an IAM user w ## GitHub Actions * Setup GitHub Actions on your repository. You can use one of the pre-made GitHub Actions if at a minimum it runs pytest and flake8 on push and requires both to pass without error. - * Make sure you set up the GitHub Action to have the same version of Python as you used in development. + * Make sure you set up the GitHub Action to use Python 3.13 (same version as development). + * Note: Add flake8 to requirements.txt if you want to use it for linting: `pip install flake8` * Add your AWS credentials to the Action. * Set up DVC in the action and specify a command to `dvc pull`. @@ -70,6 +77,7 @@ To use your new S3 bucket from the AWS CLI you will need to create an IAM user w * Enable automatic deployments that only deploy if your continuous integration passes. * Hint: think about how paths will differ in your local environment vs. on Heroku. * Hint: development in Python is fast! But how fast you can iterate slows down if you rely on your CI/CD to fail before fixing an issue. I like to run flake8 locally before I commit changes. + * Note: Install flake8 separately if needed: `pip install flake8` * Set up DVC on Heroku using the instructions contained in the starter directory. * Set up access to AWS on Heroku, if using the CLI: `heroku config:set AWS_ACCESS_KEY_ID=xxx AWS_SECRET_ACCESS_KEY=yyy` * Write a script that uses the requests module to do one POST on your live API. diff --git a/starter/data/.gitignore b/starter/data/.gitignore deleted file mode 100644 index 8b13789179..0000000000 --- a/starter/data/.gitignore +++ /dev/null @@ -1 +0,0 @@ - diff --git a/starter/model/.gitignore b/starter/model/.gitignore deleted file mode 100644 index 8b13789179..0000000000 --- a/starter/model/.gitignore +++ /dev/null @@ -1 +0,0 @@ - diff --git a/starter/model/.gitkeep b/starter/model/.gitkeep new file mode 100644 index 0000000000..f9fec6a8be --- /dev/null +++ b/starter/model/.gitkeep @@ -0,0 +1,2 @@ +# This file ensures the model directory is tracked by git +# Model files (.pkl, .joblib, etc.) will be saved here during training \ No newline at end of file diff --git a/starter/requirements.txt b/starter/requirements.txt index 3988db675e..7683dc56d9 100644 --- a/starter/requirements.txt +++ b/starter/requirements.txt @@ -1,9 +1,41 @@ -python>=3.13.0 -numpy>=1.26.0 -pandas>=2.1.0 -scikit-learn>=1.3.0 -pytest>=7.4.0 -requests>=2.31.0 -fastapi>=0.103.0 -uvicorn>=0.23.0 -gunicorn>=21.2.0 +# Exact versions from working .venv for Python 3.13 +# Generated from pip freeze - September 2025 + +# Web framework and server +fastapi==0.117.1 +uvicorn[standard]==0.36.0 + +# Data validation +pydantic==2.11.9 + +# Testing +pytest==8.4.2 +pytest-asyncio==1.2.0 + +# HTTP clients +httpx==0.28.1 +requests==2.32.5 + +# Data science libraries +pandas==2.3.2 +numpy==2.3.3 +matplotlib==3.10.6 +seaborn==0.13.2 +scikit-learn==1.7.2 + +# Jupyter support +jupyter==1.1.1 +ipykernel==6.30.1 +nbformat==5.10.4 + +# ML fairness and visualization +aequitas==0.42.0 +altair==4.1.0 + +# Flask and extensions (for aequitas) +Flask==0.12.2 +Flask-Bootstrap==3.3.7.1 + +# Other utilities +python-multipart==0.0.20 +httplib2==0.31.0 diff --git a/starter/screenshots/.gitignore b/starter/screenshots/.gitignore deleted file mode 100644 index 8b13789179..0000000000 --- a/starter/screenshots/.gitignore +++ /dev/null @@ -1 +0,0 @@ - diff --git a/starter/screenshots/.gitkeep b/starter/screenshots/.gitkeep new file mode 100644 index 0000000000..cc813f13ea --- /dev/null +++ b/starter/screenshots/.gitkeep @@ -0,0 +1,2 @@ +# This file ensures the screenshots directory is tracked by git +# Screenshots for documentation will be saved here \ No newline at end of file diff --git a/starter/starter/ml/data.py b/starter/starter/ml/data.py index ed70470643..188e3362f6 100644 --- a/starter/starter/ml/data.py +++ b/starter/starter/ml/data.py @@ -51,7 +51,7 @@ def process_data( y = np.array([]) X_categorical = X[categorical_features].values - X_continuous = X.drop(*[categorical_features], axis=1) + X_continuous = X.drop(categorical_features, axis=1) if training is True: encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")