From c834a69edf124478f0bfbce278a549757e1c1443 Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indrayudd1@gmail.com>
Date: Wed, 11 Jun 2025 18:34:57 -0400
Subject: [PATCH 01/11] TutorTask554: Add a modified version of Aayush's code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 .../docker_data605_style/__init__.py          |   0
 DATA605/project_description.py                | 183 ++++++++++++++++++
 2 files changed, 183 insertions(+)
 create mode 100644 DATA605/Spring2025/projects/TutorTask112a/docker_data605_style/__init__.py
 create mode 100644 DATA605/project_description.py

diff --git a/DATA605/Spring2025/projects/TutorTask112a/docker_data605_style/__init__.py b/DATA605/Spring2025/projects/TutorTask112a/docker_data605_style/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/DATA605/project_description.py b/DATA605/project_description.py
new file mode 100644
index 0000000000..05fa145ef4
--- /dev/null
+++ b/DATA605/project_description.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python
+"""
+Generate project descriptions from a Google Sheet and save them to a Markdown
+file.
+
+> project_description.py \
+    --sheet_url "https://docs.google.com/spreadsheets/d/1abc.../edit#gid=0" \
+    --markdown_path ./projects/MSML610_Projects.md \
+    --max_projects 3 \
+    -v INFO
+
+Import as:
+
+import DATA605.project_description as dprodesc
+"""
+
+import argparse
+import logging
+import pathlib
+import time
+from typing import Any, Optional
+
+import pandas as pd
+
+import helpers_root.helpers.hdbg as hdbg
+import helpers_root.helpers.hgoogle_drive_api as hgofiapi
+import helpers_root.helpers.hio as hio
+import helpers_root.helpers.hopenai as hopenai
+import helpers_root.helpers.hparser as hparser
+
+_LOG = logging.getLogger(__name__)
+
+# Set Constants.
+DEFAULT_SHEET_URL = "https://docs.google.com/spreadsheets/d/1Ez5uRvOgvDMkFc9c6mI21kscTKnpiCSh4UkUh_ifLIw/edit?gid=0#gid=0"
+GLOBAL_PROMPT = """
+You are a college professor of Data science.
+In the next prompt I will give you a topic XYZ for a class project and you will write a description using bullet points for a college class project about implementing an example big data system in Python.
+
+The project should be related to ingesting and processing real-time data about bitcoin. The focus should be on the technology XYZ, using basic Python packages for anything else.
+
+The assignment requires to describe the basic functionalities of the package using examples and then a concrete project related to implementing something related to time series analysis.
+The complexity of the project is 1, where 1 is easy (it should take around 7 days) to develop, 2 is medium difficulty (it should take around 10 days to complete), 3 is hard (it should take 14 days to complete).
+
+The output should follow the template below
+Title:
+Difficulty: (1=easy, 3=difficult)
+Description
+Describe technology
+Describe the project
+Useful resources
+Is it free?
+Python libraries / bindings
+"""
+
+EXAMPLE = """Example:
+Title: Ingest bitcoin prices using AWS Glue (AWS Glue is technology XYZ)
+Difficulty: 1
+Description
+AWS Glue is a fully managed extract, transform, and load (ETL) service...
+Useful resources: AWS Glue Docs
+Is it free?: Free tier available with limits
+Python libraries: boto3, PySpark
+"""
+DEFAULT_MARKDOWN_PATH = "./projects/MSML610_Projects.md"
+# The maximum number of projects.
+# Set the value to None to disable the limit.
+DEFAULT_MAX_PROJECTS = 5
+
+
+def read_google_sheet(url: str, secret_path: str) -> pd.DataFrame:
+    """
+    Read the Google Sheet and return the data as a pandas DataFrame.
+
+    :param url: the URL of the Google Sheet to read
+    :param secret_path: path to google_secret.json
+    :return: the data
+    """
+    credentials = hgofiapi.get_credentials(service_key_path=secret_path)
+    df = hgofiapi.read_google_file(url, credentials=credentials)
+    return df
+
+
+def generate_project_description(project_name: str, difficulty: str) -> Any:
+    """
+    Generate a project description.
+
+    :param project_name: the name of the project
+    :param difficulty: the difficulty level of the project
+    :return: the project description
+    """
+    # Generate the project description.
+    # prompt = f"Generate a project description for '{project_name}' with difficulty level '{difficulty}'."
+    # prompt = PROMPT_DOC_URL.strip()+ "\n\n"+ EXAMPLE.strip()+ f"\n\nTechnology: {project_name}\nDifficulty: {difficulty}"
+    # description = hopenai.get_completion(prompt, model="gpt-4o-mini")
+    # return description
+    prompt = f"Technology: {project_name}\nDifficulty: {difficulty}"
+    project_desc = hopenai.get_completion(
+        prompt,
+        system_prompt=GLOBAL_PROMPT,
+        model="gpt-4o-mini",
+        cache_mode="FALLBACK",
+        print_cost=True,
+    )
+    return project_desc
+
+
+def create_markdown_file(
+    df: pd.DataFrame,
+    markdown_path: str,
+    max_projects: Optional[int],
+    *,
+    sleep_sec: float = 1.5,
+) -> None:
+    """
+    Create a markdown file with the project descriptions using helpers.hio.
+
+    :param df: the dataframe containing the project descriptions
+    :param markdown_path: the path to the markdown file
+    :param max_projects: limit to the rows processed
+    :param sleep_sec: amount of time to sleep between rows
+    """
+    content = "# MSML610 Projects\n\n"
+    # Generate the project descriptions.
+    # Limit the number of projects.
+    rows = df.head(max_projects) if max_projects is not None else df
+    for _, row in rows.iterrows():
+        project_name = row["Tool"]
+        difficulty = row["Difficulty"]
+        description = generate_project_description(project_name, difficulty)
+        # Add the project description to the markdown file.
+        content += f"## {project_name}\n"
+        content += f"{description}\n\n"
+        time.sleep(sleep_sec)
+    # Write the markdown file.
+    hio.to_file(markdown_path, content)
+
+
+def _parse() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument(
+        "--sheet_url", default=DEFAULT_SHEET_URL, help="Google Sheet URL"
+    )
+    parser.add_argument(
+        "--secret_path",
+        default="/app/DATA605/google_secret.json",
+        help="Path to Google service‑account JSON.",
+    )
+    parser.add_argument(
+        "--markdown_path",
+        default=DEFAULT_MARKDOWN_PATH,
+        help="Output Markdown file",
+    )
+    parser.add_argument(
+        "--max_projects",
+        type=int,
+        default=DEFAULT_MAX_PROJECTS,
+        help="Limit rows processed (None = all).",
+    )
+    hparser.add_verbosity_arg(parser)  # adds -v / --log_level
+    return parser
+
+
+def _main(parser: argparse.ArgumentParser) -> None:
+    args = parser.parse_args()
+    hdbg.init_logger(verbosity=args.log_level, use_exec_path=True)
+    # Expand user/relative paths to absolute ones early to avoid surprises.
+    secret_path = str(pathlib.Path(args.secret_path).expanduser().resolve())
+    markdown_path = str(pathlib.Path(args.markdown_path).expanduser().resolve())
+    _LOG.info("Reading sheet %s", args.sheet_url)
+    sheet_df = read_google_sheet(args.sheet_url, secret_path)
+    _LOG.info("Generating Markdown → %s", markdown_path)
+    create_markdown_file(
+        sheet_df,
+        markdown_path,
+        args.max_projects,
+    )
+    _LOG.info("Done: %s", markdown_path)
+
+
+if __name__ == "__main__":
+    _main(_parse())

From c4a131a225146c6fbdb45f43cbd85d0e0cdc8d81 Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indrayudd1@gmail.com>
Date: Wed, 11 Jun 2025 18:38:34 -0400
Subject: [PATCH 02/11] TutorTask554: Add a modified version of Aayush's code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 DATA605/project_description.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DATA605/project_description.py b/DATA605/project_description.py
index 05fa145ef4..8a99f1c8c0 100644
--- a/DATA605/project_description.py
+++ b/DATA605/project_description.py
@@ -4,7 +4,7 @@
 file.
 
 > project_description.py \
-    --sheet_url "https://docs.google.com/spreadsheets/d/1abc.../edit#gid=0" \
+    --sheet_url "https://docs.google.com/spreadsheets/d/1abc...gid=0" \
     --markdown_path ./projects/MSML610_Projects.md \
     --max_projects 3 \
     -v INFO

From 3b1fd492ed00d4a220909faa9a78346ad91fa0aa Mon Sep 17 00:00:00 2001
From: Aayush <aver23@umd.edu>
Date: Wed, 11 Jun 2025 22:36:39 -0500
Subject: [PATCH 03/11] TutorTask554: Adding documentation and script
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 DATA605/project_description.py           | 10 ++-
 docs/project_description_explanation.md  | 77 ++++++++++++++++++++++++
 docs/project_description_how_to_guide.md | 76 +++++++++++++++++++++++
 3 files changed, 162 insertions(+), 1 deletion(-)
 create mode 100644 docs/project_description_explanation.md
 create mode 100644 docs/project_description_how_to_guide.md

diff --git a/DATA605/project_description.py b/DATA605/project_description.py
index 8a99f1c8c0..16b7a4af2c 100644
--- a/DATA605/project_description.py
+++ b/DATA605/project_description.py
@@ -64,7 +64,7 @@
 DEFAULT_MARKDOWN_PATH = "./projects/MSML610_Projects.md"
 # The maximum number of projects.
 # Set the value to None to disable the limit.
-DEFAULT_MAX_PROJECTS = 5
+DEFAULT_MAX_PROJECTS = None
 
 
 def read_google_sheet(url: str, secret_path: str) -> pd.DataFrame:
@@ -75,6 +75,8 @@ def read_google_sheet(url: str, secret_path: str) -> pd.DataFrame:
     :param secret_path: path to google_secret.json
     :return: the data
     """
+    _LOG.info(f"Reading Google Sheet: {url}")
+    _LOG.info(f"Using credentials from: {secret_path}")
     credentials = hgofiapi.get_credentials(service_key_path=secret_path)
     df = hgofiapi.read_google_file(url, credentials=credentials)
     return df
@@ -158,6 +160,12 @@ def _parse() -> argparse.ArgumentParser:
         default=DEFAULT_MAX_PROJECTS,
         help="Limit rows processed (None = all).",
     )
+    parser.add_argument(
+    "--openai_key",
+    type=str,
+    default=None,
+    help="OpenAI API key (will override env var)",
+)
     hparser.add_verbosity_arg(parser)  # adds -v / --log_level
     return parser
 
diff --git a/docs/project_description_explanation.md b/docs/project_description_explanation.md
new file mode 100644
index 0000000000..bf82318254
--- /dev/null
+++ b/docs/project_description_explanation.md
@@ -0,0 +1,77 @@
+# Explanation: `project_description.py`
+
+<!-- toc -->
+
+- [Introduction and motivation](#introduction-and-motivation)
+- [Core Concepts](#core-concepts)
+- [How It Works](#how-it-works)
+- [Design Rationale](#design-rationale)
+- [Trade-offs and Alternatives](#trade-offs-and-alternatives)
+
+<!-- tocstop -->
+
+## Introduction and motivation
+
+- This tool automates the generation of academic project descriptions by
+  integrating Google Sheets input with OpenAI's GPT API.
+- It addresses the need for scalable, consistent, and high-quality project
+  documentation based on dynamic student or faculty input.
+- It is intended for course instructors, academic administrators, or data
+  science curriculum designers who wish to streamline project generation and
+  documentation.
+
+## Core Concepts
+
+- **Google Sheets Integration:** Uses Google Sheets as the dynamic data source
+  for project names and difficulty levels.
+- **Prompt Engineering:** A pre-defined prompt template guides GPT to produce
+  structured project descriptions.
+- **Markdown Generation:** Outputs the generated content into a formatted
+  Markdown file for easy distribution.
+- **Helper Modules:** External utility modules (`hgoogle_file_api`, `hopenai`,
+  `hio`) abstract authentication, I/O, and API interaction.
+
+## How It Works
+
+- The script follows this control flow:
+
+  ```markdown
+  [Google Sheet URL] → read_google_sheet() → [DataFrame of projects] → loop →
+  generate_project_description() → [GPT-generated text] → create_markdown_file()
+  → [Markdown output]
+  ```
+
+- Key Functions:
+  - `read_google_sheet(url)`: Reads spreadsheet and returns a pandas DataFrame.
+  - `generate_project_description(project_name, difficulty)`: Sends input to
+    GPT-4o-mini model and returns generated text.
+  - `create_markdown_file(df, markdown_file_path)`: Iterates over the DataFrame,
+    generates description for each row, and writes it to a Markdown file.
+
+## Design Rationale
+
+- **Automation Focus:** Built to minimize manual work for faculty managing large
+  project datasets.
+- **Modular Helpers:** Offloading I/O and API logic to separate modules makes
+  this script easier to maintain or port.
+- **GPT as Content Generator:** Using GPT-4o-mini allows flexibility and
+  high-quality text output with minimal prompt tuning.
+
+## Trade-offs and Alternatives
+
+- **Current Approach:**
+  - Advantages:
+    - Automated, reproducible, and scalable.
+    - Maintains separation of logic (reading input, generating content, writing
+      file).
+  - Drawbacks:
+    - Dependent on OpenAI and Google APIs (connectivity and API keys required).
+    - Limited error handling and logging for individual failures.
+
+- **Alternative Approach:**
+  - Using a GUI-based application or Jupyter notebook for manual review and
+    editing.
+    - Advantages:
+      - Allows user customization and validation at each step.
+    - Drawbacks:
+      - Slower and less scalable; not suitable for batch generation.
diff --git a/docs/project_description_how_to_guide.md b/docs/project_description_how_to_guide.md
new file mode 100644
index 0000000000..f914af5d3f
--- /dev/null
+++ b/docs/project_description_how_to_guide.md
@@ -0,0 +1,76 @@
+<!-- toc -->
+
+  * [What It Does](#what-it-does)
+  * [Assumptions / Requirements](#assumptions--requirements)
+  * [Instructions](#instructions)
+    + [Step 1: Fetch Input](#step-1-fetch-input)
+- [Edit GOOGLE_SHEET_URL inside the script or pass a new one to read_google_sheet()](#edit-google_sheet_url-inside-the-script-or-pass-a-new-one-to-read_google_sheet)
+  * [Step 2: Describe Action](#step-2-describe-action)
+    + [Step 3: Review Output](#step-3-review-output)
+  * [Troubleshooting](#troubleshooting)
+
+<!-- tocstop -->
+
+# What It Does
+
+- Automates the process of generating academic project descriptions by:
+  - Reading project data from a Google Sheet.
+  - Using OpenAI's API to auto-generate detailed project descriptions.
+  - Saving the final output in a formatted Markdown file for distribution.
+
+## Assumptions / Requirements
+
+- Google Cloud service key file at `/app/DATA605/google_secret.json`
+- Docker running
+- Valid OpenAI API key for model access
+- Project-specific helper modules must be available:
+  - Helpers.hgoogle_file_api
+  - Helpers.hio
+  - Helpers.hopenai
+
+## Instructions
+
+### Step 1: Fetch Input
+
+Ensure the Google Sheet is publicly accessible or shared with the configured
+service account.
+
+The Google Sheet should contain:
+
+- Project name
+
+- Difficulty
+
+# Edit GOOGLE_SHEET_URL inside the script or pass a new one to read_google_sheet()
+
+URL="https://docs.google.com/spreadsheets/d/<sheet_id>/edit"
+
+## Step 2: Describe Action
+
+- Run the script directly using Python
+- This will:
+
+  Authenticate and read the Google Sheet
+
+  Generate a project description using OpenAI for each row
+
+  Save the top 5 (or all if MAX_PROJECTS=None) projects in a file called
+  `./projects/DATA605_Projects.md`
+
+### Step 3: Review Output
+
+- Navigate to the projects/ folder and open DATA605_Projects.md.
+
+## Troubleshooting
+
+Issue: google.auth.exceptions.DefaultCredentialsError Cause: Google service key
+not found at the expected path. Fix: Place the correct google_secret.json file
+in /app/DATA605/.
+
+Issue: ModuleNotFoundError: No module named 'helpers' Cause: Missing local
+helper modules. Fix: Ensure helpers/ directory is in your PYTHONPATH or the same
+directory as the script.
+
+Issue: Empty or incomplete output file Cause: API failure or invalid sheet
+format. Fix: Check logs, verify if the OpenAI and Google API calls are working,
+and ensure data in the Google Sheet is structured correctly.

From 570c1f3fbdab2075320102b6804abe0e3d68b40e Mon Sep 17 00:00:00 2001
From: Aayush <aver23@umd.edu>
Date: Thu, 12 Jun 2025 00:00:44 -0500
Subject: [PATCH 04/11] Adding unit test file and improvements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 DATA605/project_description.py      | 57 +++++++++++++++++---------
 DATA605/test_project_description.py | 63 +++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+), 20 deletions(-)
 create mode 100644 DATA605/test_project_description.py

diff --git a/DATA605/project_description.py b/DATA605/project_description.py
index 16b7a4af2c..ca5fdedbf5 100644
--- a/DATA605/project_description.py
+++ b/DATA605/project_description.py
@@ -32,26 +32,41 @@
 
 # Set Constants.
 DEFAULT_SHEET_URL = "https://docs.google.com/spreadsheets/d/1Ez5uRvOgvDMkFc9c6mI21kscTKnpiCSh4UkUh_ifLIw/edit?gid=0#gid=0"
-GLOBAL_PROMPT = """
-You are a college professor of Data science.
-In the next prompt I will give you a topic XYZ for a class project and you will write a description using bullet points for a college class project about implementing an example big data system in Python.
-
-The project should be related to ingesting and processing real-time data about bitcoin. The focus should be on the technology XYZ, using basic Python packages for anything else.
-
-The assignment requires to describe the basic functionalities of the package using examples and then a concrete project related to implementing something related to time series analysis.
-The complexity of the project is 1, where 1 is easy (it should take around 7 days) to develop, 2 is medium difficulty (it should take around 10 days to complete), 3 is hard (it should take 14 days to complete).
-
-The output should follow the template below
-Title:
-Difficulty: (1=easy, 3=difficult)
-Description
-Describe technology
-Describe the project
-Useful resources
-Is it free?
-Python libraries / bindings
-"""
-
+# GLOBAL_PROMPT = """
+# You are a college professor of Data Science.
+# I will give you a topic XYZ for a class project.
+# Your task is to generate a short, structured project brief for college students focused on implementing a big data system in Python, using the technology XYZ.
+
+# Requirements:
+#     The project must involve real-time ingestion and processing of Bitcoin data.
+#     Emphasize how XYZ helps in this context.
+#     The response should be concise and in bullet points only.
+#     Avoid long descriptions or step-by-step guides.
+#     The project must include a time series analysis component.
+# The complexity of the project should range from 1, where 1 is easy (it should take around 7 days) to develop, 2 is medium difficulty (it should take around 10 days to complete), 3 is hard (it should take 14 days to complete).
+
+# The output should follow the template below
+# Title:
+# Difficulty: (1=easy, 3=difficult)
+# Description
+# Describe technology
+# Describe the project
+# Useful resources
+# Is it free?
+# Python libraries / bindings
+# """
+GLOBAL_PROMPT='''Act as a data science professor. I will give you a tool (XYZ) and difficulty level (1–3). Write a short bullet-point project brief on how XYZ can be used for real-time Bitcoin data ingestion in Python. Include:
+
+- Title
+- Difficulty (1 means easy, should take around 7 days to develop, 2 is medium difficulty, should take around 10 days to complete, 3 is hard,should take 14 days to complete)
+- Tech Description
+- Project Idea
+- Python libs
+- Is it Free?
+- Relevant tool(XYZ) related Resource Links
+
+Avoid long texts or steps
+'''
 EXAMPLE = """Example:
 Title: Ingest bitcoin prices using AWS Glue (AWS Glue is technology XYZ)
 Difficulty: 1
@@ -101,6 +116,8 @@ def generate_project_description(project_name: str, difficulty: str) -> Any:
         system_prompt=GLOBAL_PROMPT,
         model="gpt-4o-mini",
         cache_mode="FALLBACK",
+        temperature=0.3,
+        max_tokens=400,
         print_cost=True,
     )
     return project_desc
diff --git a/DATA605/test_project_description.py b/DATA605/test_project_description.py
new file mode 100644
index 0000000000..2a5b58a518
--- /dev/null
+++ b/DATA605/test_project_description.py
@@ -0,0 +1,63 @@
+import logging
+from unittest import mock
+
+import pandas as pd
+
+import helpers_root.helpers.hunit_test as hunitest
+import DATA605.project_description as projdesc
+
+_LOG = logging.getLogger(__name__)
+
+class TestProjectDescription1(hunitest.TestCase):
+    def test_read_google_sheet1(self) -> None:
+        """
+        Test reading a Google Sheet returns a valid DataFrame.
+        """
+        url = "https://docs.google.com/fake-sheet-url"
+        secret_path = "/fake/path/to/secret.json"
+
+        mock_data = pd.DataFrame({
+            "Tool": ["Kafka"],
+            "Difficulty": ["2"]
+        })
+
+        with mock.patch("helpers_root.helpers.hgoogle_drive_api.get_credentials") as mock_creds, \
+             mock.patch("helpers_root.helpers.hgoogle_drive_api.read_google_file", return_value=mock_data):
+            df = projdesc.read_google_sheet(url, secret_path)
+            self.assertIsInstance(df, pd.DataFrame)
+            self.assertEqual(df.shape[0], 1)
+            _LOG.debug("read_google_sheet1 → %s", df)
+
+    def test_generate_project_description1(self) -> None:
+        """
+        Test project description generation using mocked OpenAI.
+        """
+        tech = "Kafka"
+        difficulty = "2"
+
+        mock_output = "Title: Kafka Project\nDifficulty: 2\n..."
+
+        with mock.patch("helpers_root.helpers.hopenai.get_completion", return_value=mock_output):
+            desc = projdesc.generate_project_description(tech, difficulty)
+            self.assertIn("Kafka", desc)
+            self.assertIn("Difficulty", desc)
+            _LOG.debug("generate_project_description1 → %s", desc)
+
+    def test_create_markdown_file1(self) -> None:
+        """
+        Test the markdown creation process with mocked data and completion.
+        """
+        df = pd.DataFrame({
+            "Tool": ["Kafka"],
+            "Difficulty": ["2"]
+        })
+        markdown_path = "/tmp/test_projects.md"
+        mock_output = "Title: Kafka Project\nDifficulty: 2\n..."
+
+        with mock.patch("helpers_root.helpers.hopenai.get_completion", return_value=mock_output), \
+             mock.patch("helpers_root.helpers.hio.to_file") as mock_to_file:
+            projdesc.create_markdown_file(df, markdown_path, max_projects=1, sleep_sec=0)
+            mock_to_file.assert_called_once()
+            written_content = mock_to_file.call_args[0][1]
+            self.assertIn("Kafka", written_content)
+            _LOG.debug("create_markdown_file1 content →\n%s", written_content)

From 80f8482ccc1e6b4b860615bf0fd4f3a3539c65c5 Mon Sep 17 00:00:00 2001
From: Aayush <aver23@umd.edu>
Date: Thu, 12 Jun 2025 11:25:48 -0500
Subject: [PATCH 05/11] TutorTask554: Cosmetic changes to documentation file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 docs/project_description_explanation.md  |  7 +++----
 docs/project_description_how_to_guide.md | 17 ++++++++---------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/docs/project_description_explanation.md b/docs/project_description_explanation.md
index bf82318254..e4fff09c6c 100644
--- a/docs/project_description_explanation.md
+++ b/docs/project_description_explanation.md
@@ -13,11 +13,10 @@
 ## Introduction and motivation
 
 - This tool automates the generation of academic project descriptions by
-  integrating Google Sheets input with OpenAI's GPT API.
+  integrating Google Sheets input with OpenAI API.
 - It addresses the need for scalable, consistent, and high-quality project
   documentation based on dynamic student or faculty input.
-- It is intended for course instructors, academic administrators, or data
-  science curriculum designers who wish to streamline project generation and
+- It is intended to streamline and automate project generation and
   documentation.
 
 ## Core Concepts
@@ -37,7 +36,7 @@
 
   ```markdown
   [Google Sheet URL] → read_google_sheet() → [DataFrame of projects] → loop →
-  generate_project_description() → [GPT-generated text] → create_markdown_file()
+  Create prompt and feed into GPT → [GPT-generated text] → create_markdown_file()
   → [Markdown output]
   ```
 
diff --git a/docs/project_description_how_to_guide.md b/docs/project_description_how_to_guide.md
index f914af5d3f..7631dd1747 100644
--- a/docs/project_description_how_to_guide.md
+++ b/docs/project_description_how_to_guide.md
@@ -11,7 +11,7 @@
 
 <!-- tocstop -->
 
-# What It Does
+## What It Does
 
 - Automates the process of generating academic project descriptions by:
   - Reading project data from a Google Sheet.
@@ -20,7 +20,7 @@
 
 ## Assumptions / Requirements
 
-- Google Cloud service key file at `/app/DATA605/google_secret.json`
+- Google Cloud service key file ready to use
 - Docker running
 - Valid OpenAI API key for model access
 - Project-specific helper modules must be available:
@@ -35,13 +35,16 @@
 Ensure the Google Sheet is publicly accessible or shared with the configured
 service account.
 
+For instructions on how to configure google sheets API, follow this link:
+https://github.com/causify-ai/helpers/blob/c50fddfdffccdccb1b2d963b729ab9674d8fda8f/docs/tools/notebooks/all.gsheet_into_pandas.how_to_guide.md
+
 The Google Sheet should contain:
 
 - Project name
 
 - Difficulty
 
-# Edit GOOGLE_SHEET_URL inside the script or pass a new one to read_google_sheet()
+### Edit Google Sheet URL inside the script or pass a new one through CLI
 
 URL="https://docs.google.com/spreadsheets/d/<sheet_id>/edit"
 
@@ -54,12 +57,12 @@ URL="https://docs.google.com/spreadsheets/d/<sheet_id>/edit"
 
   Generate a project description using OpenAI for each row
 
-  Save the top 5 (or all if MAX_PROJECTS=None) projects in a file called
+  Save the top N (or all if MAX_PROJECTS=None) projects in a file called
   `./projects/DATA605_Projects.md`
 
 ### Step 3: Review Output
 
-- Navigate to the projects/ folder and open DATA605_Projects.md.
+- Output markdown stored at DATA605/projects/MSML610_Projects.md.
 
 ## Troubleshooting
 
@@ -67,10 +70,6 @@ Issue: google.auth.exceptions.DefaultCredentialsError Cause: Google service key
 not found at the expected path. Fix: Place the correct google_secret.json file
 in /app/DATA605/.
 
-Issue: ModuleNotFoundError: No module named 'helpers' Cause: Missing local
-helper modules. Fix: Ensure helpers/ directory is in your PYTHONPATH or the same
-directory as the script.
-
 Issue: Empty or incomplete output file Cause: API failure or invalid sheet
 format. Fix: Check logs, verify if the OpenAI and Google API calls are working,
 and ensure data in the Google Sheet is structured correctly.

From 311799285b3d55cb71060f9587d0994dc7740097 Mon Sep 17 00:00:00 2001
From: Aayush <aver23@umd.edu>
Date: Thu, 12 Jun 2025 11:38:06 -0500
Subject: [PATCH 06/11] TutorTask554: Cosmetic changes to documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 docs/project_description_how_to_guide.md | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/docs/project_description_how_to_guide.md b/docs/project_description_how_to_guide.md
index 7631dd1747..0ef91f53dc 100644
--- a/docs/project_description_how_to_guide.md
+++ b/docs/project_description_how_to_guide.md
@@ -1,13 +1,15 @@
+# How To Guide: `project_description.py`
+
 <!-- toc -->
 
-  * [What It Does](#what-it-does)
-  * [Assumptions / Requirements](#assumptions--requirements)
-  * [Instructions](#instructions)
-    + [Step 1: Fetch Input](#step-1-fetch-input)
-- [Edit GOOGLE_SHEET_URL inside the script or pass a new one to read_google_sheet()](#edit-google_sheet_url-inside-the-script-or-pass-a-new-one-to-read_google_sheet)
-  * [Step 2: Describe Action](#step-2-describe-action)
-    + [Step 3: Review Output](#step-3-review-output)
-  * [Troubleshooting](#troubleshooting)
+- [What It Does](#what-it-does)
+- [Assumptions / Requirements](#assumptions--requirements)
+- [Instructions](#instructions)
+  * [Step 1: Fetch Input](#step-1-fetch-input)
+  * [Edit Google Sheet URL inside the script or pass a new one through CLI](#edit-google-sheet-url-inside-the-script-or-pass-a-new-one-through-cli)
+- [Step 2: Script Execution](#step-2-script-execution)
+  * [Step 3: Review Output](#step-3-review-output)
+- [Troubleshooting](#troubleshooting)
 
 <!-- tocstop -->
 
@@ -36,7 +38,7 @@ Ensure the Google Sheet is publicly accessible or shared with the configured
 service account.
 
 For instructions on how to configure google sheets API, follow this link:
-https://github.com/causify-ai/helpers/blob/c50fddfdffccdccb1b2d963b729ab9674d8fda8f/docs/tools/notebooks/all.gsheet_into_pandas.how_to_guide.md
+[https://github.com/causify-ai/helpers/blob/c50fddfdffccdccb1b2d963b729ab9674d8fda8f/docs/tools/notebooks/all.gsheet_into_pandas.how_to_guide.md](https://github.com/causify-ai/helpers/blob/c50fddfdffccdccb1b2d963b729ab9674d8fda8f/docs/tools/notebooks/all.gsheet_into_pandas.how_to_guide.md)
 
 The Google Sheet should contain:
 
@@ -48,7 +50,7 @@ The Google Sheet should contain:
 
 URL="https://docs.google.com/spreadsheets/d/<sheet_id>/edit"
 
-## Step 2: Describe Action
+## Step 2: Script Execution
 
 - Run the script directly using Python
 - This will:
@@ -62,7 +64,7 @@ URL="https://docs.google.com/spreadsheets/d/<sheet_id>/edit"
 
 ### Step 3: Review Output
 
-- Output markdown stored at DATA605/projects/MSML610_Projects.md.
+- Markdown stored at DATA605/projects/MSML610_Projects.md.
 
 ## Troubleshooting
 

From db3b66dec27f821dfd05725db59540f62ab3a218 Mon Sep 17 00:00:00 2001
From: Aayush <aver23@umd.edu>
Date: Thu, 12 Jun 2025 11:55:10 -0500
Subject: [PATCH 07/11] TutorTask554: COsmetic changes to documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 docs/project_description_how_to_guide.md | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/docs/project_description_how_to_guide.md b/docs/project_description_how_to_guide.md
index 0ef91f53dc..639a9c95ef 100644
--- a/docs/project_description_how_to_guide.md
+++ b/docs/project_description_how_to_guide.md
@@ -6,7 +6,6 @@
 - [Assumptions / Requirements](#assumptions--requirements)
 - [Instructions](#instructions)
   * [Step 1: Fetch Input](#step-1-fetch-input)
-  * [Edit Google Sheet URL inside the script or pass a new one through CLI](#edit-google-sheet-url-inside-the-script-or-pass-a-new-one-through-cli)
 - [Step 2: Script Execution](#step-2-script-execution)
   * [Step 3: Review Output](#step-3-review-output)
 - [Troubleshooting](#troubleshooting)
@@ -46,10 +45,6 @@ The Google Sheet should contain:
 
 - Difficulty
 
-### Edit Google Sheet URL inside the script or pass a new one through CLI
-
-URL="https://docs.google.com/spreadsheets/d/<sheet_id>/edit"
-
 ## Step 2: Script Execution
 
 - Run the script directly using Python
@@ -62,6 +57,14 @@ URL="https://docs.google.com/spreadsheets/d/<sheet_id>/edit"
   Save the top N (or all if MAX_PROJECTS=None) projects in a file called
   `./projects/DATA605_Projects.md`
 
+Code to run script:
+
+```bash
+python <file_path>/project_description.py   --sheet_url <file_path>   --secret_path <file_path>  --openai_key key   --markdown_path <file_path>  -v INFO
+```
+
+Edit Google Sheet URL inside the script or pass a new one through CLI
+
 ### Step 3: Review Output
 
 - Markdown stored at DATA605/projects/MSML610_Projects.md.

From eaeca22d7dd6fed7a7d2db2ac34203de306db3d1 Mon Sep 17 00:00:00 2001
From: Aayush <aver23@umd.edu>
Date: Thu, 12 Jun 2025 23:29:45 -0500
Subject: [PATCH 08/11] TutorTask554: Adding unit tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 DATA605/test_project_description.py | 51 ++++++++++-------------------
 1 file changed, 17 insertions(+), 34 deletions(-)

diff --git a/DATA605/test_project_description.py b/DATA605/test_project_description.py
index 2a5b58a518..1699618d60 100644
--- a/DATA605/test_project_description.py
+++ b/DATA605/test_project_description.py
@@ -1,56 +1,40 @@
-import logging
-from unittest import mock
-
 import pandas as pd
-
-import helpers_root.helpers.hunit_test as hunitest
+from unittest import mock
 import DATA605.project_description as projdesc
+import helpers.hunit_test as hunitest
+import pytest
 
-_LOG = logging.getLogger(__name__)
+class TestProjectDescriptionWithCache(hunitest.TestCase):
 
-class TestProjectDescription1(hunitest.TestCase):
-    def test_read_google_sheet1(self) -> None:
-        """
-        Test reading a Google Sheet returns a valid DataFrame.
-        """
+    @pytest.fixture(autouse=True)
+    def setup_teardown(self):
+        # Can initialize dummy cache or files here
+        yield
+        # Avoid triggering _GLOBAL_CAPSYS in tearDown
+
+    def test_read_google_sheet(self) -> None:
         url = "https://docs.google.com/fake-sheet-url"
         secret_path = "/fake/path/to/secret.json"
+        mock_data = pd.DataFrame({"Tool": ["Kafka"], "Difficulty": ["2"]})
 
-        mock_data = pd.DataFrame({
-            "Tool": ["Kafka"],
-            "Difficulty": ["2"]
-        })
-
-        with mock.patch("helpers_root.helpers.hgoogle_drive_api.get_credentials") as mock_creds, \
+        with mock.patch("helpers_root.helpers.hgoogle_drive_api.get_credentials"), \
              mock.patch("helpers_root.helpers.hgoogle_drive_api.read_google_file", return_value=mock_data):
             df = projdesc.read_google_sheet(url, secret_path)
             self.assertIsInstance(df, pd.DataFrame)
             self.assertEqual(df.shape[0], 1)
-            _LOG.debug("read_google_sheet1 → %s", df)
 
-    def test_generate_project_description1(self) -> None:
-        """
-        Test project description generation using mocked OpenAI.
-        """
+    def test_generate_project_description(self) -> None:
         tech = "Kafka"
         difficulty = "2"
-
         mock_output = "Title: Kafka Project\nDifficulty: 2\n..."
 
         with mock.patch("helpers_root.helpers.hopenai.get_completion", return_value=mock_output):
             desc = projdesc.generate_project_description(tech, difficulty)
             self.assertIn("Kafka", desc)
             self.assertIn("Difficulty", desc)
-            _LOG.debug("generate_project_description1 → %s", desc)
-
-    def test_create_markdown_file1(self) -> None:
-        """
-        Test the markdown creation process with mocked data and completion.
-        """
-        df = pd.DataFrame({
-            "Tool": ["Kafka"],
-            "Difficulty": ["2"]
-        })
+
+    def test_create_markdown_file(self) -> None:
+        df = pd.DataFrame({"Tool": ["Kafka"], "Difficulty": ["2"]})
         markdown_path = "/tmp/test_projects.md"
         mock_output = "Title: Kafka Project\nDifficulty: 2\n..."
 
@@ -60,4 +44,3 @@ def test_create_markdown_file1(self) -> None:
             mock_to_file.assert_called_once()
             written_content = mock_to_file.call_args[0][1]
             self.assertIn("Kafka", written_content)
-            _LOG.debug("create_markdown_file1 content →\n%s", written_content)

From 88d6c50b4c55863348ab4268ddc9d96ba782deae Mon Sep 17 00:00:00 2001
From: Aayush <aver23@umd.edu>
Date: Thu, 12 Jun 2025 23:32:28 -0500
Subject: [PATCH 09/11] TutorTask554:Fixing TOC in documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 docs/project_description_how_to_guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/project_description_how_to_guide.md b/docs/project_description_how_to_guide.md
index 639a9c95ef..19660d2e23 100644
--- a/docs/project_description_how_to_guide.md
+++ b/docs/project_description_how_to_guide.md
@@ -6,7 +6,7 @@
 - [Assumptions / Requirements](#assumptions--requirements)
 - [Instructions](#instructions)
   * [Step 1: Fetch Input](#step-1-fetch-input)
-- [Step 2: Script Execution](#step-2-script-execution)
+  * [Step 2: Script Execution](#step-2-script-execution)
   * [Step 3: Review Output](#step-3-review-output)
 - [Troubleshooting](#troubleshooting)
 

From afd491cfdd60659fc7156a1e3fc369fdc68ff7b1 Mon Sep 17 00:00:00 2001
From: Aayush <aver23@umd.edu>
Date: Mon, 16 Jun 2025 22:34:45 -0500
Subject: [PATCH 10/11] TutorTask554: Moving files to correct location and some
 minor file changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 .../__init__.py                               |   0
 .../generate_class_project_description.py     | 217 ++++++++++++++++++
 .../project_description.py                    |   0
 ...test_generate_class_project_description.py |  48 ++++
 4 files changed, 265 insertions(+)
 create mode 100644 tutorial_class_project_instructions/__init__.py
 create mode 100644 tutorial_class_project_instructions/generate_class_project_description.py
 rename {DATA605 => tutorial_class_project_instructions}/project_description.py (100%)
 create mode 100644 tutorial_class_project_instructions/test_generate_class_project_description.py

diff --git a/tutorial_class_project_instructions/__init__.py b/tutorial_class_project_instructions/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tutorial_class_project_instructions/generate_class_project_description.py b/tutorial_class_project_instructions/generate_class_project_description.py
new file mode 100644
index 0000000000..2cea6aed80
--- /dev/null
+++ b/tutorial_class_project_instructions/generate_class_project_description.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python
+"""
+Generate project descriptions from a Google Sheet and save them to a Markdown
+file.
+
+> project_description.py \
+    --sheet_url "https://docs.google.com/spreadsheets/d/1abc...gid=0" \
+    --markdown_path ./projects/MSML610_Projects.md \
+    --max_projects 3 \
+    -v INFO
+
+Import as:
+
+import DATA605.project_description as dprodesc
+"""
+
+import argparse
+import logging
+import pathlib
+import time
+from typing import Any, Optional
+
+import pandas as pd
+
+import helpers_root.helpers.hdbg as hdbg
+import helpers_root.helpers.hgoogle_drive_api as hgofiapi
+import helpers_root.helpers.hio as hio
+import helpers_root.helpers.hopenai as hopenai
+import helpers_root.helpers.hparser as hparser
+
+_LOG = logging.getLogger(__name__)
+
+# Set Constants.
+if True:
+    DEFAULT_SHEET_URL = (
+        "https://docs.google.com/"
+        "spreadsheets/d/"
+        "1Ez5uRvOgvDMkFc9c6mI21kscTKnpiCSh4UkUh_ifLIw/"
+        "edit?gid=0#gid=0"
+    )
+    # Set to True to use the actual spreadsheet link
+else:
+    # Set to False for testing purposes
+    fake_url = "https://docs.google.com/fake-sheet-url"
+    DEFAULT_SHEET_URL = fake_url
+GLOBAL_PROMPT = """Act as a data science professor.
+I will give you a tool (XYZ) and difficulty level (1–3).
+Write a short bullet-point project brief on how XYZ can be
+used for real-time Bitcoin data ingestion in Python.
+Include:
+
+- Title
+- Difficulty (1 means easy, should take around 7 days to develop, 2 is medium difficulty, should take around 10 days to complete, 3 is hard,should take 14 days to complete)
+- Tech Description
+- Project Idea
+- Python libs
+- Is it Free?
+- Relevant tool(XYZ) related Resource Links
+
+Avoid long texts or steps
+"""
+EXAMPLE = """Example:
+Title: Ingest bitcoin prices using AWS Glue (AWS Glue is technology XYZ)
+Difficulty: 1
+Description
+AWS Glue is a fully managed extract, transform, and load (ETL) service...
+Useful resources: AWS Glue Docs
+Is it free?: Free tier available with limits
+Python libraries: boto3, PySpark
+"""
+DEFAULT_MARKDOWN_PATH = "./projects/MSML610_Projects.md"
+# The maximum number of projects.
+# Set the value to None to disable the limit.
+DEFAULT_MAX_PROJECTS = None
+
+
+def _read_google_sheet(url: str, secret_path: str) -> pd.DataFrame:
+    """
+    Read the Google Sheet and return the data as a pandas DataFrame.
+
+    :param url: the URL of the Google Sheet to read
+    :param secret_path: path to google_secret.json
+    :return: the data
+    """
+    _LOG.info("Reading Google Sheet %s: ", url)
+    _LOG.info("Using credentials from: %s", secret_path)
+    credentials = hgofiapi.get_credentials(service_key_path=secret_path)
+    df = hgofiapi.read_google_file(url, credentials=credentials)
+    return df
+
+
+def _generate_project_description(project_name: str, difficulty: str) -> Any:
+    """
+    Generate a project description.
+
+    :param project_name: the name of the project
+    :param difficulty: the difficulty level of the project
+    :return: the project description
+    """
+    if False:
+        # Potential (v3) prompt if needed to use.
+        # Change False to True to use it.
+        prompt = (
+            f"Write a professional and detailed project description"
+            f"for a data project titled '{project_name}'. "
+            f"Indicate the difficulty level as '{difficulty}', and include objectives, "
+            f"technologies used, and expected outcomes."
+        )
+        # Will use more tokens, but might help produce a better result.
+    elif False:
+        # v1 (Original) prompt.
+        # Change False to True to use it.
+        prompt = (
+            f"Generate a project description for '{project_name}',"
+            f"with difficulty level '{difficulty}'."
+        )
+    else:
+        # v2: Added by Aayush as an improvement to optimize tokens
+        # while conveying the same information.
+        prompt = f"Technology: {project_name}\nDifficulty: {difficulty}"
+        # Short, to the point and concise. Saves the most tokens while achieving similar results.
+    project_desc = hopenai.get_completion(
+        prompt,
+        system_prompt=GLOBAL_PROMPT,
+        model="gpt-4o-mini",
+        cache_mode="FALLBACK",
+        temperature=0.3,
+        max_tokens=400,
+        print_cost=True,
+    )
+    return project_desc
+
+
+def create_markdown_file(
+    df: pd.DataFrame,
+    markdown_path: str,
+    max_projects: Optional[int],
+    *,
+    sleep_sec: float = 1.5,
+) -> None:
+    """
+    Create a markdown file with the project descriptions using helpers.hio.
+
+    :param df: the dataframe containing the project descriptions
+    :param markdown_path: the path to the markdown file
+    :param max_projects: limit to the rows processed
+    :param sleep_sec: amount of time to sleep between rows
+    """
+    content = "# MSML610 Projects\n\n"
+    # Generate the project descriptions.
+    # Limit the number of projects.
+    rows = df.head(max_projects) if max_projects is not None else df
+    for _, row in rows.iterrows():
+        project_name = row["Tool"]
+        difficulty = row["Difficulty"]
+        description = _generate_project_description(project_name, difficulty)
+        # Add the project description to the markdown file.
+        content += f"## {project_name}\n"
+        content += f"{description}\n\n"
+        # Letting it wait for a while before triggering another request
+        time.sleep(sleep_sec)
+    # Write the markdown file.
+    hio.to_file(markdown_path, content)
+
+
+def _parse() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument(
+        "--sheet_url", default=DEFAULT_SHEET_URL, help="Google Sheet URL"
+    )
+    parser.add_argument(
+        "--secret_path",
+        default="/app/DATA605/google_secret.json",
+        help="Path to Google service‑account JSON.",
+    )
+    parser.add_argument(
+        "--markdown_path",
+        default=DEFAULT_MARKDOWN_PATH,
+        help="Output Markdown file",
+    )
+    parser.add_argument(
+        "--max_projects",
+        type=int,
+        default=DEFAULT_MAX_PROJECTS,
+        help="Limit rows processed (None = all).",
+    )
+    parser.add_argument(
+        "--openai_key",
+        type=str,
+        default=None,
+        help="OpenAI API key (will override env var)",
+    )
+    hparser.add_verbosity_arg(parser)
+    return parser
+
+
+def _main(parser: argparse.ArgumentParser) -> None:
+    args = parser.parse_args()
+    hdbg.init_logger(verbosity=args.log_level, use_exec_path=True)
+    # Expand user/relative paths to absolute ones early to avoid surprises.
+    secret_path = str(pathlib.Path(args.secret_path).expanduser().resolve())
+    markdown_path = str(pathlib.Path(args.markdown_path).expanduser().resolve())
+    _LOG.info("Reading sheet %s", args.sheet_url)
+    sheet_df = _read_google_sheet(args.sheet_url, secret_path)
+    _LOG.info("Generating Markdown → %s", markdown_path)
+    create_markdown_file(
+        sheet_df,
+        markdown_path,
+        args.max_projects,
+    )
+    _LOG.info("Done: %s", markdown_path)
+
+
+if __name__ == "__main__":
+    _main(_parse())
diff --git a/DATA605/project_description.py b/tutorial_class_project_instructions/project_description.py
similarity index 100%
rename from DATA605/project_description.py
rename to tutorial_class_project_instructions/project_description.py
diff --git a/tutorial_class_project_instructions/test_generate_class_project_description.py b/tutorial_class_project_instructions/test_generate_class_project_description.py
new file mode 100644
index 0000000000..449b9f92d6
--- /dev/null
+++ b/tutorial_class_project_instructions/test_generate_class_project_description.py
@@ -0,0 +1,48 @@
+import pandas as pd
+from unittest import mock
+import DATA605.project_description as projdesc
+import helpers.hunit_test as hunitest
+import pytest
+
+class TestProjectDescriptionWithCache(hunitest.TestCase):
+
+    @pytest.fixture(autouse=True)
+    def setup_teardown(self):
+        # Can initialize dummy cache or files here
+        yield
+        # Avoid triggering _GLOBAL_CAPSYS in tearDown
+
+    def test_read_google_sheet(self) -> None:
+        if False:
+            #Set to False for testing purposes
+            url = "https://docs.google.com/fake-sheet-url"
+        secret_path = "/fake/path/to/secret.json"
+        mock_data = pd.DataFrame({"Tool": ["Kafka"], "Difficulty": ["2"]})
+
+        with mock.patch("helpers_root.helpers.hgoogle_drive_api.get_credentials"), \
+             mock.patch("helpers_root.helpers.hgoogle_drive_api.read_google_file", return_value=mock_data):
+            df = projdesc.read_google_sheet(url, secret_path)
+            self.assertIsInstance(df, pd.DataFrame)
+            self.assertEqual(df.shape[0], 1)
+
+    def test_generate_project_description(self) -> None:
+        tech = "Kafka"
+        difficulty = "2"
+        mock_output = "Title: Kafka Project\nDifficulty: 2\n..."
+
+        with mock.patch("helpers_root.helpers.hopenai.get_completion", return_value=mock_output):
+            desc = projdesc.generate_project_description(tech, difficulty)
+            self.assertIn("Kafka", desc)
+            self.assertIn("Difficulty", desc)
+
+    def test_create_markdown_file(self) -> None:
+        df = pd.DataFrame({"Tool": ["Kafka"], "Difficulty": ["2"]})
+        markdown_path = "/tmp/test_projects.md"
+        mock_output = "Title: Kafka Project\nDifficulty: 2\n..."
+
+        with mock.patch("helpers_root.helpers.hopenai.get_completion", return_value=mock_output), \
+             mock.patch("helpers_root.helpers.hio.to_file") as mock_to_file:
+            projdesc.create_markdown_file(df, markdown_path, max_projects=1, sleep_sec=0)
+            mock_to_file.assert_called_once()
+            written_content = mock_to_file.call_args[0][1]
+            self.assertIn("Kafka", written_content)

From c9b6ae736f3afee3d6190b1cd9be1a62ea96de6e Mon Sep 17 00:00:00 2001
From: Aayush <aver23@umd.edu>
Date: Mon, 16 Jun 2025 22:45:50 -0500
Subject: [PATCH 11/11] TutorTask554: Removing files from wrong location and
 adding them to tutorials/class project instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 DATA605/test_project_description.py           |  46 ----
 .../project_description.py                    | 208 ------------------
 ...test_generate_class_project_description.py |  12 +-
 3 files changed, 6 insertions(+), 260 deletions(-)
 delete mode 100644 DATA605/test_project_description.py
 delete mode 100644 tutorial_class_project_instructions/project_description.py

diff --git a/DATA605/test_project_description.py b/DATA605/test_project_description.py
deleted file mode 100644
index 1699618d60..0000000000
--- a/DATA605/test_project_description.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import pandas as pd
-from unittest import mock
-import DATA605.project_description as projdesc
-import helpers.hunit_test as hunitest
-import pytest
-
-class TestProjectDescriptionWithCache(hunitest.TestCase):
-
-    @pytest.fixture(autouse=True)
-    def setup_teardown(self):
-        # Can initialize dummy cache or files here
-        yield
-        # Avoid triggering _GLOBAL_CAPSYS in tearDown
-
-    def test_read_google_sheet(self) -> None:
-        url = "https://docs.google.com/fake-sheet-url"
-        secret_path = "/fake/path/to/secret.json"
-        mock_data = pd.DataFrame({"Tool": ["Kafka"], "Difficulty": ["2"]})
-
-        with mock.patch("helpers_root.helpers.hgoogle_drive_api.get_credentials"), \
-             mock.patch("helpers_root.helpers.hgoogle_drive_api.read_google_file", return_value=mock_data):
-            df = projdesc.read_google_sheet(url, secret_path)
-            self.assertIsInstance(df, pd.DataFrame)
-            self.assertEqual(df.shape[0], 1)
-
-    def test_generate_project_description(self) -> None:
-        tech = "Kafka"
-        difficulty = "2"
-        mock_output = "Title: Kafka Project\nDifficulty: 2\n..."
-
-        with mock.patch("helpers_root.helpers.hopenai.get_completion", return_value=mock_output):
-            desc = projdesc.generate_project_description(tech, difficulty)
-            self.assertIn("Kafka", desc)
-            self.assertIn("Difficulty", desc)
-
-    def test_create_markdown_file(self) -> None:
-        df = pd.DataFrame({"Tool": ["Kafka"], "Difficulty": ["2"]})
-        markdown_path = "/tmp/test_projects.md"
-        mock_output = "Title: Kafka Project\nDifficulty: 2\n..."
-
-        with mock.patch("helpers_root.helpers.hopenai.get_completion", return_value=mock_output), \
-             mock.patch("helpers_root.helpers.hio.to_file") as mock_to_file:
-            projdesc.create_markdown_file(df, markdown_path, max_projects=1, sleep_sec=0)
-            mock_to_file.assert_called_once()
-            written_content = mock_to_file.call_args[0][1]
-            self.assertIn("Kafka", written_content)
diff --git a/tutorial_class_project_instructions/project_description.py b/tutorial_class_project_instructions/project_description.py
deleted file mode 100644
index ca5fdedbf5..0000000000
--- a/tutorial_class_project_instructions/project_description.py
+++ /dev/null
@@ -1,208 +0,0 @@
-#!/usr/bin/env python
-"""
-Generate project descriptions from a Google Sheet and save them to a Markdown
-file.
-
-> project_description.py \
-    --sheet_url "https://docs.google.com/spreadsheets/d/1abc...gid=0" \
-    --markdown_path ./projects/MSML610_Projects.md \
-    --max_projects 3 \
-    -v INFO
-
-Import as:
-
-import DATA605.project_description as dprodesc
-"""
-
-import argparse
-import logging
-import pathlib
-import time
-from typing import Any, Optional
-
-import pandas as pd
-
-import helpers_root.helpers.hdbg as hdbg
-import helpers_root.helpers.hgoogle_drive_api as hgofiapi
-import helpers_root.helpers.hio as hio
-import helpers_root.helpers.hopenai as hopenai
-import helpers_root.helpers.hparser as hparser
-
-_LOG = logging.getLogger(__name__)
-
-# Set Constants.
-DEFAULT_SHEET_URL = "https://docs.google.com/spreadsheets/d/1Ez5uRvOgvDMkFc9c6mI21kscTKnpiCSh4UkUh_ifLIw/edit?gid=0#gid=0"
-# GLOBAL_PROMPT = """
-# You are a college professor of Data Science.
-# I will give you a topic XYZ for a class project.
-# Your task is to generate a short, structured project brief for college students focused on implementing a big data system in Python, using the technology XYZ.
-
-# Requirements:
-#     The project must involve real-time ingestion and processing of Bitcoin data.
-#     Emphasize how XYZ helps in this context.
-#     The response should be concise and in bullet points only.
-#     Avoid long descriptions or step-by-step guides.
-#     The project must include a time series analysis component.
-# The complexity of the project should range from 1, where 1 is easy (it should take around 7 days) to develop, 2 is medium difficulty (it should take around 10 days to complete), 3 is hard (it should take 14 days to complete).
-
-# The output should follow the template below
-# Title:
-# Difficulty: (1=easy, 3=difficult)
-# Description
-# Describe technology
-# Describe the project
-# Useful resources
-# Is it free?
-# Python libraries / bindings
-# """
-GLOBAL_PROMPT='''Act as a data science professor. I will give you a tool (XYZ) and difficulty level (1–3). Write a short bullet-point project brief on how XYZ can be used for real-time Bitcoin data ingestion in Python. Include:
-
-- Title
-- Difficulty (1 means easy, should take around 7 days to develop, 2 is medium difficulty, should take around 10 days to complete, 3 is hard,should take 14 days to complete)
-- Tech Description
-- Project Idea
-- Python libs
-- Is it Free?
-- Relevant tool(XYZ) related Resource Links
-
-Avoid long texts or steps
-'''
-EXAMPLE = """Example:
-Title: Ingest bitcoin prices using AWS Glue (AWS Glue is technology XYZ)
-Difficulty: 1
-Description
-AWS Glue is a fully managed extract, transform, and load (ETL) service...
-Useful resources: AWS Glue Docs
-Is it free?: Free tier available with limits
-Python libraries: boto3, PySpark
-"""
-DEFAULT_MARKDOWN_PATH = "./projects/MSML610_Projects.md"
-# The maximum number of projects.
-# Set the value to None to disable the limit.
-DEFAULT_MAX_PROJECTS = None
-
-
-def read_google_sheet(url: str, secret_path: str) -> pd.DataFrame:
-    """
-    Read the Google Sheet and return the data as a pandas DataFrame.
-
-    :param url: the URL of the Google Sheet to read
-    :param secret_path: path to google_secret.json
-    :return: the data
-    """
-    _LOG.info(f"Reading Google Sheet: {url}")
-    _LOG.info(f"Using credentials from: {secret_path}")
-    credentials = hgofiapi.get_credentials(service_key_path=secret_path)
-    df = hgofiapi.read_google_file(url, credentials=credentials)
-    return df
-
-
-def generate_project_description(project_name: str, difficulty: str) -> Any:
-    """
-    Generate a project description.
-
-    :param project_name: the name of the project
-    :param difficulty: the difficulty level of the project
-    :return: the project description
-    """
-    # Generate the project description.
-    # prompt = f"Generate a project description for '{project_name}' with difficulty level '{difficulty}'."
-    # prompt = PROMPT_DOC_URL.strip()+ "\n\n"+ EXAMPLE.strip()+ f"\n\nTechnology: {project_name}\nDifficulty: {difficulty}"
-    # description = hopenai.get_completion(prompt, model="gpt-4o-mini")
-    # return description
-    prompt = f"Technology: {project_name}\nDifficulty: {difficulty}"
-    project_desc = hopenai.get_completion(
-        prompt,
-        system_prompt=GLOBAL_PROMPT,
-        model="gpt-4o-mini",
-        cache_mode="FALLBACK",
-        temperature=0.3,
-        max_tokens=400,
-        print_cost=True,
-    )
-    return project_desc
-
-
-def create_markdown_file(
-    df: pd.DataFrame,
-    markdown_path: str,
-    max_projects: Optional[int],
-    *,
-    sleep_sec: float = 1.5,
-) -> None:
-    """
-    Create a markdown file with the project descriptions using helpers.hio.
-
-    :param df: the dataframe containing the project descriptions
-    :param markdown_path: the path to the markdown file
-    :param max_projects: limit to the rows processed
-    :param sleep_sec: amount of time to sleep between rows
-    """
-    content = "# MSML610 Projects\n\n"
-    # Generate the project descriptions.
-    # Limit the number of projects.
-    rows = df.head(max_projects) if max_projects is not None else df
-    for _, row in rows.iterrows():
-        project_name = row["Tool"]
-        difficulty = row["Difficulty"]
-        description = generate_project_description(project_name, difficulty)
-        # Add the project description to the markdown file.
-        content += f"## {project_name}\n"
-        content += f"{description}\n\n"
-        time.sleep(sleep_sec)
-    # Write the markdown file.
-    hio.to_file(markdown_path, content)
-
-
-def _parse() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument(
-        "--sheet_url", default=DEFAULT_SHEET_URL, help="Google Sheet URL"
-    )
-    parser.add_argument(
-        "--secret_path",
-        default="/app/DATA605/google_secret.json",
-        help="Path to Google service‑account JSON.",
-    )
-    parser.add_argument(
-        "--markdown_path",
-        default=DEFAULT_MARKDOWN_PATH,
-        help="Output Markdown file",
-    )
-    parser.add_argument(
-        "--max_projects",
-        type=int,
-        default=DEFAULT_MAX_PROJECTS,
-        help="Limit rows processed (None = all).",
-    )
-    parser.add_argument(
-    "--openai_key",
-    type=str,
-    default=None,
-    help="OpenAI API key (will override env var)",
-)
-    hparser.add_verbosity_arg(parser)  # adds -v / --log_level
-    return parser
-
-
-def _main(parser: argparse.ArgumentParser) -> None:
-    args = parser.parse_args()
-    hdbg.init_logger(verbosity=args.log_level, use_exec_path=True)
-    # Expand user/relative paths to absolute ones early to avoid surprises.
-    secret_path = str(pathlib.Path(args.secret_path).expanduser().resolve())
-    markdown_path = str(pathlib.Path(args.markdown_path).expanduser().resolve())
-    _LOG.info("Reading sheet %s", args.sheet_url)
-    sheet_df = read_google_sheet(args.sheet_url, secret_path)
-    _LOG.info("Generating Markdown → %s", markdown_path)
-    create_markdown_file(
-        sheet_df,
-        markdown_path,
-        args.max_projects,
-    )
-    _LOG.info("Done: %s", markdown_path)
-
-
-if __name__ == "__main__":
-    _main(_parse())
diff --git a/tutorial_class_project_instructions/test_generate_class_project_description.py b/tutorial_class_project_instructions/test_generate_class_project_description.py
index 449b9f92d6..a9ec99ba3d 100644
--- a/tutorial_class_project_instructions/test_generate_class_project_description.py
+++ b/tutorial_class_project_instructions/test_generate_class_project_description.py
@@ -1,6 +1,6 @@
 import pandas as pd
 from unittest import mock
-import DATA605.project_description as projdesc
+import tutorial_class_project_instructions.generate_class_project_description as projdesc
 import helpers.hunit_test as hunitest
 import pytest
 
@@ -13,25 +13,25 @@ def setup_teardown(self):
         # Avoid triggering _GLOBAL_CAPSYS in tearDown
 
     def test_read_google_sheet(self) -> None:
-        if False:
-            #Set to False for testing purposes
+        if True:
+            #Set to True for testing purposes
             url = "https://docs.google.com/fake-sheet-url"
         secret_path = "/fake/path/to/secret.json"
         mock_data = pd.DataFrame({"Tool": ["Kafka"], "Difficulty": ["2"]})
 
         with mock.patch("helpers_root.helpers.hgoogle_drive_api.get_credentials"), \
              mock.patch("helpers_root.helpers.hgoogle_drive_api.read_google_file", return_value=mock_data):
-            df = projdesc.read_google_sheet(url, secret_path)
+            df = projdesc._read_google_sheet(url, secret_path)
             self.assertIsInstance(df, pd.DataFrame)
             self.assertEqual(df.shape[0], 1)
 
-    def test_generate_project_description(self) -> None:
+    def test_generate_class_project_description(self) -> None:
         tech = "Kafka"
         difficulty = "2"
         mock_output = "Title: Kafka Project\nDifficulty: 2\n..."
 
         with mock.patch("helpers_root.helpers.hopenai.get_completion", return_value=mock_output):
-            desc = projdesc.generate_project_description(tech, difficulty)
+            desc = projdesc._generate_project_description(tech, difficulty)
             self.assertIn("Kafka", desc)
             self.assertIn("Difficulty", desc)