From 9f7c86adb59479771c007623b62aee8b04a16c42 Mon Sep 17 00:00:00 2001 From: mikhail12310 <167066766+mikhail12310@users.noreply.github.com> Date: Sat, 17 May 2025 21:06:16 -0400 Subject: [PATCH 1/3] Analyzing and Forecasting Bitcoin data using griptape --- .../Dockerfile | 46 ++++ .../bashrc | 1 + .../docker_bash.sh | 22 ++ .../docker_build.sh | 4 + .../docker_clean.sh | 11 + .../docker_exec.sh | 5 + .../docker_name.sh | 11 + .../docker_push.sh | 8 + .../docker_start.sh | 6 + .../etc_sudoers | 1 + .../install_jupyter_extensions.sh | 1 + .../pipeline/analysis.py | 117 +++++++++++ .../pipeline/analysis_visualisation.ipynb | 67 ++++++ .../pipeline/checkpoint.yaml | 1 + .../pipeline/config.yaml | 4 + .../pipeline/main.py | 31 +++ .../pipeline/main_orchestation.ipynb | 103 +++++++++ .../pipeline/utils.py | 103 +++++++++ .../run_jupyter.sh | 9 + .../version.sh | 31 +++ .../warehouse/bitcoin_prices.csv | 196 ++++++++++++++++++ 21 files changed, 778 insertions(+) create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/Dockerfile create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/bashrc create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_bash.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_build.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_clean.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_exec.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_name.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_push.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_start.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/etc_sudoers create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/install_jupyter_extensions.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/analysis.py create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/analysis_visualisation.ipynb create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/checkpoint.yaml create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/config.yaml create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main.py create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main_orchestation.ipynb create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/utils.py create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/run_jupyter.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/version.sh create mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/warehouse/bitcoin_prices.csv diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/Dockerfile b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/Dockerfile new file mode 100644 index 0000000000..152fa74229 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/Dockerfile @@ -0,0 +1,46 @@ +# Base Python image +FROM python:3.10-slim + +# Set up environment +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + libatlas-base-dev \ + libglib2.0-0 \ + libgomp1 \ + liblapack-dev \ + libpq-dev \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Install Python packages +RUN pip install --upgrade pip && pip install \ + jupyter \ + pandas \ + numpy==1.23.5 \ + matplotlib \ + seaborn \ + requests \ + openai \ + griptape \ + scikit-learn \ + statsmodels \ + prophet \ + plotly + +# Set working directory +WORKDIR /workspace/griptape + +# Copy local code +COPY . /workspace/griptape + +# Expose ports (Jupyter + optional API-related ports) +EXPOSE 8888 + +# Default command +CMD ["./docker_start.sh"] diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/bashrc b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/bashrc new file mode 100644 index 0000000000..6fbd519b9b --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/bashrc @@ -0,0 +1 @@ +../../../docker_common/bashrc \ No newline at end of file diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_bash.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_bash.sh new file mode 100644 index 0000000000..7a1481cae9 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_bash.sh @@ -0,0 +1,22 @@ +#!/bin/bash +source ./docker_name.sh + +FULL_IMAGE_NAME=${FULL_IMAGE_NAME:-$IMAGE_NAME} +CONTAINER_PATH="/workspace" + + +echo "🔄 Running container from image: $FULL_IMAGE_NAME" +echo "📦 Container name: $CONTAINER_NAME" +echo "📁 Mounting local path: $HOST_PATH → $CONTAINER_PATH" + +echo "docker run --rm -ti \ + --name \"$CONTAINER_NAME\" \ + -p 8888:8888 \ + -v $(pwd):/workspace/griptape \ + $FULL_IMAGE_NAME" + +docker run --rm -ti \ + --name "$CONTAINER_NAME" \ + -p 8888:8888 \ + -v $(pwd):/workspace/griptape \ + $FULL_IMAGE_NAME diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_build.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_build.sh new file mode 100644 index 0000000000..81a9868a77 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_build.sh @@ -0,0 +1,4 @@ +source ./docker_name.sh + +echo "Building Docker image: $IMAGE_NAME" +docker build -t $IMAGE_NAME . diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_clean.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_clean.sh new file mode 100644 index 0000000000..37e461b40b --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_clean.sh @@ -0,0 +1,11 @@ +#!/bin/bash +source ./docker_name.sh + +echo "Stopping container (if running)..." +docker stop $CONTAINER_NAME 2>/dev/null || true + +echo "Removing container..." +docker rm $CONTAINER_NAME 2>/dev/null || true + +echo "Removing image..." +docker rmi $IMAGE_NAME 2>/dev/null || true diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_exec.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_exec.sh new file mode 100644 index 0000000000..198e27514d --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_exec.sh @@ -0,0 +1,5 @@ +#!/bin/bash +source ./docker_name.sh + +echo "Executing into container: $CONTAINER_NAME" +docker exec -it $CONTAINER_NAME bash diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_name.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_name.sh new file mode 100644 index 0000000000..cb583a9226 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_name.sh @@ -0,0 +1,11 @@ +# Set project root +GIT_ROOT=$(git rev-parse --show-toplevel) + +# Optional: Load shared utils if you have them +if [ -f "$GIT_ROOT/docker_common/utils.sh" ]; then + source "$GIT_ROOT/docker_common/utils.sh" +fi + +# Define Docker image and container names for reuse +export IMAGE_NAME="griptape-bitcoin-project" +export CONTAINER_NAME="griptape-bitcoin-container" diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_push.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_push.sh new file mode 100644 index 0000000000..1c96a2d77e --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_push.sh @@ -0,0 +1,8 @@ +#!/bin/bash +source ./docker_name.sh + +DOCKERHUB_USER=your_dockerhub_username_here + +echo "Tagging and pushing image to DockerHub: $DOCKERHUB_USER/$IMAGE_NAME" +docker tag $IMAGE_NAME $DOCKERHUB_USER/$IMAGE_NAME +docker push $DOCKERHUB_USER/$IMAGE_NAME diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_start.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_start.sh new file mode 100644 index 0000000000..6babe57f0f --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/docker_start.sh @@ -0,0 +1,6 @@ +#!/bin/bash +echo "🚀 Running main.py to update data..." +python pipeline/main.py + +echo "✅ Data update complete. Launching Jupyter Notebook..." +jupyter notebook --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password='' diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/etc_sudoers b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/etc_sudoers new file mode 100644 index 0000000000..37cf95d1d0 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/etc_sudoers @@ -0,0 +1 @@ +../../../docker_common/etc_sudoers \ No newline at end of file diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/install_jupyter_extensions.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/install_jupyter_extensions.sh new file mode 100644 index 0000000000..6fbea6f280 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/install_jupyter_extensions.sh @@ -0,0 +1 @@ +../../../docker_common/install_jupyter_extensions.sh \ No newline at end of file diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/analysis.py b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/analysis.py new file mode 100644 index 0000000000..cdce1e6933 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/analysis.py @@ -0,0 +1,117 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from statsmodels.tsa.stattools import adfuller +from statsmodels.tsa.seasonal import seasonal_decompose +from prophet import Prophet +from typing import Tuple, Dict + +class BitcoinAnalysisTool: + def load_and_clean_data(self, csv_path: str) -> pd.DataFrame: + df = pd.read_csv(csv_path) + df.columns = [col.strip().lower() for col in df.columns] + if 'price_usd' in df.columns: + df = df.drop(columns=['price_usd']) + df['date'] = pd.to_datetime(df['date']) + df.set_index('date', inplace=True) + df = df.sort_index() + return df + + def plot_price_series(self, df: pd.DataFrame) -> None: + plt.figure(figsize=(12, 5)) + plt.plot(df, label='Price') + plt.title('Bitcoin Price Over Time') + plt.xlabel('Date') + plt.ylabel('Price') + plt.legend() + plt.grid(True) + plt.show() + + def plot_rolling_stats(self, df: pd.DataFrame, window: int = 30) -> None: + rolling_mean = df.rolling(window=window).mean() + rolling_std = df.rolling(window=window).std() + plt.figure(figsize=(12, 5)) + plt.plot(df, label='Price') + plt.plot(rolling_mean, label=f'Rolling Mean ({window}d)') + plt.plot(rolling_std, label=f'Rolling Std ({window}d)') + plt.title('Rolling Statistics') + plt.legend() + plt.grid(True) + plt.show() + + def decompose_series(self, df: pd.DataFrame, period: int = 30) -> None: + decomp = seasonal_decompose(df, model='additive', period=period) + decomp.plot() + plt.show() + + def adf_test(self, df: pd.DataFrame) -> Dict[str, float]: + series = df.dropna().iloc[:, 0] + result = adfuller(series) + return { + "adf_statistic": result[0], + "p_value": result[1], + "is_stationary": result[1] < 0.05 + } + + def run_prophet_forecast(self, df: pd.DataFrame, periods: int = 30) -> Tuple[pd.DataFrame, pd.DataFrame]: + df_prophet = df.reset_index()[['date', 'price']].rename(columns={'date': 'ds', 'price': 'y'}) + model = Prophet() + model.fit(df_prophet) + future = model.make_future_dataframe(periods=periods) + forecast = model.predict(future) + return df_prophet, forecast + + def plot_prophet_forecast(self, df_prophet: pd.DataFrame, forecast: pd.DataFrame) -> None: + df_prophet['ds'] = pd.to_datetime(df_prophet['ds']) + forecast['ds'] = pd.to_datetime(forecast['ds']) + plt.figure(figsize=(12, 5)) + plt.plot(df_prophet['ds'], df_prophet['y'], label='Actual') + plt.plot(forecast['ds'], forecast['yhat'], label='Prophet Forecast') + plt.title('Prophet Forecast vs Actual') + plt.xlabel('Date') + plt.ylabel('Price') + plt.legend() + plt.grid(True) + plt.show() + + def generate_data_summary_report(self, df: pd.DataFrame) -> str: + start_date = df.index.min().strftime("%Y-%m-%d") + end_date = df.index.max().strftime("%Y-%m-%d") + stats = df.describe().T + summary = ( + f"📊 Bitcoin Price Data Summary\n" + f"- Rows: {len(df)}\n" + f"- Date Range: {start_date} to {end_date}\n" + f"- Mean Price: {stats['mean'].values[0]:,.2f}\n" + f"- Std Dev: {stats['std'].values[0]:,.2f}\n" + f"- Min Price: {stats['min'].values[0]:,.2f}\n" + f"- Max Price: {stats['max'].values[0]:,.2f}\n" + ) + return summary + + def generate_stationarity_report(self, df: pd.DataFrame) -> str: + result = self.adf_test(df) + report = ( + f"📈 Stationarity Test (ADF)\n" + f"- ADF Statistic: {result['adf_statistic']:.4f}\n" + f"- p-value: {result['p_value']:.4f}\n" + f"- Conclusion: {'✅ Series is stationary.' if result['is_stationary'] else '❌ Series is not stationary.'}" + ) + return report + + def generate_forecast_report(self, forecast: pd.DataFrame, periods: int = 30) -> str: + forecast_tail = forecast.tail(periods) + min_forecast = forecast_tail["yhat"].min() + max_forecast = forecast_tail["yhat"].max() + mean_forecast = forecast_tail["yhat"].mean() + + report = ( + f"📅 Prophet Forecast Summary (Next {periods} Days)\n" + f"- Forecast Range: {forecast_tail['ds'].min().date()} to {forecast_tail['ds'].max().date()}\n" + f"- Min Forecast Price: {min_forecast:,.2f}\n" + f"- Max Forecast Price: {max_forecast:,.2f}\n" + f"- Avg Forecast Price: {mean_forecast:,.2f}" + ) + return report + + diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/analysis_visualisation.ipynb b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/analysis_visualisation.ipynb new file mode 100644 index 0000000000..d7e1cd8677 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/analysis_visualisation.ipynb @@ -0,0 +1,67 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "5f25eb16", + "metadata": {}, + "outputs": [], + "source": [ + "from analysis import BitcoinAnalysisTool" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1ab4535", + "metadata": {}, + "outputs": [], + "source": [ + "tool = BitcoinAnalysisTool()\n", + "df = tool.load_and_clean_data(\"/workspace/griptape/warehouse/bitcoin_prices.csv\")\n", + "tool.plot_price_series(df)\n", + "tool.plot_rolling_stats(df)\n", + "print(tool.adf_test(df))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "690f809d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "df_prophet, forecast = tool.run_prophet_forecast(df)\n", + "tool.plot_prophet_forecast(df_prophet, forecast)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a74783e6", + "metadata": {}, + "outputs": [], + "source": [ + "# 🔹 Generate and display the data summary report\n", + "data_summary = tool.generate_data_summary_report(df)\n", + "print(data_summary)\n", + "\n", + "# 🔹 Generate and display the stationarity report (ADF test)\n", + "stationarity_report = tool.generate_stationarity_report(df)\n", + "print(stationarity_report)\n", + "\n", + "# 🔹 Generate and display the forecast summary report\n", + "forecast_report = tool.generate_forecast_report(forecast)\n", + "print(forecast_report)\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/checkpoint.yaml b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/checkpoint.yaml new file mode 100644 index 0000000000..c6d7b70f77 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/checkpoint.yaml @@ -0,0 +1 @@ +last_updated: '2025-04-30' diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/config.yaml b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/config.yaml new file mode 100644 index 0000000000..146d834320 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/config.yaml @@ -0,0 +1,4 @@ +api_key: "CG-oF6aKFTXhvY2NZcMGQ9QP5fV" +data_csv_path: /workspace/griptape/warehouse/bitcoin_prices.csv +checkpoint_path: pipeline/checkpoint.yaml +default_start_date: 2024-11-01 diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main.py b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main.py new file mode 100644 index 0000000000..0a2d6e1d65 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main.py @@ -0,0 +1,31 @@ +# main.py +import yaml +from utils import BitcoinTool +from analysis import BitcoinAnalysisTool # <- contains reporting functions + +def main(): + # Load config + with open("pipeline/config.yaml", "r") as f: + config = yaml.safe_load(f) + + # Update data pipeline + tool = BitcoinTool(config=config) + print(tool.run({})) # This performs data update + + # Load the updated data + analysis = BitcoinAnalysisTool() + df = analysis.load_and_clean_data("warehouse/bitcoin_prices.csv") # replace with actual path key + + # Run forecast + df_prophet, forecast = analysis.run_prophet_forecast(df) + + # Generate reports + data_report = analysis.generate_data_summary_report(df) + forecast_report = analysis.generate_forecast_report(forecast) + + # Output reports + print(data_report) + print(forecast_report) + +if __name__ == "__main__": + main() diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main_orchestation.ipynb b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main_orchestation.ipynb new file mode 100644 index 0000000000..c6416151d1 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main_orchestation.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "de1d2aa8-b0cc-446d-8bde-36168be3da8a", + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Imports\n", + "from utils import BitcoinTool\n", + "from griptape.structures import Agent\n", + "from griptape.tasks import ToolTask\n", + "import yaml\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6966c73f-46a1-4612-89dc-91d94492df7c", + "metadata": {}, + "outputs": [], + "source": [ + "config=[]\n", + "with open(\"config.yaml\", \"r\") as f:\n", + " config = yaml.safe_load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3784ec94-0365-4c3c-b264-1f4bc0fbfa19", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'api_key': 'CG-oF6aKFTXhvY2NZcMGQ9QP5fV', 'data_csv_path': '/workspace/griptape/warehouse/bitcoin_prices.csv', 'checkpoint_path': 'checkpoint.yaml', 'default_start_date': datetime.date(2024, 11, 1)}\n" + ] + } + ], + "source": [ + "print(config)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "01be6f25-924a-4f23-a6f5-b63252414034", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Last update: 2025-05-01 | type: \n", + "Start date: 2025-05-02 00:00:00+00:00 | type: \n", + "End date: 2025-05-14 18:52:25.447803+00:00 | type: \n", + "Fetching data from 2025-05-02 00:00:00+00:00 → 1746144000\n", + "To 2025-05-14 18:52:25.447803+00:00 → 1747248745\n", + "✅ Pipeline complete. Data updated to 2025-05-14.\n" + ] + } + ], + "source": [ + "tool = BitcoinTool(config=config)\n", + "print(tool.run({})) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3556e400-2459-4fba-9773-c1f9e2f4c0f6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/utils.py b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/utils.py new file mode 100644 index 0000000000..6e39415a9d --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/utils.py @@ -0,0 +1,103 @@ +import os +import yaml +import pandas as pd +import requests +from datetime import datetime, timedelta, timezone +from griptape.tools import BaseTool +from attr import define, field +from typing import Any + + +@define +class BitcoinTool(BaseTool): + config: dict = field() + + def read_checkpoint(self, path: str) -> str: + if os.path.exists(path): + with open(path, "r") as f: + return yaml.safe_load(f).get("last_updated") + return None + + def update_checkpoint(self, path: str, date_str: str): + with open(path, "w") as f: + yaml.dump({"last_updated": date_str}, f) + + def fetch_btc_data(self, start_date: datetime, end_date: datetime, api_key: str) -> pd.DataFrame: + # Ensure both datetimes are timezone-aware UTC + if start_date.tzinfo is None: + start_date = start_date.replace(tzinfo=timezone.utc) + if end_date.tzinfo is None: + end_date = end_date.replace(tzinfo=timezone.utc) + + start_timestamp = int(start_date.timestamp()) + end_timestamp = int(end_date.timestamp()) + + print(f"Fetching data from {start_date} → {start_timestamp}") + print(f"To {end_date} → {end_timestamp}") + + url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart/range" + headers = { + "accept": "application/json", + "x-cg-demo-api-key": api_key # or "x-cg-pro-api-key" if using pro + } + + response = requests.get(url, params={ + "vs_currency": "usd", + "from": start_timestamp, + "to": end_timestamp + }, headers=headers) + + response.raise_for_status() + data = response.json().get("prices", []) + + # Create DataFrame and keep timestamp as normalized pd.Timestamp + df = pd.DataFrame(data, columns=["timestamp_ms", "price_usd"]) + df["timestamp"] = pd.to_datetime(df["timestamp_ms"], unit="ms") + df["date"] = df["timestamp"].dt.normalize() # Keep as Timestamp, strip time + df = df[["date", "price_usd"]] + return df + + def run(self, _: dict) -> str: + csv_path = self.config["data_csv_path"] + checkpoint_path = self.config["checkpoint_path"] + default_start_date = self.config["default_start_date"] + api_key = self.config["api_key"] + + # Read checkpoint or default start + last_updated = self.read_checkpoint(checkpoint_path) + if last_updated: + start_date = pd.Timestamp(last_updated, tz=timezone.utc) + timedelta(days=1) + else: + start_date = pd.Timestamp(default_start_date, tz=timezone.utc) + + # End date = now - 24 hours to ensure up-to-date but not future + end_date = pd.to_datetime(datetime.now(timezone.utc) - timedelta(hours=24)) + + # Debug + print("Last update:", last_updated, "| type:", type(last_updated)) + print("Start date:", start_date, "| type:", type(start_date)) + print("End date:", end_date, "| type:", type(end_date)) + + # Stop if no data to fetch + if start_date > end_date: + return "No new data to fetch." + + # Fetch new BTC data + new_df = self.fetch_btc_data(start_date, end_date, api_key) + + # Append to CSV + if os.path.exists(csv_path): + existing_df = pd.read_csv(csv_path, parse_dates=["date"]) + combined_df = pd.concat([existing_df, new_df]) + else: + combined_df = new_df + + # Drop duplicates and sort by date (safe now) + combined_df = combined_df.drop_duplicates(subset="date").sort_values("date") + combined_df.to_csv(csv_path, index=False) + + # Update checkpoint + last_date = combined_df["date"].max().strftime("%Y-%m-%d") + self.update_checkpoint(checkpoint_path, last_date) + + return f"✅ Pipeline complete. Data updated to {last_date}." diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/run_jupyter.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/run_jupyter.sh new file mode 100644 index 0000000000..db73c9bafe --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/run_jupyter.sh @@ -0,0 +1,9 @@ +#!/bin/bash -xe + +jupyter-notebook \ + --port=8888 \ + --no-browser \ + --ip=0.0.0.0 \ + --allow-root \ + --NotebookApp.token='' \ + --NotebookApp.password='' diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/version.sh b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/version.sh new file mode 100644 index 0000000000..5454baf275 --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/version.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +echo "=== Python Version ===" +python3 --version + +echo -e "\n=== Pip Version ===" +pip3 --version + +echo -e "\n=== Jupyter Version ===" +jupyter --version + +echo -e "\n=== PyArrow Version ===" +python3 -c "import pyarrow as pa; print('PyArrow:', pa.__version__)" + +echo -e "\n=== Pandas Version ===" +python3 -c "import pandas as pd; print('Pandas:', pd.__version__)" + +echo -e "\n=== Requests Version ===" +python3 -c "import requests; print('Requests:', requests.__version__)" + +echo -e "\n=== Matplotlib Version ===" +python3 -c "import matplotlib; print('Matplotlib:', matplotlib.__version__)" + +echo -e "\n=== Seaborn Version ===" +python3 -c "import seaborn as sns; print('Seaborn:', sns.__version__)" + +echo -e "\n=== Scikit-learn Version ===" +python3 -c "import sklearn; print('Scikit-learn:', sklearn.__version__)" + +echo -e "\n=== Griptape Version ===" +python3 -c "import griptape; print('Griptape:', griptape.__version__)" diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/warehouse/bitcoin_prices.csv b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/warehouse/bitcoin_prices.csv new file mode 100644 index 0000000000..39714740cb --- /dev/null +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/warehouse/bitcoin_prices.csv @@ -0,0 +1,196 @@ +date,price,price_usd +2024-11-01,70264.99735573279, +2024-11-02,69507.92256395647, +2024-11-03,69299.16036699413, +2024-11-04,68803.50626277467, +2024-11-05,67793.29782704762, +2024-11-06,69335.42553133758, +2024-11-07,75620.88606963192, +2024-11-08,75987.24178320667, +2024-11-09,76550.0213845611, +2024-11-10,76630.22631415377, +2024-11-11,80466.71654833542, +2024-11-12,88637.42000322911, +2024-11-13,88264.59974582895, +2024-11-14,90488.10007497849, +2024-11-15,87407.51273710128, +2024-11-16,90947.97509443705, +2024-11-17,90606.45489380708, +2024-11-18,89841.47194135105, +2024-11-19,90534.62459650292, +2024-11-20,92251.65240738312, +2024-11-21,94217.02229634204, +2024-11-22,98509.11859102432, +2024-11-23,98927.494945534, +2024-11-24,97679.46381643032, +2024-11-25,98015.9355289458, +2024-11-26,93004.70093103289, +2024-11-27,91931.83077291829, +2024-11-28,95981.18057210564, +2024-11-29,95661.59595817851, +2024-11-30,97453.2473451042, +2024-12-01,96513.14234698223, +2024-12-02,97311.70719084324, +2024-12-03,95833.1362300365, +2024-12-04,96031.63097759832, +2024-12-05,98881.4694561823, +2024-12-06,97201.5003640774, +2024-12-07,99973.85150659826, +2024-12-08,99781.82999198174, +2024-12-09,101235.3717025316, +2024-12-10,97353.94700820888, +2024-12-11,96649.71446771533, +2024-12-12,101123.61811033696, +2024-12-13,100000.80836535196, +2024-12-14,101352.22971292294, +2024-12-15,101367.01064553364, +2024-12-16,104721.50151808978, +2024-12-17,106074.10723541168, +2024-12-18,106034.913402645, +2024-12-19,100355.5761481501, +2024-12-20,97851.35377075804, +2024-12-21,97691.43431692652, +2024-12-22,97202.82496847631, +2024-12-23,95094.27394862172, +2024-12-24,94644.9108546432, +2024-12-25,98695.714007826, +2024-12-26,99344.95417367227, +2024-12-27,95678.31244565404, +2024-12-28,94331.94727053076, +2024-12-29,95184.61945288602, +2024-12-30,93663.44751964069, +2024-12-31,92627.28213113276, +2025-01-01,93507.85874741492, +2025-01-02,94384.1761153871, +2025-01-03,96852.14681235077, +2025-01-04,98084.34279280754, +2025-01-05,98256.73876849932, +2025-01-06,98364.58946599838, +2025-01-07,102229.39453189744, +2025-01-08,96952.09886774956, +2025-01-09,95016.71440989176, +2025-01-10,92376.275783461, +2025-01-11,94736.6267420969, +2025-01-12,94559.55167224212, +2025-01-13,94454.77089638809, +2025-01-14,94456.3493746993, +2025-01-15,96574.07768799194, +2025-01-16,100313.15239828422, +2025-01-17,100044.57926776377, +2025-01-18,104160.69358484232, +2025-01-19,104334.61575724394, +2025-01-20,101275.33714814208, +2025-01-21,101764.90860170544, +2025-01-22,106182.2368201815, +2025-01-23,103673.53509537788, +2025-01-24,104067.60991206949, +2025-01-25,104835.19253555956, +2025-01-26,104796.04077738234, +2025-01-27,102552.24874280208, +2025-01-28,101958.46953745594, +2025-01-29,101313.11264498268, +2025-01-30,103718.97939813643, +2025-01-31,104781.5135100103, +2025-02-01,102382.39409722164, +2025-02-02,100674.78762544956, +2025-02-03,97568.31653024876, +2025-02-04,101466.8606657039, +2025-02-05,98118.43921664466, +2025-02-06,96582.88682919052, +2025-02-07,96635.41875332213, +2025-02-08,96558.4563096667, +2025-02-09,96558.23762501092, +2025-02-10,96548.57865379583, +2025-02-11,97399.98161383196, +2025-02-12,95739.9773707924, +2025-02-13,97836.18856127484, +2025-02-14,96561.6639990985, +2025-02-15,97488.48148536457, +2025-02-16,97569.95169430463, +2025-02-17,96149.3484548204, +2025-02-18,95776.15723852724, +2025-02-19,95495.89153299289, +2025-02-20,96554.87156259285, +2025-02-21,98384.31738240314, +2025-02-22,96135.15541305284, +2025-02-23,96564.15414290236, +2025-02-24,96327.46309127836, +2025-02-25,91396.76686930266, +2025-02-26,88755.7693356979, +2025-02-27,83900.11496524839, +2025-02-28,84709.14477847965, +2025-03-01,84441.9012237131, +2025-03-02,86005.25629681018, +2025-03-03,94261.5328653844, +2025-03-04,86124.71418722175, +2025-03-05,87310.80531078295, +2025-03-06,90604.08098523636, +2025-03-07,90001.40087607042, +2025-03-08,86773.33597555371, +2025-03-09,86142.9833586432, +2025-03-10,80751.13893300897, +2025-03-11,78783.94057934263, +2025-03-12,82799.1080292053, +2025-03-13,83884.24578828987, +2025-03-14,81098.90052436228, +2025-03-15,83971.70916064628, +2025-03-16,84391.69087647168, +2025-03-17,82610.61750343916, +2025-03-18,84075.36559694471, +2025-03-19,82780.03048688271, +2025-03-20,86815.44109470697, +2025-03-21,84270.84358966233, +2025-03-22,84009.532917822, +2025-03-23,83793.30854192551, +2025-03-24,85787.70914901773, +2025-03-25,87327.72969669085, +2025-03-26,87520.58391530563, +2025-03-27,86960.8555491039, +2025-03-28,87227.27158032371, +2025-03-29,84359.46915506005, +2025-03-30,82679.17255385047, +2025-03-31,82356.38111183756, +2025-04-01,82514.09365397849, +2025-04-02,85237.5876052852, +2025-04-03,82526.4221526433, +2025-04-04,83163.98757387721, +2025-04-05,83852.00765399594, +2025-04-06,83595.88550177493, +2025-04-07,78211.48358199988, +2025-04-08,79179.29226757778, +2025-04-09,76329.090356324, +2025-04-10,82622.17029546302, +2025-04-11,79596.35732250768, +2025-04-12,83439.28818043756, +2025-04-13,85305.09728392666, +2025-04-14,83600.82010054543, +2025-04-15,84523.4524914945, +2025-04-16,83656.49248858042, +2025-04-17,84105.77942167096, +2025-04-18,84930.90857600998, +2025-04-19,84433.75017215389, +2025-04-20,85126.66244301842, +2025-04-21,85073.16544858096, +2025-04-22,87452.04699068287, +2025-04-23,93576.1658857398, +2025-04-24,93605.45230873208, +2025-04-25,93872.81422867288, +2025-04-26,94773.11200643536, +2025-04-27,94644.06637120109, +2025-04-28,93809.33781968225, +2025-04-29,95030.60645474444, +2025-04-30,94256.35946318062, +2025-05-01,94235.75331048168, +2025-05-02,96426.94522273028, +2025-05-03,96855.5681343944, +2025-05-04,95922.86842404422, +2025-05-05,94326.62048529249, +2025-05-06,94758.8237105546, +2025-05-07,96854.53198815876, +2025-05-08,97026.493769668, +2025-05-09,103076.27555512934, +2025-05-10,102962.540456923, +2025-05-11,104630.8792994166, +2025-05-12,103994.061616746, +2025-05-13,102876.8304286011, +2025-05-14,104184.49039270742, From 8a9a7c192c9d569bd97b93a0ef61b9b00e13fb0d Mon Sep 17 00:00:00 2001 From: mikhail12310 <167066766+mikhail12310@users.noreply.github.com> Date: Sun, 18 May 2025 14:06:15 -0400 Subject: [PATCH 2/3] removing print statements --- .../pipeline/main_orchestation.ipynb | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main_orchestation.ipynb b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main_orchestation.ipynb index c6416151d1..44898ef9f8 100644 --- a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main_orchestation.ipynb +++ b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/main_orchestation.ipynb @@ -26,24 +26,6 @@ " config = yaml.safe_load(f)" ] }, - { - "cell_type": "code", - "execution_count": 3, - "id": "3784ec94-0365-4c3c-b264-1f4bc0fbfa19", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'api_key': 'CG-oF6aKFTXhvY2NZcMGQ9QP5fV', 'data_csv_path': '/workspace/griptape/warehouse/bitcoin_prices.csv', 'checkpoint_path': 'checkpoint.yaml', 'default_start_date': datetime.date(2024, 11, 1)}\n" - ] - } - ], - "source": [ - "print(config)" - ] - }, { "cell_type": "code", "execution_count": 4, From 6b4ec1662c77d58f52f1b063bd83aa39b8b82e49 Mon Sep 17 00:00:00 2001 From: mikhail12310 <167066766+mikhail12310@users.noreply.github.com> Date: Sun, 18 May 2025 14:41:45 -0400 Subject: [PATCH 3/3] Removing configs --- .../pipeline/config.yaml | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/config.yaml diff --git a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/config.yaml b/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/config.yaml deleted file mode 100644 index 146d834320..0000000000 --- a/DATA605/Spring2025/projects/TutorTask180_Spring2025_Analyze_Real_time_Bitcoin_Data_with_Griptape/pipeline/config.yaml +++ /dev/null @@ -1,4 +0,0 @@ -api_key: "CG-oF6aKFTXhvY2NZcMGQ9QP5fV" -data_csv_path: /workspace/griptape/warehouse/bitcoin_prices.csv -checkpoint_path: pipeline/checkpoint.yaml -default_start_date: 2024-11-01