docs: Add markdown explanations for key notebook cells

Satvik-Singh192 · Satvik-Singh192 · commit 6b8501d980c9 · 2025-11-08T09:34:44.000+05:30
Added brief markdown descriptions above major code blocks to improve readability.
diff --git a/ecg_preprocessing.ipynb b/ecg_preprocessing.ipynb
@@ -1,5 +1,14 @@
 {
   "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# ECG Signal Preprocessing\n",
+        "\n",
+        "This notebook performs preprocessing of ECG signals from the MIT-BIH Arrhythmia Database, including signal filtering, R-peak detection, and beat segmentation."
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 2,
@@ -56,6 +65,14 @@
         "!pip install wfdb"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Setup\n",
+        "Installing and importing required libraries for ECG signal processing"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 3,
@@ -199,6 +216,14 @@
         "print(f\"Download complete! Files saved to: {download_path}\")"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Data Download\n",
+        "Downloading the MIT-BIH Arrhythmia Database, a standard dataset for ECG analysis"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 6,
@@ -220,6 +245,14 @@
         "    return y"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Signal Filtering Functions\n",
+        "Implementing bandpass filter for noise removal from ECG signals (0.5-40 Hz)"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 7,
@@ -242,9 +275,17 @@
         "        return 'Q'  # Other/Unknown"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Beat Classification\n",
+        "Function to group different beat annotations into main categories (Normal, Supraventricular, Ventricular, Fusion, Unknown)"
+      ]
+    },
     {
       "cell_type": "code",
-      "execution_count": 8,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -277,25 +318,20 @@
         "all_X = []\n",
         "all_y = []\n",
         "\n",
-        "# Get list of record files\n",
         "record_files = [f.split('.')[0] for f in os.listdir(download_path) if f.endswith('.dat')]\n",
         "print(f\"\\nFound {len(record_files)} records to process\")\n",
         "\n",
-        "# Process each record\n",
         "for rec_name in tqdm(sorted(record_files), desc=\"Processing records\"):\n",
         "    try:\n",
-        "        # Read record and annotations\n",
         "        record = wfdb.rdrecord(os.path.join(download_path, rec_name))\n",
         "        annotation = wfdb.rdann(os.path.join(download_path, rec_name), 'atr')\n",
         "\n",
-        "        # Get signal from first channel and apply filtering\n",
         "        signal = record.p_signal[:, 0]\n",
         "        filtered_signal = bandpass_filter(signal, fs=record.fs)\n",
         "\n",
         "        ann_samples = annotation.sample\n",
         "        ann_symbols = annotation.symbol\n",
         "\n",
-        "        # Extract segments around R-peaks\n",
         "        segments = []\n",
         "        labels = []\n",
         "\n",
@@ -324,6 +360,14 @@
         "        continue"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Data Processing\n",
+        "Processing ECG records: filtering signals, segmenting beats around R-peaks, and extracting features"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 9,
@@ -379,6 +423,14 @@
         "    print(f\"  {label}: {count} samples ({count/len(y_all)*100:.2f}%)\")\n"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Data Preparation\n",
+        "Combining processed data, adding channel dimension, and encoding labels for model training"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 10,
@@ -407,7 +459,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 16,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -449,7 +501,6 @@
         "        length: Number of samples to plot\n",
         "        channel: Channel index (0 or 1 for MIT-BIH)\n",
         "    \"\"\"\n",
-        "    # Load record\n",
         "    record = wfdb.rdrecord(os.path.join(download_path, str(record_name)))\n",
         "    annotation = wfdb.rdann(os.path.join(download_path, str(record_name)), 'atr')\n",
         "\n",
@@ -487,9 +538,17 @@
         "visualize_signal('200', start=5000, length=1800)  # Different record"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Visualization Functions\n",
+        "Helper function to visualize raw and filtered ECG signals with R-peak annotations"
+      ]
+    },
     {
       "cell_type": "code",
-      "execution_count": 18,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -520,8 +579,7 @@
       "source": [
         "plt.figure(figsize=(15, 8))\n",
         "for i, label in enumerate(le.classes_):\n",
-        "    idx = np.where(y_all == label)[0][0]  # Get first occurrence\n",
-        "    plt.subplot(len(le.classes_), 1, i+1)\n",
+        "    idx = np.where(y_all == label)[0][0]  \n",
         "    plt.plot(X_all[idx, :, 0])\n",
         "    plt.title(f'Beat Type: {label}')\n",
         "    plt.ylabel('Amplitude')\n",
@@ -533,6 +591,14 @@
         "plt.show()"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Beat Type Visualization\n",
+        "Plotting sample beats from each category to visualize differences between classes"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 19,
diff --git a/lstm_model.ipynb b/lstm_model.ipynb
@@ -1,5 +1,14 @@
 {
   "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# LSTM Model for ECG Arrhythmia Classification\n",
+        "\n",
+        "This notebook implements a LSTM-based deep learning model to classify ECG signals into different arrhythmia types."
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -9,6 +18,14 @@
         "#### Import Required Libraries"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Data Processing and Model Libraries\n",
+        "Importing required libraries for data manipulation, visualization, machine learning, and deep learning:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 1,
@@ -33,6 +50,14 @@
         "tf.random.set_seed(42)"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Data Loading\n",
+        "Loading preprocessed ECG data from NPZ file, containing features (X) and labels (y)"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 2,
@@ -65,6 +90,14 @@
         "print(f\"Classes: {label_names}\")\n"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Data Splitting\n",
+        "Splitting data into training, validation, and test sets using a stratified approach to maintain class distribution"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 3,
@@ -105,6 +138,14 @@
         "print(f\"Test set: {X_test.shape[0]} samples\")\n"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Model Architecture\n",
+        "Defining a LSTM-based neural network model with dropout layers for regularization"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 4,
@@ -168,8 +209,7 @@
         "\n",
         "    return model\n",
         "\n",
-        "# Create model\n",
-        "input_shape = (X_train.shape[1], X_train.shape[2])  # (250, 1)\n",
+        "input_shape = (X_train.shape[1], X_train.shape[2])\n",
         "num_classes = y_train.shape[1]\n",
         "\n",
         "model = create_lstm_model(input_shape, num_classes)\n"
@@ -403,11 +443,18 @@
         "    validation_data=(X_val, y_val),\n",
         "    epochs=50,\n",
         "    batch_size=128,\n",
-        "    # callbacks=[early_stop, reduce_lr, checkpoint],\n",
         "    verbose=1\n",
         ")\n"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Model Training\n",
+        "Training the LSTM model with Adam optimizer and monitoring multiple metrics"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -486,6 +533,14 @@
         "plt.show()\n"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Training Visualization\n",
+        "Plotting training metrics (accuracy, loss, precision) to evaluate model performance"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,