|
1 | 1 | { |
2 | 2 | "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "metadata": {}, |
| 6 | + "source": [ |
| 7 | + "# ECG Signal Preprocessing\n", |
| 8 | + "\n", |
| 9 | + "This notebook performs preprocessing of ECG signals from the MIT-BIH Arrhythmia Database, including signal filtering, R-peak detection, and beat segmentation." |
| 10 | + ] |
| 11 | + }, |
3 | 12 | { |
4 | 13 | "cell_type": "code", |
5 | 14 | "execution_count": 2, |
|
56 | 65 | "!pip install wfdb" |
57 | 66 | ] |
58 | 67 | }, |
| 68 | + { |
| 69 | + "cell_type": "markdown", |
| 70 | + "metadata": {}, |
| 71 | + "source": [ |
| 72 | + "### Setup\n", |
| 73 | + "Installing and importing required libraries for ECG signal processing" |
| 74 | + ] |
| 75 | + }, |
59 | 76 | { |
60 | 77 | "cell_type": "code", |
61 | 78 | "execution_count": 3, |
|
199 | 216 | "print(f\"Download complete! Files saved to: {download_path}\")" |
200 | 217 | ] |
201 | 218 | }, |
| 219 | + { |
| 220 | + "cell_type": "markdown", |
| 221 | + "metadata": {}, |
| 222 | + "source": [ |
| 223 | + "### Data Download\n", |
| 224 | + "Downloading the MIT-BIH Arrhythmia Database, a standard dataset for ECG analysis" |
| 225 | + ] |
| 226 | + }, |
202 | 227 | { |
203 | 228 | "cell_type": "code", |
204 | 229 | "execution_count": 6, |
|
220 | 245 | " return y" |
221 | 246 | ] |
222 | 247 | }, |
| 248 | + { |
| 249 | + "cell_type": "markdown", |
| 250 | + "metadata": {}, |
| 251 | + "source": [ |
| 252 | + "### Signal Filtering Functions\n", |
| 253 | + "Implementing bandpass filter for noise removal from ECG signals (0.5-40 Hz)" |
| 254 | + ] |
| 255 | + }, |
223 | 256 | { |
224 | 257 | "cell_type": "code", |
225 | 258 | "execution_count": 7, |
|
242 | 275 | " return 'Q' # Other/Unknown" |
243 | 276 | ] |
244 | 277 | }, |
| 278 | + { |
| 279 | + "cell_type": "markdown", |
| 280 | + "metadata": {}, |
| 281 | + "source": [ |
| 282 | + "### Beat Classification\n", |
| 283 | + "Function to group different beat annotations into main categories (Normal, Supraventricular, Ventricular, Fusion, Unknown)" |
| 284 | + ] |
| 285 | + }, |
245 | 286 | { |
246 | 287 | "cell_type": "code", |
247 | | - "execution_count": 8, |
| 288 | + "execution_count": null, |
248 | 289 | "metadata": { |
249 | 290 | "colab": { |
250 | 291 | "base_uri": "https://localhost:8080/" |
|
277 | 318 | "all_X = []\n", |
278 | 319 | "all_y = []\n", |
279 | 320 | "\n", |
280 | | - "# Get list of record files\n", |
281 | 321 | "record_files = [f.split('.')[0] for f in os.listdir(download_path) if f.endswith('.dat')]\n", |
282 | 322 | "print(f\"\\nFound {len(record_files)} records to process\")\n", |
283 | 323 | "\n", |
284 | | - "# Process each record\n", |
285 | 324 | "for rec_name in tqdm(sorted(record_files), desc=\"Processing records\"):\n", |
286 | 325 | " try:\n", |
287 | | - " # Read record and annotations\n", |
288 | 326 | " record = wfdb.rdrecord(os.path.join(download_path, rec_name))\n", |
289 | 327 | " annotation = wfdb.rdann(os.path.join(download_path, rec_name), 'atr')\n", |
290 | 328 | "\n", |
291 | | - " # Get signal from first channel and apply filtering\n", |
292 | 329 | " signal = record.p_signal[:, 0]\n", |
293 | 330 | " filtered_signal = bandpass_filter(signal, fs=record.fs)\n", |
294 | 331 | "\n", |
295 | 332 | " ann_samples = annotation.sample\n", |
296 | 333 | " ann_symbols = annotation.symbol\n", |
297 | 334 | "\n", |
298 | | - " # Extract segments around R-peaks\n", |
299 | 335 | " segments = []\n", |
300 | 336 | " labels = []\n", |
301 | 337 | "\n", |
|
324 | 360 | " continue" |
325 | 361 | ] |
326 | 362 | }, |
| 363 | + { |
| 364 | + "cell_type": "markdown", |
| 365 | + "metadata": {}, |
| 366 | + "source": [ |
| 367 | + "### Data Processing\n", |
| 368 | + "Processing ECG records: filtering signals, segmenting beats around R-peaks, and extracting features" |
| 369 | + ] |
| 370 | + }, |
327 | 371 | { |
328 | 372 | "cell_type": "code", |
329 | 373 | "execution_count": 9, |
|
379 | 423 | " print(f\" {label}: {count} samples ({count/len(y_all)*100:.2f}%)\")\n" |
380 | 424 | ] |
381 | 425 | }, |
| 426 | + { |
| 427 | + "cell_type": "markdown", |
| 428 | + "metadata": {}, |
| 429 | + "source": [ |
| 430 | + "### Data Preparation\n", |
| 431 | + "Combining processed data, adding channel dimension, and encoding labels for model training" |
| 432 | + ] |
| 433 | + }, |
382 | 434 | { |
383 | 435 | "cell_type": "code", |
384 | 436 | "execution_count": 10, |
|
407 | 459 | }, |
408 | 460 | { |
409 | 461 | "cell_type": "code", |
410 | | - "execution_count": 16, |
| 462 | + "execution_count": null, |
411 | 463 | "metadata": { |
412 | 464 | "colab": { |
413 | 465 | "base_uri": "https://localhost:8080/", |
|
449 | 501 | " length: Number of samples to plot\n", |
450 | 502 | " channel: Channel index (0 or 1 for MIT-BIH)\n", |
451 | 503 | " \"\"\"\n", |
452 | | - " # Load record\n", |
453 | 504 | " record = wfdb.rdrecord(os.path.join(download_path, str(record_name)))\n", |
454 | 505 | " annotation = wfdb.rdann(os.path.join(download_path, str(record_name)), 'atr')\n", |
455 | 506 | "\n", |
|
487 | 538 | "visualize_signal('200', start=5000, length=1800) # Different record" |
488 | 539 | ] |
489 | 540 | }, |
| 541 | + { |
| 542 | + "cell_type": "markdown", |
| 543 | + "metadata": {}, |
| 544 | + "source": [ |
| 545 | + "### Visualization Functions\n", |
| 546 | + "Helper function to visualize raw and filtered ECG signals with R-peak annotations" |
| 547 | + ] |
| 548 | + }, |
490 | 549 | { |
491 | 550 | "cell_type": "code", |
492 | | - "execution_count": 18, |
| 551 | + "execution_count": null, |
493 | 552 | "metadata": { |
494 | 553 | "colab": { |
495 | 554 | "base_uri": "https://localhost:8080/", |
|
520 | 579 | "source": [ |
521 | 580 | "plt.figure(figsize=(15, 8))\n", |
522 | 581 | "for i, label in enumerate(le.classes_):\n", |
523 | | - " idx = np.where(y_all == label)[0][0] # Get first occurrence\n", |
524 | | - " plt.subplot(len(le.classes_), 1, i+1)\n", |
| 582 | + " idx = np.where(y_all == label)[0][0] \n", |
525 | 583 | " plt.plot(X_all[idx, :, 0])\n", |
526 | 584 | " plt.title(f'Beat Type: {label}')\n", |
527 | 585 | " plt.ylabel('Amplitude')\n", |
|
533 | 591 | "plt.show()" |
534 | 592 | ] |
535 | 593 | }, |
| 594 | + { |
| 595 | + "cell_type": "markdown", |
| 596 | + "metadata": {}, |
| 597 | + "source": [ |
| 598 | + "### Beat Type Visualization\n", |
| 599 | + "Plotting sample beats from each category to visualize differences between classes" |
| 600 | + ] |
| 601 | + }, |
536 | 602 | { |
537 | 603 | "cell_type": "code", |
538 | 604 | "execution_count": 19, |
|
0 commit comments