From 8589218a63c01d71c08129f354ac8f52bf19a5bc Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Tue, 21 Mar 2023 15:53:10 -0700
Subject: [PATCH 01/33] Update black formatting for a few files.

---
 .pre-commit-config.yaml          | 2 +-
 afqinsight/cnn.py                | 1 -
 afqinsight/tests/test_bagging.py | 1 -
 afqinsight/tests/test_cnn.py     | 1 -
 4 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b17d1dbd..3e6fa4bc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,4 +3,4 @@ repos:
     rev: 22.3.0
     hooks:
       - id: black
-        language_version: python3.8
+        language_version: python
diff --git a/afqinsight/cnn.py b/afqinsight/cnn.py
index c32207af..16f03fd3 100644
--- a/afqinsight/cnn.py
+++ b/afqinsight/cnn.py
@@ -141,7 +141,6 @@ def __init__(
         project_name=None,
         **tuner_kwargs,
     ):
-
         self.tuner_type = tuner_type
         self.layers = layers
         self.input_shape = input_shape
diff --git a/afqinsight/tests/test_bagging.py b/afqinsight/tests/test_bagging.py
index a89fdd7a..4612e1db 100644
--- a/afqinsight/tests/test_bagging.py
+++ b/afqinsight/tests/test_bagging.py
@@ -213,7 +213,6 @@ def fit(self, X, y):
         X_train_sparse = sparse_format(X_train)
         X_test_sparse = sparse_format(X_test)
         for params in parameter_sets:
-
             # Trained on sparse format
             sparse_classifier = SerialBaggingRegressor(
                 base_estimator=CustomSVR(), random_state=1, **params
diff --git a/afqinsight/tests/test_cnn.py b/afqinsight/tests/test_cnn.py
index 10898ffd..8899d912 100644
--- a/afqinsight/tests/test_cnn.py
+++ b/afqinsight/tests/test_cnn.py
@@ -150,7 +150,6 @@ def test_random_cnn():
 
 
 def test_fail_cnn():
-
     with pytest.raises(ValueError):
         # passing in wrong shape of X (not 2d):
         model = CNN(100, 6, 5, 64)

From 03173f37c41898ca5274ce8487ea93f76ca76a68 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 14 Jan 2024 12:26:00 -0800
Subject: [PATCH 02/33] Upgrade python version support.

Based on https://scientific-python.org/specs/spec-0000/
---
 .github/workflows/test.yml | 2 +-
 setup.cfg                  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index aff5cdcb..42208a99 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: [3.7, 3.8, 3.9, "3.10"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
       - name: Checkout repo
diff --git a/setup.cfg b/setup.cfg
index feca5487..1fb0e4e2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,7 +30,7 @@ platforms = OS Independent
 [options]
 setup_requires =
     setuptools_scm
-python_requires = >=3.7
+python_requires = >=3.10
 install_requires =
     dipy>=1.0.0
     groupyr>=0.2.7
@@ -39,7 +39,7 @@ install_requires =
     pandas>=1.1.0
     requests
     seaborn
-    scikit-learn>=1.0.0
+    scikit-learn==1.2.1
     sklearn_pandas>=2.0.0
     tables>=3.0.0
     tqdm

From bee0776f9a5f7dd16645ff2f18fd29654ec2d58d Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 14 Jan 2024 12:28:04 -0800
Subject: [PATCH 03/33] Upgrade docbuild Python version as well.

---
 .github/workflows/docbuild.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docbuild.yml b/.github/workflows/docbuild.yml
index 679bd09a..392444d7 100644
--- a/.github/workflows/docbuild.yml
+++ b/.github/workflows/docbuild.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: [3.7, 3.8, 3.9, "3.10"]
+        python-version: ["3.10", "3.11"]
 
     steps:
       - name: Checkout repo

From 6082fea411071a7c8d41737f1c7113c39807f717 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 14 Jan 2024 12:34:27 -0800
Subject: [PATCH 04/33] Support for 3.12 will have to wait for groupyr.

Which in turn needs to wait for numba, which will happen soon.
---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 42208a99..928f10cd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11"]
 
     steps:
       - name: Checkout repo

From 3391b212af8556cea33f485020b7a9ae3cd14e7f Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 14 Jan 2024 13:49:56 -0800
Subject: [PATCH 05/33] Pin a few of the dependencies to make sure things
 install.

---
 setup.cfg | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 1fb0e4e2..454cb8ff 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -38,12 +38,13 @@ install_requires =
     numpy
     pandas>=1.1.0
     requests
-    seaborn
+    seaborn==0.13.0
     scikit-learn==1.2.1
     sklearn_pandas>=2.0.0
-    tables>=3.0.0
+    tables==3.9.1
     tqdm
-    statsmodels
+    statsmodels==0.14.0
+    copt==0.9.1
 zip_safe = False
 include_package_data = True
 packages = find:

From cb1c7133b63b47a182b3c19d22c0e05af926cc89 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 14 Jan 2024 13:58:18 -0800
Subject: [PATCH 06/33] Pin a more advanced pandas, don't pin scipy.

---
 setup.cfg | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 454cb8ff..040e7775 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -36,7 +36,7 @@ install_requires =
     groupyr>=0.2.7
     matplotlib
     numpy
-    pandas>=1.1.0
+    pandas==2.1.4
     requests
     seaborn==0.13.0
     scikit-learn==1.2.1
@@ -67,7 +67,6 @@ dev =
     pytest-xdist[psutil]
     pytest
     s3fs
-    scipy<=1.7.3
     sphinx
     sphinx-gallery
     sphinx-panels

From 28c43686785d551549d4aa02990675a945b56698 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Thu, 18 Jan 2024 11:23:42 -0800
Subject: [PATCH 07/33] Be explicit about trip_msg input for optional_pkg.

This is now a requirement of the API in dipy.
---
 afqinsight/cnn.py          | 4 ++--
 afqinsight/datasets.py     | 4 ++--
 afqinsight/nn/tf_models.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/afqinsight/cnn.py b/afqinsight/cnn.py
index 16f03fd3..4027bc0f 100644
--- a/afqinsight/cnn.py
+++ b/afqinsight/cnn.py
@@ -18,8 +18,8 @@
     "with `pip install tensorflow keras-tuner`."
 )
 
-kt, _, _ = optional_package("keras_tuner", keras_msg)
-tf, has_tf, _ = optional_package("tensorflow", keras_msg)
+kt, _, _ = optional_package("keras_tuner", trip_msg=keras_msg)
+tf, has_tf, _ = optional_package("tensorflow", trip_msg=keras_msg)
 
 if has_tf:
     from tensorflow.keras.models import Sequential
diff --git a/afqinsight/datasets.py b/afqinsight/datasets.py
index d75ba20b..690b2f63 100755
--- a/afqinsight/datasets.py
+++ b/afqinsight/datasets.py
@@ -22,7 +22,7 @@
     "afqinsight[torch]`, or by separately installing these packages with "
     "`pip install torch`."
 )
-torch, HAS_TORCH, _ = optional_package("torch", torch_msg)
+torch, HAS_TORCH, _ = optional_package("torch", trip_msg=torch_msg)
 
 tf_msg = (
     "To use AFQ-Insight's tensorflow classes, you will need to have tensorflow "
@@ -30,7 +30,7 @@
     "afqinsight[tensorflow]`, or by separately installing these packages with "
     "`pip install tensorflow`."
 )
-tf, _, _ = optional_package("tensorflow", tf_msg)
+tf, _, _ = optional_package("tensorflow", trip_msg=tf_msg)
 
 __all__ = ["AFQDataset", "load_afq_data", "bundles2channels"]
 _DATA_DIR = op.join(op.expanduser("~"), ".cache", "afq-insight")
diff --git a/afqinsight/nn/tf_models.py b/afqinsight/nn/tf_models.py
index a01ae4d7..e685170c 100644
--- a/afqinsight/nn/tf_models.py
+++ b/afqinsight/nn/tf_models.py
@@ -9,7 +9,7 @@
     "tensorflow`."
 )
 
-tf, has_tf, _ = optional_package("tensorflow", keras_msg)
+tf, has_tf, _ = optional_package("tensorflow", trip_msg=keras_msg)
 
 if has_tf:
     from tensorflow.keras.models import Model

From 6cb6148c73c694a8a3c499ed9bb8d97fe7f1d138 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 9 Feb 2024 08:09:25 -0800
Subject: [PATCH 08/33] Upgrade to newest groupyr.

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 040e7775..8eddacfe 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -33,7 +33,7 @@ setup_requires =
 python_requires = >=3.10
 install_requires =
     dipy>=1.0.0
-    groupyr>=0.2.7
+    groupyr>=0.3.2
     matplotlib
     numpy
     pandas==2.1.4

From ef6cd1f9cc9bc00558c335ed3158900d70f556ad Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 9 Feb 2024 08:13:48 -0800
Subject: [PATCH 09/33] Pin numpy version under 2.0

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 8eddacfe..7162ca07 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,7 +35,7 @@ install_requires =
     dipy>=1.0.0
     groupyr>=0.3.2
     matplotlib
-    numpy
+    numpy<2
     pandas==2.1.4
     requests
     seaborn==0.13.0

From 87f1e4e3a6234691375360b61ced1acbafab0feb Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 11 Feb 2024 22:21:30 -0800
Subject: [PATCH 10/33] Make sure this is the right type.

---
 afqinsight/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/afqinsight/datasets.py b/afqinsight/datasets.py
index 690b2f63..bbf82670 100755
--- a/afqinsight/datasets.py
+++ b/afqinsight/datasets.py
@@ -763,7 +763,7 @@ def drop_target_na(self):
         This method modifies the ``X``, ``y``, and ``subjects`` attributes in-place.
         """
         if self.y is not None:
-            nan_mask = np.isnan(self.y)
+            nan_mask = np.isnan(self.y.astype(float))
             if len(self.y.shape) > 1:
                 nan_mask = nan_mask.astype(int).sum(axis=1).astype(bool)
 

From 576cdc1b1c96c28cf9b673f880573e54a6889552 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Thu, 22 Feb 2024 21:45:53 -0800
Subject: [PATCH 11/33] Upgrade to new groupyr release (0.3.3).

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 7162ca07..83cef3a4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -33,7 +33,7 @@ setup_requires =
 python_requires = >=3.10
 install_requires =
     dipy>=1.0.0
-    groupyr>=0.3.2
+    groupyr>=0.3.3
     matplotlib
     numpy<2
     pandas==2.1.4

From 7e3996ce8d244a13c0d381421b3c2f1acf0eede2 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Thu, 22 Feb 2024 21:56:09 -0800
Subject: [PATCH 12/33] Update black setup.

---
 .pre-commit-config.yaml | 2 +-
 pyproject.toml          | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3e6fa4bc..2e49df86 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/python/black
-    rev: 22.3.0
+    rev: 24.2.0
     hooks:
       - id: black
         language_version: python
diff --git a/pyproject.toml b/pyproject.toml
index 24e23566..0e5fdce1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [tool.black]
 line-length = 88
-target-version = ['py38']
+target-version = ['py310']
 extend-exclude = '''
 
 (
@@ -22,6 +22,7 @@ extend-exclude = '''
     | \.venv
     | afqinsight.egg-info
     | doc
+    | examples
     | build
     | dist
   )/

From f47c250c45c138feceedf16239e5a5ea4bcefc78 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Thu, 22 Feb 2024 22:03:33 -0800
Subject: [PATCH 13/33] Linting.

---
 afqinsight/transform.py | 1 +
 setup.py                | 1 +
 2 files changed, 2 insertions(+)

diff --git a/afqinsight/transform.py b/afqinsight/transform.py
index d40075a6..689a61eb 100755
--- a/afqinsight/transform.py
+++ b/afqinsight/transform.py
@@ -1,4 +1,5 @@
 """Transform AFQ data."""
+
 import numpy as np
 import pandas as pd
 from collections import OrderedDict
diff --git a/setup.py b/setup.py
index 820a485f..4dc5bfa6 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,5 @@
 """Statistical learning for tractometry data, especially within the AFQ software ecosystem."""
+
 from setuptools import setup
 import string
 import os.path as op

From 02c236ddfe276f302ff2f6235052c5ccdf674839 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Thu, 22 Feb 2024 22:08:06 -0800
Subject: [PATCH 14/33] More linting.

---
 afqinsight/__init__.py                  | 1 +
 afqinsight/_serial_bagging.py           | 1 +
 afqinsight/augmentation/__init__.py     | 1 +
 afqinsight/augmentation/augmentation.py | 1 +
 afqinsight/augmentation/dtw.py          | 1 +
 afqinsight/datasets.py                  | 9 ++++++---
 afqinsight/pipeline.py                  | 1 +
 7 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/afqinsight/__init__.py b/afqinsight/__init__.py
index 2a2b1873..c0b40cf4 100755
--- a/afqinsight/__init__.py
+++ b/afqinsight/__init__.py
@@ -1,4 +1,5 @@
 """AFQ-Insight is a Python library for statistical learning of tractometry data."""
+
 from . import datasets  # noqa
 from . import utils  # noqa
 from .cross_validate import *  # noqa
diff --git a/afqinsight/_serial_bagging.py b/afqinsight/_serial_bagging.py
index fc46b166..c706e6c4 100644
--- a/afqinsight/_serial_bagging.py
+++ b/afqinsight/_serial_bagging.py
@@ -9,6 +9,7 @@
 parallelism when using a dask.distributed backend, I will gladly remove this
 private module. @richford
 """
+
 import itertools
 import numbers
 import numpy as np
diff --git a/afqinsight/augmentation/__init__.py b/afqinsight/augmentation/__init__.py
index b40f6984..69033fef 100644
--- a/afqinsight/augmentation/__init__.py
+++ b/afqinsight/augmentation/__init__.py
@@ -13,4 +13,5 @@
 augmentation for time series classification with neural networks," PLOS ONE
 16(7): e0254841. DOI: https://doi.org/10.1371/journal.pone.0254841
 """
+
 from .augmentation import *  # noqa: F401,F403
diff --git a/afqinsight/augmentation/augmentation.py b/afqinsight/augmentation/augmentation.py
index 57131646..916cbb9e 100644
--- a/afqinsight/augmentation/augmentation.py
+++ b/afqinsight/augmentation/augmentation.py
@@ -13,6 +13,7 @@
 augmentation for time series classification with neural networks," PLOS ONE
 16(7): e0254841. DOI: https://doi.org/10.1371/journal.pone.0254841
 """
+
 import numpy as np
 from tqdm import tqdm
 
diff --git a/afqinsight/augmentation/dtw.py b/afqinsight/augmentation/dtw.py
index 4007c904..4d99519a 100644
--- a/afqinsight/augmentation/dtw.py
+++ b/afqinsight/augmentation/dtw.py
@@ -11,6 +11,7 @@
 augmentation for time series classification with neural networks," PLOS ONE
 16(7): e0254841. DOI: https://doi.org/10.1371/journal.pone.0254841
 """
+
 import numpy as np
 import sys
 
diff --git a/afqinsight/datasets.py b/afqinsight/datasets.py
index bbf82670..acc7ef33 100755
--- a/afqinsight/datasets.py
+++ b/afqinsight/datasets.py
@@ -1,4 +1,5 @@
 """Generate samples of synthetic data sets or extract AFQ data."""
+
 import hashlib
 import numpy as np
 import os
@@ -705,9 +706,11 @@ def __getitem__(self, indices):
             target_cols=self.target_cols,
             group_names=self.group_names,
             subjects=np.array(self.subjects)[indices].tolist(),
-            sessions=np.array(self.sessions)[indices].tolist()
-            if self.sessions is not None
-            else None,
+            sessions=(
+                np.array(self.sessions)[indices].tolist()
+                if self.sessions is not None
+                else None
+            ),
             classes=self.classes,
         )
 
diff --git a/afqinsight/pipeline.py b/afqinsight/pipeline.py
index c60179b6..ad5e8a6b 100755
--- a/afqinsight/pipeline.py
+++ b/afqinsight/pipeline.py
@@ -1,4 +1,5 @@
 """sklearn-compatible pipelines for AFQ data."""
+
 import inspect
 import groupyr as gpr
 

From 17ce505a8918eb22529eb6899b3f60451ec6418c Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Thu, 22 Feb 2024 22:16:39 -0800
Subject: [PATCH 15/33] Update testing tox.ini environment.

---
 tox.ini | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tox.ini b/tox.ini
index f3118213..fec7d946 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,17 +7,17 @@ isolated_build = True
 usedevelop = True
 deps =
     dipy>=1.0.0
-    groupyr==0.2.7
+    groupyr==0.3.3
     h5py>=3.0.0
     keras-tuner
     matplotlib
-    numpy
+    numpy<2
     pandas>=1.1.0
     pytest
     pytest-cov
     pytest-xdist[psutil]
     requests
-    scikit-learn>=1.0.0
+    scikit-learn>=1.2.1
     scipy<=1.7.3
     seaborn
     setuptools_scm

From 1b88ac6d806d27b1233f1bfaf39afeaed64d52f3 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Thu, 22 Feb 2024 22:27:37 -0800
Subject: [PATCH 16/33] Try to get a reasonable scipy by pinning sklearn.

---
 tox.ini | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tox.ini b/tox.ini
index fec7d946..5f85f388 100644
--- a/tox.ini
+++ b/tox.ini
@@ -17,8 +17,7 @@ deps =
     pytest-cov
     pytest-xdist[psutil]
     requests
-    scikit-learn>=1.2.1
-    scipy<=1.7.3
+    scikit-learn==1.2.1
     seaborn
     setuptools_scm
     sklearn_pandas>=2.0.0

From 707fabdb3677d591e912ffe04a663c68d55dd505 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 23 Feb 2024 07:11:22 -0800
Subject: [PATCH 17/33] Avoid tox.

---
 .github/workflows/test.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 928f10cd..4b5e2dfd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,12 +30,11 @@ jobs:
           pydocstyle
       - name: Test
         run: |
-          cp $(python -c 'import site; print(site.getsitepackages()[0])')/afqinsight/_version.py afqinsight/_version.py
-          tox
+          cd && mkdir for_test && cd for_test && pytest --pyargs AFQ --cov-report term-missing --cov=AFQ
       - name: Coveralls
         run: |
           coveralls
-        if: matrix.python-version == 3.8
+        if: matrix.python-version == 3.10
         env:
           COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From b0fe8907c627d7751a928124c2e167984413835b Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 23 Feb 2024 07:20:31 -0800
Subject: [PATCH 18/33] Fix copy-paste artifacts.

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4b5e2dfd..3783f73e 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,7 +30,7 @@ jobs:
           pydocstyle
       - name: Test
         run: |
-          cd && mkdir for_test && cd for_test && pytest --pyargs AFQ --cov-report term-missing --cov=AFQ
+          cd && mkdir for_test && cd for_test && pytest --pyargs afqinsight --cov-report term-missing --cov=afqinsight
       - name: Coveralls
         run: |
           coveralls

From 9d0772e4e01bca8c6405a8c4a3db0947b0f8d4dc Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 23 Feb 2024 09:35:04 -0800
Subject: [PATCH 19/33] Replaces sklearn testing functions with pytest/numpy.

---
 afqinsight/tests/test_bagging.py | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/afqinsight/tests/test_bagging.py b/afqinsight/tests/test_bagging.py
index 4612e1db..1bfe1d9e 100644
--- a/afqinsight/tests/test_bagging.py
+++ b/afqinsight/tests/test_bagging.py
@@ -7,17 +7,16 @@
 
 import numpy as np
 import joblib
+import pytest
 
 from afqinsight._serial_bagging import SerialBaggingClassifier, SerialBaggingRegressor
 
 from sklearn.base import BaseEstimator
 
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_raises
-from sklearn.utils._testing import assert_warns
-from sklearn.utils._testing import assert_warns_message
-from sklearn.utils._testing import assert_raise_message
+from numpy.testing import assert_array_equal
+from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_raises
+from numpy.testing import assert_warns
 from sklearn.utils._testing import ignore_warnings
 
 from sklearn.dummy import DummyClassifier, DummyRegressor
@@ -504,15 +503,14 @@ def test_parallel_classification():
     assert_array_almost_equal(decisions1, decisions2)
 
     X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1))))
-    assert_raise_message(
+    with pytest.raises(
         ValueError,
         "Number of features of the model "
         "must match the input. Model n_features is {0} "
         "and input n_features is {1} "
         "".format(X_test.shape[1], X_err.shape[1]),
-        ensemble.decision_function,
-        X_err,
-    )
+    ):
+        ensemble.decision_function(X_err)
 
     ensemble = SerialBaggingClassifier(
         SVC(decision_function_shape="ovr"), n_jobs=1, random_state=0
@@ -689,14 +687,11 @@ def test_warm_start_equal_n_estimators():
     y_pred = clf.predict(X_test)
     # modify X to nonsense values, this should not change anything
     X_train += 1.0
+    with pytest.warns(
+        UserWarning, match="Warm-start fitting without increasing n_estimators does not"
+    ):
+        clf.fit(X_train, y_train)
 
-    assert_warns_message(
-        UserWarning,
-        "Warm-start fitting without increasing n_estimators does not",
-        clf.fit,
-        X_train,
-        y_train,
-    )
     assert_array_equal(y_pred, clf.predict(X_test))
 
 

From 0a7e9ea0abf2d744c6f36304b0ac8f9dbf4aec0e Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 23 Feb 2024 09:57:21 -0800
Subject: [PATCH 20/33] Remove CNN module that is no longer used.

We are using afqinsight/nn/* instead.
---
 afqinsight/cnn.py            | 620 -----------------------------------
 afqinsight/tests/test_cnn.py | 211 ------------
 2 files changed, 831 deletions(-)
 delete mode 100644 afqinsight/cnn.py
 delete mode 100644 afqinsight/tests/test_cnn.py

diff --git a/afqinsight/cnn.py b/afqinsight/cnn.py
deleted file mode 100644
index 4027bc0f..00000000
--- a/afqinsight/cnn.py
+++ /dev/null
@@ -1,620 +0,0 @@
-"""Build, fit, and predict with 1-D convolutional neural networks."""
-
-import functools
-import numpy as np
-import os.path as op
-import tempfile
-
-from dipy.utils.optpkg import optional_package
-from sklearn.impute import SimpleImputer
-from sklearn.metrics import r2_score
-from sklearn.model_selection import train_test_split
-from sklearn.utils.validation import check_X_y, check_is_fitted
-
-keras_msg = (
-    "To use afqinsight's convolutional neural nets for tractometry data, you will need "
-    "to have tensorflow and kerastuner installed. You can do this by installing "
-    "afqinsight with `pip install afqinsight[tf]`, or by separately installing these packages "
-    "with `pip install tensorflow keras-tuner`."
-)
-
-kt, _, _ = optional_package("keras_tuner", trip_msg=keras_msg)
-tf, has_tf, _ = optional_package("tensorflow", trip_msg=keras_msg)
-
-if has_tf:
-    from tensorflow.keras.models import Sequential
-    from tensorflow.keras.layers import Dense, Conv1D, Flatten, MaxPool1D, Dropout
-    from tensorflow.keras.callbacks import ModelCheckpoint
-
-
-def build_model(hp, conv_layers, input_shape):
-    """Build a keras model.
-
-    Uses keras tuner to build model - can control # layers, # filters in each layer, kernel size,
-    regularization etc
-
-    Parameters
-    ----------
-    hp : tensorflow.keras.HyperParameters()
-            Hyperparameters class from which to sample hyperparameters
-
-    conv_layers : int
-            number of layers (one layer is Conv and MaxPool) in the sequential model.
-
-    input_shape : int
-            input shape of X so the model gets built continuously as you are adding layers
-
-    Returns
-    -------
-    model : tensorflow.keras.Model
-            compiled model that uses hyperparameters defined inline to hypertune the model
-
-    """
-    model = Sequential()
-    model.add(
-        Conv1D(
-            filters=hp.Int("init_conv_filters", min_value=32, max_value=512, step=32),
-            kernel_size=hp.Int("init_conv_kernel", min_value=1, max_value=4, step=1),
-            activation="relu",
-            input_shape=input_shape,
-        )
-    )
-
-    for i in range(conv_layers - 1):
-        model.add(
-            Conv1D(
-                filters=hp.Int(
-                    "conv_filters" + str(i), min_value=32, max_value=512, step=32
-                ),
-                kernel_size=hp.Int(
-                    "conv_kernel" + str(i), min_value=1, max_value=4, step=1
-                ),
-                activation="relu",
-            )
-        )
-
-        model.add(MaxPool1D(pool_size=2, padding="same"))
-
-    model.add(Dropout(0.25))
-    model.add(Flatten())
-
-    dense_filters_2 = hp.Int("dense_filters_2", min_value=32, max_value=512, step=32)
-    model.add(Dense(dense_filters_2, activation="relu"))
-    model.add(Dropout(0.25))
-    model.add(Dense(64, activation="relu"))
-    model.add(Dense(1, activation="linear"))
-
-    model.compile(
-        loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
-    )
-
-    return model
-
-
-class ModelBuilder:
-    """Build a complex model architecture with the specified number of layers.
-
-    Parameters
-    ----------
-    tuner_type : str or class.
-        Tuner to use. One of {"hyperband", "bayesian", "random"}.
-
-    input_shape : tuple
-        Expected shape of the input data.
-
-    layers : int
-        Number of layers in the model.
-
-    max_epochs : int
-        Number of epochs to train the model.
-
-    X_test : numpy.ndarray
-        Test data.
-
-    y_test : numpy.ndarray
-        Test labels or test values.
-
-    batch_size : int
-        Batch size to use when training.
-
-    directory : str
-        Directory to save the model to.
-
-    project_name : str, optional
-        A string, the name to use as prefix for files saved by the tuner object. Defaults to None
-
-    tuner_kwargs : dict, optional
-        Keyword arguments to pass to the tuner class on initialization.
-        Defaults to tuner defaults.
-    """
-
-    def __init__(
-        self,
-        tuner_type,
-        input_shape,
-        layers,
-        max_epochs,
-        X_test,
-        y_test,
-        batch_size,
-        directory=None,
-        project_name=None,
-        **tuner_kwargs,
-    ):
-        self.tuner_type = tuner_type
-        self.layers = layers
-        self.input_shape = input_shape
-        self.max_epochs = max_epochs
-        self.batch_size = batch_size
-        self.X_test = X_test
-        self.y_test = y_test
-        self.directory = directory
-        self.project_name = project_name
-        self.tuner_kwargs = tuner_kwargs
-
-    def _get_tuner(self):
-        """Call build_model and instantiate a Keras Tuner for the returned model depending on user choice of tuner.
-
-        Returns
-        -------
-        tuner : kerastuner.tuners
-                BayesianOptimization, Hyperband, or RandomSearch tuner
-
-        """
-        # setting parameters beforehand
-        hypermodel = functools.partial(
-            build_model, conv_layers=self.layers, input_shape=self.input_shape
-        )
-        if isinstance(self.tuner_type, str):
-            # instantiating tuner based on user's choice
-            if self.tuner_type == "hyperband":
-                tuner = kt.Hyperband(
-                    hypermodel=hypermodel,
-                    objective="mean_squared_error",
-                    max_epochs=10,
-                    overwrite=True,
-                    project_name=self.project_name,
-                    directory=self.directory,
-                    **self.tuner_kwargs,
-                )
-
-            elif self.tuner_type == "bayesian":
-                tuner = kt.BayesianOptimization(
-                    hypermodel=hypermodel,
-                    objective="mean_squared_error",
-                    max_trials=10,
-                    overwrite=True,
-                    project_name=self.project_name,
-                    directory=self.directory,
-                    **self.tuner_kwargs,
-                )
-
-            elif self.tuner_type == "random":
-                tuner = kt.RandomSearch(
-                    hypermodel=hypermodel,
-                    objective="mean_squared_error",
-                    max_trials=10,
-                    overwrite=True,
-                    project_name=self.project_name,
-                    directory=self.directory,
-                    **self.tuner_kwargs,
-                )
-            else:
-                raise ValueError(
-                    f"tuner parameter expects 'hyperband', 'bayesian', or 'random', but you provided {self.tuner_type}"
-                )
-            return tuner
-        # We do not cover the following line, because CNN also handles this
-        # error:
-        else:  # pragma: no cover
-            raise TypeError(
-                f"`tuner` parameter should be a string, but you provided {self.tuner_type}"
-            )
-
-    def _get_best_weights(self, model, X, y):
-        """Fit a CNN and save the best weights.
-
-        Use keras ModelCheckpoint to fit CNN and save the weights from the epoch
-        that produced the lowest validation loss to a temporary file. Uses
-        temporary file to load the best weights into the CNN model and returns
-        this best model.
-
-        Parameters
-        ----------
-        model : tensorflow.keras.Sequential()
-                Hyperparameters class from which to sample hyperparameters
-
-        X : array-like of shape (n_samples, n_features)
-                The feature samples
-
-        y : array-like of shape (n_samples,) or (n_samples, n_targets)
-                Target values
-
-        Returns
-        -------
-        model : tensorflow.keras.Model
-                fitted keras model with best weights loaded
-
-        """
-        weights_path = op.join(tempfile.mkdtemp(), "weights.hdf5")
-        # making model checkpoint to save best model (# epochs) to file
-        model_checkpoint_callback = ModelCheckpoint(
-            filepath=weights_path,
-            monitor="val_loss",
-            mode="auto",
-            save_best_only=True,
-            save_weights_only=True,
-            verbose=True,
-        )
-
-        # Fitting model using model checkpoint callback to find best model which is saved to 'weights'
-        model.fit(
-            X,
-            y,
-            epochs=self.max_epochs,
-            batch_size=self.batch_size,
-            callbacks=[model_checkpoint_callback],
-            validation_data=(self.X_test, self.y_test),
-        )
-        # loading in weights
-        model.load_weights(weights_path)
-
-        # return the model
-        return model
-
-    def build_basic_model(self, X, y):
-        """Build a sequential model without hyperparameter tuning.
-
-        Builds a static baseline sequential model with no hyperparameter tuning.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-                The feature samples
-
-        y : array-like of shape (n_samples,) or (n_samples, n_targets)
-                Target values
-
-        Returns
-        -------
-        model : tensorflow.keras.Model
-                compiled model using basic Weston Havens architecture
-
-        """
-        model = Sequential()
-        model.add(Dense(128, activation="relu", input_shape=X.shape[1:]))
-        model.add(Conv1D(24, kernel_size=2, activation="relu"))
-        model.add(MaxPool1D(pool_size=2, padding="same"))
-        model.add(Conv1D(32, kernel_size=2, activation="relu"))
-        model.add(MaxPool1D(pool_size=2, padding="same"))
-        model.add(Conv1D(64, kernel_size=3, activation="relu"))
-        model.add(MaxPool1D(pool_size=2, padding="same"))
-        model.add(Conv1D(128, kernel_size=4, activation="relu"))
-        model.add(MaxPool1D(pool_size=2, padding="same"))
-        model.add(Conv1D(256, kernel_size=4, activation="relu"))
-        model.add(MaxPool1D(pool_size=2, padding="same"))
-        model.add(Dropout(0.25))
-        model.add(Flatten())
-        model.add(Dense(128, activation="relu"))
-        model.add(Dropout(0.25))
-        model.add(Dense(64, activation="relu"))
-        model.add(Dense(1, activation="linear"))
-
-        model.compile(
-            loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
-        )
-
-        best_model = self._get_best_weights(model, X, y)
-        return best_model
-
-    def build_tuned_model(self, X, y):
-        """Build a tuned model using Keras tuner.
-
-        Initializes a Keras tuner on user's model, searches for best hyperparameters, and saves them.
-        Then builds "best" model using saved best hyperparameters found during the search and returns model
-        with best weights loaded from _get_best_weights.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-                The feature samples
-
-        y : array-like of shape (n_samples,) or (n_samples, n_targets)
-                Target values
-
-        Returns
-        -------
-        model : tensorflow.keras.Model
-                compiled model that uses hyperparameters defined inline to hypertune the model
-
-        """
-        # initialize tuner
-        tuner = self._get_tuner()
-
-        # Find the optimal hyperparameters
-        tuner.search(X, y, epochs=50, validation_split=0.2)
-
-        # Save the optimal hyperparameters
-        best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
-
-        # make CNN model using best hyperparameters
-        model = tuner.hypermodel.build(best_hps)
-
-        best_model = self._get_best_weights(model, X, y)
-        return best_model
-
-
-class CNN:
-    """A Convolutional Neural Network model with a fit/predict interface.
-
-    Parameters
-    ----------
-    n_nodes : int
-        Number of nodes in each bundle profile.
-
-    n_channels : int
-        Number of metrics in each bundle profile.
-
-    max_epochs : int
-        Maximum number of epochs to train model.
-
-    batch_size : int
-        Number of samples per batch.
-
-    tuner_type : str
-        Type of hyperparameter tuner to use. One of 'hyperband', 'bayesian', or
-        'random'.
-
-    layers : int
-        Number of convolutional layers to use.
-
-    test_size : float
-        Fraction of data to use as test set.
-
-    impute_strategy : str, optional
-        Imputation strategy to use. One of 'mean', 'median', or 'knn'.
-        Default: "median".
-
-    random_state : int or RandomState instance, optional
-        Default: None.
-
-    directory : str, optional
-        Directory to save model and hyperparameters. Default: "."
-
-    project_name : str, optional
-        A string, the name to use as prefix for files saved by the tuner
-        object. Defaults to None
-
-    tuner_kwargs : dict, optional
-        Keyword arguments to pass to tuner. Default: tuner defaults.
-    """
-
-    def __init__(
-        self,
-        n_nodes,
-        n_channels,
-        max_epochs=50,
-        batch_size=32,
-        tuner_type=None,
-        layers=1,
-        test_size=0.2,
-        impute_strategy="median",
-        random_state=None,
-        directory=None,
-        project_name=None,
-        **tuner_kwargs,
-    ):
-        # checking n_nodes is passed as int
-        if not isinstance(n_nodes, int):
-            raise TypeError("Parameter n_nodes must be an integer.")
-        else:
-            self.n_nodes = n_nodes
-
-        # checking n_channels is passed as int
-        if not isinstance(n_channels, int):
-            raise TypeError("Parameter n_channels must be an integer.")
-        else:
-            self.n_channels = n_channels
-
-        # checking layers is passed as int
-        if not isinstance(layers, int):
-            raise TypeError("Parameter layers must be an integer.")
-        else:
-            self.layers = layers
-
-        # checking max epochs is passed as int
-        if not isinstance(max_epochs, int):
-            raise TypeError("Parameter max_epochs must be an integer.")
-        else:
-            self.max_epochs = max_epochs
-
-        if not isinstance(batch_size, int):
-            raise TypeError("Parameter batch_size must be an integer.")
-        else:
-            self.batch_size = batch_size
-
-        # checking tiner is passed as str or None
-        if not isinstance(tuner_type, str) and tuner_type is not None:
-            raise TypeError("Parameter tuner must be str.")
-        else:
-            # tuner can be None (no tuning) BayesianOptimization, Hyperband, or RandomSearch
-            self.tuner_type = tuner_type
-
-        # checking val split is passed as float
-        if not isinstance(test_size, float):
-            raise TypeError("Parameter test_size must be a float.")
-        else:
-            self.test_size = test_size
-
-        # checking strategy is passed as str and has value of 'median', 'mean', or 'knn'
-        if not isinstance(impute_strategy, str):
-            raise TypeError("Parameter impute_strategy must be a string.")
-        elif impute_strategy not in ["median", "mean", "knn"]:
-            raise ValueError(
-                f"Parameter impute_strategy must be 'median', 'mean', or 'knn' but you provided {impute_strategy}"
-            )
-        else:
-            self.impute_strategy = impute_strategy
-
-        if random_state is not None:
-            if not (isinstance(random_state, int) or isinstance(np.random.RandomState)):
-                raise TypeError(
-                    f"Parameter random_state must be an int or RandomState, but you provided {random_state}"
-                )
-        self.random_state = random_state
-
-        self.directory = directory
-        self.project_name = project_name
-        self.tuner_kwargs = tuner_kwargs
-        self.model_ = None
-        self.best_hps_ = None
-
-    def _preprocess(self, X, y=None):
-        """Convert feature matrix for input into a CNN.
-
-        Masks NAN values for X and y (if y is given), imputes X, and reshapes X
-        to be in proper form for CNN model. In more conventional machine
-        learning, X has shape (n_samples, n_features), where n_features is
-        n_nodes * n_bundles * n_metrics. However, in our CNN approach, we treat
-        each bundle/metric combination as a separate channel, analogous to RGB
-        channels in a 2D image. The remaining one dimension is the nodes
-        dimension. Thus the output has shape (n_samples, n_channels, n_nodes),
-        where n_channels = n_metrics * n_bundles.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_metrics * n_nodes)
-                Diffusion MRI tractometry features (columns) for each subject in the sample (rows).
-
-        y : array-like of shape (n_samples,) or (n_samples, n_targets)
-                Target values
-
-        Returns
-        -------
-        X : array-like of shape (n_samples, n_channels, n_nodes)
-                The imputed and reshaped feature samples
-
-        y : array-like of shape (n_samples,) or (n_samples, n_targets)
-                Target values
-
-        """
-        # n_nodes * n_channels must = X.shape[1]
-        if self.n_nodes * self.n_channels != X.shape[1]:
-            raise ValueError(
-                "The product n_nodes and n_channels is not the correct shape."
-            )
-
-        # We don't cover the following line, because this case is also handled
-        # in the fall to fit:
-        if len(X.shape) > 2:  # pragma: no cover
-            raise ValueError("Expected X to be a 2D matrix.")
-        if y is not None:
-            nan_mask = np.logical_not(np.isnan(y))
-            X = X[nan_mask, :]
-            y = y[nan_mask]
-
-        imp = SimpleImputer(strategy=self.impute_strategy)
-        X = imp.fit_transform(X)
-
-        if y is not None:
-            X, y = check_X_y(X, y)
-
-        n_subjects = X.shape[0]
-
-        X = np.swapaxes(X.reshape((n_subjects, self.n_channels, self.n_nodes)), 1, 2)
-
-        if y is not None:
-            return X, y
-        else:
-            return X
-
-    def fit(self, X, y):
-        """Fit the model.
-
-        Preprocesses X and y, builds CNN model, tunes model hyperparameters and
-        fits the model to given X and y, using X_test and y_test to validate and
-        find best weights and hyperparameters.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_metrics * n_nodes)
-                Diffusion MRI tractometry features (columns) for each subject (rows).
-
-        y : array-like of shape (n_samples,) or (n_samples, n_targets)
-                Target values
-
-        Returns
-        -------
-        self : CNN
-                updated CNN instantiation
-
-        """
-        X, y = self._preprocess(X, y)
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=self.test_size, random_state=self.random_state
-        )
-        # CNN gets n_nodes, n_channels, max_epochs, tuner=None, layers=None
-        # Model Builder takes tuner_type, input_shape, layers, max_epochs, **kwargs
-        builder = ModelBuilder(
-            self.tuner_type,
-            X_train.shape[1:],
-            self.layers,
-            self.max_epochs,
-            X_test,
-            y_test,
-            self.batch_size,
-            self.directory,
-            self.project_name,
-            **self.tuner_kwargs,
-        )
-        if self.tuner_type is None:
-            self.model_ = builder.build_basic_model(X_train, y_train)
-        else:
-            self.model_ = builder.build_tuned_model(X_train, y_train)
-
-        self.is_fitted_ = True
-
-        return self
-
-    def predict(self, X):
-        """Predict target values.
-
-        Preprocesses X and returns predicted y values for X from fitted CNN model.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_metrics * n_nodes)
-                Tractometry features (columns) for each subject in the sample (rows).
-
-        Returns
-        -------
-        pred : array-like of shape (n_samples,) or (n_samples, n_targets)
-                predicted values
-        """
-        X = self._preprocess(X)
-        check_is_fitted(self, "is_fitted_")
-        pred = self.model_.predict(X).squeeze()
-        return pred
-
-    def score(self, y_test, y_hat):
-        """Score the performance of the model.
-
-        Masks out NaN values from y_test and returns $R^2$ score for the CNN model comparing to y_hat
-
-        Parameters
-        ----------
-        y_test : array-like of shape (n_samples,) or (n_samples, n_targets)
-                Testing target values
-
-        y_hat : array-like of shape (n_samples,) or (n_samples, n_targets)
-                Predicted target values
-
-        Returns
-        -------
-        r2_score : float
-                r-squared score for y_test and y_hat for CNN model
-
-        """
-        nan_mask = np.logical_not(np.isnan(y_test))
-        y_test = y_test[nan_mask]
-        return r2_score(y_test, y_hat)
diff --git a/afqinsight/tests/test_cnn.py b/afqinsight/tests/test_cnn.py
deleted file mode 100644
index 8899d912..00000000
--- a/afqinsight/tests/test_cnn.py
+++ /dev/null
@@ -1,211 +0,0 @@
-import afqinsight as afqi
-import os.path as op
-import pytest
-import tempfile
-
-from afqinsight.cnn import CNN
-from afqinsight.datasets import load_afq_data
-
-data_path = op.join(afqi.__path__[0], "data")
-test_data_path = op.join(data_path, "test_data")
-
-X, y, groups, feature_names, group_names, subjects, _, _ = load_afq_data(
-    fn_nodes=op.join(test_data_path, "nodes.csv"),
-    fn_subjects=op.join(test_data_path, "subjects.csv"),
-    target_cols=["test_class"],
-    label_encode_cols=["test_class"],
-)
-
-
-def test_basic_cnn():
-    with tempfile.TemporaryDirectory() as tdir:
-        model = CNN(100, 6, 5, project_name="test-project", directory=tdir)
-        model.fit(X, y)
-        assert model.is_fitted_ is True
-        y_hat = model.predict(X)
-        _ = model.score(y, y_hat)
-
-
-def test_hyperband_cnn():
-    with tempfile.TemporaryDirectory() as tdir:
-        model = CNN(
-            100, 6, 5, 64, "hyperband", project_name="test-project", directory=tdir
-        )
-        model.fit(X, y)
-        assert model.is_fitted_ is True
-        y_hat = model.predict(X)
-        _ = model.score(y, y_hat)
-
-        model2 = CNN(
-            100, 6, 5, 64, "hyperband", 4, project_name="test-project", directory=tdir
-        )
-        model2.fit(X, y)
-        assert model2.is_fitted_ is True
-        y_hat2 = model2.predict(X)
-        _ = model2.score(y, y_hat2)
-
-        model3 = CNN(
-            100,
-            6,
-            5,
-            64,
-            "hyperband",
-            4,
-            0.3,
-            project_name="test-project",
-            directory=tdir,
-        )
-        model3.fit(X, y)
-        assert model3.is_fitted_ is True
-        y_hat3 = model3.predict(X)
-        _ = model3.score(y, y_hat3)
-
-        model4 = CNN(
-            100,
-            6,
-            5,
-            64,
-            "hyperband",
-            4,
-            0.3,
-            factor=2,
-            hyperband_iterations=2,
-            seed=2,
-            project_name="test-project",
-            directory=tdir,
-        )
-        model4.fit(X, y)
-        assert model4.is_fitted_ is True
-        y_hat4 = model4.predict(X)
-        _ = model4.score(y, y_hat4)
-
-
-def test_bayesian_cnn():
-    with tempfile.TemporaryDirectory() as tdir:
-        model = CNN(100, 6, 5, 64, "bayesian", directory=tdir)
-        model.fit(X, y)
-        assert model.is_fitted_ is True
-        y_hat = model.predict(X)
-        _ = model.score(y, y_hat)
-
-        model2 = CNN(100, 6, 5, 64, "bayesian", 4, directory=tdir)
-        model2.fit(X, y)
-        assert model2.is_fitted_ is True
-        y_hat2 = model2.predict(X)
-        _ = model2.score(y, y_hat2)
-
-        model3 = CNN(100, 6, 5, 64, "bayesian", 4, 0.3, directory=tdir)
-        model3.fit(X, y)
-        assert model3.is_fitted_ is True
-        y_hat3 = model3.predict(X)
-        _ = model3.score(y, y_hat3)
-
-        model4 = CNN(
-            100,
-            6,
-            5,
-            64,
-            "bayesian",
-            4,
-            0.3,
-            num_initial_points=2,
-            alpha=0.02,
-            beta=0.5,
-            seed=5,
-            directory=tdir,
-        )
-        model4.fit(X, y)
-        assert model4.is_fitted_ is True
-        y_hat4 = model4.predict(X)
-        _ = model4.score(y, y_hat4)
-
-
-def test_random_cnn():
-    with tempfile.TemporaryDirectory() as tdir:
-        model = CNN(100, 6, 5, 64, "random", directory=tdir)
-        model.fit(X, y)
-        assert model.is_fitted_ is True
-        y_hat = model.predict(X)
-        _ = model.score(y, y_hat)
-
-        model2 = CNN(100, 6, 5, 64, "random", 4, directory=tdir)
-        model2.fit(X, y)
-        assert model2.is_fitted_ is True
-        y_hat2 = model2.predict(X)
-        _ = model2.score(y, y_hat2)
-
-        model3 = CNN(100, 6, 5, 64, "random", 4, 0.3, directory=tdir)
-        model3.fit(X, y)
-        assert model3.is_fitted_ is True
-        y_hat3 = model3.predict(X)
-        _ = model3.score(y, y_hat3)
-
-        model4 = CNN(
-            100, 6, 5, 64, "random", 4, 0.3, impute_strategy="mean", directory=tdir
-        )
-        model4.fit(X, y)
-        assert model4.is_fitted_ is True
-        y_hat4 = model4.predict(X)
-        _ = model4.score(y, y_hat4)
-
-
-def test_fail_cnn():
-    with pytest.raises(ValueError):
-        # passing in wrong shape of X (not 2d):
-        model = CNN(100, 6, 5, 64)
-        model.fit(X.reshape((7, 100, -1)), y)
-
-    with pytest.raises(ValueError):
-        # passing in wrong tuner value
-        model = CNN(100, 6, 5, 64, "wrong")
-        model.fit(X, y)
-
-    with pytest.raises(TypeError):
-        # passing in int for tuner
-        model = CNN(100, 6, 5, 64, 0)
-
-    with pytest.raises(ValueError):
-        # passing in n_nodes and n_channels that multiply to equal
-        # proper dimension for given x
-        model = CNN(78, 6, 5, 64, "random")
-        model.fit(X, y)
-
-    with pytest.raises(TypeError):
-        # passing in float for tuner_type
-        model = CNN(100, 6, 5, 64, 0.0)
-
-    with pytest.raises(TypeError):
-        # passing in float for n_nodes
-        model = CNN(1.1, 6, 5, 64, "random")
-
-    with pytest.raises(TypeError):
-        # passing in float for n_channels
-        model = CNN(100, 6.0, 5, 64, "random")
-
-    with pytest.raises(TypeError):
-        # passing in float for layers
-        model = CNN(100, 6, layers=5.0)
-
-    with pytest.raises(TypeError):
-        # passing in float for batch size
-        model = CNN(100, 6, 5, 6.4, "random")
-
-    with pytest.raises(TypeError):
-        # passing in string for batch size
-        model = CNN(100, 6, 5, "64", "random")
-
-    with pytest.raises(TypeError):
-        # passing in an integer for test_size
-        model = CNN(100, 6, test_size=20)
-
-    with pytest.raises(TypeError):
-        # passing in an integer for impute_strategy (this should be a string).
-        model = CNN(100, 6, impute_strategy=20)
-
-    with pytest.raises(ValueError):
-        # passing in the wrong string for impute_strategy:
-        model = CNN(100, 6, impute_strategy="foo")
-
-    with pytest.raises(TypeError):
-        # passing in a string for random_state (should be int or RandomState).
-        model = CNN(100, 6, random_state="foo")

From 6c99748b5cda9dae050f49405caeea626fefe963 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 23 Feb 2024 15:50:26 -0800
Subject: [PATCH 21/33] Deprecated np.float not used.

---
 afqinsight/_serial_bagging.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/afqinsight/_serial_bagging.py b/afqinsight/_serial_bagging.py
index c706e6c4..fe06d6ee 100644
--- a/afqinsight/_serial_bagging.py
+++ b/afqinsight/_serial_bagging.py
@@ -384,7 +384,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
         # Validate max_features
         if isinstance(self.max_features, numbers.Integral):
             max_features = self.max_features
-        elif isinstance(self.max_features, np.float):
+        elif isinstance(self.max_features, float):
             max_features = self.max_features * self.n_features_in_
         else:
             raise ValueError("max_features must be int or float")
@@ -898,7 +898,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
         # Validate max_features
         if isinstance(self.max_features, numbers.Integral):
             max_features = self.max_features
-        elif isinstance(self.max_features, np.float):  # pragma: no cover
+        elif isinstance(self.max_features, float):  # pragma: no cover
             max_features = self.max_features * self.n_features_in_
         else:  # pragma: no cover
             raise ValueError("max_features must be int or float")

From 3d14cce7619dc278b8a249a92fe5d7828ba90abd Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 23 Feb 2024 15:54:14 -0800
Subject: [PATCH 22/33] Pin numpy 1.23.5

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 83cef3a4..74d704a1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,7 +35,7 @@ install_requires =
     dipy>=1.0.0
     groupyr>=0.3.3
     matplotlib
-    numpy<2
+    numpy==1.23.5
     pandas==2.1.4
     requests
     seaborn==0.13.0

From 2ee4e8e6958928ebff51a7440d48454bd44df6f5 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 23 Feb 2024 16:08:08 -0800
Subject: [PATCH 23/33] Implement the other required attribute of this dummy
 class.

---
 afqinsight/tests/test_bagging.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/afqinsight/tests/test_bagging.py b/afqinsight/tests/test_bagging.py
index 1bfe1d9e..2a12f40b 100644
--- a/afqinsight/tests/test_bagging.py
+++ b/afqinsight/tests/test_bagging.py
@@ -240,6 +240,9 @@ def fit(self, X, y):
         self.training_size_ = X.shape[0]
         self.training_hash_ = joblib.hash(X)
 
+    def predict(self, X):
+        return np.zeros(X.shape[0])
+
 
 def test_bootstrap_samples():
     # Test that bootstrapping samples generate non-perfect base estimators.

From fdcecda1a6675677263223cb49e99f27625dcb3c Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Fri, 23 Feb 2024 16:31:59 -0800
Subject: [PATCH 24/33] Coerce a float type for y.

---
 afqinsight/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/afqinsight/datasets.py b/afqinsight/datasets.py
index acc7ef33..09918e01 100755
--- a/afqinsight/datasets.py
+++ b/afqinsight/datasets.py
@@ -311,7 +311,7 @@ def load_afq_data(
         else:
             classes = None
 
-        y = np.squeeze(y.to_numpy())
+        y = np.squeeze(y.to_numpy()).astype(float)
 
     return AFQData(
         X=X,

From e18b6fcc3ee24a1613c947de3f828ed863ca14dc Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 14 Jan 2024 12:27:07 -0800
Subject: [PATCH 25/33] Initial draft implementation of an autoencoder model.

---
 afqinsight/nn/tf_models.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/afqinsight/nn/tf_models.py b/afqinsight/nn/tf_models.py
index e685170c..df9916d0 100644
--- a/afqinsight/nn/tf_models.py
+++ b/afqinsight/nn/tf_models.py
@@ -307,3 +307,27 @@ def cnn_resnet(input_shape, n_classes, output_activation="softmax", verbose=Fals
         model.summary()
 
     return model
+
+
+def autoencoder(input_shape, n_hidden=None, verbose=False):
+    """
+    Fully connected autoencoder
+    """
+    ip = Input(shape=input_shape)
+    if n_hidden is None:
+        n_hidden = input_shape[0] // 8
+
+    fc = Flatten()(ip)
+    fc = Dense(input_shape, activation="relu")(fc)
+    fc = Dense(input_shape // 2, activation="relu")(fc)
+    fc = Dense(input_shape // 4, activation="relu")(fc)
+    fc = Dense(n_hidden, activation="relu")(fc)
+    fc = Dense(input_shape // 4, activation="relu")(fc)
+    fc = Dense(input_shape // 2, activation="relu")(fc)
+    out = Dense(input_shape)(fc)
+
+    model = Model([ip], [out])
+    if verbose:
+        model.summary()
+
+    return model

From 7e924391b1e6c9558ab4f4f7c3cf7cab3ef498b8 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 14 Jan 2024 14:56:25 -0800
Subject: [PATCH 26/33] Fixes this test, that was failing because of
 non-numeric values in ses ID.

---
 afqinsight/tests/test_datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/afqinsight/tests/test_datasets.py b/afqinsight/tests/test_datasets.py
index 7a807a96..b6ac9e5f 100644
--- a/afqinsight/tests/test_datasets.py
+++ b/afqinsight/tests/test_datasets.py
@@ -536,9 +536,9 @@ def test_load_afq_data(dwi_metrics):
     )
 
     means_ref = (
-        nodes.groupby(["subjectID", "tractID"])
+        nodes.drop(["nodeID", "sessionID"], axis="columns")
+        .groupby(["subjectID", "tractID"])
         .agg("mean")
-        .drop("nodeID", axis="columns")
         .unstack("tractID")
     )
     assert np.allclose(X, means_ref.to_numpy(), equal_nan=True)  # nosec

From 573c29d1529de8ab5253fe646e774f0752854b36 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 14 Jan 2024 15:47:26 -0800
Subject: [PATCH 27/33] Be a little more liberal with dtype here.

This is to that we can deal with nans in y.
---
 afqinsight/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/afqinsight/datasets.py b/afqinsight/datasets.py
index 09918e01..08159046 100755
--- a/afqinsight/datasets.py
+++ b/afqinsight/datasets.py
@@ -311,7 +311,7 @@ def load_afq_data(
         else:
             classes = None
 
-        y = np.squeeze(y.to_numpy()).astype(float)
+        y = np.squeeze(y.to_numpy().astype(float))
 
     return AFQData(
         X=X,

From 8777f9d661599fbfc574fc27c76e4866f8db326a Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Sun, 14 Jan 2024 16:08:45 -0800
Subject: [PATCH 28/33] Include dl_qc_score when loading the hbn data.

---
 afqinsight/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/afqinsight/datasets.py b/afqinsight/datasets.py
index 08159046..0925ba8b 100755
--- a/afqinsight/datasets.py
+++ b/afqinsight/datasets.py
@@ -664,7 +664,7 @@ def from_study(study, verbose=None):
             "weston-havens": dict(dwi_metrics=["md", "fa"], target_cols=["Age"]),
             "hbn": dict(
                 dwi_metrics=["dki_md", "dki_fa"],
-                target_cols=["age", "sex", "scan_site_id"],
+                target_cols=["age", "sex", "scan_site_id", "dl_qc_score"],
                 label_encode_cols=["sex", "scan_site_id"],
                 index_col="subject_id",
             ),

From 6d2da2647f2bde7e91b0fdbe98860e95a2343bf5 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Thu, 18 Jan 2024 11:18:32 -0800
Subject: [PATCH 29/33] A bit more progress on autoencoder.

---
 afqinsight/nn/tf_models.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/afqinsight/nn/tf_models.py b/afqinsight/nn/tf_models.py
index df9916d0..be74d8bb 100644
--- a/afqinsight/nn/tf_models.py
+++ b/afqinsight/nn/tf_models.py
@@ -13,7 +13,7 @@
 
 if has_tf:
     from tensorflow.keras.models import Model
-    from tensorflow.keras.layers import Dense, Flatten, Dropout, Input
+    from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, Reshape
     from tensorflow.keras.layers import MaxPooling1D, Conv1D
     from tensorflow.keras.layers import LSTM, Bidirectional
     from tensorflow.keras.layers import (
@@ -309,22 +309,23 @@ def cnn_resnet(input_shape, n_classes, output_activation="softmax", verbose=Fals
     return model
 
 
-def autoencoder(input_shape, n_hidden=None, verbose=False):
+def autoencoder(input_shape, encoding_dim=None, verbose=False):
     """
     Fully connected autoencoder
     """
     ip = Input(shape=input_shape)
-    if n_hidden is None:
-        n_hidden = input_shape[0] // 8
+    if encoding_dim is None:
+        encoding_dim = (input_shape[0] * input_shape[1]) // 8
 
     fc = Flatten()(ip)
-    fc = Dense(input_shape, activation="relu")(fc)
-    fc = Dense(input_shape // 2, activation="relu")(fc)
-    fc = Dense(input_shape // 4, activation="relu")(fc)
-    fc = Dense(n_hidden, activation="relu")(fc)
-    fc = Dense(input_shape // 4, activation="relu")(fc)
-    fc = Dense(input_shape // 2, activation="relu")(fc)
-    out = Dense(input_shape)(fc)
+    fc = Dense(input_shape[0] * input_shape[1], activation="relu")(fc)
+    fc = Dense((input_shape[0] * input_shape[1]) // 2, activation="relu")(fc)
+    fc = Dense((input_shape[0] * input_shape[1]) // 4, activation="relu")(fc)
+    fc = Dense(encoding_dim, activation="relu")(fc)
+    fc = Dense((input_shape[0] * input_shape[1]) // 4, activation="relu")(fc)
+    fc = Dense((input_shape[0] * input_shape[1]) // 2, activation="relu")(fc)
+    pre_out = Dense((input_shape[0] * input_shape[1]))(fc)
+    out = Reshape(input_shape)(pre_out)
 
     model = Model([ip], [out])
     if verbose:

From dfb102ddf79f69fd98886a4cde8419d2c84cf74a Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Mon, 12 Feb 2024 13:48:15 -0800
Subject: [PATCH 30/33] Adds a conv autoencoder.

---
 afqinsight/nn/tf_models.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/afqinsight/nn/tf_models.py b/afqinsight/nn/tf_models.py
index be74d8bb..dc382a83 100644
--- a/afqinsight/nn/tf_models.py
+++ b/afqinsight/nn/tf_models.py
@@ -14,7 +14,7 @@
 if has_tf:
     from tensorflow.keras.models import Model
     from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, Reshape
-    from tensorflow.keras.layers import MaxPooling1D, Conv1D
+    from tensorflow.keras.layers import MaxPooling1D, Conv1D, Conv1DTranspose
     from tensorflow.keras.layers import LSTM, Bidirectional
     from tensorflow.keras.layers import (
         BatchNormalization,
@@ -23,6 +23,7 @@
         concatenate,
         Activation,
         add,
+        Layer,
     )
 else:
     # Since all model building functions start with Input, we make Input the
@@ -309,7 +310,7 @@ def cnn_resnet(input_shape, n_classes, output_activation="softmax", verbose=Fals
     return model
 
 
-def autoencoder(input_shape, encoding_dim=None, verbose=False):
+def fc_autoencoder(input_shape, encoding_dim=None, verbose=False):
     """
     Fully connected autoencoder
     """
@@ -328,6 +329,28 @@ def autoencoder(input_shape, encoding_dim=None, verbose=False):
     out = Reshape(input_shape)(pre_out)
 
     model = Model([ip], [out])
+    if verbose:
+        model.summary()
+    return model
+
+
+def cnn_autoencoder(input_shape, verbose=False):
+    """
+    Convolutional autoencoder
+    """
+    ip = Input(shape=input_shape)
+    # Encoder
+    x = Conv1D(32, (3), activation="relu", padding="same")(ip)
+    x = MaxPooling1D((2), padding="same")(x)
+    x = Conv1D(32, (3), activation="relu", padding="same")(x)
+    x = MaxPooling1D((2), padding="same")(x)
+
+    # Decoder
+    x = Conv1DTranspose(32, (3), strides=2, activation="relu", padding="same")(x)
+    x = Conv1DTranspose(32, (3), strides=2, activation="relu", padding="same")(x)
+    x = Conv1D(1, (3), activation="sigmoid", padding="same")(x)
+
+    model = Model([ip], [x])
     if verbose:
         model.summary()
 

From 951b3f666eb4fe78192c0138218404703a9e64ae Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Mon, 12 Feb 2024 13:51:54 -0800
Subject: [PATCH 31/33] Adds Initial implementation of a VAE.

---
 afqinsight/nn/tf_models.py | 99 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)

diff --git a/afqinsight/nn/tf_models.py b/afqinsight/nn/tf_models.py
index dc382a83..c1da05e6 100644
--- a/afqinsight/nn/tf_models.py
+++ b/afqinsight/nn/tf_models.py
@@ -25,6 +25,8 @@
         add,
         Layer,
     )
+    from tensorflow.keras.losses import binary_crossentropy
+
 else:
     # Since all model building functions start with Input, we make Input the
     # tripwire instance for cases where tensorflow is not installed.
@@ -355,3 +357,100 @@ def cnn_autoencoder(input_shape, verbose=False):
         model.summary()
 
     return model
+
+
+class _Sampling(Layer):
+    """
+    Sample the latent layer of a VAE
+    """
+
+    def call(self, inputs):
+        z_mean, z_log_var = inputs
+        batch = tf.shape(z_mean)[0]
+        dim = tf.shape(z_mean)[1]
+        epsilon = tf.random.normal(shape=(batch, dim))
+        return z_mean + tf.exp(0.5 * z_log_var) * epsilon
+
+
+def _fc_vae_encoder(input_shape, encoding_dim=None, verbose=False):
+    """
+    Encoder section for a fully connected variational autoencoder
+    """
+    ip = Input(shape=input_shape)
+    if encoding_dim is None:
+        encoding_dim = (input_shape[0] * input_shape[1]) // 8
+
+    fc = Flatten()(ip)
+    fc = Dense(input_shape[0] * input_shape[1], activation="relu")(fc)
+    fc = Dense((input_shape[0] * input_shape[1]) // 2, activation="relu")(fc)
+    fc = Dense((input_shape[0] * input_shape[1]) // 4, activation="relu")(fc)
+
+    z_mean = Dense(encoding_dim, activation="relu")(fc)
+    z_log_var = Dense(encoding_dim, name="z_mean")(fc)
+    z = _Sampling()([z_mean, z_log_var])
+    return Model(ip, [z_mean, z_log_var, z], name="encoder")
+
+
+def _fc_vae_decoder(input_shape, encoding_dim=None, verbose=False):
+    """
+    Decoder section for a fully connected variational autoencoder
+    """
+
+    fc = Dense((input_shape[0] * input_shape[1]) // 4, activation="relu")(fc)
+    fc = Dense((input_shape[0] * input_shape[1]) // 2, activation="relu")(fc)
+    pre_out = Dense((input_shape[0] * input_shape[1]))(fc)
+    out = Reshape(input_shape)(pre_out)
+
+
+def _VAE(Model):
+    """
+    A variational autoencoder class
+    """
+
+    def __init__(self, encoder, decoder, **kwargs):
+        super().__init__(**kwargs)
+        self.encoder = encoder
+        self.decoder = decoder
+        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
+        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(
+            name="reconstruction_loss"
+        )
+        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")
+
+    @property
+    def metrics(self):
+        return [
+            self.total_loss_tracker,
+            self.reconstruction_loss_tracker,
+            self.kl_loss_tracker,
+        ]
+
+    def train_step(self, data):
+        with tf.GradientTape() as tape:
+            z_mean, z_log_var, z = self.encoder(data)
+            reconstruction = self.decoder(z)
+            reconstruction_loss = tf.reduce_mean(
+                tf.reduce_sum(binary_crossentropy(data, reconstruction), axis=1)
+            )
+            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
+            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
+            total_loss = reconstruction_loss + kl_loss
+        grads = tape.gradient(total_loss, self.trainable_weights)
+        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
+        self.total_loss_tracker.update_state(total_loss)
+        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
+        self.kl_loss_tracker.update_state(kl_loss)
+        return {
+            "loss": self.total_loss_tracker.result(),
+            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
+            "kl_loss": self.kl_loss_tracker.result(),
+        }
+
+
+def fc_vae(input_shape, encoding_dim=None, verbose=False):
+    """
+    Fully connected variational autoencoder.
+    """
+    encoder = _fc_vae_encoder(input_shape, encoding_dim, verbose)
+    decoder = _fc_vae_decoder(input_shape, encoding_dim, verbose)
+    return _VAE(encoder, decoder)

From e31f680cdea2e70b2708aad5238bf2a19e377508 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Mon, 12 Feb 2024 15:58:26 -0800
Subject: [PATCH 32/33] Make sure that encoding dimension takes a non-None
 value.

And that it's consistent between the encoder and decoder.
---
 afqinsight/nn/tf_models.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/afqinsight/nn/tf_models.py b/afqinsight/nn/tf_models.py
index c1da05e6..eba0e9b8 100644
--- a/afqinsight/nn/tf_models.py
+++ b/afqinsight/nn/tf_models.py
@@ -377,6 +377,7 @@ def _fc_vae_encoder(input_shape, encoding_dim=None, verbose=False):
     Encoder section for a fully connected variational autoencoder
     """
     ip = Input(shape=input_shape)
+
     if encoding_dim is None:
         encoding_dim = (input_shape[0] * input_shape[1]) // 8
 
@@ -388,21 +389,22 @@ def _fc_vae_encoder(input_shape, encoding_dim=None, verbose=False):
     z_mean = Dense(encoding_dim, activation="relu")(fc)
     z_log_var = Dense(encoding_dim, name="z_mean")(fc)
     z = _Sampling()([z_mean, z_log_var])
-    return Model(ip, [z_mean, z_log_var, z], name="encoder")
+    return Model([ip], [z_mean, z_log_var, z], name="encoder")
 
 
 def _fc_vae_decoder(input_shape, encoding_dim=None, verbose=False):
     """
     Decoder section for a fully connected variational autoencoder
     """
-
+    ip = Input(shape=(encoding_dim,))
+    fc = Flatten()(ip)
     fc = Dense((input_shape[0] * input_shape[1]) // 4, activation="relu")(fc)
     fc = Dense((input_shape[0] * input_shape[1]) // 2, activation="relu")(fc)
     pre_out = Dense((input_shape[0] * input_shape[1]))(fc)
-    out = Reshape(input_shape)(pre_out)
+    return Reshape(input_shape)(pre_out)
 
 
-def _VAE(Model):
+class _VAE(Model):
     """
     A variational autoencoder class
     """
@@ -451,6 +453,9 @@ def fc_vae(input_shape, encoding_dim=None, verbose=False):
     """
     Fully connected variational autoencoder.
     """
+    if encoding_dim is None:
+        encoding_dim = (input_shape[0] * input_shape[1]) // 8
+
     encoder = _fc_vae_encoder(input_shape, encoding_dim, verbose)
     decoder = _fc_vae_decoder(input_shape, encoding_dim, verbose)
     return _VAE(encoder, decoder)

From 68f03d51577c36738940768aa9c9d2d1876b28c4 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Thu, 22 Feb 2024 21:41:47 -0800
Subject: [PATCH 33/33] More work on autoencoder.

Also, use groupyr 0.3.3
---
 afqinsight/nn/tf_models.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/afqinsight/nn/tf_models.py b/afqinsight/nn/tf_models.py
index eba0e9b8..61637c0b 100644
--- a/afqinsight/nn/tf_models.py
+++ b/afqinsight/nn/tf_models.py
@@ -336,21 +336,25 @@ def fc_autoencoder(input_shape, encoding_dim=None, verbose=False):
     return model
 
 
-def cnn_autoencoder(input_shape, verbose=False):
+def cnn_autoencoder(input_shape, encoding_dim=8, verbose=False):
     """
     Convolutional autoencoder
     """
     ip = Input(shape=input_shape)
     # Encoder
-    x = Conv1D(32, (3), activation="relu", padding="same")(ip)
-    x = MaxPooling1D((2), padding="same")(x)
-    x = Conv1D(32, (3), activation="relu", padding="same")(x)
-    x = MaxPooling1D((2), padding="same")(x)
-
+    x = Conv1D(32, 3, activation="relu", padding="same")(ip)
+    x = MaxPooling1D(2, padding="same")(x)
+    x = Conv1D(16, 3, activation="relu", padding="same")(x)
+    x = MaxPooling1D(2, padding="same")(x)
+    shape = x.shape
+    # Latent
+    x = Flatten()(x)
+    x = Dense(encoding_dim, activation="relu")(x)
     # Decoder
-    x = Conv1DTranspose(32, (3), strides=2, activation="relu", padding="same")(x)
-    x = Conv1DTranspose(32, (3), strides=2, activation="relu", padding="same")(x)
-    x = Conv1D(1, (3), activation="sigmoid", padding="same")(x)
+    x = Reshape(shape)(x)
+    x = Conv1DTranspose(32, 3, strides=2, activation="relu", padding="same")(x)
+    x = Conv1DTranspose(16, 3, strides=2, activation="relu", padding="same")(x)
+    x = Conv1DTranspose(1, 3, activation="sigmoid", padding="same")(x)
 
     model = Model([ip], [x])
     if verbose:
@@ -401,7 +405,8 @@ def _fc_vae_decoder(input_shape, encoding_dim=None, verbose=False):
     fc = Dense((input_shape[0] * input_shape[1]) // 4, activation="relu")(fc)
     fc = Dense((input_shape[0] * input_shape[1]) // 2, activation="relu")(fc)
     pre_out = Dense((input_shape[0] * input_shape[1]))(fc)
-    return Reshape(input_shape)(pre_out)
+    out = Reshape(input_shape)(pre_out)
+    return Model([ip], [out], name="decoder")
 
 
 class _VAE(Model):
@@ -427,6 +432,11 @@ def metrics(self):
             self.kl_loss_tracker,
         ]
 
+    def call(self, inputs):
+        z_mean, z_log_var, z = self.encoder(inputs)
+        reconstructed = self.decoder(z)
+        return reconstructed
+
     def train_step(self, data):
         with tf.GradientTape() as tape:
             z_mean, z_log_var, z = self.encoder(data)