From 5d39a976d726b6746a3f56de527800201807bfe8 Mon Sep 17 00:00:00 2001
From: Rafael <rao9787@gmail.com>
Date: Tue, 20 Jun 2023 14:03:14 -0400
Subject: [PATCH 1/6] take off actnorm from conditional glow, can be easily
 compsed with summarizednet. Add bit doc string

---
 .../invertible_network_conditional_glow.jl    | 23 ++++++++-----------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/networks/invertible_network_conditional_glow.jl b/src/networks/invertible_network_conditional_glow.jl
index 7bd57c69..65df9214 100644
--- a/src/networks/invertible_network_conditional_glow.jl
+++ b/src/networks/invertible_network_conditional_glow.jl
@@ -6,14 +6,14 @@
 export NetworkConditionalGlow, NetworkConditionalGlow3D
 
 """
-    G = NetworkGlow(n_in, n_cond, n_hidden, L, K; k1=3, k2=1, p1=1, p2=0, s1=1, s2=1)
+    G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;  split_scales=fals)
 
-    G = NetworkGlow3D(n_in, n_cond, n_hidden, L, K; k1=3, k2=1, p1=1, p2=0, s1=1, s2=1)
+    G = NetworkConditionalGlow3D(n_in, n_cond, n_hidden, L, K; split_scales=false)
 
  Create a conditional invertible network based on the Glow architecture. Each flow step in the inner loop 
- consists of an activation normalization layer, followed by an invertible coupling layer with
- 1x1 convolutions and a residual block. The outer loop performs a squeezing operation prior 
- to the inner loop, and a splitting operation afterwards.
+ consists of an activation normalization layer, followed by an invertible glow conditional coupling layer with
+ 1x1 convolutions and a residual block that takes the condition as an input. 
+ The outer loop performs a squeezing operation prior to the inner loop, and a splitting operation afterwards.
 
  *Input*: 
 
@@ -44,7 +44,7 @@ export NetworkConditionalGlow, NetworkConditionalGlow3D
 
  *Output*:
  
- - `G`: invertible Glow network.
+ - `G`: invertible conditional Glow network.
 
  *Usage:*
 
@@ -56,14 +56,13 @@ export NetworkConditionalGlow, NetworkConditionalGlow3D
 
  - None in `G` itself
 
- - Trainable parameters in activation normalizations `G.AN[i,j]` and coupling layers `G.C[i,j]`,
+ - Trainable parameters in activation normalizations `G.AN[i,j]` and coupling layers `G.CL[i,j]`,
    where `i` and `j` range from `1` to `L` and `K` respectively.
 
- See also: [`ActNorm`](@ref), [`CouplingLayerGlow!`](@ref), [`get_params`](@ref), [`clear_grad!`](@ref)
+ See also: [`ActNorm`](@ref), [`ConditionalLayerGlow!`](@ref), [`get_params`](@ref), [`clear_grad!`](@ref)
 """
 struct NetworkConditionalGlow <: InvertibleNetwork
     AN::AbstractArray{ActNorm, 2}
-    AN_C::ActNorm
     CL::AbstractArray{ConditionalLayerGlow, 2}
     Z_dims::Union{Array{Array, 1}, Nothing}
     L::Int64
@@ -77,7 +76,6 @@ end
 # Constructor
 function NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; freeze_conv=false,  split_scales=false,  rb_activation::ActivationFunction=ReLUlayer(), k1=3, k2=1, p1=1, p2=0, s1=1, s2=1, ndims=2, squeezer::Squeezer=ShuffleLayer(), activation::ActivationFunction=SigmoidLayer())
     AN = Array{ActNorm}(undef, L, K)    # activation normalization
-    AN_C = ActNorm(n_cond; logdet=false)    # activation normalization for condition
     CL = Array{ConditionalLayerGlow}(undef, L, K)  # coupling layers w/ 1x1 convolution and residual block
  
     if split_scales
@@ -108,8 +106,6 @@ function forward(X::AbstractArray{T, N}, C::AbstractArray{T, N}, G::NetworkCondi
     G.split_scales && (Z_save = array_of_array(X, G.L-1))
     orig_shape = size(X)
 
-    C = G.AN_C.forward(C)
-
     logdet = 0
     for i=1:G.L
         (G.split_scales) && (X = G.squeezer.forward(X))
@@ -176,6 +172,5 @@ function backward(ΔX::AbstractArray{T, N}, X::AbstractArray{T, N}, C::AbstractA
         end
     end
 
-    ΔC, C = G.AN_C.backward(ΔC, C)
     return ΔX, X, ΔC
-end
+end
\ No newline at end of file

From b5010bdaff9224db0988b71034a8cc5cbf529347 Mon Sep 17 00:00:00 2001
From: Rafael Orozco <rao9787@gmail.com>
Date: Tue, 20 Jun 2023 14:05:33 -0400
Subject: [PATCH 2/6] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 9a0b3b9b..fc9fb0d6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "InvertibleNetworks"
 uuid = "b7115f24-5f92-4794-81e8-23b0ddb121d3"
 authors = ["Philipp Witte <p.witte@ymail.com>", "Ali Siahkoohi <alisk@gatech.edu>", "Mathias Louboutin <mlouboutin3@gatech.edu>", "Gabrio Rizzuti <g.rizzuti@umcutrecht.nl>", "Rafael Orozco <rorozco@gatech.edu>", "Felix J. herrmann <fherrmann@gatech.edu>"]
-version = "2.2.5"
+version = "2.2.6"
 
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"

From 23d2fb4aef2ecba140b3d38772aa43e159abe31a Mon Sep 17 00:00:00 2001
From: Rafael <rao9787@gmail.com>
Date: Tue, 20 Jun 2023 14:27:37 -0400
Subject: [PATCH 3/6] fix cl not defined

---
 src/networks/invertible_network_conditional_glow.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/networks/invertible_network_conditional_glow.jl b/src/networks/invertible_network_conditional_glow.jl
index 65df9214..d700a869 100644
--- a/src/networks/invertible_network_conditional_glow.jl
+++ b/src/networks/invertible_network_conditional_glow.jl
@@ -96,7 +96,7 @@ function NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; freeze_conv=false,
         (i < L && split_scales) && (n_in = Int64(n_in/2)) # split
     end
 
-    return NetworkConditionalGlow(AN, AN_C, CL, Z_dims, L, K, squeezer, split_scales)
+    return NetworkConditionalGlow(AN, CL, Z_dims, L, K, squeezer, split_scales)
 end
 
 NetworkConditionalGlow3D(args; kw...) = NetworkConditionalGlow(args...; kw..., ndims=3)

From ef4c011bc303c3df68c55390a4d6ad2ce8970861 Mon Sep 17 00:00:00 2001
From: Rafael <rao9787@gmail.com>
Date: Thu, 22 Jun 2023 10:01:44 -0400
Subject: [PATCH 4/6] cond glow handle odd size, cleaner test

---
 .../conditional_layer_glow.jl                 |  14 +-
 .../test_conditional_glow_network.jl          | 248 ++++++++++++++++--
 2 files changed, 235 insertions(+), 27 deletions(-)

diff --git a/src/conditional_layers/conditional_layer_glow.jl b/src/conditional_layers/conditional_layer_glow.jl
index ab36e064..9be591ae 100644
--- a/src/conditional_layers/conditional_layer_glow.jl
+++ b/src/conditional_layers/conditional_layer_glow.jl
@@ -75,10 +75,13 @@ end
 
 # Constructor from input dimensions
 function ConditionalLayerGlow(n_in::Int64, n_cond::Int64, n_hidden::Int64;freeze_conv=false, k1=3, k2=1, p1=1, p2=0, s1=1, s2=1, logdet=false, activation::ActivationFunction=SigmoidLayer(), rb_activation::ActivationFunction=RELUlayer(), ndims=2)
-
-    # 1x1 Convolution and residual block for invertible layers
     C  = Conv1x1(n_in; freeze=freeze_conv)
-    RB = ResidualBlock(Int(n_in/2)+n_cond, n_hidden; n_out=n_in, activation=rb_activation, k1=k1, k2=k2, p1=p1, p2=p2, s1=s1, s2=s2, fan=true, ndims=ndims)
+
+    split_num = Int(round(n_in/2))
+    in_chan   = n_in-split_num
+    out_chan  = 2*split_num
+
+    RB = ResidualBlock(in_chan+n_cond, n_hidden; n_out=out_chan, activation=rb_activation, k1=k1, k2=k2, p1=p1, p2=p2, s1=s1, s2=s2, fan=true, ndims=ndims)
 
     return ConditionalLayerGlow(C, RB, logdet, activation)
 end
@@ -143,7 +146,10 @@ function backward(ΔY::AbstractArray{T, N}, Y::AbstractArray{T, N}, C::AbstractA
 
     # Backpropagate RB
     ΔX2_ΔC = L.RB.backward(tensor_cat(L.activation.backward(ΔS, S), ΔT), (tensor_cat(X2, C)))
-    ΔX2, ΔC = tensor_split(ΔX2_ΔC; split_index=Int(size(ΔY)[N-1]/2))
+
+    n_in = size(ΔY)[N-1]
+    split_num = Int(round(n_in/2))
+    ΔX2, ΔC = tensor_split(ΔX2_ΔC; split_index=n_in-split_num)
     ΔX2 += ΔY2
 
     # Backpropagate 1x1 conv
diff --git a/test/test_networks/test_conditional_glow_network.jl b/test/test_networks/test_conditional_glow_network.jl
index 642f76f2..b7875197 100644
--- a/test/test_networks/test_conditional_glow_network.jl
+++ b/test/test_networks/test_conditional_glow_network.jl
@@ -3,23 +3,231 @@
 # Date: January 2020
 
 using InvertibleNetworks, LinearAlgebra, Test, Random
-using Flux 
+using Statistics 
 
 # Random seed
-Random.seed!(3);
+Random.seed!(10);
+
+function loss(G, X, Cond;summarized=false)
+    Y, ZC, logdet = G.forward(X, Cond)
+    f = -log_likelihood(Y) - logdet
+    ΔY = -∇log_likelihood(Y)
+    if summarized
+        ΔX = G.backward(ΔY, Y, ZC; Y_save=Cond)[1]
+        return f, ΔX, G.cond_net.CL[1,1].RB.W1.grad
+    else 
+        ΔX =  G.backward(ΔY, Y, ZC)[1]
+        return f, ΔX, G.CL[1,1].RB.W1.grad
+    end
+end
+
+
+
+function gradients_set(G, n_in,n_cond,N; summarized=false)
+    X = rand(Float32, N..., n_in, batchsize)
+    Cond = rand(Float32, N..., n_cond, batchsize)
+
+    XZ, CondZ = G.forward(X,Cond)
+
+    # Set gradients 
+
+    summarized ? G.backward(XZ, XZ, CondZ; Y_save=Cond) : G.backward(XZ, XZ, CondZ)
+
+    P = get_params(G)
+    gsum = 0
+    for p in P
+        ~isnothing(p.grad) && (gsum += 1)
+    end
+    summarized ?  (@test isequal(gsum, L*K*10+12)) : (@test isequal(gsum, L*K*10))
+   
+    clear_grad!(G)
+    gsum = 0
+    for p in P
+        ~isnothing(p.grad) && (gsum += 1)
+    end
+    @test isequal(gsum, 0)
+end
 
 # Define network
-nx = 32
-ny = 32
-nz = 32
-n_in = 2
+nx = 16
+ny = 16
+nz = 16
+n_in = 4
 n_cond = 2
 n_hidden = 4
-batchsize = 2
+batchsize = 4
 L = 2
 K = 2
-split_scales = true
-N = (nx,ny)
+
+stol = 1.5f0
+for split_scales in [false,true]
+    for N in [(16*nx),(nx,ny),(nx,ny,nz)]
+        println("Test with split_scales = $(split_scales) N = $(N) and summarized=$(summary)")
+        
+        # Network and inputs
+        G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales, ndims=length(N))
+
+        X = randn(Float32, N..., n_in, batchsize)
+        Cond = rand(Float32, N..., n_cond, batchsize)
+
+        # Invertibility
+        XZ, CondZ = G.forward(X,Cond)
+        X_ = G.inverse(XZ, CondZ) # saving the cond output is important in split scales because of reshapes
+        @test isapprox(norm(X - X_)/norm(X), 0f0; atol=1f-5)
+
+        ###################################################################################################
+        # Test gradients are set and cleared
+        gradients_set(G, n_in, n_cond,N;)
+
+        ###################################################################################################
+        # Gradient test w.r.t. input
+        X0 = rand(Float32, N..., n_in, batchsize)
+        Cond0 = rand(Float32, N..., n_cond, batchsize)
+
+        dX = X - X0
+
+        f0, ΔX = loss(G, X0, Cond0)[1:2]
+        h = 0.1f0
+        maxiter = 4
+        err1 = zeros(Float32, maxiter)
+        err2 = zeros(Float32, maxiter)
+
+        print("\nGradient test glow: input\n")
+        for j=1:maxiter
+            f = loss(G, X0 + h*dX, Cond0)[1]
+            err1[j] = abs(f - f0)
+            err2[j] = abs(f - f0 - h*dot(dX, ΔX))
+            print(err1[j], "; ", err2[j], "\n")
+            h = h/2f0
+        end
+
+        rate1 = err1[1:end-1]./err1[2:end]
+        rate2 = err2[1:end-1]./err2[2:end]
+
+        @test isapprox(mean(rate1),2f0; atol=stol)
+        @test isapprox(mean(rate2), 4f0; atol=stol)
+
+        # Gradient test w.r.t. parameters
+        G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales, ndims=length(N))
+        G0 = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales, ndims=length(N))
+        Gini = deepcopy(G0)
+
+        # Test one parameter from residual block
+        dW = G.CL[1,1].RB.W1.data - G0.CL[1,1].RB.W1.data
+
+        f0, ΔX, ΔW = loss(G0, X, Cond)
+        h = 0.1f0
+        maxiter = 4
+        err1 = zeros(Float32, maxiter)
+        err2 = zeros(Float32, maxiter)
+
+        print("\nGradient test glow: parameter\n")
+        for j=1:maxiter
+            G0.CL[1,1].RB.W1.data = Gini.CL[1,1].RB.W1.data + h*dW
+
+            f = loss(G0, X, Cond)[1]
+            err1[j] = abs(f - f0)
+            err2[j] = abs(f - f0 - h*dot(dW, ΔW))
+            print(err1[j], "; ", err2[j], "\n")
+            h = h/2f0
+        end
+
+        rate1 = err1[1:end-1]./err1[2:end]
+        rate2 = err2[1:end-1]./err2[2:end]
+
+        @test isapprox(mean(rate1),2f0; atol=stol)
+        @test isapprox(mean(rate2), 4f0; atol=stol)
+    end
+end
+
+for split_scales in [false,true]
+    for N in [(16*nx),(nx,ny),(nx,ny,nz)]
+        println("Test with split_scales = $(split_scales) N = $(N) and summarized=$(true)")
+        
+        # Network and inputs
+        G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales, ndims=length(N))
+        sum_net = ResNet(n_cond, 16, 3; norm=nothing,ndims=length(N)) # make sure it doesnt have any weird normalizations
+        G = SummarizedNet(G, sum_net)
+
+        X = randn(Float32, N..., n_in, batchsize)
+        Cond = rand(Float32, N..., n_cond, batchsize)
+
+        # Invertibility
+        XZ, CondZ = G.forward(X,Cond)
+        X_ = G.inverse(XZ, CondZ) # saving the cond output is important in split scales because of reshapes
+        @test isapprox(norm(X - X_)/norm(X), 0f0; atol=1f-5)
+
+        ###################################################################################################
+        # Test gradients are set and cleared
+        gradients_set(G, n_in, n_cond,N; summarized=true)
+
+        ###################################################################################################
+        # Gradient test w.r.t. input
+        X0 = rand(Float32, N..., n_in, batchsize)
+        Cond0 = rand(Float32, N..., n_cond, batchsize)
+
+        dX = X - X0
+
+        f0, ΔX = loss(G, X0, Cond0; summarized=true)[1:2]
+        h = 0.1f0
+        maxiter = 4
+        err1 = zeros(Float32, maxiter)
+        err2 = zeros(Float32, maxiter)
+
+        print("\nGradient test glow: input\n")
+        for j=1:maxiter
+            f = loss(G, X0 + h*dX, Cond0; summarized=true)[1]
+            err1[j] = abs(f - f0)
+            err2[j] = abs(f - f0 - h*dot(dX, ΔX))
+            print(err1[j], "; ", err2[j], "\n")
+            h = h/2f0
+        end
+
+        rate1 = err1[1:end-1]./err1[2:end]
+        rate2 = err2[1:end-1]./err2[2:end]
+
+        @test isapprox(mean(rate1),2f0; atol=stol)
+        @test isapprox(mean(rate2), 4f0; atol=stol)
+
+        # Gradient test w.r.t. parameters
+        G0 = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales, ndims=length(N))
+        sum_net = ResNet(n_cond, 16, 3; norm=nothing,ndims=length(N)) # make sure it doesnt have any weird normalizations
+        G0 = SummarizedNet(G0, sum_net)
+        Gini = deepcopy(G0)
+
+        # Test one parameter from residual block
+        dW = G.cond_net.CL[1,1].RB.W1.data - G0.cond_net.CL[1,1].RB.W1.data
+
+        f0, ΔX, ΔW = loss(G0, X, Cond; summarized=true)
+        h = 0.1f0
+        maxiter = 4
+        err1 = zeros(Float32, maxiter)
+        err2 = zeros(Float32, maxiter)
+
+        print("\nGradient test glow: parameter\n")
+        for j=1:maxiter
+            G0.cond_net.CL[1,1].RB.W1.data = Gini.cond_net.CL[1,1].RB.W1.data + h*dW
+
+            f = loss(G0, X, Cond; summarized=true)[1]
+            err1[j] = abs(f - f0)
+            err2[j] = abs(f - f0 - h*dot(dW, ΔW))
+            print(err1[j], "; ", err2[j], "\n")
+            h = h/2f0
+        end
+
+        rate1 = err1[1:end-1]./err1[2:end]
+        rate2 = err2[1:end-1]./err2[2:end]
+
+        @test isapprox(mean(rate1),2f0; atol=stol)
+        @test isapprox(mean(rate2), 4f0; atol=stol)
+    end
+end
+
+
+
+
+
+
 
 ########################################### Test with split_scales = true N = (nx,ny) #########################
 # Invertibility
@@ -42,7 +250,7 @@ gsum = 0
 for p in P
     ~isnothing(p.grad) && (global gsum += 1)
 end
-@test isequal(gsum, L*K*10+2)
+@test isequal(gsum, L*K*10)
 
 clear_grad!(G)
 gsum = 0
@@ -58,13 +266,13 @@ function loss(G, X, Cond)
     Y, ZC, logdet = G.forward(X, Cond)
     f = -log_likelihood(Y) - logdet
     ΔY = -∇log_likelihood(Y)
-    ΔX, X_ = G.backward(ΔY, Y, ZC)
+    ΔX = G.backward(ΔY, Y, ZC)[1]
     return f, ΔX, G.CL[1,1].RB.W1.grad, G.CL[1,1].C.v1.grad
 end
 
 
 # Gradient test w.r.t. input
-G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
+#G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
 X = rand(Float32, N..., n_in, batchsize)
 Cond = rand(Float32, N..., n_cond, batchsize)
 X0 = rand(Float32, N..., n_in, batchsize)
@@ -93,7 +301,7 @@ end
 
 # Gradient test w.r.t. parameters
 X = rand(Float32, N..., n_in, batchsize)
-G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
+#G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
 G0 = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
 Gini = deepcopy(G0)
 
@@ -148,7 +356,7 @@ gsum = 0
 for p in P
     ~isnothing(p.grad) && (global gsum += 1)
 end
-@test isequal(gsum, L*K*10+2+12) # depends on summary net you use
+@test isequal(gsum, L*K*10+12) # depends on summary net you use
 
 clear_grad!(G)
 gsum = 0
@@ -159,13 +367,7 @@ end
 
 
 # Gradient test
-function loss_sum(G, X, Cond)
-    Y, ZC, logdet = G.forward(X, Cond)
-    f = -log_likelihood(Y) - logdet
-    ΔY = -∇log_likelihood(Y)
-    ΔX, X_ = G.backward(ΔY, Y, ZC; Y_save=Cond)
-    return f, ΔX, G.cond_net.CL[1,1].RB.W1.grad, G.cond_net.CL[1,1].C.v1.grad
-end
+
 
 # Gradient test w.r.t. input
 X = rand(Float32, N..., n_in, batchsize);
@@ -248,7 +450,7 @@ gsum = 0
 for p in P
     ~isnothing(p.grad) && (global gsum += 1)
 end
-@test isequal(gsum, L*K*10+2)
+@test isequal(gsum, L*K*10)
 
 clear_grad!(G)
 gsum = 0
@@ -345,7 +547,7 @@ gsum = 0
 for p in P
     ~isnothing(p.grad) && (global gsum += 1)
 end
-@test isequal(gsum, L*K*10+2+12)
+@test isequal(gsum, L*K*10+12)
 
 clear_grad!(G)
 gsum = 0

From 60dbb04f261cfb0d6c60818469a5c85d24157176 Mon Sep 17 00:00:00 2001
From: Rafael <rao9787@gmail.com>
Date: Thu, 22 Jun 2023 11:41:27 -0400
Subject: [PATCH 5/6] clean test cond glow

---
 .../test_conditional_glow_network.jl          | 405 +-----------------
 1 file changed, 4 insertions(+), 401 deletions(-)

diff --git a/test/test_networks/test_conditional_glow_network.jl b/test/test_networks/test_conditional_glow_network.jl
index b7875197..cf69e965 100644
--- a/test/test_networks/test_conditional_glow_network.jl
+++ b/test/test_networks/test_conditional_glow_network.jl
@@ -21,8 +21,6 @@ function loss(G, X, Cond;summarized=false)
     end
 end
 
-
-
 function gradients_set(G, n_in,n_cond,N; summarized=false)
     X = rand(Float32, N..., n_in, batchsize)
     Cond = rand(Float32, N..., n_cond, batchsize)
@@ -30,7 +28,6 @@ function gradients_set(G, n_in,n_cond,N; summarized=false)
     XZ, CondZ = G.forward(X,Cond)
 
     # Set gradients 
-
     summarized ? G.backward(XZ, XZ, CondZ; Y_save=Cond) : G.backward(XZ, XZ, CondZ)
 
     P = get_params(G)
@@ -62,7 +59,7 @@ K = 2
 stol = 1.5f0
 for split_scales in [false,true]
     for N in [(16*nx),(nx,ny),(nx,ny,nz)]
-        println("Test with split_scales = $(split_scales) N = $(N) and summarized=$(summary)")
+        println("Test with split_scales = $(split_scales) N = $(N)")
         
         # Network and inputs
         G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales, ndims=length(N))
@@ -140,6 +137,7 @@ for split_scales in [false,true]
     end
 end
 
+# with summary network
 for split_scales in [false,true]
     for N in [(16*nx),(nx,ny),(nx,ny,nz)]
         println("Test with split_scales = $(split_scales) N = $(N) and summarized=$(true)")
@@ -219,401 +217,6 @@ for split_scales in [false,true]
         rate2 = err2[1:end-1]./err2[2:end]
 
         @test isapprox(mean(rate1),2f0; atol=stol)
-        @test isapprox(mean(rate2), 4f0; atol=stol)
+        @test isapprox(mean(rate2),4f0; atol=stol)
     end
-end
-
-
-
-
-
-
-
-########################################### Test with split_scales = true N = (nx,ny) #########################
-# Invertibility
-
-# Network and input
-G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
-X = rand(Float32, N..., n_in, batchsize)
-Cond = rand(Float32, N..., n_cond, batchsize)
-
-Y, Cond = G.forward(X,Cond)
-X_ = G.inverse(Y,Cond) # saving the cond is important in split scales because of reshapes
-
-@test isapprox(norm(X - X_)/norm(X), 0f0; atol=1f-5)
-
-# Test gradients are set and cleared
-G.backward(Y, Y, Cond)
-
-P = get_params(G)
-gsum = 0
-for p in P
-    ~isnothing(p.grad) && (global gsum += 1)
-end
-@test isequal(gsum, L*K*10)
-
-clear_grad!(G)
-gsum = 0
-for p in P
-    ~isnothing(p.grad) && (global gsum += 1)
-end
-@test isequal(gsum, 0)
-
-###################################################################################################
-# Gradient test
-
-function loss(G, X, Cond)
-    Y, ZC, logdet = G.forward(X, Cond)
-    f = -log_likelihood(Y) - logdet
-    ΔY = -∇log_likelihood(Y)
-    ΔX = G.backward(ΔY, Y, ZC)[1]
-    return f, ΔX, G.CL[1,1].RB.W1.grad, G.CL[1,1].C.v1.grad
-end
-
-
-# Gradient test w.r.t. input
-#G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
-X = rand(Float32, N..., n_in, batchsize)
-Cond = rand(Float32, N..., n_cond, batchsize)
-X0 = rand(Float32, N..., n_in, batchsize)
-Cond0 = rand(Float32, N..., n_cond, batchsize)
-
-dX = X - X0
-
-f0, ΔX = loss(G, X0, Cond0)[1:2]
-h = 0.1f0
-maxiter = 4
-err1 = zeros(Float32, maxiter)
-err2 = zeros(Float32, maxiter)
-
-print("\nGradient test glow: input\n")
-for j=1:maxiter
-    f = loss(G, X0 + h*dX, Cond0)[1]
-    err1[j] = abs(f - f0)
-    err2[j] = abs(f - f0 - h*dot(dX, ΔX))
-    print(err1[j], "; ", err2[j], "\n")
-    global h = h/2f0
-end
-
-@test isapprox(err1[end] / (err1[1]/2^(maxiter-1)), 1f0; atol=1f0)
-@test isapprox(err2[end] / (err2[1]/4^(maxiter-1)), 1f0; atol=1f0)
-
-
-# Gradient test w.r.t. parameters
-X = rand(Float32, N..., n_in, batchsize)
-#G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
-G0 = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
-Gini = deepcopy(G0)
-
-# Test one parameter from residual block and 1x1 conv
-dW = G.CL[1,1].RB.W1.data - G0.CL[1,1].RB.W1.data
-dv = G.CL[1,1].C.v1.data - G0.CL[1,1].C.v1.data
-
-f0, ΔX, ΔW, Δv = loss(G0, X, Cond)
-h = 0.1f0
-maxiter = 4
-err3 = zeros(Float32, maxiter)
-err4 = zeros(Float32, maxiter)
-
-print("\nGradient test glow: input\n")
-for j=1:maxiter
-    G0.CL[1,1].RB.W1.data = Gini.CL[1,1].RB.W1.data + h*dW
-    G0.CL[1,1].C.v1.data = Gini.CL[1,1].C.v1.data + h*dv
-
-    f = loss(G0, X, Cond)[1]
-    err3[j] = abs(f - f0)
-    err4[j] = abs(f - f0 - h*dot(dW, ΔW) - h*dot(dv, Δv))
-    print(err3[j], "; ", err4[j], "\n")
-    global h = h/2f0
-end
-
-@test isapprox(err3[end] / (err3[1]/2^(maxiter-1)), 1f0; atol=1f0)
-@test isapprox(err4[end] / (err4[1]/4^(maxiter-1)), 1f0; atol=1f0)
-
-
-
-########################################### Test with split_scales = true N = (nx,ny) and summary network #########################
-# Invertibility
-sum_net = ResNet(n_cond, 16, 3; norm=nothing) # make sure it doesnt have any weird normalizations
-
-# Network and input
-flow = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales,ndims=length(N))
-G = SummarizedNet(flow, sum_net)
-
-X = rand(Float32, N..., n_in, batchsize);
-Cond = rand(Float32, N..., n_cond, batchsize);
-
-Y, ZCond = G.forward(X,Cond)
-X_ = G.inverse(Y,ZCond) # saving the cond is important in split scales because of reshapes
-
-@test isapprox(norm(X - X_)/norm(X), 0f0; atol=1f-5)
-
-# Test gradients are set and cleared
-G.backward(Y, Y, ZCond; Y_save = Cond)
-
-P = get_params(G)
-gsum = 0
-for p in P
-    ~isnothing(p.grad) && (global gsum += 1)
-end
-@test isequal(gsum, L*K*10+12) # depends on summary net you use
-
-clear_grad!(G)
-gsum = 0
-for p in P
-    ~isnothing(p.grad) && (global gsum += 1)
-end
-@test isequal(gsum, 0)
-
-
-# Gradient test
-
-
-# Gradient test w.r.t. input
-X = rand(Float32, N..., n_in, batchsize);
-Cond = rand(Float32, N..., n_cond, batchsize);
-X0 = rand(Float32, N..., n_in, batchsize);
-Cond0 = rand(Float32, N..., n_cond, batchsize);
-
-dX = X - X0
-
-f0, ΔX = loss_sum(G, X0, Cond0)[1:2]
-h = 0.1f0
-maxiter = 4
-err1 = zeros(Float32, maxiter)
-err2 = zeros(Float32, maxiter)
-
-print("\nGradient test glow: input\n")
-for j=1:maxiter
-    f = loss_sum(G, X0 + h*dX, Cond0)[1]
-    err1[j] = abs(f - f0)
-    err2[j] = abs(f - f0 - h*dot(dX, ΔX))
-    print(err1[j], "; ", err2[j], "\n")
-    global h = h/2f0
-end
-
-@test isapprox(err1[end] / (err1[1]/2^(maxiter-1)), 1f0; atol=1f0)
-@test isapprox(err2[end] / (err2[1]/4^(maxiter-1)), 1f0; atol=1f0)
-
-
-# Gradient test w.r.t. parameters
-X = rand(Float32, N..., n_in, batchsize)
-flow0 = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales,ndims=length(N))
-G0 = SummarizedNet(flow0, sum_net)
-Gini = deepcopy(G0)
-
-# Test one parameter from residual block and 1x1 conv
-dW = G.cond_net.CL[1,1].RB.W1.data - G0.cond_net.CL[1,1].RB.W1.data
-dv = G.cond_net.CL[1,1].C.v1.data - G0.cond_net.CL[1,1].C.v1.data
-
-f0, ΔX, ΔW, Δv = loss_sum(G0, X, Cond)
-h = 0.1f0
-maxiter = 4
-err3 = zeros(Float32, maxiter)
-err4 = zeros(Float32, maxiter)
-
-print("\nGradient test glow: input\n")
-for j=1:maxiter
-    G0.cond_net.CL[1,1].RB.W1.data = Gini.cond_net.CL[1,1].RB.W1.data + h*dW
-    G0.cond_net.CL[1,1].C.v1.data = Gini.cond_net.CL[1,1].C.v1.data + h*dv
-
-    f = loss_sum(G0, X, Cond)[1]
-    err3[j] = abs(f - f0)
-    err4[j] = abs(f - f0 - h*dot(dW, ΔW) - h*dot(dv, Δv))
-    print(err3[j], "; ", err4[j], "\n")
-    global h = h/2f0
-end
-
-@test isapprox(err3[end] / (err3[1]/2^(maxiter-1)), 1f0; atol=1f0)
-@test isapprox(err4[end] / (err4[1]/4^(maxiter-1)), 1f0; atol=1f0)
-
-
-N = (nx,ny,nz)
-########################################### Test with split_scales = true N = (nx,ny,nz) #########################
-# Invertibility
-
-# Network and input
-G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
-X = rand(Float32, N..., n_in, batchsize)
-Cond = rand(Float32, N..., n_cond, batchsize)
-
-Y, Cond = G.forward(X,Cond)
-X_ = G.inverse(Y,Cond) # saving the cond is important in split scales because of reshapes
-
-@test isapprox(norm(X - X_)/norm(X), 0f0; atol=1f-5)
-
-# Test gradients are set and cleared
-G.backward(Y, Y, Cond)
-
-P = get_params(G)
-gsum = 0
-for p in P
-    ~isnothing(p.grad) && (global gsum += 1)
-end
-@test isequal(gsum, L*K*10)
-
-clear_grad!(G)
-gsum = 0
-for p in P
-    ~isnothing(p.grad) && (global gsum += 1)
-end
-@test isequal(gsum, 0)
-
-
-# Gradient test
-
-
-# Gradient test w.r.t. input
-G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
-X = rand(Float32, N..., n_in, batchsize)
-Cond = rand(Float32, N..., n_cond, batchsize)
-X0 = rand(Float32, N..., n_in, batchsize)
-Cond0 = rand(Float32, N..., n_cond, batchsize)
-
-dX = X - X0
-
-f0, ΔX = loss(G, X0, Cond0)[1:2]
-h = 0.1f0
-maxiter = 4
-err1 = zeros(Float32, maxiter)
-err2 = zeros(Float32, maxiter)
-
-print("\nGradient test glow: input\n")
-for j=1:maxiter
-    f = loss(G, X0 + h*dX, Cond0)[1]
-    err1[j] = abs(f - f0)
-    err2[j] = abs(f - f0 - h*dot(dX, ΔX))
-    print(err1[j], "; ", err2[j], "\n")
-    global h = h/2f0
-end
-
-@test isapprox(err1[end] / (err1[1]/2^(maxiter-1)), 1f0; atol=1f0)
-@test isapprox(err2[end] / (err2[1]/4^(maxiter-1)), 1f0; atol=1f0)
-
-
-# Gradient test w.r.t. parameters
-X = rand(Float32, N..., n_in, batchsize)
-G = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
-G0 = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K;split_scales=split_scales,ndims=length(N))
-Gini = deepcopy(G0)
-
-# Test one parameter from residual block and 1x1 conv
-dW = G.CL[1,1].RB.W1.data - G0.CL[1,1].RB.W1.data
-dv = G.CL[1,1].C.v1.data - G0.CL[1,1].C.v1.data
-
-f0, ΔX, ΔW, Δv = loss(G0, X, Cond)
-h = 0.1f0
-maxiter = 4
-err3 = zeros(Float32, maxiter)
-err4 = zeros(Float32, maxiter)
-
-print("\nGradient test glow: input\n")
-for j=1:maxiter
-    G0.CL[1,1].RB.W1.data = Gini.CL[1,1].RB.W1.data + h*dW
-    G0.CL[1,1].C.v1.data = Gini.CL[1,1].C.v1.data + h*dv
-
-    f = loss(G0, X, Cond)[1]
-    err3[j] = abs(f - f0)
-    err4[j] = abs(f - f0 - h*dot(dW, ΔW) - h*dot(dv, Δv))
-    print(err3[j], "; ", err4[j], "\n")
-    global h = h/2f0
-end
-
-@test isapprox(err3[end] / (err3[1]/2^(maxiter-1)), 1f0; atol=1f0)
-@test isapprox(err4[end] / (err4[1]/4^(maxiter-1)), 1f0; atol=1f0)
-
-
-########################################### Test with split_scales = true N = (nx,ny,nz) and Summary network #########################
-# Invertibility
-sum_net_3d = ResNet(n_cond, 16, 3; ndims=3, norm=nothing) # make sure it doesnt have any weird normalizati8ons
-
-# Network and input
-flow = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales,ndims=length(N));
-G = SummarizedNet(flow, sum_net_3d)
-
-X = rand(Float32, N..., n_in, batchsize);
-Cond = rand(Float32, N..., n_cond, batchsize);
-
-Y, ZCond = G.forward(X,Cond);
-X_ = G.inverse(Y,ZCond); # saving the cond is important in split scales because of reshapes
-
-@test isapprox(norm(X - X_)/norm(X), 0f0; atol=1f-5)
-
-# Test gradients are set and cleared
-G.backward(Y, Y, ZCond; Y_save=Cond)
-
-P = get_params(G)
-gsum = 0
-for p in P
-    ~isnothing(p.grad) && (global gsum += 1)
-end
-@test isequal(gsum, L*K*10+12)
-
-clear_grad!(G)
-gsum = 0
-for p in P
-    ~isnothing(p.grad) && (global gsum += 1)
-end
-@test isequal(gsum, 0)
-
-
-# Gradient test
-
-
-# Gradient test w.r.t. input
-X = rand(Float32, N..., n_in, batchsize);
-Cond = rand(Float32, N..., n_cond, batchsize);
-X0 = rand(Float32, N..., n_in, batchsize);
-Cond0 = rand(Float32, N..., n_cond, batchsize);
-
-dX = X - X0;
-
-f0, ΔX = loss_sum(G, X0, Cond0)[1:2];
-h = 0.1f0
-maxiter = 4
-err1 = zeros(Float32, maxiter)
-err2 = zeros(Float32, maxiter)
-
-print("\nGradient test glow: input\n")
-for j=1:maxiter
-    f = loss_sum(G, X0 + h*dX, Cond0)[1]
-    err1[j] = abs(f - f0)
-    err2[j] = abs(f - f0 - h*dot(dX, ΔX))
-    print(err1[j], "; ", err2[j], "\n")
-    global h = h/2f0
-end
-
-@test isapprox(err1[end] / (err1[1]/2^(maxiter-1)), 1f0; atol=1f0)
-@test isapprox(err2[end] / (err2[1]/4^(maxiter-1)), 1f0; atol=1f0)
-
-# Gradient test w.r.t. parameters
-X = rand(Float32, N..., n_in, batchsize)
-flow0 = NetworkConditionalGlow(n_in, n_cond, n_hidden, L, K; split_scales=split_scales,ndims=length(N))
-G0 = SummarizedNet(flow0, sum_net_3d)
-Gini = deepcopy(G0)
-
-# Test one parameter from residual block and 1x1 conv
-dW = G.cond_net.CL[1,1].RB.W1.data - G0.cond_net.CL[1,1].RB.W1.data
-dv = G.cond_net.CL[1,1].C.v1.data - G0.cond_net.CL[1,1].C.v1.data
-
-f0, ΔX, ΔW, Δv = loss_sum(G0, X, Cond);
-h = 0.1f0
-maxiter = 4
-err3 = zeros(Float32, maxiter)
-err4 = zeros(Float32, maxiter)
-
-print("\nGradient test glow: input\n")
-for j=1:maxiter
-    G0.cond_net.CL[1,1].RB.W1.data = Gini.cond_net.CL[1,1].RB.W1.data + h*dW
-    G0.cond_net.CL[1,1].C.v1.data = Gini.cond_net.CL[1,1].C.v1.data + h*dv
-
-    f = loss_sum(G0, X, Cond)[1]
-    err3[j] = abs(f - f0)
-    err4[j] = abs(f - f0 - h*dot(dW, ΔW) - h*dot(dv, Δv))
-    print(err3[j], "; ", err4[j], "\n")
-    global h = h/2f0
-end
-
-@test isapprox(err3[end] / (err3[1]/2^(maxiter-1)), 1f0; atol=1f0)
-@test isapprox(err4[end] / (err4[1]/4^(maxiter-1)), 1f0; atol=1f0)
-
+end
\ No newline at end of file

From 0e6ebd106acbdcf686e42bce5f3da6d37086adaa Mon Sep 17 00:00:00 2001
From: Rafael <rao9787@gmail.com>
Date: Thu, 22 Jun 2023 13:41:47 -0400
Subject: [PATCH 6/6] test passing in julia1.6

---
 .../test_conditional_glow_network.jl              | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/test/test_networks/test_conditional_glow_network.jl b/test/test_networks/test_conditional_glow_network.jl
index cf69e965..77e3d6df 100644
--- a/test/test_networks/test_conditional_glow_network.jl
+++ b/test/test_networks/test_conditional_glow_network.jl
@@ -5,8 +5,9 @@
 using InvertibleNetworks, LinearAlgebra, Test, Random
 using Statistics 
 
+
 # Random seed
-Random.seed!(10);
+Random.seed!(36);
 
 function loss(G, X, Cond;summarized=false)
     Y, ZC, logdet = G.forward(X, Cond)
@@ -78,8 +79,8 @@ for split_scales in [false,true]
 
         ###################################################################################################
         # Gradient test w.r.t. input
-        X0 = rand(Float32, N..., n_in, batchsize)
-        Cond0 = rand(Float32, N..., n_cond, batchsize)
+        X0 = randn(Float32, N..., n_in, batchsize)
+        Cond0 = randn(Float32, N..., n_cond, batchsize)
 
         dX = X - X0
 
@@ -101,7 +102,7 @@ for split_scales in [false,true]
         rate1 = err1[1:end-1]./err1[2:end]
         rate2 = err2[1:end-1]./err2[2:end]
 
-        @test isapprox(mean(rate1),2f0; atol=stol)
+        @test isapprox(mean(rate1), 2f0; atol=stol)
         @test isapprox(mean(rate2), 4f0; atol=stol)
 
         # Gradient test w.r.t. parameters
@@ -148,7 +149,7 @@ for split_scales in [false,true]
         G = SummarizedNet(G, sum_net)
 
         X = randn(Float32, N..., n_in, batchsize)
-        Cond = rand(Float32, N..., n_cond, batchsize)
+        Cond = randn(Float32, N..., n_cond, batchsize)
 
         # Invertibility
         XZ, CondZ = G.forward(X,Cond)
@@ -161,8 +162,8 @@ for split_scales in [false,true]
 
         ###################################################################################################
         # Gradient test w.r.t. input
-        X0 = rand(Float32, N..., n_in, batchsize)
-        Cond0 = rand(Float32, N..., n_cond, batchsize)
+        X0 = randn(Float32, N..., n_in, batchsize)
+        Cond0 = randn(Float32, N..., n_cond, batchsize)
 
         dX = X - X0