Merge branch 'MCCA'

Nabarb · Nabarb · commit 1c4af72f1224 · 2025-10-22T11:29:40.000+02:00
diff --git a/+datareader/+is/Struct.m b/+datareader/+is/Struct.m
@@ -85,7 +85,7 @@
     fnames = fieldnames(Din);
     fnames = string(fnames);
     fields2check = ["data","time","condition","area"];
-    fieldType    = ["numeric","cell","string","string"];
+    fieldType    = ["numeric","double","string","string"];
 
     Din_ = repmat(struct(),size(Din));
 
@@ -151,11 +151,11 @@
                 'Provided time and input data dimension mismatch. Please provide,for each trial, a time struct field matching data second dimension.');
         end
     else
-        assert(iscell(opts.time),...
-            'Struct input detected. Please provide a cell array of time vectors, one per trial.');
+        % assert(iscell(opts.time),...
+        %     'Struct input detected. Please provide a cell array of time vectors, one per trial.');
 
-        assert(length(opts.time{1}) == unique(Time),...
-                'Provided time and input data dimension mismatch. Please provide a cell array of time vectors matching trial lenghts.');
+        % assert(length(opts.time{1}) == unique(Time),...
+        %         'Provided time and input data dimension mismatch. Please provide a cell array of time vectors matching trial lenghts.');
 
         dishomogeneous = false;
     end
diff --git a/+embedding/+CCA/mcca.m b/+embedding/+CCA/mcca.m
@@ -0,0 +1,55 @@
+function [V,rho,A,rhotest]=mcca(X,d,Xtest,k)
+% [V,rho,A,rhotest]=mcca(X,d,Xtest,k) Multiset Canonical Correlation
+% Analysis. X is the data arranged as samples by dimension, whereby all
+% sets are concatenated along the dimensions. d is a vector with the
+% dimensions of each set. V are the component vectors and rho the resulting
+% inter-set correlations. A are the corresponding forward models, which
+% are returned as a list of length N. If Xtest is given, it will also
+% compute rho for the test data with the optimal V. If k is given then the
+% within-set correlation will be reduced in dimension from d to k prior to
+% inversion using PCA. This is useful for rank deficient data or for
+% regularization. If k is not given, dimension is reduced to the rank of
+% the data prior to inversion. 
+% 
+% See https://arxiv.org/abs/1802.03759, https://arxiv.org/abs/1801.08881
+
+% Apr 30, 2018, Lucas Parra (c)
+% Sep 11, 2018, removed hack for forward model computation that broke the code sometimes
+% Sep 14, 2018, make forward model robust to ill conditioned data
+% Sep 15, 2018, keep simpler code in case that there is no regularization or rank problem
+
+if ~exist('k','var') || isempty(k), k=d; end
+
+N=length(d);
+R=cov(X);
+for i=N:-1:1, j=(1:d(i))+sum(d(1:i-1)); 
+    D(j,j)=R(j,j);
+    k(i)=min(k(i),rank(D(j,j)));  % check rank for oblivious users
+end
+if sum(d)==sum(k) % simple case
+    [V,lambda]=eig(R,D);
+else  % if rank deficient, or if regularization requested
+    for i=N:-1:1, j=(1:d(i))+sum(d(1:i-1)); Dinv(j,j)=embedding.CCA.regInv(D(j,j),k(i)); end
+    [V,lambda]=eigs(Dinv*R,sum(k));
+end  
+rho = (diag(lambda)-1)/(N-1);
+[~,indx]=sort(rho,'descend'); rho=rho(indx); V=V(:,indx);
+
+% compute forward models
+if nargout>2
+    for i=N:-1:1, j=(1:d(i))+sum(d(1:i-1));
+        W=V(j,1:k(i)); Rw=R(j,j);
+        if k(i)==d(i), A{i}=Rw*W/(W'*Rw*W);        % original formula, but wont work for rank deficient Rw
+        else A{i}=Rw*W*diag(1./diag(W'*Rw*W)); end % ignores correlation of components but robust to ill conditioned Rw
+    end
+end
+    
+% compute rho for test data
+if exist('Xtest') && ~isempty(Xtest)
+    R=cov(Xtest);
+    for i=N:-1:1, j=(1:d(i))+sum(d(1:i-1)); D(j,j)=R(j,j); end
+    lambda = diag(V'*R*V)./diag(V'*D*V);
+    rhotest = (lambda-1)/(N-1);
+end
+
+
diff --git a/+embedding/+CCA/project.m b/+embedding/+CCA/project.m
@@ -1,14 +1,16 @@
 function E = project(D,C)
 
-E = cell(size(D));
-alldata = [D{:}];
+[nTrial,nArea] = size(D);
+E = cell(nTrial,nArea);
+alldata = arrayfun(@(Aidx)cat(2,D{:,Aidx}),1:size(D,2),'UniformOutput',false);
 
-sc = (alldata - mean(alldata,2))'*C;
+E_ = cellfun(@(d,c) c * d,alldata,C,'UniformOutput',false);
 
 % For each condition, store the reduced version of each data vector
-index = 0;
-for ii = 1:length(D)
-    E{ii} = sc(index + (1:size(D(ii).data,2)),:)';
-    index = index + size(D(ii).data,2);
-end %ii
-end
+for jj = 1:nArea
+    index = 0;
+    for ii = 1:nTrial
+        E{ii,jj} = E_{jj}(index + (1:pars.TrialL),1:dims)';
+        index = index + pars.TrialL;
+    end
+end
diff --git a/+embedding/+CCA/reduce.m b/+embedding/+CCA/reduce.m
@@ -1,7 +1,7 @@
 function [E,C,Corr] = reduce(D,pars)
         % CCAREDUCE Internal function for CCA
 
-        % Agglomerate all of the conditions, and perform PCA
+        % Agglomerate all of the conditions, and perform mCCA
         E_ = cell(1,pars.nArea);
         C = cell(size(D));
         Corr = cell(size(D));
@@ -19,18 +19,19 @@
             [A,B,Corr,E_{1},E_{2}] = canoncorr(D{:});
             C = {A(:,1:dims),B(:,1:dims)};
         else
-            data = cat(1,D{:})';
+            data = arrayfun(@(Aidx)cat(2,D{:,Aidx}),1:size(D,2),'UniformOutput',false);
             d = cellfun(@(x)size(x,1),D);
-            [~,Corr,C] = embedding.CCA.mcca(data,d);
+            [~,Corr,C] = embedding.CCA.mcca(cat(1,data{:})',d);
+            E_ = cellfun(@(d,c) c * d,data,C,'UniformOutput',false);
         end
 
         % [U,V] = checkFlip(D{:},C{:},endLeg_range, interest_range);
 
         % For each condition, store the reduced version of each data vector
         E = cell(pars.nTrial,pars.nArea);
-        for ii = 1:pars.nTrial
+        for jj = 1:pars.nArea
             index = 0;
-            for jj = 1:pars.nArea
+            for ii = 1:pars.nTrial
                 E{ii,jj} = E_{jj}(index + (1:pars.TrialL),1:dims)';
                 index = index + pars.TrialL;
             end
diff --git a/+embedding/+CCA/regInv.m b/+embedding/+CCA/regInv.m
@@ -0,0 +1,13 @@
+function invR = regInv(R, K)
+%invR = regInv(R, K)
+%   PCA regularized inverse of square symmetric positive definite matrix R
+if nargin<2, K=size(R,1); end;
+if ~ismatrix(R), error('JD: R must have two dimensions'); end;
+if size(R,1)~=size(R,2), error('JD: R must be a square matrix'); end;
+
+[U,S,V]=svd(R,0);
+diagS=diag(S);
+invR=U(:,1:K)*diag(1./diagS(1:K))*V(:,1:K).';
+
+end
+
diff --git a/+embedding/+GPFA/project.m b/+embedding/+GPFA/project.m
@@ -4,12 +4,12 @@
 alldata = [D{:}];
 projMatrix = C{:};
 
-sc = alldata'*projMatrix;
+sc = projMatrix * alldata;
 
 % For each condition, store the reduced version of each data vector
 index = 0;
 for ii = 1:length(D)
-    E{ii} = sc(index + (1:size(D{ii},2)),:)';
+    E{ii} = sc(:,index + (1:size(D{ii},2)));
     index = index + size(D{ii},2);
 end %ii
 end
diff --git a/+embedding/+GPFA/reduce.m b/+embedding/+GPFA/reduce.m
@@ -100,5 +100,5 @@
   [E{:}] = deal(AllSeq.xsm);
   [~, C{1}] = embedding.GPFA.util.orthogonalize([AllSeq.xsm], estParams.C);
   [~,lat] = pcacov(estParams.C * estParams.C');
-  VarExp{1} = cumsum(lat(1:xDim))./sum(lat);
+  VarExp{1} = cumsum(lat(xDim))./sum(lat);
 end
diff --git a/+embedding/+MCCA/loadParams.m b/+embedding/+MCCA/loadParams.m
@@ -0,0 +1,10 @@
+function pars = loadParams()
+%% GPFA specific parameters
+pars = struct();
+
+% pars.endLeg_range = @(t)getNormRange(t,fraction);
+% pars.interest_range = @(t)getInterestRange(t,fraction,alignment);
+% pars.ccaRefSig = [];
+pars.mcca_k = 0.9; 
+
+end
diff --git a/+embedding/+MCCA/mcca.m b/+embedding/+MCCA/mcca.m
@@ -0,0 +1,55 @@
+function [V,rho,A,rhotest]=mcca(X,d,Xtest,k)
+% [V,rho,A,rhotest]=mcca(X,d,Xtest,k) Multiset Canonical Correlation
+% Analysis. X is the data arranged as samples by dimension, whereby all
+% sets are concatenated along the dimensions. d is a vector with the
+% dimensions of each set. V are the component vectors and rho the resulting
+% inter-set correlations. A are the corresponding forward models, which
+% are returned as a list of length N. If Xtest is given, it will also
+% compute rho for the test data with the optimal V. If k is given then the
+% within-set correlation will be reduced in dimension from d to k prior to
+% inversion using PCA. This is useful for rank deficient data or for
+% regularization. If k is not given, dimension is reduced to the rank of
+% the data prior to inversion. 
+% 
+% See https://arxiv.org/abs/1802.03759, https://arxiv.org/abs/1801.08881
+
+% Apr 30, 2018, Lucas Parra (c)
+% Sep 11, 2018, removed hack for forward model computation that broke the code sometimes
+% Sep 14, 2018, make forward model robust to ill conditioned data
+% Sep 15, 2018, keep simpler code in case that there is no regularization or rank problem
+
+if ~exist('k','var') || isempty(k), k=d; end
+
+N=length(d);
+R=cov(X);
+for i=N:-1:1, j=(1:d(i))+sum(d(1:i-1)); 
+    D(j,j)=R(j,j);
+    k(i)=min(k(i),rank(D(j,j)));  % check rank for oblivious users
+end
+if sum(d)==sum(k) % simple case
+    [V,lambda]=eig(R,D);
+else  % if rank deficient, or if regularization requested
+    for i=N:-1:1, j=(1:d(i))+sum(d(1:i-1)); Dinv(j,j)=embedding.CCA.regInv(D(j,j),k(i)); end
+    [V,lambda]=eigs(Dinv*R,sum(k));
+end  
+rho = (diag(lambda)-1)/(N-1);
+[~,indx]=sort(rho,'descend'); rho=rho(indx); V=V(:,indx);
+
+% compute forward models
+if nargout>2
+    for i=N:-1:1, j=(1:d(i))+sum(d(1:i-1));
+        W=V(j,1:k(i)); Rw=R(j,j);
+        if k(i)==d(i), A{i}=Rw*W/(W'*Rw*W);        % original formula, but wont work for rank deficient Rw
+        else A{i}=Rw*W*diag(1./diag(W'*Rw*W)); end % ignores correlation of components but robust to ill conditioned Rw
+    end
+end
+    
+% compute rho for test data
+if exist('Xtest') && ~isempty(Xtest)
+    R=cov(Xtest);
+    for i=N:-1:1, j=(1:d(i))+sum(d(1:i-1)); D(j,j)=R(j,j); end
+    lambda = diag(V'*R*V)./diag(V'*D*V);
+    rhotest = (lambda-1)/(N-1);
+end
+
+
diff --git a/+embedding/+MCCA/project.m b/+embedding/+MCCA/project.m
@@ -0,0 +1,16 @@
+function E = project(D,C)
+
+[nTrial,nArea] = size(D);
+E = cell(nTrial,nArea);
+alldata = arrayfun(@(Aidx)cat(2,D{:,Aidx}),1:size(D,2),'UniformOutput',false);
+
+E_ = cellfun(@(d,c) c * d,alldata,C,'UniformOutput',false);
+
+% For each condition, store the reduced version of each data vector
+for jj = 1:nArea
+    index = 0;
+    for ii = 1:nTrial
+        E{ii,jj} = E_{jj}(index + (1:pars.TrialL),1:dims)';
+        index = index + pars.TrialL;
+    end
+end
diff --git a/+embedding/+MCCA/reduce.m b/+embedding/+MCCA/reduce.m
@@ -0,0 +1,18 @@
+function [C,Corr] = reduce(D_all,pars)
+        % CCAREDUCE Internal function for CCA
+        nSets = length(D_all);
+
+        % Agglomerate all the sets, and perform mCCA
+
+        % 
+        % if length(D) < 2 && ~iscell(D)
+        %     error('Input must be a cell-array of at least two elements with data from two distinct areas.')
+        % end
+        % endLeg_range = pars.endLeg_range;
+        % interest_range = pars.interest_range;
+        
+        data = cellfun(@(eall)cat(2,eall{:}),D_all,'UniformOutput',false);
+        d = cellfun(@(x)size(x,1),data);
+        [~,Corr,C] = embedding.CCA.mcca(cat(1,data{:})',d);
+
+    end
diff --git a/+embedding/+MCCA/regInv.m b/+embedding/+MCCA/regInv.m
@@ -0,0 +1,13 @@
+function invR = regInv(R, K)
+%invR = regInv(R, K)
+%   PCA regularized inverse of square symmetric positive definite matrix R
+if nargin<2, K=size(R,1); end;
+if ~ismatrix(R), error('JD: R must have two dimensions'); end;
+if size(R,1)~=size(R,2), error('JD: R must be a square matrix'); end;
+
+[U,S,V]=svd(R,0);
+diagS=diag(S);
+invR=U(:,1:K)*diag(1./diagS(1:K))*V(:,1:K).';
+
+end
+
diff --git a/+embedding/+PCA/project.m b/+embedding/+PCA/project.m
@@ -4,12 +4,12 @@
 alldata = [D{:}];
 projMatrix = C{:};
 
-sc = (alldata - mean(alldata,2))'*projMatrix;
+sc = projMatrix * (alldata - mean(alldata,2));
 
 % For each condition, store the reduced version of each data vector
 index = 0;
 for ii = 1:length(D)
-    E{ii} = sc(index + (1:size(D{ii},2)),:)';
+    E{ii} = sc(:,index + (1:size(D{ii},2)));
     index = index + size(D{ii},2);
 end %ii
 end
diff --git a/+embedding/+PCA/reduce.m b/+embedding/+PCA/reduce.m
@@ -16,11 +16,11 @@
         % For each condition, store the reduced version of each data vector
         index = 0;
         for i=1:length(D)
-            D(i).data = sc(index + (1:size(D(i).data,2)),1:dims)';
+            D(i).data = sc(index + (1:size(D(i).data,2)),:)';
             index = index + size(D(i).data,2);
         end
         [E{:}] = deal(D.data);
-        C{1} = u(:,1:dims);
-        VarExplained{1} = cumsum(lat) ./ sum(lat);  % eigenvalues
+        C{1} = u;
+        VarExplained{1} = cumsum(lat(dims)) ./ sum(lat);  % eigenvalues
 
     end
diff --git a/+embedding/GPFA.m b/+embedding/GPFA.m
@@ -1,4 +1,7 @@
-function [E,ProjMatrix,VarExplained]= GPFA(D,pars)
+function [E,ProjMatrix,VarExplained]= GPFA(D,pars,W, VarExplained)
+if nargin < 3
+    [W, VarExplained] = deal([]);
+end
 
 seqTest = pars.seqTest;
 nTrial = length(D);
@@ -21,8 +24,16 @@
 end
 [D_.T] = deal(pars.TrialL{:});
 otherArgs = [fieldnames(pars) struct2cell(pars)]';
-[E,ProjMatrix,VarExplained] = embedding.GPFA.reduce(D_(~seqTest),D_(seqTest),...
-    'xDim',pars.numPC,'verbose',false,'binWidth',pars.subsampling,...
-    otherArgs{:});
+
+if pars.projectOnly
+    E = embedding.GPFA.project(D,W);
+    ProjMatrix = W;
+else
+    [E,ProjMatrix,VarExplained] = embedding.GPFA.reduce(D_(~seqTest),D_(seqTest),...
+        'xDim',pars.numPC,'verbose',false,'binWidth',pars.subsampling,...
+        otherArgs{:});
+    ProjMatrix{1} = ProjMatrix{1}';
+end
+% VarExplained = VarExplained;
     
 end
diff --git a/+embedding/MCCA.m b/+embedding/MCCA.m
@@ -0,0 +1,24 @@
+function [E,ProjMatrix,ProjMatrixInv,Corr] = MCCA(Dall,pars)
+    nSets = numel(Dall);
+    minTrials = min(cellfun(@(x)size(x,1),Dall));
+    randIdx = arrayfun(@(idx)randi(pars(idx).nTrial,1,minTrials),1:nSets, ...
+        'UniformOutput',false);
+    Dall_ = cellfun(@(d,idx)d(idx),Dall,randIdx ...
+        ,'UniformOutput',false);
+    TrialL = arrayfun(@(p,idx)[p.TrialL{idx{1}}],pars,randIdx,'UniformOutput',false);
+
+    [ProjMatrix,Corr] = embedding.MCCA.reduce(Dall_,pars);
+    ProjMatrixInv = cellfun(@(w)pinv(w),ProjMatrix,'UniformOutput',false);
+    E_ = cellfun(@(d,c) c * cat(2,d{:}),Dall,ProjMatrix,'UniformOutput',false);
+
+
+    % For each dataset, store the coregistered version
+    E = Dall;
+    for ii = 1:nSets
+        index = 0;
+        for jj = 1:pars(ii).nTrial
+            E{ii}{jj} = E_{ii}(:,index + (1:pars(ii).TrialL{jj}),:);
+            index = index + pars(ii).TrialL{jj};
+        end
+    end
+end
diff --git a/+embedding/PCA.m b/+embedding/PCA.m
diff --git a/+metrics/Alignment.m b/+metrics/Alignment.m
diff --git a/+metrics/Tangling.m b/+metrics/Tangling.m
diff --git a/@NeuralEmbedding/NeuralEmbedding.m b/@NeuralEmbedding/NeuralEmbedding.m
diff --git a/@NeuralEmbedding/computeMetrics.m b/@NeuralEmbedding/computeMetrics.m
diff --git a/@NeuralEmbedding/findEmbedding.m b/@NeuralEmbedding/findEmbedding.m