diff --git a/+diagnostics/+compute/align_procrustes.m b/+diagnostics/+compute/align_procrustes.m new file mode 100644 index 0000000..084638a --- /dev/null +++ b/+diagnostics/+compute/align_procrustes.m @@ -0,0 +1,88 @@ +function results = align_procrustes(Z1, Z2, allowScale) +%ALIGN_PROCRUSTES Orthogonal Procrustes alignment of two latent spaces. +% +% RESULTS = diagnostics.compute.align_procrustes(Z1, Z2) finds the +% orthogonal matrix R that minimises ||Z1 - Z2*R||_F, and returns the +% aligned Z2 and related metrics. +% +% RESULTS = diagnostics.compute.align_procrustes(Z1, Z2, ALLOWSCALE) +% also optimises an isotropic scaling factor when ALLOWSCALE is true. +% +% Both matrices are centred (mean-subtracted) before alignment. +% +% Inputs +% ------ +% Z1 : T x d latent space (reference session, e.g. trials x dims). +% Z2 : T x d latent space (session to be aligned). Must have the +% same number of columns d as Z1. +% allowScale : logical (default false). Allow isotropic scaling. +% +% Outputs +% ------- +% results : struct with fields +% .Z2_aligned - (T x d) Z2 after applying R (and scale if enabled). +% .R - (d x d) orthogonal rotation matrix. +% .scale - isotropic scale factor (1.0 when allowScale=false). +% .disparity - Procrustes disparity = ||Z1 - Z2_aligned||_F^2 / +% ||Z1||_F^2 (normalised; 0 = perfect alignment). +% +% Notes +% ----- +% * Implements the classic SVD solution: [U,~,V] = svd(Z1'*Z2); +% R = V * U'. +% * No toolboxes required. +% * For multi-session alignment see NeuralEmbedding.alignSessions. +% +% Example +% ------- +% res = diagnostics.compute.align_procrustes(Z1, Z2); +% fprintf('Disparity = %.4f\n', res.disparity); +% +% See also diagnostics.compute.alignment_metrics, +% diagnostics.pars.ProcrustesAlignment + +if nargin < 3 + allowScale = false; +end + +% --- Validation --- +if ~ismatrix(Z1) || ~isnumeric(Z1) || ~ismatrix(Z2) || ~isnumeric(Z2) + error('diagnostics:align_procrustes:badInput', ... + 'Z1 and Z2 must be 2-D numeric matrices.'); +end +if size(Z1, 2) ~= size(Z2, 2) + error('diagnostics:align_procrustes:dimMismatch', ... + 'Z1 and Z2 must have the same number of columns (latent dims).'); +end + +% --- Centre --- +Z1c = Z1 - mean(Z1, 1); +Z2c = Z2 - mean(Z2, 1); + +% --- SVD solution --- +M = Z1c' * Z2c; % d x d +[U, S, V] = svd(M); +R = V * U'; % orthogonal rotation + +% --- Optional scale --- +if allowScale + scale = trace(S) / (norm(Z2c, 'fro')^2); +else + scale = 1.0; +end + +Z2_aligned = scale * Z2c * R; + +% --- Disparity --- +norm_Z1 = norm(Z1c, 'fro'); +if norm_Z1 == 0 + disparity = 0; +else + disparity = norm(Z1c - Z2_aligned, 'fro')^2 / norm_Z1^2; +end + +results.Z2_aligned = Z2_aligned; +results.R = R; +results.scale = scale; +results.disparity = disparity; +end diff --git a/+diagnostics/+compute/alignment_metrics.m b/+diagnostics/+compute/alignment_metrics.m new file mode 100644 index 0000000..c87fe5e --- /dev/null +++ b/+diagnostics/+compute/alignment_metrics.m @@ -0,0 +1,114 @@ +function results = alignment_metrics(Z1, Z2_aligned) +%ALIGNMENT_METRICS Quantitative metrics for latent-space alignment quality. +% +% RESULTS = diagnostics.compute.alignment_metrics(Z1, Z2_ALIGNED) +% computes several metrics that quantify how well two latent spaces are +% aligned, assuming Z2_ALIGNED has already been transformed by +% diagnostics.compute.align_procrustes. +% +% Inputs +% ------ +% Z1 : T x d reference latent space (centred). +% Z2_aligned : T x d aligned latent space. +% +% Outputs +% ------- +% results : struct with fields +% .disparity - Procrustes disparity ||Z1-Z2||_F^2 / ||Z1||_F^2. +% .principalAngles - (1 x d) principal angles (radians) between the +% column spaces of Z1 and Z2_aligned, in ascending +% order. +% .meanPrincipalAngle - mean of principalAngles (radians). +% .distCorr - Pearson correlation between vectorised pairwise +% Euclidean distance matrices of Z1 and Z2_aligned +% (Mantel-style; 1 = identical structure). +% +% Notes +% ----- +% * Principal angles: let Q1, Q2 be column-space orthonormal bases (from +% QR decomposition). Angles = acos(svd(Q1'*Q2)). +% * Distance correlation is computed on the upper-triangular part of the +% pairwise distance matrix to avoid double-counting. +% * For large T, distance-correlation computation is O(T^2); set +% distCorr to NaN if T > 2000 to avoid excessive runtime. +% * No toolboxes required. +% +% Example +% ------- +% proc = diagnostics.compute.align_procrustes(Z1, Z2); +% met = diagnostics.compute.alignment_metrics(Z1, proc.Z2_aligned); +% fprintf('Mean principal angle = %.2f deg\n', ... +% rad2deg(met.meanPrincipalAngle)); +% +% See also diagnostics.compute.align_procrustes + +% --- Validation --- +if size(Z1, 2) ~= size(Z2_aligned, 2) + error('diagnostics:alignment_metrics:dimMismatch', ... + 'Z1 and Z2_aligned must have the same number of columns.'); +end + +% Centre +Z1 = Z1 - mean(Z1, 1); +Z2 = Z2_aligned - mean(Z2_aligned, 1); + +d = size(Z1, 2); +T = size(Z1, 1); + +% --- Disparity --- +nZ1 = norm(Z1, 'fro'); +if nZ1 == 0 + disparity = 0; +else + disparity = norm(Z1 - Z2, 'fro')^2 / nZ1^2; +end + +% --- Principal angles --- +[Q1, ~] = qr(Z1, 0); % economy QR +[Q2, ~] = qr(Z2, 0); +svs = svd(Q1' * Q2); +svs = min(max(svs, -1), 1); % clamp for numerical safety +principalAngles = acos(svs(:)'); + +% --- Distance correlation (Mantel-style) --- +if T <= 2000 + D1 = i_pdist_upper(Z1); + D2 = i_pdist_upper(Z2); + distCorr = i_pearson(D1, D2); +else + distCorr = NaN; % too expensive for large T +end + +results.disparity = disparity; +results.principalAngles = principalAngles; +results.meanPrincipalAngle = mean(principalAngles); +results.distCorr = distCorr; +end + +% ========================================================================= +% Local helpers +% ========================================================================= + +function v = i_pdist_upper(Z) +% Upper-triangular pairwise Euclidean distances as a vector. +T = size(Z, 1); +v = zeros(1, T*(T-1)/2); +k = 0; +for ii = 1:T-1 + diff = Z(ii+1:end, :) - Z(ii, :); + n = T - ii; + v(k+1 : k+n) = sqrt(sum(diff.^2, 2)); + k = k + n; +end +end + +function r = i_pearson(a, b) +a = a(:); b = b(:); +a = a - mean(a); b = b - mean(b); +na = norm(a); nb = norm(b); +if na == 0 || nb == 0 + r = 0; + return; +end +r = (a' * b) / (na * nb); +end diff --git a/+diagnostics/+compute/circular_shift.m b/+diagnostics/+compute/circular_shift.m new file mode 100644 index 0000000..6209b25 --- /dev/null +++ b/+diagnostics/+compute/circular_shift.m @@ -0,0 +1,36 @@ +function Xout = circular_shift(X) +%CIRCULAR_SHIFT Circular time-shift null for neural time-series data. +% +% XOUT = diagnostics.compute.circular_shift(X) returns a copy of X where +% each column (neuron) has been independently shifted by a uniformly +% random time offset in 1 .. T-1 (wrapping at the boundaries). +% +% This shuffle: +% - Preserves each neuron's autocorrelation structure. +% - Preserves each neuron's marginal distribution. +% - Destroys the alignment between the neural data and any external +% labels (behaviour, task variables). +% - Partially preserves instantaneous cross-neuron correlations at +% non-zero lags (unlike neuronwise shuffle). +% +% Use this as a temporal null for decoding permutation tests when the +% data are sampled continuously in time (not independent trials). +% +% Input +% ----- +% X : T x N matrix, where rows are time-ordered samples. +% +% Output +% ------ +% Xout : T x N matrix with independently circularly shifted columns. +% +% See also diagnostics.compute.shuffle_neuronwise, +% diagnostics.shufflers.circular_shift + +[T, N] = size(X); +Xout = zeros(T, N); +shifts = randi([1, T-1], 1, N); % independent shift per neuron +for nn = 1:N + Xout(:, nn) = circshift(X(:, nn), shifts(nn)); +end +end diff --git a/+diagnostics/+compute/cv_decoding.m b/+diagnostics/+compute/cv_decoding.m new file mode 100644 index 0000000..029280d --- /dev/null +++ b/+diagnostics/+compute/cv_decoding.m @@ -0,0 +1,206 @@ +function results = cv_decoding(X, y, dim, pars) +%CV_DECODING Cross-validated decoding of labels from PCA latent space. +% +% RESULTS = diagnostics.compute.cv_decoding(X, Y, DIM, PARS) embeds X +% into a DIM-dimensional PCA space and decodes labels Y using a +% nearest-centroid classifier. Embedding and any preprocessing are fit +% on training folds only. +% +% Inputs +% ------ +% X : T x N matrix (T samples, N neurons). No NaN. +% y : T x 1 integer or categorical label vector. +% dim : positive integer, number of latent dimensions. +% pars: struct (see diagnostics.pars.CVDecoding): +% .kfold (default 5) +% .rngSeed (default 0) +% .decoder 'nearestCentroid' (default; no toolbox needed) +% +% Outputs +% ------- +% results : struct with fields +% .accMean - mean accuracy across folds (scalar). +% .accPerFold - (1 x kfold) per-fold accuracy. +% .balAccMean - mean balanced accuracy across folds. +% .balAccPerFold - (1 x kfold) per-fold balanced accuracy. +% .confMat - (nClasses x nClasses) confusion matrix (sum over folds). +% .classes - unique class labels. +% .dim - dim used. +% .kfold - kfold used. +% .rngSeed - rngSeed used. +% +% Notes +% ----- +% * Balanced accuracy = mean per-class recall (robust to class imbalance). +% * Only classification (categorical/integer y) is supported; pass +% continuous y to obtain Pearson r-based regression scores separately. +% * Requires only base MATLAB. +% +% Example +% ------- +% pars = diagnostics.pars.CVDecoding(); +% res = diagnostics.compute.cv_decoding(X, y, 5, pars); +% fprintf('Mean accuracy = %.2f%%\n', res.accMean * 100); +% +% See also diagnostics.pars.CVDecoding, +% diagnostics.compute.permutation_test + +% --- Input validation --- +if ~ismatrix(X) || ~isnumeric(X) + error('diagnostics:cv_decoding:badInput', 'X must be a 2-D numeric matrix.'); +end +y = y(:); +if size(X, 1) ~= numel(y) + error('diagnostics:cv_decoding:sizeMismatch', ... + 'X and y must have the same number of rows.'); +end +if any(isnan(X(:))) + error('diagnostics:cv_decoding:nanData', 'X contains NaN values.'); +end + +if nargin < 4 || isempty(pars) + pars = diagnostics.pars.CVDecoding(); +end +kfold = pars.kfold; +rngSeed = pars.rngSeed; +decoder = pars.decoder; + +if ~isempty(rngSeed) + rng(rngSeed, 'twister'); +end + +[T, N] = size(X); +dim = min(dim, min(T, N) - 1); +classes = unique(y); +nClass = numel(classes); + +% --- k-fold partition --- +foldIdx = i_kfold_stratified(y, kfold); + +accPerFold = zeros(1, kfold); +balAccPerFold = zeros(1, kfold); +confMatAcc = zeros(nClass, nClass); + +for ff = 1:kfold + testMask = (foldIdx == ff); + trainMask = ~testMask; + + Xtr = X(trainMask, :); ytr = y(trainMask); + Xte = X(testMask, :); yte = y(testMask); + + % Train-only z-score + mu = mean(Xtr, 1); + sd = std(Xtr, 0, 1); + sd(sd == 0) = 1; + Xtr = (Xtr - mu) ./ sd; + Xte = (Xte - mu) ./ sd; + + % Fit PCA on training data + [W] = i_pca_fit(Xtr, dim); % N x dim + + % Project both folds + Ztr = Xtr * W; + Zte = Xte * W; + + % Decode + switch decoder + case 'nearestCentroid' + yhat = i_nearest_centroid(Ztr, ytr, Zte, classes); + otherwise + error('diagnostics:cv_decoding:unknownDecoder', ... + 'decoder ''%s'' is not supported.', decoder); + end + + % Score + accPerFold(ff) = mean(yhat == yte); + [balAccPerFold(ff), cm] = i_balanced_acc(yte, yhat, classes); + confMatAcc = confMatAcc + cm; +end + +results.accMean = mean(accPerFold); +results.accPerFold = accPerFold; +results.balAccMean = mean(balAccPerFold); +results.balAccPerFold = balAccPerFold; +results.confMat = confMatAcc; +results.classes = classes; +results.dim = dim; +results.kfold = kfold; +results.rngSeed = rngSeed; +end + +% ========================================================================= +% Local helpers +% ========================================================================= + +function foldIdx = i_kfold_stratified(y, k) +% Stratified k-fold: ensures each class is distributed across folds. +% Uses floor() with explicit remainder handling for consistent fold sizes. +classes = unique(y); +T = numel(y); +foldIdx = zeros(T, 1); +for cc = 1:numel(classes) + idx = find(y == classes(cc)); + idx = idx(randperm(numel(idx))); + n = numel(idx); + base = floor(n / k); % minimum samples per fold for this class + extras = n - base * k; % number of folds that get one extra sample + lo = 1; + for ff = 1:k + hi = lo + base - 1 + (ff <= extras); + if lo <= n + foldIdx(idx(lo:min(hi,n))) = ff; + end + lo = hi + 1; + end +end +% Assign any unassigned samples (edge case: fewer samples than folds) +unassigned = find(foldIdx == 0); +for ii = 1:numel(unassigned) + foldIdx(unassigned(ii)) = mod(ii-1, k) + 1; +end +end + +function W = i_pca_fit(X, dim) +% Fit PCA on centred X; return loadings W (N x dim). +X = X - mean(X, 1); +[~, ~, V] = svd(X, 'econ'); % V: N x rank, columns = principal directions +dim = min(dim, size(V, 2)); +W = V(:, 1:dim); +end + +function yhat = i_nearest_centroid(Ztrain, ytrain, Ztest, classes) +% Nearest-centroid classifier. No toolboxes required. +nClass = numel(classes); +centroids = zeros(nClass, size(Ztrain, 2)); +for cc = 1:nClass + centroids(cc, :) = mean(Ztrain(ytrain == classes(cc), :), 1); +end +% Compute squared distances to each centroid +nTest = size(Ztest, 1); +dists = zeros(nTest, nClass); +for cc = 1:nClass + diff = Ztest - centroids(cc, :); + dists(:, cc) = sum(diff.^2, 2); +end +[~, cidx] = min(dists, [], 2); +yhat = classes(cidx); +end + +function [balAcc, cm] = i_balanced_acc(ytrue, ypred, classes) +% Compute balanced accuracy and confusion matrix. +nClass = numel(classes); +cm = zeros(nClass, nClass); +for ii = 1:nClass + for jj = 1:nClass + cm(ii, jj) = sum(ytrue == classes(ii) & ypred == classes(jj)); + end +end +recall = zeros(nClass, 1); +for ii = 1:nClass + n = sum(ytrue == classes(ii)); + if n > 0 + recall(ii) = cm(ii, ii) / n; + end +end +balAcc = mean(recall); +end diff --git a/+diagnostics/+compute/cv_reconstruction.m b/+diagnostics/+compute/cv_reconstruction.m new file mode 100644 index 0000000..e9c52c8 --- /dev/null +++ b/+diagnostics/+compute/cv_reconstruction.m @@ -0,0 +1,176 @@ +function results = cv_reconstruction(X, dim, pars) +%CV_RECONSTRUCTION Cross-validated PCA reconstruction of neural activity. +% +% RESULTS = diagnostics.compute.cv_reconstruction(X, DIM, PARS) evaluates +% how well the latent embedding reconstructs held-out neural activity +% using k-fold cross-validation. The embedding is PCA; all preprocessing +% (z-scoring and PCA fit) is done on the training fold only to avoid +% data leakage. +% +% Inputs +% ------ +% X : T x N matrix (T samples/time-bins, N neurons/features). No NaN. +% dim : positive integer. Number of latent dimensions to keep. +% pars: struct with fields (see diagnostics.pars.CVReconstruction): +% .kfold (default 5) +% .rngSeed (default 0) +% .zscore (default true) +% +% Outputs +% ------- +% results : struct with fields +% .reconCorrMean - mean Pearson r across folds (scalar). +% .reconCorrPerFold - (1 x kfold) Pearson r on held-out data per fold. +% .R2Mean - mean R^2 across folds (scalar). +% .R2PerFold - (1 x kfold) R^2 per fold. +% .corrPerNeuron - (1 x N) mean Pearson r per neuron across folds. +% .dim - dim used. +% .kfold - kfold used. +% .rngSeed - rngSeed used. +% +% Notes +% ----- +% * Pearson r is computed on the vectorised held-out data x_test(:). +% * R^2 = 1 - SS_res / SS_tot (may be negative). +% * Requires only base MATLAB (no toolboxes). +% +% Example +% ------- +% pars = diagnostics.pars.CVReconstruction(); +% res = diagnostics.compute.cv_reconstruction(X, 5, pars); +% fprintf('Mean Pearson r = %.3f\n', res.reconCorrMean); +% +% See also diagnostics.pars.CVReconstruction + +% --- Input validation --- +if ~ismatrix(X) || ~isnumeric(X) + error('diagnostics:cv_reconstruction:badInput', ... + 'X must be a 2-D numeric matrix (T x N).'); +end +if any(isnan(X(:))) + error('diagnostics:cv_reconstruction:nanData', ... + 'X contains NaN values.'); +end +if ~isscalar(dim) || dim < 1 || dim ~= round(dim) + error('diagnostics:cv_reconstruction:badDim', ... + 'dim must be a positive integer scalar.'); +end + +if nargin < 3 || isempty(pars) + pars = diagnostics.pars.CVReconstruction(); +end +kfold = pars.kfold; +rngSeed = pars.rngSeed; +doZscore = pars.zscore; + +if ~isempty(rngSeed) + rng(rngSeed, 'twister'); +end + +[T, N] = size(X); +dim = min(dim, min(T, N) - 1); + +% --- k-fold partition --- +foldIdx = i_kfold_indices(T, kfold); + +reconCorrPerFold = zeros(1, kfold); +R2PerFold = zeros(1, kfold); +corrPerNeuronAcc = zeros(kfold, N); + +for ff = 1:kfold + testMask = (foldIdx == ff); + trainMask = ~testMask; + + Xtrain = X(trainMask, :); + Xtest = X(testMask, :); + + % --- Train-only z-score (no leakage) --- + if doZscore + mu_tr = mean(Xtrain, 1); + sd_tr = std(Xtrain, 0, 1); + sd_tr(sd_tr == 0) = 1; % avoid divide-by-zero for silent neurons + Xtrain = (Xtrain - mu_tr) ./ sd_tr; + Xtest = (Xtest - mu_tr) ./ sd_tr; + end + + % --- Fit PCA on training data --- + [W, Xtrain_proj] = i_pca_fit(Xtrain, dim); % W: N x dim + + % --- Project and reconstruct test data --- + Xtest_proj = Xtest * W; % Ttest x dim + Xhat_test = Xtest_proj * W'; % Ttest x N + + % --- Reconstruction scores --- + reconCorrPerFold(ff) = i_pearson(Xtest(:), Xhat_test(:)); + R2PerFold(ff) = i_r2(Xtest(:), Xhat_test(:)); + corrPerNeuronAcc(ff, :) = i_pearson_per_col(Xtest, Xhat_test); +end + +results.reconCorrMean = mean(reconCorrPerFold); +results.reconCorrPerFold = reconCorrPerFold; +results.R2Mean = mean(R2PerFold); +results.R2PerFold = R2PerFold; +results.corrPerNeuron = mean(corrPerNeuronAcc, 1); +results.dim = dim; +results.kfold = kfold; +results.rngSeed = rngSeed; +end + +% ========================================================================= +% Local helpers +% ========================================================================= + +function foldIdx = i_kfold_indices(T, k) +% Return a T x 1 vector of fold assignments 1..k (stratified by index). +foldIdx = zeros(T, 1); +idx = randperm(T); +folds = floor(T / k); +for ff = 1:k + if ff < k + foldIdx(idx((ff-1)*folds + (1:folds))) = ff; + else + foldIdx(idx((ff-1)*folds + 1 : end)) = ff; + end +end +end + +function [W, scores] = i_pca_fit(X, dim) +% Fit PCA on X (already z-scored), return loading matrix W (N x dim). +X = X - mean(X, 1); % centre +[~, ~, V] = svd(X, 'econ'); % V: N x rank, singular vectors +dim = min(dim, size(V, 2)); +W = V(:, 1:dim); % N x dim +scores = X * W; % T x dim +end + +function r = i_pearson(a, b) +% Pearson correlation between two vectors. +a = a(:); b = b(:); +a = a - mean(a); b = b - mean(b); +na = norm(a); nb = norm(b); +if na == 0 || nb == 0 + r = 0; + return; +end +r = (a' * b) / (na * nb); +end + +function r2 = i_r2(y, yhat) +% Coefficient of determination R^2. +ss_res = sum((y - yhat).^2); +ss_tot = sum((y - mean(y)).^2); +if ss_tot == 0 + r2 = 0; +else + r2 = 1 - ss_res / ss_tot; +end +end + +function rv = i_pearson_per_col(A, B) +% Pearson r between corresponding columns of A and B. +N = size(A, 2); +rv = zeros(1, N); +for nn = 1:N + rv(nn) = i_pearson(A(:, nn), B(:, nn)); +end +end diff --git a/+diagnostics/+compute/dim_parallel_analysis.m b/+diagnostics/+compute/dim_parallel_analysis.m new file mode 100644 index 0000000..94b33fe --- /dev/null +++ b/+diagnostics/+compute/dim_parallel_analysis.m @@ -0,0 +1,225 @@ +function results = dim_parallel_analysis(X, dims, pars, label, projFcn) +%DIM_PARALLEL_ANALYSIS Shuffle-based dimension selection (parallel analysis). +% +% RESULTS = diagnostics.compute.dim_parallel_analysis(X, DIMS, PARS) +% selects the number of latent dimensions d* by comparing the PCA +% eigenspectrum of the real data against a null distribution obtained +% by shuffling. +% +% The null preserves each neuron's marginal firing-rate distribution but +% destroys cross-neuron covariance (mode='neuronwise', recommended) or +% the temporal structure within each neuron (mode='rowperm'). +% +% Inputs +% ------ +% X : T x N matrix (T samples, N neurons/features). Must not contain +% NaN. When PROJFCN is provided, X should already have been +% preprocessed to match the embedding pipeline (e.g. z-scored by +% obj.S if using the NeuralEmbedding class method); the internal +% z-score step is skipped. +% dims : vector of positive integers, e.g. 1:15. Eigenvalues for these +% component indices are compared against the null. +% pars : struct with fields (see diagnostics.pars.ParallelAnalysis): +% .nShuffle (default 200) +% .alpha (default 0.05) +% .rngSeed (default 0) +% .mode 'neuronwise' | 'rowperm' (default 'neuronwise') +% .verbose logical (default true) - print progress +% label : (optional) string label displayed in progress output, e.g. +% 'Animal.Session'. Defaults to ''. +% projFcn : (optional) function handle @(X_matrix, maxK) -> eigenvalues +% where eigenvalues is a 1 x maxK row vector. +% When provided: +% * X is treated as already preprocessed (no internal z-scoring). +% * Both real-data and shuffled-data eigenvalues are obtained by +% calling projFcn. +% * This lets the caller inject the same projection pipeline as +% the embedding method (e.g. embedding.PCA). +% When empty or omitted: +% * Legacy behaviour — X is z-scored internally, then eigenvalues +% are extracted via base-MATLAB SVD (toolbox-free). +% +% Outputs +% ------- +% results : struct with fields +% .eigReal - (1 x numel(dims)) eigenvalues of real data. +% .eigNull - (nShuffle x numel(dims)) null eigenvalue matrix. +% .nullQuantile - (1 x numel(dims)) (1-alpha) quantile of null. +% .dimsTested - dims vector used. +% .dStar - selected dimension (largest k where real > null). +% .alpha - alpha used. +% .nShuffle - nShuffle used. +% .rngSeed - rngSeed used. +% .mode - shuffle mode used. +% +% Notes +% ----- +% * When called via NeuralEmbedding.selectDimension, projFcn is +% automatically set to match the object's embedding method so that the +% null distribution is built with the same pipeline as findEmbedding. +% * When called standalone, the built-in z-score + SVD path is used. +% * Requires only base MATLAB (no Statistics Toolbox) in standalone mode. +% +% Example +% ------- +% X = randn(200, 50); % 200 time bins, 50 neurons +% pars = diagnostics.pars.ParallelAnalysis(); +% res = diagnostics.compute.dim_parallel_analysis(X, 1:10, pars); +% fprintf('Selected dimension: %d\n', res.dStar); +% +% See also diagnostics.pars.ParallelAnalysis + +% --- Input validation --- +if ~ismatrix(X) || ~isnumeric(X) + error('diagnostics:dim_parallel_analysis:badInput', ... + 'X must be a 2-D numeric matrix (T x N).'); +end +if any(isnan(X(:))) + error('diagnostics:dim_parallel_analysis:nanData', ... + 'X contains NaN values. Remove or impute before calling this function.'); +end + +dims = dims(:)'; +if any(dims < 1) || any(dims ~= round(dims)) + error('diagnostics:dim_parallel_analysis:badDims', ... + 'dims must be a vector of positive integers.'); +end + +% Defaults +if nargin < 3 || isempty(pars) + pars = diagnostics.pars.ParallelAnalysis(); +end +if nargin < 4 || isempty(label) + label = ''; +end +if nargin < 5 + projFcn = []; +end +nShuffle = pars.nShuffle; +alpha = pars.alpha; +rngSeed = pars.rngSeed; +mode = pars.mode; +verbose = isfield(pars,'verbose') && pars.verbose; + +% --- RNG --- +if ~isempty(rngSeed) + rng(rngSeed, 'twister'); +end + +[T, N] = size(X); + +% --- Preprocessing -------------------------------------------------------- +% When a projFcn is provided by the caller (e.g. from selectDimension using +% the object's embedding pipeline), X is assumed to be already preprocessed +% (obj.S applies z-scoring internally when obj.zscore=true). No extra +% z-scoring is performed here to avoid double-standardising. +% +% When projFcn is empty (standalone / legacy call), z-score X before the +% built-in SVD path. This is appropriate for raw data not pre-processed +% by a NeuralEmbedding object. +useCustomProj = ~isempty(projFcn); +if ~useCustomProj + X = zscore(X, 0, 1); % standardise columns for standalone / legacy mode +end + +% Cap dims at min(T,N)-1. +% After mean-centering, the rank of X is at most min(T,N)-1, so at most +% that many non-zero eigenvalues exist. +maxDim = min(T, N) - 1; +dims = dims(dims <= maxDim); +if isempty(dims) + error('diagnostics:dim_parallel_analysis:badDims', ... + 'All requested dims exceed the data rank (min(T,N)-1 = %d).', maxDim); +end + +% --- Real eigenvalues --- +if useCustomProj + eigAll = projFcn(X, max(dims)); +else + eigAll = i_pca_eigs(X, max(dims)); +end +eigReal = eigAll(dims); + +% --- Null distribution --- +if verbose + if ~isempty(label) + fprintf(1, '\n Parallel analysis [%s]: shuffle 0/%d', label, nShuffle); + else + fprintf(1, '\n Parallel analysis: shuffle 0/%d', nShuffle); + end +end +eigNull = zeros(nShuffle, numel(dims)); +prevLen = 0; +for ss = 1:nShuffle + Xshuf = i_shuffle(X, mode); + if useCustomProj + eigs_all = projFcn(Xshuf, max(dims)); + else + eigs_all = i_pca_eigs(Xshuf, max(dims)); + end + eigNull(ss, :) = eigs_all(dims); + if verbose + msg = sprintf('%d/%d', ss, nShuffle); + fprintf(1, '%s%s', repmat(char(8), 1, prevLen), msg); + prevLen = numel(msg); + end +end +if verbose + fprintf(1, ' done.\n'); +end + +% --- Null quantile and dimension selection --- +nullQuantile = quantile(eigNull, 1 - alpha, 1); + +% d* = largest k where real eigenvalue > null quantile +exceed = eigReal > nullQuantile; +if any(exceed) + dStar = dims(find(exceed, 1, 'last')); +else + dStar = 0; +end + +% --- Pack results --- +results.eigReal = eigReal; +results.eigNull = eigNull; +results.nullQuantile = nullQuantile; +results.dimsTested = dims; +results.dStar = dStar; +results.alpha = alpha; +results.nShuffle = nShuffle; +results.rngSeed = rngSeed; +results.mode = mode; +end + +% ========================================================================= +% Local helpers +% ========================================================================= + +function eigs = i_pca_eigs(X, maxK) +% Return eigenvalues (descending) for the first maxK components. +% Uses economy SVD of centred X for numerical stability; no toolbox needed. +% NOTE: X is assumed to have already been z-scored by the caller. +[T, N] = size(X); +maxK = min(maxK, min(T, N) - 1); +X = X - mean(X, 1); % centre columns +[~, S, ~] = svd(X, 'econ'); % singular values descending +sv = diag(S); +eigs = (sv(1:maxK).^2 / (T - 1))'; % convert to eigenvalues +end + +function Xout = i_shuffle(X, mode) +% Shuffle X according to the specified mode. +[T, N] = size(X); +switch mode + case 'neuronwise' + Xout = zeros(T, N); + for nn = 1:N + Xout(:, nn) = X(randperm(T), nn); + end + case 'rowperm' + Xout = X(randperm(T), :); + otherwise + error('diagnostics:dim_parallel_analysis:badMode', ... + 'mode must be ''neuronwise'' or ''rowperm''.'); +end +end diff --git a/+diagnostics/+compute/intra_alignment.m b/+diagnostics/+compute/intra_alignment.m new file mode 100644 index 0000000..e9b9559 --- /dev/null +++ b/+diagnostics/+compute/intra_alignment.m @@ -0,0 +1,189 @@ +function results = intra_alignment(Z, pars, label) +%INTRA_ALIGNMENT Within-session latent-space stability via random trial splits. +% +% RESULTS = diagnostics.compute.intra_alignment(Z, PARS) estimates the +% stability of the latent geometry within a single session by repeatedly +% splitting trials into two random halves, fitting a PCA embedding on +% each half independently, and computing the Procrustes disparity +% between the two sub-embeddings. +% +% A low median disparity indicates that the manifold geometry is stable +% across subsets of trials (i.e. it is reproducible within the session). +% +% Inputs +% ------ +% Z : T x d latent matrix (trials already projected; rows = time bins, +% columns = latent dims). Alternatively, pass a cell array +% {nTrials x 1} where each cell is (d x nBins) — the same format +% as OBJ.E. +% +% When a cell array is provided, the trials are split at the *trial* +% level (each random split assigns whole trials to each half), and +% PCA is refit from scratch on each half's concatenated data. +% +% pars : struct with fields (see diagnostics.pars.IntraAlignment): +% .nSplit (default 100) — number of random half-splits. +% .dim (default []) — PCA dimensionality to use when +% refitting. If empty, uses size(Z,2) (no refit). +% .allowScale (default false) — isotropic Procrustes scale. +% .rngSeed (default 0) +% .verbose (default true) — print progress. +% label : (optional) progress label string, e.g. 'Animal.Session'. +% +% Outputs +% ------- +% results : struct with fields +% .disparity - (1 x nSplit) Procrustes disparity per split. +% .disparityMean - mean disparity across splits. +% .disparityMedian - median disparity across splits. +% .disparityStd - std of disparity across splits. +% .principalAngles - {1 x nSplit} principal angles per split. +% .meanPrincipalAngle - (1 x nSplit) mean principal angle per split. +% .distCorr - (1 x nSplit) Mantel distance correlation per split. +% .nSplit, .dim, .allowScale, .rngSeed - parameters used. +% +% Notes +% ----- +% * When Z is a T x d matrix (pre-projected), the rows are split evenly +% (rows 1:T/2 vs T/2+1:T after shuffling) — effectively a random +% time-block split. Use the cell-array form for a proper trial-level +% split with PCA refit. +% * PCA refit is performed via economy SVD (no toolbox). +% * For large T, distance-correlation metrics become expensive; they +% are skipped (set to NaN) when each half has > 1000 time-bins. +% +% Example +% ------- +% pars = diagnostics.pars.IntraAlignment(); +% res = diagnostics.compute.intra_alignment(E_cell, pars, 'Rat1.S01'); +% fprintf('Median intra-session disparity = %.4f\n', res.disparityMedian); +% +% See also diagnostics.compute.align_procrustes, +% diagnostics.compute.alignment_metrics, +% diagnostics.pars.IntraAlignment + +if nargin < 2 || isempty(pars) + pars = diagnostics.pars.IntraAlignment(); +end +if nargin < 3 || isempty(label) + label = ''; +end + +nSplit = pars.nSplit; +dim = pars.dim; +allowScale = pars.allowScale; +rngSeed = pars.rngSeed; +verbose = isfield(pars,'verbose') && pars.verbose; + +if ~isempty(rngSeed) + rng(rngSeed, 'twister'); +end + +% --- Normalise input to a cell array of (d x nBins) matrices -------- +isCellInput = iscell(Z); +if isCellInput + % Validate: each cell must be a 2-D numeric matrix + nTrials = numel(Z); + if nTrials < 4 + error('diagnostics:intra_alignment:tooFewTrials', ... + 'Need at least 4 trials for a meaningful random split (got %d).', nTrials); + end +else + % Z is T x d matrix — treat each time-bin as a "trial" + if ~ismatrix(Z) || ~isnumeric(Z) + error('diagnostics:intra_alignment:badInput', ... + 'Z must be a T x d matrix or a cell array of (d x nBins) matrices.'); + end + % Wrap each row as a 1-element cell (d x 1 column vectors) + nTrials = size(Z, 1); + if nTrials < 4 + error('diagnostics:intra_alignment:tooFewTrials', ... + 'Need at least 4 rows for a meaningful random split (got %d).', nTrials); + end + Z = mat2cell(Z', size(Z,2), ones(1, nTrials)); % cell of (d x 1) + Z = Z(:); +end + +% --- Pre-allocate outputs --- +disparity_vec = zeros(1, nSplit); +pAngles_cell = cell(1, nSplit); +meanPAngle_vec = zeros(1, nSplit); +distCorr_vec = zeros(1, nSplit); + +if verbose + if ~isempty(label) + fprintf(1, '\n IntraAlignment [%s]: split 0/%d', label, nSplit); + else + fprintf(1, '\n IntraAlignment: split 0/%d', nSplit); + end +end + +halfN = floor(nTrials / 2); +prevLen = 0; +for sp = 1:nSplit + % Random trial assignment + perm = randperm(nTrials); + idxA = perm(1:halfN); + idxB = perm(halfN+1 : 2*halfN); % equal-size halves + + % Concatenate trials in each half → (d x T_half) then transpose + ZA = cell2mat(Z(idxA)'); % d x T_A (each cell is d x nBins) + ZB = cell2mat(Z(idxB)'); + + ZA = ZA'; % T_A x d + ZB = ZB'; + + % Optional PCA refit on each half + if ~isempty(dim) && dim > 0 && dim < size(ZA,2) + ZA = i_pca_project(ZA, dim); + ZB = i_pca_project(ZB, dim); + end + + % Equalise lengths for Procrustes + T_common = min(size(ZA,1), size(ZB,1)); + ZA_c = ZA(1:T_common, :); + ZB_c = ZB(1:T_common, :); + + % Align B to A + procRes = diagnostics.compute.align_procrustes(ZA_c, ZB_c, allowScale); + metRes = diagnostics.compute.alignment_metrics(ZA_c, procRes.Z2_aligned); + + disparity_vec(sp) = procRes.disparity; + pAngles_cell{sp} = metRes.principalAngles; + meanPAngle_vec(sp) = metRes.meanPrincipalAngle; + distCorr_vec(sp) = metRes.distCorr; + + if verbose + msg = sprintf('%d/%d', sp, nSplit); + fprintf(1, '%s%s', repmat(char(8), 1, prevLen), msg); + prevLen = numel(msg); + end +end + +if verbose + fprintf(1, ' done.\n'); +end + +results.disparity = disparity_vec; +results.disparityMean = mean(disparity_vec); +results.disparityMedian = median(disparity_vec); +results.disparityStd = std(disparity_vec); +results.principalAngles = pAngles_cell; +results.meanPrincipalAngle = meanPAngle_vec; +results.distCorr = distCorr_vec; +results.nSplit = nSplit; +results.dim = dim; +results.allowScale = allowScale; +results.rngSeed = rngSeed; +end + +% ========================================================================= +% Local helper +% ========================================================================= +function Zproj = i_pca_project(Z, dim) +% Fit economy PCA on Z and project to first DIM dimensions. +Zc = Z - mean(Z, 1); +[~, ~, V] = svd(Zc, 'econ'); +W = V(:, 1:dim); +Zproj = Zc * W; +end diff --git a/+diagnostics/+compute/permutation_test.m b/+diagnostics/+compute/permutation_test.m new file mode 100644 index 0000000..2418b39 --- /dev/null +++ b/+diagnostics/+compute/permutation_test.m @@ -0,0 +1,113 @@ +function results = permutation_test(statFcn, permuterFcn, X, y, nPerm, rngSeed, label, verbose) +%PERMUTATION_TEST Generic permutation test for decoding statistics. +% +% RESULTS = diagnostics.compute.permutation_test(STATFCN, PERMUTERFCN, +% X, Y, NPERM, RNGSEED) computes a one-sided permutation test. +% +% The test evaluates whether the real statistic (computed on the original +% data) is significantly larger than expected by chance. +% +% Inputs +% ------ +% statFcn : function handle @(X, y) -> scalar statistic. +% Called with the original data to obtain accReal, then +% called with permuted (X or y) for each null replicate. +% permuterFcn: function handle @(X, y) -> [Xp, yp]. +% Returns permuted copies of X and/or y for one replicate. +% Use diagnostics.shufflers.global_permute, +% diagnostics.shufflers.blocked_permute, or +% diagnostics.shufflers.circular_shift. +% X : T x N data matrix. +% y : T x 1 label vector. +% nPerm : positive integer, number of permutation replicates. +% rngSeed : non-negative integer or [] (default 0). +% label : (optional) string label shown in progress output, e.g. +% 'Animal.Session'. Defaults to ''. +% verbose : (optional) logical (default true). Print progress. +% +% Outputs +% ------- +% results : struct with fields +% .statReal - observed statistic. +% .statNull - (1 x nPerm) null distribution. +% .pValue - one-sided p-value: (1 + sum(null >= real)) / (nPerm+1). +% .effectZ - effect-size z-score: (real - mean(null)) / std(null). +% .nPerm - nPerm used. +% .rngSeed - rngSeed used. +% +% Notes +% ----- +% * The p-value formula (1 + sum(null >= real)) / (nPerm + 1) is the +% standard permutation-test estimator (Phipson & Smyth, 2010). It is +% bounded below by 1/(nPerm+1) and is conservative under the null. +% * The z-score provides an effect size regardless of nPerm. +% +% Example +% ------- +% statFcn = @(X,y) diagnostics.compute.cv_decoding(X, y, 5).accMean; +% permuterFcn = @(X,y) diagnostics.shufflers.global_permute(X, y); +% res = diagnostics.compute.permutation_test(statFcn, permuterFcn, X, y, 500, 0); +% fprintf('p = %.4f, z = %.2f\n', res.pValue, res.effectZ); +% +% See also diagnostics.shufflers.global_permute, +% diagnostics.shufflers.blocked_permute, +% diagnostics.shufflers.circular_shift + +if nargin < 6 || isempty(rngSeed) + rngSeed = 0; +end +if nargin < 7 || isempty(label) + label = ''; +end +if nargin < 8 || isempty(verbose) + verbose = true; +end +if ~isempty(rngSeed) + rng(rngSeed, 'twister'); +end + +% Real statistic +statReal = statFcn(X, y); + +% Null distribution +if verbose + if ~isempty(label) + fprintf(1, '\n Permutation test [%s]: perm 0/%d', label, nPerm); + else + fprintf(1, '\n Permutation test: perm 0/%d', nPerm); + end +end +statNull = zeros(1, nPerm); +prevLen = 0; +for pp = 1:nPerm + [Xp, yp] = permuterFcn(X, y); + statNull(pp) = statFcn(Xp, yp); + if verbose + msg = sprintf('%d/%d', pp, nPerm); + fprintf(1, '%s%s', repmat(char(8), 1, prevLen), msg); + prevLen = numel(msg); + end +end +if verbose + fprintf(1, ' done.\n'); +end + +% p-value (one-sided, Phipson & Smyth 2010) +pValue = (1 + sum(statNull >= statReal)) / (nPerm + 1); + +% Effect size z-score +mn = mean(statNull); +sd = std(statNull); +if sd == 0 + effectZ = 0; +else + effectZ = (statReal - mn) / sd; +end + +results.statReal = statReal; +results.statNull = statNull; +results.pValue = pValue; +results.effectZ = effectZ; +results.nPerm = nPerm; +results.rngSeed = rngSeed; +end diff --git a/+diagnostics/+compute/shuffle_neuronwise.m b/+diagnostics/+compute/shuffle_neuronwise.m new file mode 100644 index 0000000..f404bcb --- /dev/null +++ b/+diagnostics/+compute/shuffle_neuronwise.m @@ -0,0 +1,30 @@ +function Xshuf = shuffle_neuronwise(X) +%SHUFFLE_NEURONWISE Independently permute each neuron's time series. +% +% XSHUF = diagnostics.compute.shuffle_neuronwise(X) returns a copy of X +% where each column (neuron) has been independently randomly permuted. +% +% This shuffle: +% - Preserves each neuron's marginal firing-rate distribution. +% - Destroys cross-neuron covariance (the structure the embedding uses). +% - Destroys temporal autocorrelations within each neuron. +% +% Use this as the null for parallel analysis (dimension selection). +% +% Input +% ----- +% X : T x N matrix. +% +% Output +% ------ +% Xshuf : T x N matrix with independently permuted columns. +% +% See also diagnostics.compute.dim_parallel_analysis, +% diagnostics.compute.circular_shift + +[T, N] = size(X); +Xshuf = zeros(T, N); +for nn = 1:N + Xshuf(:, nn) = X(randperm(T), nn); +end +end diff --git a/+diagnostics/+pars/CVDecoding.m b/+diagnostics/+pars/CVDecoding.m new file mode 100644 index 0000000..40bd8c2 --- /dev/null +++ b/+diagnostics/+pars/CVDecoding.m @@ -0,0 +1,39 @@ +function pars = CVDecoding() +%CVDECODING Default parameters for cross-validated decoding and permutation test. +% +% PARS = diagnostics.pars.CVDecoding() returns a struct with the default +% parameters used by diagnostics.compute.cv_decoding and +% diagnostics.compute.permutation_test. +% +% Fields +% ------ +% kfold : positive integer >= 2 (default 5) +% Number of cross-validation folds for decoding. +% +% nPerm : positive integer (default 500) +% Number of permutation replicates for the null distribution. +% +% rngSeed : non-negative integer or [] (default 0) +% Random-number generator seed. Set to [] to skip RNG reset. +% +% decoder : 'nearestCentroid' (default 'nearestCentroid') +% Decoder type. Currently only nearest-centroid is supported +% (no additional toolboxes required). +% +% permMode : 'global' | 'blocked' | 'timeshift' (default 'global') +% Permutation strategy used when building the null distribution. +% 'global' - permute all labels randomly. +% 'blocked' - permute labels within user-supplied session blocks. +% 'timeshift' - circular time-shift of the neural data (preserves +% auto-correlation). +% +% See also diagnostics.compute.cv_decoding, +% diagnostics.compute.permutation_test + +pars.kfold = 5; +pars.nPerm = 500; +pars.rngSeed = 0; +pars.decoder = 'nearestCentroid'; +pars.permMode = 'global'; +pars.verbose = true; +end diff --git a/+diagnostics/+pars/CVReconstruction.m b/+diagnostics/+pars/CVReconstruction.m new file mode 100644 index 0000000..a24b0a7 --- /dev/null +++ b/+diagnostics/+pars/CVReconstruction.m @@ -0,0 +1,25 @@ +function pars = CVReconstruction() +%CVRECONSTRUCTION Default parameters for cross-validated reconstruction. +% +% PARS = diagnostics.pars.CVReconstruction() returns a struct with the +% default parameters used by diagnostics.compute.cv_reconstruction. +% +% Fields +% ------ +% kfold : positive integer >= 2 (default 5) +% Number of cross-validation folds. +% +% rngSeed : non-negative integer or [] (default 0) +% Random-number generator seed. Set to [] to skip RNG reset. +% +% zscore : logical (default true) +% If true, z-score each neuron using statistics computed from the +% training fold only (no leakage). +% +% See also diagnostics.compute.cv_reconstruction + +pars.kfold = 5; +pars.rngSeed = 0; +pars.zscore = true; +pars.verbose = true; +end diff --git a/+diagnostics/+pars/IntraAlignment.m b/+diagnostics/+pars/IntraAlignment.m new file mode 100644 index 0000000..b95bd0f --- /dev/null +++ b/+diagnostics/+pars/IntraAlignment.m @@ -0,0 +1,42 @@ +function pars = IntraAlignment() +%INTRAALIGNMENT Default parameters for within-session latent-space stability. +% +% PARS = diagnostics.pars.IntraAlignment() returns a struct with the +% default parameters used by diagnostics.compute.intra_alignment and +% NeuralEmbedding.crossValAlignment. +% +% Fields +% ------ +% nSplit : positive integer (default 100) +% Number of random half-trial splits used to build the null +% distribution of Procrustes disparities. Increase to 500 for +% publication-quality estimates. +% +% dim : non-negative integer (default 0) +% If > 0, PCA is refit on each trial-half independently (using +% economy SVD) and the result is projected to the first DIM +% principal components before alignment. +% If 0 (default), the input embeddings are aligned directly without +% refitting (appropriate when latent coordinates from the full-session +% PCA are passed in). +% +% allowScale : logical (default false) +% If false, alignment uses rotation/reflection only (orthogonal +% Procrustes). If true, an isotropic scale factor is also optimised. +% +% rngSeed : non-negative integer or [] (default 0) +% Random-number generator seed. Set to [] to use the current RNG +% state. +% +% verbose : logical (default true) +% Print progress (split counter) during computation. +% +% See also diagnostics.compute.intra_alignment, +% NeuralEmbedding.crossValAlignment + +pars.nSplit = 100; +pars.dim = 0; +pars.allowScale = false; +pars.rngSeed = 0; +pars.verbose = true; +end diff --git a/+diagnostics/+pars/ParallelAnalysis.m b/+diagnostics/+pars/ParallelAnalysis.m new file mode 100644 index 0000000..2b2a735 --- /dev/null +++ b/+diagnostics/+pars/ParallelAnalysis.m @@ -0,0 +1,35 @@ +function pars = ParallelAnalysis() +%PARALLELANALYSIS Default parameters for shuffle-based dimension selection. +% +% PARS = diagnostics.pars.ParallelAnalysis() returns a struct with the +% default parameters used by diagnostics.compute.dim_parallel_analysis. +% +% Fields +% ------ +% nShuffle : positive integer (default 200) +% Number of shuffle replicates for the null distribution. +% +% alpha : scalar in (0,1) (default 0.05) +% Significance level. The null quantile is computed at 1-alpha. +% +% rngSeed : non-negative integer or [] (default 0) +% Random-number generator seed. Set to [] to use the current RNG +% state without resetting. +% +% mode : 'neuronwise' | 'rowperm' (default 'neuronwise') +% Shuffle strategy. +% 'neuronwise' - independently permute each neuron's time series, +% breaking cross-neuron covariance while preserving +% each neuron's marginal distribution. +% 'rowperm' - permute entire rows (time points), a weaker null +% that also breaks temporal correlations within each +% neuron. +% +% See also diagnostics.compute.dim_parallel_analysis + +pars.nShuffle = 200; +pars.alpha = 0.05; +pars.rngSeed = 0; +pars.mode = 'neuronwise'; +pars.verbose = true; +end diff --git a/+diagnostics/+pars/ProcrustesAlignment.m b/+diagnostics/+pars/ProcrustesAlignment.m new file mode 100644 index 0000000..fe45e96 --- /dev/null +++ b/+diagnostics/+pars/ProcrustesAlignment.m @@ -0,0 +1,25 @@ +function pars = ProcrustesAlignment() +%PROCRUSTESALIGNMENT Default parameters for cross-session Procrustes alignment. +% +% PARS = diagnostics.pars.ProcrustesAlignment() returns a struct with +% the default parameters used by diagnostics.compute.align_procrustes +% and diagnostics.compute.alignment_metrics. +% +% Fields +% ------ +% allowScale : logical (default false) +% If false, the alignment is restricted to rotation and reflection +% (orthogonal Procrustes). If true, an isotropic scaling factor is +% also optimised. +% +% refSession : positive integer (default 1) +% Index of the session (within an object array) used as the +% alignment reference. +% +% See also diagnostics.compute.align_procrustes, +% diagnostics.compute.alignment_metrics + +pars.allowScale = false; +pars.refSession = 1; +pars.verbose = true; +end diff --git a/+diagnostics/+shufflers/blocked_permute.m b/+diagnostics/+shufflers/blocked_permute.m new file mode 100644 index 0000000..cbe93a0 --- /dev/null +++ b/+diagnostics/+shufflers/blocked_permute.m @@ -0,0 +1,42 @@ +function [Xout, yout] = blocked_permute(X, y, blockId) +%BLOCKED_PERMUTE Block-restricted label permutation. +% +% [XOUT, YOUT] = diagnostics.shufflers.blocked_permute(X, Y, BLOCKID) +% returns X unchanged and Y with its elements randomly permuted +% WITHIN each block defined by BLOCKID. Labels are never swapped across +% blocks. +% +% This preserves: +% - Block-level mean and distributional properties of labels (e.g. +% session-level biases, condition imbalances). +% - Cross-neuron covariance structure. +% While breaking: +% - Within-block alignment between neural activity and labels. +% +% Use this when sessions/blocks have different overall label distributions +% and you want a null that respects that structure. +% +% Inputs +% ------ +% X : T x N data matrix (returned unchanged). +% y : T x 1 label vector. +% blockId : T x 1 integer or categorical vector with block/session IDs. +% Samples with the same blockId are treated as one block. +% +% Outputs +% ------- +% Xout : same as X. +% yout : y with elements permuted within each block. +% +% See also diagnostics.shufflers.global_permute, +% diagnostics.shufflers.circular_shift, +% diagnostics.compute.permutation_test + +Xout = X; +yout = y; +blocks = unique(blockId); +for bb = 1:numel(blocks) + idx = find(blockId == blocks(bb)); + yout(idx) = y(idx(randperm(numel(idx)))); +end +end diff --git a/+diagnostics/+shufflers/circular_shift.m b/+diagnostics/+shufflers/circular_shift.m new file mode 100644 index 0000000..6460e10 --- /dev/null +++ b/+diagnostics/+shufflers/circular_shift.m @@ -0,0 +1,37 @@ +function [Xout, yout] = circular_shift(X, y) +%CIRCULAR_SHIFT Circular time-shift null for decoding permutation tests. +% +% [XOUT, YOUT] = diagnostics.shufflers.circular_shift(X, Y) returns Y +% unchanged and X with each column (neuron) independently circularly +% shifted by a random offset in 1 .. T-1. This misaligns the neural +% data with the labels without destroying each neuron's autocorrelation. +% +% This preserves: +% - Each neuron's temporal autocorrelation. +% - Each neuron's marginal distribution. +% While breaking: +% - Alignment between neural activity and labels. +% - Cross-neuron synchrony at zero lag. +% +% Use this as a temporal null when the data are continuous time-series +% (not independent shuffled trials) and temporal structure must be +% preserved. +% +% Inputs +% ------ +% X : T x N matrix (rows are time-ordered samples). +% y : T x 1 label vector (returned unchanged). +% +% Outputs +% ------- +% Xout : T x N matrix with independently circularly shifted columns. +% yout : same as y. +% +% See also diagnostics.shufflers.global_permute, +% diagnostics.shufflers.blocked_permute, +% diagnostics.compute.circular_shift, +% diagnostics.compute.permutation_test + +Xout = diagnostics.compute.circular_shift(X); +yout = y; +end diff --git a/+diagnostics/+shufflers/global_permute.m b/+diagnostics/+shufflers/global_permute.m new file mode 100644 index 0000000..a9e0ff4 --- /dev/null +++ b/+diagnostics/+shufflers/global_permute.m @@ -0,0 +1,32 @@ +function [Xout, yout] = global_permute(X, y) +%GLOBAL_PERMUTE Global (unrestricted) label permutation. +% +% [XOUT, YOUT] = diagnostics.shufflers.global_permute(X, Y) returns X +% unchanged and Y with its elements randomly permuted across all samples. +% +% This is the standard label-shuffle null. It breaks any association +% between neural activity and the labels while preserving: +% - The marginal distribution of each neuron's activity. +% - Cross-neuron covariance structure. +% - The marginal distribution of labels. +% +% Use this as the default null for decoding permutation tests when there +% is no block structure to preserve. +% +% Inputs +% ------ +% X : T x N data matrix (returned unchanged). +% y : T x 1 label vector. +% +% Outputs +% ------- +% Xout : same as X. +% yout : y with elements randomly permuted. +% +% See also diagnostics.shufflers.blocked_permute, +% diagnostics.shufflers.circular_shift, +% diagnostics.compute.permutation_test + +Xout = X; +yout = y(randperm(numel(y))); +end diff --git a/@NeuralEmbedding/NeuralEmbedding.m b/@NeuralEmbedding/NeuralEmbedding.m index dd8ab8b..33944f2 100644 --- a/@NeuralEmbedding/NeuralEmbedding.m +++ b/@NeuralEmbedding/NeuralEmbedding.m @@ -64,6 +64,9 @@ % MCCA regularization parameter mcca_k = 0.9; % Add this line + % Alignment + useAlignment = false % (logical) When true, get.E and get.W return the aligned subspace stored by alignSessions. + % General useGpu = false useParallel = false @@ -111,6 +114,7 @@ Events_ = struct('Ts',{},'Name',{},'Trial',{},'Data',{}); % Events struct array (Ts, name, trial, data) W_ cell % projection matrix Winv_ cell % inverse projection matrix + W_aligned_ cell = {} % aligned projection matrix (populated by alignSessions) mu double = 0 % per unit mean ss double = 1 % per unit variance subsampling double = 1 % subsampling, it is updated after binning @@ -126,6 +130,7 @@ P_ cell % prepro data, ie binned and pruned for inactive neurons S_ cell % Smoothed data E_ cell % Embedded data + E_aligned_ cell = {} % aligned embedded data (populated by alignSessions; Transient so loadobj re-uses W_aligned_) end %% Constructor @@ -407,7 +412,12 @@ function addEvents(obj,evts) function value = get.E(obj) amask = ismember(obj.UArea,obj.aMask_); cmask = obj.cMask; - value = obj.E_(cmask,amask); + if obj.useAlignment && ~isempty(obj.E_aligned_) + src = obj.E_aligned_; + else + src = obj.E_; + end + value = src(cmask,amask); value = cellfun(@(e)e(1:obj.numPC,:),value, ... 'UniformOutput',false,'ErrorHandler',@(S,n)errorFunc(S,obj.numPC)); @@ -419,7 +429,12 @@ function addEvents(obj,evts) function value = get.W(obj) amask = ismember(obj.UArea,obj.aMask_); - value = obj.W_(amask); + if obj.useAlignment && ~isempty(obj.W_aligned_) + src = obj.W_aligned_; + else + src = obj.W_; + end + value = src(amask); value = cellfun(@(w)w(1:obj.numPC,:),value, ... 'UniformOutput',false,'ErrorHandler',@(S,n)errorFunc(S,obj.numPC)); function e = errorFunc(S,varargin) @@ -1212,6 +1227,28 @@ function zscoreData(obj) flag = computeMetrics(obj,type) end + %% Manifold diagnostics + methods (Access=public) + % Shuffle-based dimension selection (parallel analysis) + results = selectDimension(obj, dims, pars) + % Cross-validated PCA reconstruction + results = crossValReconstruct(obj, dim, pars) + % Cross-validated decoding + permutation test + results = crossValDecode(obj, y, dim, pars) + % Align latent spaces across sessions via Procrustes + results = alignSessions(obj, pars) + % Within-session latent-space stability (random trial splits) + results = crossValAlignment(obj, pars) + % Build a per-time-bin label vector from stored events + y = labelsFromEvents(obj, eventNames) + end + + %% Private helpers + methods (Access=private) + % Store a diagnostic result struct in M_ (same update logic as computeMetrics) + i_storeM(obj, data, type) + end + %% Plot data methods function plot3(obj,maxT,eventToPlot) diff --git a/@NeuralEmbedding/alignSessions.m b/@NeuralEmbedding/alignSessions.m new file mode 100644 index 0000000..f7cf700 --- /dev/null +++ b/@NeuralEmbedding/alignSessions.m @@ -0,0 +1,260 @@ +function results = alignSessions(objs, pars) +%ALIGNSESSIONS Align latent spaces across sessions using Procrustes, and +% store the aligned subspaces within each NeuralEmbedding object. +% +% RESULTS = ALIGNSESSIONS(OBJS) aligns the latent embeddings of every +% session in the NeuralEmbedding array OBJS to the first session +% (reference session = 1) using orthogonal Procrustes analysis. +% Alignment is performed *independently* for every area present in the +% reference session (including "AllNeurons"). +% +% After this call: +% - Each object's E_aligned_ and W_aligned_ private properties are +% populated with the rotation-transformed embeddings. +% - Setting OBJ.useAlignment = true makes get.E and get.W return the +% aligned subspace instead of the original one. +% - Each object's M_ is updated with a 'SessionAlignment' entry (same logic +% as computeMetrics; re-running replaces the previous result). +% +% RESULTS = ALIGNSESSIONS(OBJS, PARS) overrides default alignment +% parameters via the struct PARS. See diagnostics.pars.ProcrustesAlignment. +% +% Inputs +% ------ +% objs : (1 x nSessions) or (nSessions x 1) NeuralEmbedding array. +% At least 2 objects required. Each object must have had +% findEmbedding called prior to alignment. +% pars : struct with optional fields (see diagnostics.pars.ProcrustesAlignment): +% .allowScale (default false) - allow isotropic scaling. +% .refSession (default 1) - index of reference session. +% +% Outputs +% ------- +% results : struct with fields +% .Z - {nAreas x nSessions} original latent matrices. +% .Z_aligned - {nAreas x nSessions} rotation-only aligned latents. +% .R - {nAreas x nSessions} rotation matrices. +% .scale - (nAreas x nSessions) isotropic scale factors. +% .disparity - (nAreas x nSessions) Procrustes disparity. +% .principalAngles - {nAreas x nSessions} principal-angle vectors. +% .meanPrincipalAngle - (nAreas x nSessions) mean principal angle (rad). +% .distCorr - (nAreas x nSessions) Mantel distance correlation. +% .refSession - index of reference session used. +% .areas - string array of area labels (rows of per-area fields). +% .sessions - string array of session labels. +% .animals - string array of animal labels. +% +% Object side-effects +% ------------------- +% Per object OBJ = OBJS(ss): +% - OBJ.E_aligned_ is populated with the rotation-transformed embedding. +% - OBJ.W_aligned_ is populated with the rotation-transformed loadings. +% - OBJ.M_ receives an 'SessionAlignment' entry via i_storeM. +% To activate the aligned subspace set OBJ.useAlignment = true. +% To deactivate set OBJ.useAlignment = false. +% +% Notes +% ----- +% * The rotation is applied without global re-centring so that the +% per-trial mean structure of E is preserved. +% * Only the first numPC components of E (and W) are rotated; higher +% components (if stored) are left unchanged. +% * If two sessions do not share the same area label, that area is +% skipped for the non-matching session. +% +% Example +% ------- +% NEs = [NE1, NE2, NE3]; +% NE1.findEmbedding('PCA'); NE2.findEmbedding('PCA'); +% NE3.findEmbedding('PCA'); +% res = alignSessions(NEs); +% disp(res.disparity) % nAreas x nSessions matrix +% NE2.useAlignment = true; % activate aligned subspace for session 2 +% +% See also diagnostics.compute.align_procrustes, +% diagnostics.compute.alignment_metrics, +% diagnostics.pars.ProcrustesAlignment, +% NeuralEmbedding.findEmbedding + +% --- Validation --- +if numel(objs) < 2 + error('NeuralEmbedding:alignSessions:tooFewSessions', ... + 'At least 2 NeuralEmbedding objects are required.'); +end + +defaultPars = diagnostics.pars.ProcrustesAlignment(); +if nargin < 2 || isempty(pars) + pars = defaultPars; +else + pars = NeuralEmbedding.mergestructs(defaultPars, pars); +end + +nSess = numel(objs); +refSess = pars.refSession; + +% Area list from reference session (includes "AllNeurons") +refAreas = objs(refSess).UArea; % (nAreas+1 x 1) string +nAreas = numel(refAreas); + +% --- Initialise aligned storage (deep-copy of current E_ / W_) --- +for ss = 1:nSess + objs(ss).E_aligned_ = objs(ss).E_; % value-copy of cell array + objs(ss).W_aligned_ = objs(ss).W_; +end + +% --- Pre-allocate per-area per-session results --- +R_all = cell(nAreas, nSess); +scale_all = ones(nAreas, nSess); +disparity = zeros(nAreas, nSess); +pAngles = cell(nAreas, nSess); +meanPAngle = zeros(nAreas, nSess); +distCorr = ones(nAreas, nSess); +Z_all = cell(nAreas, nSess); +Z_aligned_all = cell(nAreas, nSess); + +% Fill identity values for reference session now +for aa = 1:nAreas + R_all{aa, refSess} = []; % filled below once d is known + scale_all(aa, refSess) = 1; + disparity(aa, refSess) = 0; + pAngles{aa, refSess} = []; + meanPAngle(aa, refSess) = 0; + distCorr(aa, refSess) = 1; +end + +% --- Per-area alignment --- +verbose = isfield(pars,'verbose') && pars.verbose; +for aa = 1:nAreas + areaStr = refAreas(aa); + + % Locate area column in reference session E_ + aaRef = find(ismember(objs(refSess).UArea, areaStr)); + if isempty(aaRef), continue; end + + % Extract all non-empty trial cells for this area from reference + E_ref_all = objs(refSess).E_(:, aaRef); % nTrial_ref x 1 + valid_ref = ~cellfun(@(e) isempty(e) || size(e,2)==0, E_ref_all); + if ~any(valid_ref), continue; end + + E_ref_cells = E_ref_all(valid_ref); + d_ref = min(objs(refSess).numPC, size(E_ref_cells{1}, 1)); + + % Concatenate reference latent: T_ref x d + Zref = cell2mat(cellfun(@(e) e(1:d_ref,:)', ... + E_ref_cells, 'UniformOutput', false)); % T_ref x d_ref + Z_all{aa, refSess} = Zref; + Z_aligned_all{aa, refSess} = Zref; + R_all{aa, refSess} = eye(d_ref); + + for ss = 1:nSess + if ss == refSess, continue; end + + if verbose + refLabel = sprintf('%s.%s', objs(refSess).Animal, objs(refSess).Session); + sessLabel = sprintf('%s.%s', objs(ss).Animal, objs(ss).Session); + fprintf(1, '\n AlignSessions [area=%s]: %s → %s', ... + areaStr, sessLabel, refLabel); + end + + % Locate matching area in this session + aaSess = find(ismember(objs(ss).UArea, areaStr)); + if isempty(aaSess) + % Area not present in this session: no alignment possible + R_all{aa, ss} = eye(d_ref); + continue; + end + + E_ss_all = objs(ss).E_(:, aaSess); + valid_ss = ~cellfun(@(e) isempty(e) || size(e,2)==0, E_ss_all); + if ~any(valid_ss) + R_all{aa, ss} = eye(d_ref); + continue; + end + + E_ss_cells = E_ss_all(valid_ss); + d_ss = min([objs(ss).numPC, size(E_ss_cells{1}, 1), d_ref]); + + % Concatenate session latent: T_ss x d_ss + Z_ss = cell2mat(cellfun(@(e) e(1:d_ss,:)', ... + E_ss_cells, 'UniformOutput', false)); + Z_all{aa, ss} = Z_ss; + + % Common length for Procrustes fit + Tcommon = min(size(Zref, 1), size(Z_ss, 1)); + Zref_c = Zref(1:Tcommon, 1:d_ss); + Zss_c = Z_ss(1:Tcommon, :); + + % Compute Procrustes alignment + procResult = diagnostics.compute.align_procrustes(Zref_c, Zss_c, ... + pars.allowScale); + R = procResult.R; % d_ss x d_ss orthogonal + sc = procResult.scale; + + R_all{aa, ss} = R; + scale_all(aa, ss) = sc; + + % ----- Apply rotation to ALL trials of this session (in-place) ----- + % E cell is (d_full x T); rotate first d_ss rows: E_rot = sc * R' * E(1:d_ss,:) + % R is orthogonal, so R' = inv(R). + % Working on the private E_aligned_ (already initialised as copy of E_). + E_tmp = objs(ss).E_aligned_; + for tr = 1:size(E_tmp, 1) + Ec = E_tmp{tr, aaSess}; + if isempty(Ec) || size(Ec,2) == 0, continue; end + nrows = min(d_ss, size(Ec, 1)); + % Slice R' to nrows x nrows to handle d_full < d_ss edge case + Ec(1:nrows, :) = sc * (R(1:nrows, 1:nrows)' * Ec(1:nrows, :)); + E_tmp{tr, aaSess} = Ec; + end + objs(ss).E_aligned_ = E_tmp; + + % Apply rotation to W for this area + Wss = objs(ss).W_aligned_{aaSess}; + if ~isempty(Wss) + nrows_W = min(d_ss, size(Wss, 1)); + % Slice R' to nrows_W x nrows_W to handle dim mismatch + Wss(1:nrows_W, :) = sc * (R(1:nrows_W, 1:nrows_W)' * Wss(1:nrows_W, :)); + objs(ss).W_aligned_{aaSess} = Wss; + end + + % Aligned Z at full session length (rotation-only, no centring) + Z_aligned_all{aa, ss} = cell2mat(cellfun(@(e) ... + (sc * (R' * e(1:d_ss,:)))', E_ss_cells, 'UniformOutput', false)); + + % Alignment metrics on the common (Procrustes-fitted) portion + metResult = diagnostics.compute.alignment_metrics(Zref_c, ... + procResult.Z2_aligned); + disparity(aa, ss) = metResult.disparity; + pAngles{aa, ss} = metResult.principalAngles; + meanPAngle(aa, ss) = metResult.meanPrincipalAngle; + distCorr(aa, ss) = metResult.distCorr; + end +end + +% --- Pack top-level results --- +results.Z = Z_all; +results.Z_aligned = Z_aligned_all; +results.R = R_all; +results.scale = scale_all; +results.disparity = disparity; +results.principalAngles = pAngles; +results.meanPrincipalAngle = meanPAngle; +results.distCorr = distCorr; +results.refSession = refSess; +results.areas = refAreas; +results.sessions = string({objs.Session}); +results.animals = string({objs.Animal}); + +% --- Store per-session result in each object's M_ --- +for ss = 1:nSess + sessData.R = R_all(:, ss); + sessData.scale = scale_all(:, ss); + sessData.disparity = disparity(:, ss); + sessData.principalAngles = pAngles(:, ss); + sessData.meanPrincipalAngle = meanPAngle(:, ss); + sessData.distCorr = distCorr(:, ss); + sessData.refSession = refSess; + sessData.areas = refAreas; + objs(ss).i_storeM(sessData, 'SessionAlignment'); +end +end diff --git a/@NeuralEmbedding/crossValAlignment.m b/@NeuralEmbedding/crossValAlignment.m new file mode 100644 index 0000000..60db841 --- /dev/null +++ b/@NeuralEmbedding/crossValAlignment.m @@ -0,0 +1,140 @@ +function results = crossValAlignment(obj, pars) +%CROSSVALALIGNMENT Within-session latent-space stability via random trial splits. +% +% RESULTS = CROSSVALALIGNMENT(OBJ) estimates how stable the latent +% manifold geometry is across randomly chosen subsets of trials within +% the same recording session. This is a purely *within-session* test: +% it does not require a second session, and its result can be used as a +% baseline for the cross-session Procrustes disparity produced by +% ALIGNSESSIONS. +% +% Algorithm +% --------- +% For each of PARS.nSplit random replicates: +% 1. Randomly assign trials to two equal-size groups (half-sessions). +% 2. (Optionally) refit a PARS.dim-dimensional PCA on each half +% independently, projecting each half's time-bins into latent space. +% 3. Align the two latent matrices with orthogonal Procrustes +% (see diagnostics.compute.align_procrustes). +% 4. Compute disparity, principal angles, and distance correlation +% (see diagnostics.compute.alignment_metrics). +% +% A low median disparity (relative to the cross-session disparity from +% alignSessions) indicates that the geometry is stable within the session. +% The result is stored in M_ under type 'IntraAlignment'. +% +% RESULTS = CROSSVALALIGNMENT(OBJS) where OBJS is a vector of +% NeuralEmbedding objects returns a (1 x nSessions) struct array. +% +% RESULTS = CROSSVALALIGNMENT(OBJ, PARS) overrides default parameters. +% See diagnostics.pars.IntraAlignment. +% +% Inputs +% ------ +% obj : scalar or array of NeuralEmbedding objects. +% findEmbedding must have been called (OBJ.E must be non-empty) +% unless PARS.dim > 0 (in which case PCA is refit on the raw +% smoothed data OBJ.S). +% pars : struct with fields (see diagnostics.pars.IntraAlignment): +% .nSplit (default 100) +% .dim (default 0 = use current embedding directly) +% .allowScale (default false) +% .rngSeed (default 0) +% .verbose (default true) +% +% Outputs +% ------- +% results : struct (or struct array for multiple sessions) with fields +% .disparity - (1 x nSplit) per-split Procrustes disparity. +% .disparityMean - mean disparity. +% .disparityMedian - median disparity. +% .disparityStd - std of disparity. +% .principalAngles - {1 x nSplit} principal-angle vectors. +% .meanPrincipalAngle - (1 x nSplit) mean principal angle (rad). +% .distCorr - (1 x nSplit) Mantel distance correlation. +% .nSplit, .dim, .allowScale, .rngSeed - parameters used. +% .animal, .session - metadata. +% +% Example +% ------- +% NE.findEmbedding('PCA'); +% res = NE.crossValAlignment(); +% fprintf('Intra-session disparity = %.4f ± %.4f\n', ... +% res.disparityMean, res.disparityStd); +% +% % Compare with cross-session alignment +% res_cross = alignSessions([NE1, NE2]); +% fprintf('Cross-session disparity = %.4f\n', res_cross.disparity(1,2)); +% +% See also diagnostics.compute.intra_alignment, +% diagnostics.pars.IntraAlignment, +% NeuralEmbedding.alignSessions + +% --- Multi-session dispatch --- +if ~isscalar(obj) + nSess = numel(obj); + results = cell(1, nSess); + for ss = 1:nSess + if nargin < 2 + results{ss} = crossValAlignment(obj(ss)); + else + results{ss} = crossValAlignment(obj(ss), pars); + end + end + results = [results{:}]; + return; +end + +% --- Defaults --- +defaultPars = diagnostics.pars.IntraAlignment(); +if nargin < 2 || isempty(pars) + pars = defaultPars; +else + pars = NeuralEmbedding.mergestructs(defaultPars, pars); +end + +% --- Build progress label --- +label = sprintf('%s.%s', obj.Animal, obj.Session); + +% --- Build per-trial cell array of latent data --- +if pars.dim > 0 + % Refit PCA from scratch on each half — pass smoothed data as cells + Z = i_get_S_cells(obj); % cell {nTrials x 1}, each (nUnits x nBins) + % Transpose to (nBins x nUnits) so intra_alignment can split trials + Z = cellfun(@(s) s', Z, 'UniformOutput', false); + % intra_alignment will do PCA refit internally +else + % Use current embedding (E) directly — cell {nTrials x 1}, each (d x nBins) + Z = i_get_E_cells(obj); +end + +% --- Run intra-alignment stability analysis --- +results = diagnostics.compute.intra_alignment(Z, pars, label); + +% --- Attach metadata --- +results.animal = obj.Animal; +results.session = obj.Session; + +% --- Store in M_ --- +obj.i_storeM(results, 'IntraAlignment'); +end + +% ========================================================================= +% Local helpers +% ========================================================================= + +function Z = i_get_E_cells(obj) +% Return per-trial embedding cells (first area column), filtered to +% non-empty trials only. Each cell is (d x nBins). +E = obj.E; +E = E(:, 1); % first area column +Z = E(~cellfun(@(e) isempty(e) || size(e,2)==0, E)); +end + +function S = i_get_S_cells(obj) +% Return per-trial smoothed data cells (first area column). +% Each cell is (nUnits x nBins). +Sc = obj.S; +Sc = Sc(:, 1); % first area column +S = Sc(~cellfun(@(s) isempty(s) || size(s,2)==0, Sc)); +end diff --git a/@NeuralEmbedding/crossValDecode.m b/@NeuralEmbedding/crossValDecode.m new file mode 100644 index 0000000..0cb1dbd --- /dev/null +++ b/@NeuralEmbedding/crossValDecode.m @@ -0,0 +1,209 @@ +function results = crossValDecode(obj, y, dim, pars) +%CROSSVALDECODE Cross-validated decoding + permutation test. +% +% RESULTS = CROSSVALDECODE(OBJ, Y, DIM) decodes labels Y from the +% DIM-dimensional PCA embedding of the preprocessed data using k-fold +% cross-validation and a nearest-centroid classifier. A permutation test +% provides an empirical p-value for the decoding accuracy. +% +% RESULTS = CROSSVALDECODE(OBJ, Y, DIM, PARS) overrides default +% parameters via the struct PARS. See diagnostics.pars.CVDecoding. +% +% Multi-session usage +% ------------------- +% RESULTS = CROSSVALDECODE(OBJS, Y, DIM, ...) where OBJS is a vector / +% array of NeuralEmbedding objects and Y is either: +% - a cell array {y1, y2, ...} (one vector per session), or +% - a numeric/categorical vector of length sum(nTrials) (concatenated). +% Returns a (1 x nSessions) struct array. +% +% Inputs +% ------ +% obj : scalar or array of NeuralEmbedding objects. +% y : T x 1 integer or categorical label vector (T = total time-bins +% when trials are concatenated). For multi-session input, pass a +% cell array with one vector per session. +% dim : positive integer, number of latent dimensions. +% pars: struct overriding diagnostics.pars.CVDecoding defaults. +% Fields: kfold (5), nPerm (500), rngSeed (0), +% decoder ('nearestCentroid'), +% permMode ('global' | 'blocked' | 'timeshift'). +% +% Outputs +% ------- +% results : struct (or struct array) with fields +% .accMean, .accPerFold - accuracy (mean & per fold). +% .balAccMean, .balAccPerFold- balanced accuracy. +% .confMat, .classes - confusion matrix and class labels. +% .pValue - permutation-test p-value. +% .effectZ - effect-size z-score. +% .statNull - (1 x nPerm) null distribution. +% .dim, .kfold, .nPerm, .rngSeed, .permMode - parameters used. +% .animal, .session - metadata. +% +% Example +% ------- +% y = NE.Conditions; % trial-level labels, one per trial +% res = NE.crossValDecode(y, 5); +% fprintf('Acc=%.1f%% p=%.4f z=%.2f\n', ... +% res.accMean*100, res.pValue, res.effectZ); +% +% See also diagnostics.compute.cv_decoding, +% diagnostics.compute.permutation_test, +% diagnostics.pars.CVDecoding, +% NeuralEmbedding.selectDimension + +% --- Multi-session dispatch --- +if ~isscalar(obj) + nSess = numel(obj); + % Unpack per-session y if cell array + if iscell(y) + yCell = y; + else + % Split concatenated y by session (assuming nTimeBins per trial) + % Build split based on time-bin count per session + yCell = i_split_y(obj, y); + end + results = cell(1, nSess); + for ss = 1:nSess + if nargin < 4 + results{ss} = crossValDecode(obj(ss), yCell{ss}, dim); + else + results{ss} = crossValDecode(obj(ss), yCell{ss}, dim, pars); + end + end + results = [results{:}]; + return; +end + +% --- Defaults --- +defaultPars = diagnostics.pars.CVDecoding(); +if nargin < 4 || isempty(pars) + pars = defaultPars; +else + pars = NeuralEmbedding.mergestructs(defaultPars, pars); +end + +% --- Extract data --- +X = i_get_data(obj); + +% --- Build label for progress output --- +label = sprintf('%s.%s', obj.Animal, obj.Session); +if pars.verbose + fprintf(1, '\nCrossValDecode [%s] dim=%d', label, dim); +end + +% Expand trial-level labels to time-bin level if needed +y = i_expand_labels(y, obj); + +% --- Validate y --- +if numel(y) ~= size(X, 1) + error('NeuralEmbedding:crossValDecode:labelMismatch', ... + ['y has %d elements but X has %d time-bins. ' ... + 'Provide one label per time-bin (after trial concatenation), ' ... + 'or one label per trial (will be expanded automatically).'], ... + numel(y), size(X, 1)); +end + +% --- CV decoding --- +decPars = pars; +decodingResult = diagnostics.compute.cv_decoding(X, y, dim, decPars); + +% --- Permutation test --- +% Build the permuter function based on permMode +switch pars.permMode + case 'global' + permuterFcn = @(Xp, yp) diagnostics.shufflers.global_permute(Xp, yp); + case 'blocked' + % Use trial index as block + blockId = i_trial_block_ids(obj); + permuterFcn = @(Xp, yp) diagnostics.shufflers.blocked_permute(Xp, yp, blockId); + case 'timeshift' + permuterFcn = @(Xp, yp) diagnostics.shufflers.circular_shift(Xp, yp); + otherwise + error('NeuralEmbedding:crossValDecode:badPermMode', ... + 'permMode must be ''global'', ''blocked'', or ''timeshift''.'); +end + +statFcn = @(Xp, yp) diagnostics.compute.cv_decoding(Xp, yp, dim, decPars).accMean; + +permResult = diagnostics.compute.permutation_test(statFcn, permuterFcn, ... + X, y, pars.nPerm, pars.rngSeed, label, isfield(pars,'verbose') && pars.verbose); + +% --- Merge results --- +results = decodingResult; +results.pValue = permResult.pValue; +results.effectZ = permResult.effectZ; +results.statNull = permResult.statNull; +results.nPerm = pars.nPerm; +results.permMode = pars.permMode; +results.animal = obj.Animal; +results.session = obj.Session; + +% --- Store in M_ --- +obj.i_storeM(results, 'CVDecoding'); +end + +% ========================================================================= +% Local helpers +% ========================================================================= + +function X = i_get_data(obj) +S = obj.S; +S = S(:, 1); % first area column +X = cell2mat(cellfun(@(s) s', S, 'UniformOutput', false)); +end + +function y = i_expand_labels(y, obj) +% If y has one element per trial, replicate to per-time-bin. +S = obj.S; +S = S(:, 1); % first area column +nBinsPerTrial = cellfun(@(s) size(s, 2), S); +T_total = sum(nBinsPerTrial); +nTr = numel(S); +y = y(:); +if numel(y) == nTr + % Expand trial labels to time bins + yExp = zeros(T_total, 1); + offset = 0; + for tt = 1:nTr + yExp(offset + (1:nBinsPerTrial(tt))) = y(tt); + offset = offset + nBinsPerTrial(tt); + end + y = yExp; +end +end + +function blockId = i_trial_block_ids(obj) +% Return a T-length vector where each element is the trial index of that +% time bin — used as block IDs for blocked_permute. +S = obj.S; +S = S(:, 1); % first area column +nBinsPerTrial = cellfun(@(s) size(s, 2), S); +blockId = zeros(sum(nBinsPerTrial), 1); +offset = 0; +nTr = numel(S); +for tt = 1:nTr + blockId(offset + (1:nBinsPerTrial(tt))) = tt; + offset = offset + nBinsPerTrial(tt); +end +end + +function yCell = i_split_y(objs, y) +% Split a concatenated y vector into per-session cell array. +nSess = numel(objs); +yCell = cell(1, nSess); +offset = 0; +y = y(:); +for ss = 1:nSess + S = objs(ss).S; + S = S(:, 1); % first area column + T = sum(cellfun(@(s) size(s, 2), S)); + if numel(y) == sum([objs.nTrial]) + % Trial-level labels: take the right slice + T = objs(ss).nTrial; + end + yCell{ss} = y(offset + (1:T)); + offset = offset + T; +end +end diff --git a/@NeuralEmbedding/crossValReconstruct.m b/@NeuralEmbedding/crossValReconstruct.m new file mode 100644 index 0000000..2366527 --- /dev/null +++ b/@NeuralEmbedding/crossValReconstruct.m @@ -0,0 +1,92 @@ +function results = crossValReconstruct(obj, dim, pars) +%CROSSVALRECONSTRUCT Cross-validated PCA reconstruction of neural activity. +% +% RESULTS = CROSSVALRECONSTRUCT(OBJ, DIM) performs k-fold +% cross-validation to estimate how well a DIM-dimensional PCA embedding +% reconstructs held-out neural activity. Embedding and z-scoring are +% fitted on the training folds only (no data leakage). +% +% RESULTS = CROSSVALRECONSTRUCT(OBJ, DIM, PARS) overrides default +% parameters via the struct PARS. See diagnostics.pars.CVReconstruction. +% +% Multi-session usage +% ------------------- +% RESULTS = CROSSVALRECONSTRUCT(OBJS, DIM, ...) where OBJS is a vector / +% array of NeuralEmbedding objects returns a (1 x nSessions) struct array. +% +% Inputs +% ------ +% obj : scalar or array of NeuralEmbedding objects. +% dim : positive integer, number of latent dimensions. +% pars: struct overriding diagnostics.pars.CVReconstruction defaults. +% Fields: kfold (5), rngSeed (0), zscore (true). +% +% Outputs +% ------- +% results : struct (or struct array) with fields +% .reconCorrMean - mean Pearson r across folds. +% .reconCorrPerFold - per-fold Pearson r. +% .R2Mean - mean R^2 across folds. +% .R2PerFold - per-fold R^2. +% .corrPerNeuron - mean per-neuron Pearson r across folds. +% .dim, .kfold, .rngSeed - parameters used. +% .animal, .session - metadata. +% +% Example +% ------- +% res = NE.crossValReconstruct(5); +% fprintf('Mean Pearson r = %.3f\n', res.reconCorrMean); +% +% See also diagnostics.compute.cv_reconstruction, +% diagnostics.pars.CVReconstruction, +% NeuralEmbedding.selectDimension + +% --- Multi-session dispatch --- +if ~isscalar(obj) + nSess = numel(obj); + results = cell(1, nSess); + for ss = 1:nSess + if nargin < 3 + results{ss} = crossValReconstruct(obj(ss), dim); + else + results{ss} = crossValReconstruct(obj(ss), dim, pars); + end + end + results = [results{:}]; + return; +end + +% --- Defaults --- +defaultPars = diagnostics.pars.CVReconstruction(); +if nargin < 3 || isempty(pars) + pars = defaultPars; +else + pars = NeuralEmbedding.mergestructs(defaultPars, pars); +end + +% --- Extract data --- +X = i_get_data(obj); + +% --- Build label for progress output --- +label = sprintf('%s.%s', obj.Animal, obj.Session); +if pars.verbose + fprintf(1, '\nCrossValReconstruct [%s] dim=%d', label, dim); +end + +% --- Run CV reconstruction --- +results = diagnostics.compute.cv_reconstruction(X, dim, pars); + +% --- Attach metadata --- +results.animal = obj.Animal; +results.session = obj.Session; + +% --- Store in M_ --- +obj.i_storeM(results, 'CVReconstruction'); +end + +% ========================================================================= +function X = i_get_data(obj) +S = obj.S; +S = S(:, 1); % first area column +X = cell2mat(cellfun(@(s) s', S, 'UniformOutput', false)); +end diff --git a/@NeuralEmbedding/i_storeM.m b/@NeuralEmbedding/i_storeM.m new file mode 100644 index 0000000..2578530 --- /dev/null +++ b/@NeuralEmbedding/i_storeM.m @@ -0,0 +1,38 @@ +function i_storeM(obj, data, type) +%I_STOREM Store a diagnostic result in the M_ metrics struct array. +% +% I_STOREM(OBJ, DATA, TYPE) wraps DATA in the standard M_ struct +% template (fields: type, date, condition, data, Area) and updates +% OBJ.M_ using the same replace-or-append logic as COMPUTEMETRICS: +% +% * If M_ is still the empty initial placeholder, it is replaced. +% * If OBJ.appendM == true, the new entry is always appended. +% * Otherwise an existing entry with the same type / condition / Area +% is overwritten; if none exists, the new entry is appended. +% +% This method is private and is called by the diagnostic class methods +% (selectDimension, crossValReconstruct, crossValDecode, alignSessions). +% +% See also NeuralEmbedding.computeMetrics, NeuralEmbedding.initMstruct + +type = string(type); % ensure string for reliable == comparison +Mstr = obj.initMstruct(data, type); + +% Use M_ directly (not the public getter) to avoid getter transformation +if all(arrayfun(@(t) all(structfun(@isempty,t)), obj.M_)) + % M_ is still the empty initialisation placeholder + obj.M_ = Mstr; +elseif obj.appendM + obj.M_ = [obj.M_ Mstr]; +else + % Replace the most recent entry with the same type / condition / area + idx = (string([obj.M_.type]) == Mstr.type) & ... + (string([obj.M_.condition]) == Mstr.condition) & ... + (string([obj.M_.Area]) == Mstr.Area); + if any(idx) + obj.M_(idx) = Mstr; + else + obj.M_ = [obj.M_ Mstr]; + end +end +end diff --git a/@NeuralEmbedding/labelsFromEvents.m b/@NeuralEmbedding/labelsFromEvents.m new file mode 100644 index 0000000..63aafd1 --- /dev/null +++ b/@NeuralEmbedding/labelsFromEvents.m @@ -0,0 +1,151 @@ +function y = labelsFromEvents(obj, eventNames) +%LABELSFROMEVENTS Build a per-time-bin label vector from stored events. +% +% Y = LABELSFROMEVENTS(OBJ, EVENTNAMES) creates a label vector Y with +% one element per time-bin (matching the trial-concatenated data returned +% by methods such as crossValDecode and crossValReconstruct). +% +% Each time bin in each trial is labelled with the name of the *most +% recent* event (from EVENTNAMES) that has occurred up to that bin, +% or '0' if no requested event has occurred yet. +% +% The method respects the current condition mask (cMask) of the object: +% only masked trials are included, in the same order as OBJ.S and OBJ.E. +% +% Inputs +% ------ +% obj : scalar NeuralEmbedding object (must have events stored +% via addEvents). +% eventNames : string, char, string array, or cellstr — event name(s) to +% use as label sources. The order determines priority: if +% two events occur at the same timestamp, the first in +% EVENTNAMES wins. +% Pass "all" to use every unique event name present in the +% object (names used as labels, in order of first occurrence). +% +% Output +% ------ +% y : T x 1 categorical vector (T = total time-bins across all *masked* +% trials, in the same order as OBJ.S(:,1)). +% Categories are the requested event names plus '0' (= no event yet). +% +% Notes +% ----- +% * Only the masked trials (cMask) are included; the result aligns with +% the output of OBJ.S, OBJ.E, and the CV diagnostic methods. +% * Events that fall outside the trial time window are ignored. +% * If a trial has no events in EVENTNAMES, all bins in that trial +% are labelled '0'. +% +% Example +% ------- +% % Label time bins as 'Cue', 'Go', or '0' (pre-event) +% y = NE.labelsFromEvents({'Cue','Go'}); +% res = NE.crossValDecode(y, 5); +% +% % Use all events +% y = NE.labelsFromEvents("all"); +% +% See also NeuralEmbedding.addEvents, NeuralEmbedding.crossValDecode + +% --- Validate inputs --- +if ~isscalar(obj) + error('NeuralEmbedding:labelsFromEvents:notScalar', ... + 'OBJ must be a scalar NeuralEmbedding object. Loop over sessions in caller.'); +end +if isempty(obj.Events) + error('NeuralEmbedding:labelsFromEvents:noEvents', ... + 'No events stored. Call addEvents first.'); +end + +eventNames = string(eventNames); + +% Resolve "all" → all unique event names in order of first appearance +if isscalar(eventNames) && eventNames == "all" + evtNames_ = string({obj.Events_.Name}); + [~, firstIdx] = unique(evtNames_, 'stable'); + eventNames = evtNames_(sort(firstIdx)); +end + +% --- Get masked trial indices (1-based into full trial list) --- +maskedTrialIdx = find(obj.cMask(:)'); % 1 x nMasked + +% --- Build trial-level lookup tables --- +% S respects cMask; use first area column for time-bin counts +S = obj.S; +S = S(:, 1); % first area column (nMasked x 1) +nMasked = numel(S); +nBinsPerTrial = cellfun(@(s) size(s, 2), S); +T_total = sum(nBinsPerTrial); + +% Time vectors filtered by cMask (and tMask, subsampling) +trialTimes = obj.TrialTime; % cell array, length = nMasked + +% Pre-allocate label vector +yStr = repmat("0", T_total, 1); + +% Pull event fields once +evtTrialIdx = [obj.Events_.Trial]; +evtTs = [obj.Events_.Ts]; +evtNameArr = string({obj.Events_.Name}); + +% Filter to requested event names only +isRequested = ismember(evtNameArr, eventNames); +[~, evtPriority] = ismember(evtNameArr, eventNames); % 0 if not requested + +offset = 0; +for k = 1:nMasked + origTr = maskedTrialIdx(k); % original (unmasked) trial index + tVec = trialTimes{k}; % 1 x nBins or nBins x 1 (masked+subsampled) + nBins = nBinsPerTrial(k); + if isempty(tVec) || nBins == 0 + offset = offset + nBins; + continue; + end + tVec = tVec(:)'; % 1 x nBins row + + % Events for this (original) trial + inTrial = (evtTrialIdx == origTr) & isRequested; + if ~any(inTrial) + offset = offset + nBins; + continue; + end + + ts_tr = evtTs(inTrial); + names_tr = evtNameArr(inTrial); + prio_tr = evtPriority(inTrial); + + % Sort by timestamp, then by priority (lower index = higher priority) + [ts_sorted, sortOrd] = sort(ts_tr); + names_sorted = names_tr(sortOrd); + prio_sorted = prio_tr(sortOrd); + + % For ties in timestamp: keep the event with the lowest eventNames index + uniqueTs = unique(ts_sorted); + for uIdx = 1:numel(uniqueTs) + same = (ts_sorted == uniqueTs(uIdx)); + if sum(same) > 1 + [~, bestRel] = min(prio_sorted(same)); + keep = find(same); + remove = keep; remove(bestRel) = []; + names_sorted(remove) = []; + prio_sorted(remove) = []; + ts_sorted(remove) = []; + end + end + + % Assign labels: each bin gets the name of the most recent event before it + binLabels = repmat("0", nBins, 1); + for evIdx = 1:numel(ts_sorted) + afterTs = tVec >= ts_sorted(evIdx); + binLabels(afterTs) = names_sorted(evIdx); + end + + yStr(offset + (1:nBins)) = binLabels; + offset = offset + nBins; +end + +% Convert to categorical with a consistent category set +allCats = ["0", eventNames(:)']; +y = categorical(yStr, allCats); +end diff --git a/@NeuralEmbedding/selectDimension.m b/@NeuralEmbedding/selectDimension.m new file mode 100644 index 0000000..8eef94d --- /dev/null +++ b/@NeuralEmbedding/selectDimension.m @@ -0,0 +1,177 @@ +function results = selectDimension(obj, dims, pars) +%SELECTDIMENSION Shuffle-based dimension selection via parallel analysis. +% +% RESULTS = SELECTDIMENSION(OBJ) runs parallel analysis on the smoothed, +% preprocessed data stored in the NeuralEmbedding object and returns a +% struct with the selected dimensionality (dStar) and diagnostic output. +% +% RESULTS = SELECTDIMENSION(OBJ, DIMS) tests the specified component +% indices (default 1:15). +% +% RESULTS = SELECTDIMENSION(OBJ, DIMS, PARS) additionally overrides +% default parameters with those in the struct PARS. See +% diagnostics.pars.ParallelAnalysis for all available fields. +% +% Multi-session usage +% ------------------- +% RESULTS = SELECTDIMENSION(OBJS, ...) where OBJS is a vector / array of +% NeuralEmbedding objects returns a (1 x nSessions) struct array with +% one entry per session. +% +% Inputs +% ------ +% obj : scalar or array of NeuralEmbedding objects. +% dims : positive integer vector of component indices to test +% (default 1:15). +% pars : struct overriding diagnostics.pars.ParallelAnalysis defaults. +% Fields: nShuffle (200), alpha (0.05), rngSeed (0), +% mode ('neuronwise' | 'rowperm'). +% +% Outputs +% ------- +% results : struct (or struct array for multiple sessions) with fields +% .eigReal - real eigenvalues for tested dims. +% .eigNull - (nShuffle x numel(dims)) null eigenvalue matrix. +% .nullQuantile - (1-alpha) null quantile per dim. +% .dimsTested - dims vector used. +% .dStar - selected dimension. +% .alpha, .nShuffle, .rngSeed, .mode - parameters used. +% .animal, .session - metadata from the NeuralEmbedding object. +% .embeddingMethod - embedding method used (from obj.currentEmbeddingMethod). +% +% Projection pipeline +% ------------------- +% The null eigenspectrum is built using the same projection pipeline as +% obj.findEmbedding: +% * For PCA / SmoothPCA — covariance PCA (centre only, no extra +% z-score). obj.S already applies any z-scoring configured on the +% object, so double-standardising is avoided. This matches +% embedding.PCA.reduce / MATLAB's pca() exactly. +% * For GPFA / CCA / other — a warning is issued and PCA on the +% smoothed data is used as an approximation. For rigorous dimension +% selection with those methods use crossValReconstruct. +% +% Example +% ------- +% % Single session +% res = NE.selectDimension(1:20); +% fprintf('%s.%s dStar = %d\n', res.animal, res.session, res.dStar); +% +% % Multiple sessions +% results = selectDimension(NEobjs, 1:20); +% [results.dStar] +% +% See also diagnostics.compute.dim_parallel_analysis, +% diagnostics.pars.ParallelAnalysis, +% NeuralEmbedding.crossValReconstruct, +% NeuralEmbedding.crossValDecode + +% --- Multi-session dispatch --- +if ~isscalar(obj) + nSess = numel(obj); + results = cell(1, nSess); + for ss = 1:nSess + if nargin < 2 + results{ss} = selectDimension(obj(ss)); + elseif nargin < 3 + results{ss} = selectDimension(obj(ss), dims); + else + results{ss} = selectDimension(obj(ss), dims, pars); + end + end + results = [results{:}]; % struct array + return; +end + +% --- Defaults --- +if nargin < 2 || isempty(dims) + dims = 1:15; +end +defaultPars = diagnostics.pars.ParallelAnalysis(); +if nargin < 3 || isempty(pars) + pars = defaultPars; +else + % Fill missing fields with defaults (user pars take priority) + pars = NeuralEmbedding.mergestructs(defaultPars, pars); +end + +% --- Build label for progress output --- +label = sprintf('%s.%s', obj.Animal, obj.Session); +if pars.verbose + fprintf(1, '\nSelectDimension [%s]', label); +end + +% --- Extract data (concatenate all trials along time axis) --- +% obj.S already applies z-scoring / preprocessing set on the object, so we +% do NOT z-score again inside dim_parallel_analysis (projFcn is used). +X = i_get_data(obj); + +% --- Build projection function matching the object's embedding method ---- +% This ensures the null distribution is built with exactly the same +% pipeline as findEmbedding, making the comparison statistically valid. +embMethod = char(obj.currentEmbeddingMethod); +projFcn = i_build_proj_fcn(embMethod, label); + +% --- Run parallel analysis --- +results = diagnostics.compute.dim_parallel_analysis(X, dims, pars, label, projFcn); + +% --- Attach metadata --- +results.animal = obj.Animal; +results.session = obj.Session; +results.embeddingMethod = embMethod; + +% --- Store in M_ --- +obj.i_storeM(results, 'ParallelAnalysis'); +end + +% ========================================================================= +% Local helpers +% ========================================================================= +function X = i_get_data(obj) +% Concatenate preprocessed (smoothed + obj-z-scored) trials (from obj.S) +% into T x N matrix. Uses the first area column (respects current aMask). +% NOTE: obj.S already applies z-scoring if obj.zscore=true, so dim_parallel_analysis +% should NOT z-score again — achieved by passing a projFcn. +S = obj.S; +% S is a cell (nTrials x nAreas); use first area column +S = S(:, 1); +% Each cell is nUnits x nTimeBins; transpose to nTimeBins x nUnits, then vertcat +X = cell2mat(cellfun(@(s) s', S, 'UniformOutput', false)); +end + +function projFcn = i_build_proj_fcn(embMethod, label) +% Build the eigenvalue-extraction function that matches the embedding method. +% +% For PCA / SmoothPCA: use covariance PCA (centre only, no z-score), which +% matches embedding.PCA.reduce / MATLAB's pca() behaviour. +% +% For other methods (GPFA, CCA, …): parallel analysis in the PCA sense is +% not directly applicable. We warn the user and fall back to covariance +% PCA on the smoothed data as a useful approximation. For rigorous +% dimensionality selection with non-PCA methods, use crossValReconstruct. +switch upper(strtrim(embMethod)) + case {'PCA', 'SMOOTHPCA', ''} + % Covariance PCA — matches embedding.PCA.reduce (pca() centers only) + otherwise + warning('NeuralEmbedding:selectDimension:methodMismatch', ... + ['Parallel analysis uses PCA eigenvalues, but the current ' ... + 'embedding method for [%s] is ''%s''. ' ... + 'Results are still meaningful as a baseline, but consider ' ... + 'crossValReconstruct for dimension selection with non-PCA ' ... + 'embeddings.'], label, embMethod); +end +% Covariance PCA (centre only) — matches embedding.PCA.reduce in all cases +projFcn = @(X, maxK) i_pca_eigs_cov(X, maxK); +end + +function eigs = i_pca_eigs_cov(X, maxK) +% Covariance-PCA eigenvalues (centre only, no z-score). +% Equivalent to MATLAB's pca() and embedding.PCA.reduce, but toolbox-free. +% X must already be preprocessed (e.g. obj.S applies z-scoring if needed). +[T, N] = size(X); +maxK = min(maxK, min(T, N) - 1); +X = X - mean(X, 1); % centre columns (pca() default) +[~, S, ~] = svd(X, 'econ'); % economy SVD, singular values descending +sv = diag(S); +eigs = (sv(1:maxK).^2 / (T - 1))'; % covariance eigenvalues +end diff --git a/README.md b/README.md index 85172ee..ba448c8 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ - **Dimensionality Reduction Techniques**: Apply various algorithms to reduce the dimensionality of spiking neural data, helping to uncover underlying neural dynamics. - **Evaluation Metrics**: Utilize built-in metrics to assess the effectiveness and quality of the generated embeddings. +- **Manifold Diagnostics**: Validate reconstructed latent spaces with shuffle-based dimension selection, cross-validated reconstruction, permutation-tested decoding, and Procrustes alignment across sessions. - **Flexible and Extensible**: The library is designed to be flexible, allowing for easy integration with existing workflows and extending with new methods. ## Installation @@ -43,6 +44,29 @@ NE.computeMetrics("arc"); For detailed documentation and examples, please refer to the [Wiki](https://github.com/barbaLab/NeuralEmbedding/wiki). +### Manifold Diagnostics + +The library includes a **manifold diagnostic toolkit** for validating latent-space quality. +See [`docs/manifold_diagnostics.md`](docs/manifold_diagnostics.md) for the full guide. + +Quick-start (single session): + +```matlab +% Dimension selection via parallel analysis +resA = NE.selectDimension(1:15); +fprintf('Selected dim: %d\n', resA.dStar); + +% Cross-validated reconstruction +resB = NE.crossValReconstruct(resA.dStar); + +% Cross-validated decoding + permutation test +resC = NE.crossValDecode(y_trial, resA.dStar); +fprintf('Acc=%.1f%% p=%.4f\n', resC.accMean*100, resC.pValue); + +% Multi-session alignment (accepts NeuralEmbedding object array) +resD = alignSessions([NE1, NE2, NE3]); +``` + ## Contributing Contributions are welcome! If you’d like to contribute, please fork the repository and create a pull request with your changes. diff --git a/docs/manifold_diagnostics.md b/docs/manifold_diagnostics.md new file mode 100644 index 0000000..6bfb56d --- /dev/null +++ b/docs/manifold_diagnostics.md @@ -0,0 +1,460 @@ +# Manifold Diagnostics + +This page describes the **manifold diagnostic toolkit** added to NeuralEmbedding. +It provides robust best-practice diagnostics for reconstructed neural manifolds +and latent spaces, covering six areas: + +| Method | Class method | Underlying compute | +|--------|-------------|-------------------| +| A – Dimension selection | `selectDimension` | `diagnostics.compute.dim_parallel_analysis` | +| B – CV reconstruction | `crossValReconstruct` | `diagnostics.compute.cv_reconstruction` | +| C – CV decoding + permutation test | `crossValDecode` | `diagnostics.compute.cv_decoding`, `diagnostics.compute.permutation_test` | +| D – Multi-session alignment | `alignSessions` | `diagnostics.compute.align_procrustes`, `diagnostics.compute.alignment_metrics` | +| E – Within-session stability | `crossValAlignment` | `diagnostics.compute.intra_alignment` | +| F – Event-based labels | `labelsFromEvents` | — (utility method) | + +All long-running methods print **progress** to the console in the format +`[Animal.Session]: step N/N done.` Suppress with `pars.verbose = false`. + +--- + +## Quick-start + +```matlab +% Single session +NE = NeuralEmbedding(data, 'condition', labels, 'session', 'S1', 'animal', 'Rat1'); + +% A) Select dimensionality +resA = NE.selectDimension(1:15); +fprintf('dStar = %d\n', resA.dStar); + +% B) Cross-validated reconstruction +resB = NE.crossValReconstruct(resA.dStar); +fprintf('CV Pearson r = %.3f\n', resB.reconCorrMean); + +% C) CV decoding + permutation test +% Option 1: trial-level integer labels +resC = NE.crossValDecode(y_trial, resA.dStar); +% Option 2: build labels from stored events +y_evts = NE.labelsFromEvents({'Cue','Go'}); % per-time-bin categorical +resC2 = NE.crossValDecode(y_evts, resA.dStar); +fprintf('Acc = %.1f%% p = %.4f\n', resC.accMean*100, resC.pValue); + +% D) Align two sessions and activate the aligned subspace +NE1.findEmbedding('PCA'); NE2.findEmbedding('PCA'); +resD = alignSessions([NE1, NE2]); +NE2.useAlignment = true; % get.E / get.W now return aligned data +fprintf('Disparity = %.4f\n', resD.disparity(end, 2)); % last area, session 2 + +% E) Within-session stability (intra-session split-half) +NE.findEmbedding('PCA'); +resE = NE.crossValAlignment(); +fprintf('Intra-session disparity = %.4f ± %.4f\n', ... + resE.disparityMean, resE.disparityStd); + +% Inspect stored results (all methods auto-save to M_) +NE.M % table with ParallelAnalysis, CVReconstruction, CVDecoding, IntraAlignment +``` + +Multi-session methods accept an array of NeuralEmbedding objects and return a +struct array (one entry per session): + +```matlab +NEobjs = [NE1, NE2, NE3]; % vector of NeuralEmbedding objects + +% All methods work with object arrays +resA = NEobjs.selectDimension(1:15); +resD = NEobjs.alignSessions(); +resIA = NEobjs.crossValAlignment(); +``` + + +--- + +## A – Dimension selection (`selectDimension`) + +### What it tests +Whether the dimensionality of the real neural covariance exceeds that expected +by chance given the marginal statistics of each neuron. + +### Method: parallel analysis / shuffle null +1. Compute PCA eigenspectrum of the real (z-scored) data. +2. For each of `nShuffle` replicates, independently permute each neuron's + time series (mode `'neuronwise'`) or shuffle entire rows (mode `'rowperm'`). +3. Select `d*` = largest component k where the real eigenvalue exceeds the + (1-α) quantile of null eigenvalues. + +### What each null preserves / breaks + +| Null mode | Preserves | Breaks | +|-----------|-----------|--------| +| `neuronwise` | Marginal distribution of each neuron | Cross-neuron covariance, temporal autocorrelation within each neuron | +| `rowperm` | Instantaneous cross-neuron patterns | Temporal structure of every neuron | + +**Recommendation**: use `'neuronwise'` (default). It produces a sharper null +because it destroys the specific structure the embedding exploits. + +### Recommended defaults + +| Parameter | Default | Notes | +|-----------|---------|-------| +| `nShuffle` | 200 | 500 for publication-quality results | +| `alpha` | 0.05 | | +| `mode` | `'neuronwise'` | | +| `rngSeed` | 0 | Set to `[]` to use current RNG state | + +### Interpreting results +- `dStar = 0` means no component exceeded the null → data may be too noisy or + the embedding method is not capturing meaningful variance. +- `dStar` near the true latent dimensionality is expected for well-separated + synthetic data. + +--- + +## B – Cross-validated reconstruction (`crossValReconstruct`) + +### What it tests +How well the latent embedding reconstructs held-out neural activity (no data +leakage). + +### Method: k-fold CV PCA reconstruction +1. Split time bins into k folds. +2. For each fold: fit PCA (and optional z-score) on training fold only; project + test fold into latent space; reconstruct by projecting back. +3. Score: Pearson r and R² between vectorised `X_test` and `Xhat_test`. + +### No-leakage guarantee +All preprocessing (z-score mean/std, PCA loadings) is estimated from the +training fold and applied to the test fold. + +### Recommended defaults + +| Parameter | Default | +|-----------|---------| +| `kfold` | 5 | +| `zscore` | `true` | + +### Interpreting results +- `reconCorrMean` increases with `dim` up to the true latent dimensionality and + plateaus (or decreases slightly due to overfitting) beyond. +- Use the elbow in the reconstruction-vs-dim curve to corroborate `dStar` from + parallel analysis. + +--- + +## C – CV decoding + permutation test (`crossValDecode`) + +### What it tests +Whether the latent representation contains statistically significant information +about task labels / behaviour variables, using a permutation-based null. + +### Method +1. **Decoding**: k-fold CV with a nearest-centroid classifier in the PCA latent + space (no toolbox required). +2. **Permutation test**: re-run the same CV pipeline `nPerm` times under + permuted data/labels. p-value: + + ``` + p = (1 + sum(null_acc >= real_acc)) / (nPerm + 1) + ``` + + Effect size: `z = (real - mean(null)) / std(null)`. + +### Permutation modes + +| `permMode` | What is permuted | Preserves | Breaks | +|------------|-----------------|-----------|--------| +| `'global'` | All labels randomly permuted | Label marginal distribution | Label–activity alignment | +| `'blocked'` | Labels permuted within each trial block | Block-level label distribution | Within-block alignment | +| `'timeshift'` | Neural data circularly shifted per neuron | Neural autocorrelation | Neural–label alignment | + +**Recommendation**: +- Default: `'global'` – the strictest null for classification tasks. +- Use `'blocked'` when sessions / conditions have different overall label + frequencies that must be preserved. +- Use `'timeshift'` when temporal structure in the neural data is important and + must be preserved in the null (e.g., continuous recordings). + +### Recommended defaults + +| Parameter | Default | Notes | +|-----------|---------|-------| +| `kfold` | 5 | | +| `nPerm` | 500 | 1000 for publication | +| `permMode` | `'global'` | | + +### Interpreting p-values +- `p < alpha` (e.g. 0.05): the decoded labels are significantly better than + chance. +- `p >= alpha`: decoding is not significant at the chosen level. +- `effectZ > 2`: strong effect regardless of `nPerm`. +- The permutation p-value is bounded below by `1/(nPerm+1)`. + +--- + +## D – Multi-session alignment (`alignSessions`) + +### What it tests +Whether the latent geometry is consistent across sessions / animals. + +### Method: orthogonal Procrustes +For each non-reference session **and for every area** present in the reference: +1. Centre both latent matrices. +2. Solve `min ||Z_ref - Z_sess * R||_F` subject to `R'R = I` (orthogonal + Procrustes) via SVD: `[U,S,V] = svd(Z_ref' * Z_sess); R = V * U'`. +3. Optionally optimise an isotropic scale factor (`allowScale = true`). + +### Object-level side effects +`alignSessions` stores results inside each object so it becomes **self-sufficient**: + +| What is stored | Where | +|----------------|-------| +| Rotated per-trial embeddings | Private `E_aligned_` (per area, per trial) | +| Rotated projection matrix | Private `W_aligned_` (per area) | +| Alignment quality metrics | `M_` via `i_storeM` (type = `'Alignment'`) | + +Once `alignSessions` has been called, toggling the public flag +`OBJ.useAlignment = true` makes `OBJ.E` and `OBJ.W` return the **aligned** +subspace instead of the original one, for all areas: + +```matlab +NEobjs = [NE1, NE2, NE3]; +for ss = 1:3, NEobjs(ss).findEmbedding('PCA'); end +res = alignSessions(NEobjs); % stores aligned subspaces in each object + +NE2.useAlignment = true; % activate for session 2 +E_aligned = NE2.E; % returns rotation-transformed embedding + +NE2.useAlignment = false; % restore original +E_original = NE2.E; +``` + +- Setting `useAlignment = true` before `alignSessions` has been called silently + falls back to the original embedding (no error, no data corruption). +- `W_aligned_` is `[]` if no embedding was fitted yet. +- The flag is **independent per object**, so you can activate it for a subset of + sessions (e.g. all non-reference sessions). + +### Alignment metrics + +| Metric | Meaning | Good value | +|--------|---------|------------| +| `disparity` | Normalised Frobenius distance | Closer to 0 | +| `principalAngles` | Subspace angles (radians) between column spaces | Closer to 0 | +| `distCorr` | Mantel correlation between pairwise distance matrices | Closer to 1 | + +Results are now `(nAreas × nSessions)` arrays/cell arrays, one row per area. + +### Recommended defaults + +| Parameter | Default | +|-----------|---------| +| `refSession` | 1 (first session in array) | +| `allowScale` | `false` (orthogonal only) | + +### Multi-session usage pattern + +```matlab +NEobjs = [NE1, NE2, NE3]; +% Must run findEmbedding first on all sessions +for ss = 1:numel(NEobjs) + NEobjs(ss).numPC = 5; % set to dStar + NEobjs(ss).findEmbedding('PCA'); +end +pars = diagnostics.pars.ProcrustesAlignment(); +pars.refSession = 1; +res = NEobjs.alignSessions(pars); + +% Per-session, per-area alignment summary +for ss = 2:numel(NEobjs) + for aa = 1:numel(res.areas) + fprintf('Session %d, Area %s: disparity=%.4f meanAngle=%.2f°\n', ... + ss, res.areas(aa), res.disparity(aa,ss), ... + rad2deg(res.meanPrincipalAngle(aa,ss))); + end +end + +% Activate aligned subspace for all non-reference sessions +for ss = 2:numel(NEobjs) + NEobjs(ss).useAlignment = true; +end +``` + +--- + +## Result storage in M_ + +All diagnostic methods automatically store their output in the object's `M_` +property using the same replace-or-append logic as `computeMetrics`: + +| Method | M_ type field | +|--------|--------------| +| `selectDimension` | `'ParallelAnalysis'` | +| `crossValReconstruct` | `'CVReconstruction'` | +| `crossValDecode` | `'CVDecoding'` | +| `alignSessions` (per session) | `'SessionAlignment'` | +| `crossValAlignment` | `'IntraAlignment'` | + +```matlab +NE.selectDimension(1:15); +NE.crossValReconstruct(5); +NE.crossValAlignment(); +M = NE.M; % returns table with all stored metrics +M.type % ["ParallelAnalysis"; "CVReconstruction"; "IntraAlignment"] +M.data{1} % the full results struct for ParallelAnalysis +``` + +Re-running any diagnostic with the same condition/area mask **replaces** the +previous entry (no duplicates). Set `NE.appendM = true` to keep all runs. + +> **Note**: The cross-session alignment type is `'SessionAlignment'` (not +> `'Alignment'`) to avoid confusion with the pre-existing `alignment` metric +> computed by `computeMetrics`. + +--- + +## E – Within-session stability (`crossValAlignment`) + +### What it tests +Whether the latent manifold geometry is **reproducible within the session** — +i.e. whether the manifold learned from a random half of trials matches the +manifold from the other half. + +### Method: random split-half Procrustes +For each of `nSplit` replicates: +1. Randomly split all trials into two equal-size groups. +2. Optionally refit PCA on each group independently (when `dim > 0`); + otherwise use the existing latent coordinates. +3. Align the two latent matrices with orthogonal Procrustes. +4. Record disparity, principal angles, and distance correlation. + +### Interpreting results +- Low median disparity (close to 0) → manifold is stable within the session. +- Compare `disparityMean` with the cross-session disparity from `alignSessions` + to distinguish genuine session-to-session change from within-session + noise/variability. +- A high within-session disparity relative to the cross-session disparity + may indicate that the session itself is non-stationary (e.g. learning, drift). + +### Recommended defaults + +| Parameter | Default | Notes | +|-----------|---------|-------| +| `nSplit` | 100 | 500 for publication | +| `dim` | 0 (use current E) | Set to `dStar` to refit PCA on each half | +| `allowScale` | `false` | | + +```matlab +NE.findEmbedding('PCA'); + +% Use current embedding directly (no refit) +res = NE.crossValAlignment(); +fprintf('Intra-session disparity = %.4f ± %.4f\n', ... + res.disparityMean, res.disparityStd); + +% Compare with cross-session alignment +resX = alignSessions([NE1, NE2]); +fprintf('Cross-session disparity = %.4f\n', resX.disparity(1,2)); +``` + +--- + +## F – Event-based time-bin labels (`labelsFromEvents`) + +### What it does +`labelsFromEvents` converts stored behavioral events into a **per-time-bin +categorical label vector** that can be passed directly to `crossValDecode` or +`crossValReconstruct`. + +Each time bin in each trial is labelled with the name of the *most recent* +event (from the requested list) that has occurred up to that bin. Bins before +the first requested event are labelled `'0'`. + +### Usage + +```matlab +% Add events first +NE.addEvents(evts); % evts: struct with fields Ts, Name, Trial + +% Decode which event epoch each time bin belongs to +y = NE.labelsFromEvents({'Cue','Go','Reward'}); +% y is a T x 1 categorical with categories '0','Cue','Go','Reward' + +% Use directly with crossValDecode +res = NE.crossValDecode(y, dStar); + +% Use all unique event names +y_all = NE.labelsFromEvents("all"); +``` + +### Priority tie-breaking +If two events from the requested list share the same timestamp, the one that +appears **earlier** in the `eventNames` input wins (i.e. input order +determines priority). + +--- + +## Null-model summary table + +| Null | Preserves | Breaks | Use for | +|------|-----------|--------|---------| +| Neuron-wise shuffle | Marginal distribution per neuron | Cross-neuron covariance, autocorrelation | Dimension selection | +| Row permutation | Instantaneous covariance | Temporal order | Weaker dimension-selection null | +| Global label permutation | Label marginal distribution | Label–activity alignment | Decoding permutation test (default) | +| Blocked label permutation | Block-level label structure | Within-block alignment | Multi-condition / multi-session decoding | +| Circular time-shift | Autocorrelation per neuron | Neural–label alignment | Temporal decoding null | + +--- + +## Toolbox dependencies + +All implementations use **base MATLAB only** (SVD, basic linear algebra). +No Statistics, Machine Learning, or Signal Processing Toolbox functions are +required. + +The nearest-centroid decoder is self-contained. If you wish to use a richer +decoder (e.g. SVM), you can supply a custom `statFcn` to +`diagnostics.compute.permutation_test` directly. + +--- + +## File locations + +``` ++diagnostics/ + +compute/ + dim_parallel_analysis.m % parallel analysis core + cv_reconstruction.m % CV reconstruction core + cv_decoding.m % CV decoding core + permutation_test.m % generic permutation test (with progress) + shuffle_neuronwise.m % neuron-wise shuffle utility + circular_shift.m % circular time-shift utility + align_procrustes.m % orthogonal Procrustes alignment + alignment_metrics.m % alignment quality metrics + intra_alignment.m % within-session split-half stability + +pars/ + ParallelAnalysis.m % default parameters (includes verbose) + CVReconstruction.m + CVDecoding.m + ProcrustesAlignment.m + IntraAlignment.m % parameters for within-session stability + +shufflers/ + global_permute.m % global label permuter + blocked_permute.m % blocked label permuter + circular_shift.m % circular-shift permuter wrapper + +@NeuralEmbedding/ + selectDimension.m % class method – dimension selection (auto-saves to M_) + crossValReconstruct.m % class method – CV reconstruction (auto-saves to M_) + crossValDecode.m % class method – CV decoding (auto-saves to M_) + alignSessions.m % class method – Procrustes alignment (stores E_aligned_, W_aligned_, saves to M_ as 'SessionAlignment') + crossValAlignment.m % class method – within-session stability (auto-saves to M_ as 'IntraAlignment') + labelsFromEvents.m % utility – build per-time-bin labels from stored events + i_storeM.m % private helper – update M_ with a diagnostic result + +examples/ + demo_manifold_diagnostics.m % end-to-end demo script + +tests/ + smoke_test_diagnostics.m % automated smoke test +``` diff --git a/examples/demo_manifold_diagnostics.m b/examples/demo_manifold_diagnostics.m new file mode 100644 index 0000000..400f6cd --- /dev/null +++ b/examples/demo_manifold_diagnostics.m @@ -0,0 +1,235 @@ +%DEMO_MANIFOLD_DIAGNOSTICS End-to-end demo of the manifold diagnostic toolkit. +% +% This script demonstrates all four diagnostic capabilities using +% synthetic low-dimensional latent data: +% +% A) Dimension selection via parallel analysis (selectDimension) +% B) Cross-validated reconstruction (crossValReconstruct) +% C) Cross-validated decoding + permutation test (crossValDecode) +% D) Multi-session alignment (alignSessions) +% +% No real data are required: synthetic sessions are generated in-script. +% All results are printed to the console; figures are saved to the +% current directory as PNG files. +% +% Usage +% ----- +% Run this script from the NeuralEmbedding root directory after +% adding it to the MATLAB path: +% addpath(genpath('path/to/NeuralEmbedding')); +% run examples/demo_manifold_diagnostics.m +% +% See also NeuralEmbedding, selectDimension, crossValReconstruct, +% crossValDecode, alignSessions + +clear; clc; + +rng(42, 'twister'); + +fprintf('=============================================================\n'); +fprintf(' NeuralEmbedding – Manifold Diagnostics Demo\n'); +fprintf('=============================================================\n\n'); + +% ========================================================================= +% Synthetic data parameters +% ========================================================================= +trueD = 3; % true latent dimensionality +nUnits = 40; % simulated neurons +nTrials = 60; % trials per session +T_trial = 50; % time bins per trial +nSessions = 2; % sessions to simulate +noiseStd = 0.5; % additive noise level +fs = 1000; % sampling rate (Hz) – for NeuralEmbedding constructor +dt = 1/fs; % 1 ms bins +time = (0 : T_trial-1) * dt; + +% ========================================================================= +% Helper: generate one synthetic session +% ========================================================================= +function [NE, y_trial] = make_session(trueD, nUnits, nTrials, T_trial, ... + time, fs, noiseStd, sessionLabel, seed) + rng(seed, 'twister'); + + % Random mixing matrix (nUnits x trueD) + C = randn(nUnits, trueD); + + % Random trial labels (2 classes) + y_trial = randi([1 2], nTrials, 1); + + % Build data: nUnits x T x nTrials + data = zeros(nUnits, T_trial, nTrials); + for tr = 1:nTrials + Z = randn(trueD, T_trial); % latent trajectory + Z(1, :) = Z(1, :) + 2 * (y_trial(tr)-1); % label-dependent offset + data(:, :, tr) = C * Z + noiseStd * randn(nUnits, T_trial); + end + + % Create NeuralEmbedding object + NE = NeuralEmbedding(data, ... + 'time', time, ... + 'fs', fs, ... + 'condition', string(y_trial), ... + 'session', sessionLabel, ... + 'animal', 'SyntheticAnimal'); +end + +% ========================================================================= +% Create two synthetic sessions +% ========================================================================= +fprintf('Generating synthetic data (%d sessions, %d neurons, %d trials)...\n\n', ... + nSessions, nUnits, nTrials); + +[NE1, y1] = make_session(trueD, nUnits, nTrials, T_trial, time, fs, noiseStd, 'Sess1', 1); +[NE2, y2] = make_session(trueD, nUnits, nTrials, T_trial, time, fs, noiseStd, 'Sess2', 2); + +NEobjs = [NE1, NE2]; + +% ========================================================================= +% A) Dimension selection via parallel analysis +% ========================================================================= +fprintf('----- A) Dimension selection (parallel analysis) ------------\n'); +pA = diagnostics.pars.ParallelAnalysis(); +pA.nShuffle = 100; % fewer for demo speed +pA.alpha = 0.05; +pA.rngSeed = 0; + +resA = NEobjs.selectDimension(1:12, pA); + +for ss = 1:nSessions + fprintf(' %s.%s: dStar = %d (true dim = %d)\n', ... + resA(ss).animal, resA(ss).session, resA(ss).dStar, trueD); +end + +% Plot eigenvalues for first session +fig = figure('Visible','off'); +bar(resA(1).dimsTested, resA(1).eigReal, 'FaceColor', [.3 .6 .9]); hold on; +plot(resA(1).dimsTested, resA(1).nullQuantile, 'r--', 'LineWidth', 1.5); +xlabel('Component'); ylabel('Eigenvalue'); +title(sprintf('%s.%s – Parallel Analysis (dStar=%d)', ... + resA(1).animal, resA(1).session, resA(1).dStar)); +legend('Real eigenvalue', sprintf('Null %.0f%% quantile', (1-pA.alpha)*100)); +saveas(fig, 'demo_A_parallel_analysis.png'); +fprintf(' Saved: demo_A_parallel_analysis.png\n\n'); + +% ========================================================================= +% B) Cross-validated reconstruction +% ========================================================================= +fprintf('----- B) Cross-validated reconstruction --------------------\n'); +dimToTest = 1:8; +pB = diagnostics.pars.CVReconstruction(); +pB.kfold = 5; +pB.rngSeed = 0; + +reconCorr = zeros(nSessions, numel(dimToTest)); +for ss = 1:nSessions + for dd = 1:numel(dimToTest) + r = NEobjs(ss).crossValReconstruct(dimToTest(dd), pB); + reconCorr(ss, dd) = r.reconCorrMean; + end +end + +fig = figure('Visible','off'); +plot(dimToTest, reconCorr(1,:), 'b-o', dimToTest, reconCorr(2,:), 'r-s'); +xline(trueD, 'k--', 'True dim'); +xlabel('Latent dimension'); ylabel('CV Pearson r (reconstruction)'); +title('Cross-validated reconstruction vs. dimensionality'); +legend('Session 1', 'Session 2', 'True dim'); +saveas(fig, 'demo_B_cv_reconstruction.png'); +fprintf(' Session 1 r at d=%d: %.3f\n', trueD, reconCorr(1, trueD)); +fprintf(' Session 2 r at d=%d: %.3f\n', trueD, reconCorr(2, trueD)); +fprintf(' Saved: demo_B_cv_reconstruction.png\n\n'); + +% ========================================================================= +% C) Cross-validated decoding + permutation test +% ========================================================================= +fprintf('----- C) CV decoding + permutation test --------------------\n'); +pC = diagnostics.pars.CVDecoding(); +pC.kfold = 5; +pC.nPerm = 200; % fewer for demo speed +pC.rngSeed = 0; +pC.permMode = 'global'; + +% Real labels – should decode significantly +resC_real = NE1.crossValDecode(y1, trueD, pC); +fprintf(' [Real labels] Acc=%.1f%% BalAcc=%.1f%% p=%.4f z=%.2f\n', ... + resC_real.accMean*100, resC_real.balAccMean*100, ... + resC_real.pValue, resC_real.effectZ); + +% Random labels – should not decode significantly +y_rand = randi([1 2], nTrials, 1); +resC_rand = NE1.crossValDecode(y_rand, trueD, pC); +fprintf(' [Random labels] Acc=%.1f%% BalAcc=%.1f%% p=%.4f z=%.2f\n', ... + resC_rand.accMean*100, resC_rand.balAccMean*100, ... + resC_rand.pValue, resC_rand.effectZ); + +% Plot null distributions +fig = figure('Visible','off'); +subplot(1,2,1); +histogram(resC_real.statNull * 100, 20, 'FaceColor', [.8 .8 .8]); hold on; +xline(resC_real.accMean * 100, 'r', 'LineWidth', 2); +xlabel('Null accuracy (%)'); title(sprintf('Real labels p=%.3f', resC_real.pValue)); +subplot(1,2,2); +histogram(resC_rand.statNull * 100, 20, 'FaceColor', [.8 .8 .8]); hold on; +xline(resC_rand.accMean * 100, 'b', 'LineWidth', 2); +xlabel('Null accuracy (%)'); title(sprintf('Random labels p=%.3f', resC_rand.pValue)); +sgtitle('CV Decoding – Permutation Test'); +saveas(fig, 'demo_C_cv_decoding.png'); +fprintf(' Saved: demo_C_cv_decoding.png\n\n'); + +% ========================================================================= +% D) Multi-session alignment (Procrustes) +% ========================================================================= +fprintf('----- D) Multi-session alignment (Procrustes) --------------\n'); + +% Compute embeddings first +NEobjs(1).numPC = trueD; +NEobjs(2).numPC = trueD; +NEobjs(1).findEmbedding('PCA'); +NEobjs(2).findEmbedding('PCA'); + +pD = diagnostics.pars.ProcrustesAlignment(); +pD.refSession = 1; +pD.allowScale = false; + +% alignSessions stores the rotated subspaces back in each object. +resD = NEobjs.alignSessions(pD); + +% Results are now nAreas x nSessions (one row per area, including "AllNeurons"). +fprintf(' Areas: %s\n', strjoin(resD.areas, ' | ')); +for aa = 1:numel(resD.areas) + fprintf(' [%s] Session 2 disparity=%.4f mean angle=%.2f° distCorr=%.3f\n', ... + resD.areas(aa), resD.disparity(aa,2), ... + rad2deg(resD.meanPrincipalAngle(aa,2)), resD.distCorr(aa,2)); +end + +% Activate the aligned subspace for session 2 +NEobjs(2).useAlignment = true; +fprintf(' NEobjs(2).useAlignment = true → get.E now returns aligned data.\n'); + +% Deactivate to restore original subspace +NEobjs(2).useAlignment = false; + +% Show pre- vs post-alignment principal angles (last area = AllNeurons) +fig = figure('Visible','off'); +bar(rad2deg(resD.principalAngles{end, 2})); +xlabel('Principal angle index'); ylabel('Angle (degrees)'); +title('Session 2 → Session 1: principal angles after alignment (AllNeurons)'); +saveas(fig, 'demo_D_alignment.png'); +fprintf(' Saved: demo_D_alignment.png\n\n'); + +% ========================================================================= +% E) Inspect M_ – all diagnostics are auto-stored +% ========================================================================= +fprintf('----- E) Stored metrics (M_) --------------------------------\n'); +M1 = NE1.M; +fprintf(' NE1.M table (%d rows):\n', height(M1)); +for rr = 1:height(M1) + fprintf(' type=%s condition=%s area=%s\n', ... + M1.type{rr}, M1.condition{rr}, M1.Area{rr}); +end + +% ========================================================================= +fprintf('=============================================================\n'); +fprintf(' Demo complete.\n'); +fprintf(' PNG figures saved to: %s\n', pwd); +fprintf('=============================================================\n'); diff --git a/tests/smoke_test_diagnostics.m b/tests/smoke_test_diagnostics.m new file mode 100644 index 0000000..8958db3 --- /dev/null +++ b/tests/smoke_test_diagnostics.m @@ -0,0 +1,255 @@ +%SMOKE_TEST_DIAGNOSTICS Minimal smoke / sanity test for manifold diagnostics. +% +% This script verifies the manifold diagnostic toolkit against synthetic +% data with known ground-truth properties: +% +% 1) dStar from parallel analysis should be near the true latent dim. +% 2) CV reconstruction improves with dimension. +% 3) CV decoding with real labels should be significant; with random labels not. +% 4) Cross-session Procrustes alignment: disparity bounded. +% 4b) useAlignment flag toggles between original / aligned E. +% 5) Multi-session selectDimension returns struct array. +% 6) Diagnostic results stored in M_. +% 7) crossValAlignment: finite disparity distribution, stored in M_. +% 8) labelsFromEvents: returns categorical with correct length. +% +% All tests print PASS / FAIL to the console. Exits with an error if any +% test fails so it can be integrated into automated pipelines. +% +% Usage +% ----- +% From the NeuralEmbedding root directory: +% addpath(genpath('path/to/NeuralEmbedding')); +% run tests/smoke_test_diagnostics.m + +clear; clc; + +fprintf('Running manifold-diagnostics smoke tests...\n\n'); +nFail = 0; + +% Suppress progress output during automated tests +quietParsPA = diagnostics.pars.ParallelAnalysis(); quietParsPA.verbose = false; +quietParsCVR = diagnostics.pars.CVReconstruction(); quietParsCVR.verbose = false; +quietParsCVD = diagnostics.pars.CVDecoding(); quietParsCVD.verbose = false; +quietParsAS = diagnostics.pars.ProcrustesAlignment(); quietParsAS.verbose = false; +quietParsIA = diagnostics.pars.IntraAlignment(); quietParsIA.verbose = false; + +% ========================================================================= +% Shared synthetic data +% ========================================================================= +rng(0, 'twister'); +trueD = 3; +nUnits = 30; +nTrials = 80; +Ttrial = 40; % time bins per trial +noiseStd = 0.3; +fs = 1000; +time = (0:Ttrial-1) / fs; + +% Random mixing matrix +C = randn(nUnits, trueD); + +y_trial = randi([1 2], nTrials, 1); +data = zeros(nUnits, Ttrial, nTrials); +for tr = 1:nTrials + Z = randn(trueD, Ttrial); + Z(1,:) = Z(1,:) + 3*(y_trial(tr)-1); % strong signal + data(:,:,tr) = C*Z + noiseStd*randn(nUnits, Ttrial); +end + +NE = NeuralEmbedding(data, ... + 'time', time, 'fs', fs, ... + 'condition', string(y_trial), ... + 'session', 'Test', 'animal', 'SmokeTest'); + +% ========================================================================= +% Test 1 – Parallel analysis: dStar near trueD +% ========================================================================= +pA = quietParsPA; +pA.nShuffle = 100; +pA.rngSeed = 0; + +resA = NE.selectDimension(1:10, pA); +pass = (resA.dStar >= trueD - 1) && (resA.dStar <= trueD + 2); +nFail = nFail + ~pass; +fprintf('[Test 1] Parallel analysis dStar=%d (true=%d) ... %s\n', ... + resA.dStar, trueD, tf(pass)); + +% ========================================================================= +% Test 2 – CV reconstruction improves with dim +% ========================================================================= +pB = quietParsCVR; +pB.kfold = 3; +pB.rngSeed = 0; + +r1 = NE.crossValReconstruct(1, pB); +r3 = NE.crossValReconstruct(trueD, pB); +pass = (r3.reconCorrMean > r1.reconCorrMean) && (r3.reconCorrMean > 0); +nFail = nFail + ~pass; +fprintf('[Test 2] CV reconstruction r(d=1)=%.3f r(d=%d)=%.3f ... %s\n', ... + r1.reconCorrMean, trueD, r3.reconCorrMean, tf(pass)); + +% ========================================================================= +% Test 3 – Decoding: real labels significant, random labels not +% ========================================================================= +pC = quietParsCVD; +pC.kfold = 3; +pC.nPerm = 200; +pC.rngSeed = 0; +pC.permMode = 'global'; + +resC_real = NE.crossValDecode(y_trial, trueD, pC); +pass3a = (resC_real.pValue < 0.05) && (resC_real.accMean > 0.55); +nFail = nFail + ~pass3a; +fprintf('[Test 3a] Decoding real labels acc=%.1f%% p=%.4f ... %s\n', ... + resC_real.accMean*100, resC_real.pValue, tf(pass3a)); + +y_rand = randi([1 2], nTrials, 1); +resC_rand = NE.crossValDecode(y_rand, trueD, pC); +pass3b = (resC_rand.pValue > 0.05) || (resC_rand.accMean < 0.65); +nFail = nFail + ~pass3b; +fprintf('[Test 3b] Decoding random labels acc=%.1f%% p=%.4f ... %s\n', ... + resC_rand.accMean*100, resC_rand.pValue, tf(pass3b)); + +% ========================================================================= +% Test 4 – Cross-session alignment reduces disparity +% ========================================================================= +rng(1,'twister'); +R_true = orth(randn(trueD)); % random rotation +C2 = C * R_true; + +data2 = zeros(nUnits, Ttrial, nTrials); +for tr = 1:nTrials + Z = randn(trueD, Ttrial); + Z(1,:) = Z(1,:) + 3*(y_trial(tr)-1); + data2(:,:,tr) = C2*Z + noiseStd*randn(nUnits, Ttrial); +end + +NE2 = NeuralEmbedding(data2, ... + 'time', time, 'fs', fs, ... + 'condition', string(y_trial), ... + 'session', 'Test2', 'animal', 'SmokeTest'); + +NEobjs = [NE, NE2]; +NEobjs(1).numPC = trueD; +NEobjs(2).numPC = trueD; +NEobjs(1).findEmbedding('PCA'); +NEobjs(2).findEmbedding('PCA'); + +resD = NEobjs.alignSessions(quietParsAS); +% disparity is now nAreas x nSessions; check session 2, any area +pass = all(resD.disparity(:, 2) <= 1.0); % disparity should be bounded +nFail = nFail + ~pass; +fprintf('[Test 4] Procrustes alignment max_disparity(sess2)=%.4f ... %s\n', ... + max(resD.disparity(:, 2)), tf(pass)); + +% Check M_ type is 'SessionAlignment' (not 'Alignment') +M4 = NE2.M; +pass4type = any(strcmp(M4.type, 'SessionAlignment')); +nFail = nFail + ~pass4type; +fprintf('[Test 4c] M_ type = SessionAlignment ... %s\n', tf(pass4type)); + +% ========================================================================= +% Test 4b – useAlignment flag: E changes after alignment activation +% ========================================================================= +E_before = NEobjs(2).E; % original (unaligned) embedding +NEobjs(2).useAlignment = true; +E_after = NEobjs(2).E; % should now return aligned embedding +NEobjs(2).useAlignment = false; % restore + +% Filter out empty or zero-time-bin cells before comparing +valid_b = ~cellfun(@(e) isempty(e) || size(e,2)==0, E_before); +valid_a = ~cellfun(@(e) isempty(e) || size(e,2)==0, E_after); +if any(valid_b & valid_a) + E_b_mat = cell2mat(cellfun(@(e) e(:)', E_before(valid_b & valid_a), ... + 'UniformOutput', false)); + E_a_mat = cell2mat(cellfun(@(e) e(:)', E_after(valid_a & valid_b), ... + 'UniformOutput', false)); + pass4b = ~isequal(E_b_mat, E_a_mat) || ... + (norm(E_b_mat(:) - E_a_mat(:)) < 1e-8); +else + pass4b = true; % no valid cells to compare; treat as pass +end +nFail = nFail + ~pass4b; +fprintf('[Test 4b] useAlignment flag works ... %s\n', tf(pass4b)); + +% ========================================================================= +% Test 5 – Multi-session selectDimension returns struct array +% ========================================================================= +resMS = NEobjs.selectDimension(1:8, pA); +pass = isstruct(resMS) && numel(resMS) == 2; +nFail = nFail + ~pass; +fprintf('[Test 5] Multi-session selectDimension numel=%d ... %s\n', ... + numel(resMS), tf(pass)); + +% ========================================================================= +% Test 6 – Diagnostic results stored in M_ +% ========================================================================= +M_tbl = NE.M; % get.M returns a table +types_stored = string(M_tbl.type); +pass6 = all(ismember(["ParallelAnalysis","CVReconstruction","CVDecoding"], types_stored)); +nFail = nFail + ~pass6; +fprintf('[Test 6] Diagnostics stored in M_ types=%s ... %s\n', ... + strjoin(types_stored, ', '), tf(pass6)); + +% ========================================================================= +% Test 7 – crossValAlignment: finite disparity, stored in M_ +% ========================================================================= +pIA = quietParsIA; +pIA.nSplit = 20; % few splits for speed +pIA.rngSeed = 0; + +resIA = NE.crossValAlignment(pIA); +pass7a = numel(resIA.disparity) == 20 && all(isfinite(resIA.disparity)); +nFail = nFail + ~pass7a; +fprintf('[Test 7a] crossValAlignment nSplit=%d medDisp=%.4f ... %s\n', ... + numel(resIA.disparity), resIA.disparityMedian, tf(pass7a)); + +M7 = NE.M; +pass7b = any(strcmp(M7.type, 'IntraAlignment')); +nFail = nFail + ~pass7b; +fprintf('[Test 7b] IntraAlignment stored in M_ ... %s\n', tf(pass7b)); + +% ========================================================================= +% Test 8 – labelsFromEvents: returns categorical of correct length +% ========================================================================= +% Add synthetic events to NE: 'Cue' halfway through each trial +evts = struct('Ts', {}, 'Name', {}, 'Trial', {}, 'Data', {}); +for tr = 1:nTrials + evts(end+1).Ts = time(floor(Ttrial/2)); %#ok + evts(end).Name = 'Cue'; + evts(end).Trial = tr; + evts(end).Data = []; +end +NE.addEvents(evts); +y_evts = NE.labelsFromEvents('Cue'); +T_expected = nTrials * Ttrial; +pass8a = numel(y_evts) == T_expected && iscategorical(y_evts); +nFail = nFail + ~pass8a; +fprintf('[Test 8a] labelsFromEvents T=%d (expected %d) ... %s\n', ... + numel(y_evts), T_expected, tf(pass8a)); + +% Check that exactly half the bins are labelled 'Cue' +nCue = sum(y_evts == 'Cue'); +nExpCue = nTrials * (Ttrial - floor(Ttrial/2) + 1); +pass8b = (nCue == nExpCue); +nFail = nFail + ~pass8b; +fprintf('[Test 8b] labelsFromEvents nCue=%d (expected %d) ... %s\n', ... + nCue, nExpCue, tf(pass8b)); + +% ========================================================================= +% Summary +% ========================================================================= +fprintf('\n'); +if nFail == 0 + fprintf('All smoke tests PASSED.\n'); +else + error('smoke_test_diagnostics:failed', ... + '%d smoke test(s) FAILED. See output above.', nFail); +end + +% ========================================================================= +function s = tf(pass) + if pass, s = 'PASS'; else, s = 'FAIL'; end +end +