From 155262f300d2f1994ace78b291eb54f651f72fc8 Mon Sep 17 00:00:00 2001 From: Ryan Harvey Date: Tue, 5 Nov 2024 21:56:27 -0500 Subject: [PATCH] fix formatting and some warnings --- preProcessing/CleanRez.m | 215 ++++++++++++++++++++++----------------- 1 file changed, 122 insertions(+), 93 deletions(-) diff --git a/preProcessing/CleanRez.m b/preProcessing/CleanRez.m index f466e82..18248ab 100644 --- a/preProcessing/CleanRez.m +++ b/preProcessing/CleanRez.m @@ -1,4 +1,4 @@ -function rez = CleanRez(rez,varargin) +function rez = CleanRez(rez, varargin) % Remove noisy clusters from the rez file. This is an optional preprocessing % step the user may choose to perform before exporting the Kilosort results to @@ -12,7 +12,7 @@ % cleanRez = CleanRez(rez, ) % % INPUTS -% +% % rez rez file from kilosort (note post cleaning after sorting % % optional list of property-value pairs (see table below) @@ -27,9 +27,9 @@ % 'mahalThreshold' spikes exceeding this threshold will be considered % outliers and removed (set to Inf to not impose threshold, % default = 12) -% 'minNumberOfSpikes' clusters with nSpikesCleanRez'' for details).']); + error(['Parameter ', num2str(i+2), ' is not a property (type ''help CleanRez'' for details).']); end - switch(lower(varargin{i})) + switch (lower(varargin{i})) case 'savepath' savepath = varargin{i+1}; if ~isfolder(savepath) @@ -76,7 +76,7 @@ end case 'noiseperiods' badIntervals = varargin{i+1}; - if ~isdmatrix(badIntervals) || size(badIntervals,2) ~= 2 || any(rem(badIntervals(:),1)) + if ~isdmatrix(badIntervals) || size(badIntervals, 2) ~= 2 || any(rem(badIntervals(:), 1)) error('Incorrect value for property ''noisePeriods'' (type ''help CleanRez'' for details).'); end case 'multitrough' @@ -110,54 +110,61 @@ error('Incorrect value for property ''verbose'' (type ''help CleanRez'' for details).'); end otherwise - error(['Unknown property ''' num2str(varargin{i}) ''' (type ''help CleanRez'' for details).']); + error(['Unknown property ''', num2str(varargin{i}), ''' (type ''help CleanRez'' for details).']); end end - %% Impose a Mahalanobis distance threshold on each cluster -if mahalThreshold100 - d = mahal(PCA(indices,:),PCA(indices,:)); - bad = sqrt(d)>mahalThreshold; - nSpikesRemoved = nSpikesRemoved+sum(bad); + indices = find(clu == i); + if length(indices) > 100 + d = mahal(PCA(indices, :), PCA(indices, :)); + bad = sqrt(d) > mahalThreshold; + nSpikesRemoved = nSpikesRemoved + sum(bad); clu(indices(bad)) = badCluster; end end - if verbose, disp(['Removed a total of ' num2str(sum(nSpikesRemoved)) ' bad spikes (Mahalanobis threshold) to unused cluster ' num2str(badCluster) ' in phy.']); end + if verbose + disp(['Removed a total of ', num2str(sum(nSpikesRemoved)), ' bad spikes (Mahalanobis threshold) to unused cluster ', num2str(badCluster), ' in phy.']); + end writeNPY(uint32(clu), fullfile(savepath, 'spike_clusters.npy')); -elseif mahalThreshold 0); + if verbose + disp([num2str(sum(singleBin)), '/', num2str(nClusters), ' clusters are a single-bin spike']); end - singleBin = ~ (d>0); - if verbose, disp([num2str(sum(singleBin)) '/' num2str(nClusters) ' clusters are a single-bin spike']); end end -multiplePeaksAndTroughs = false(nClusters,1); +multiplePeaksAndTroughs = false(nClusters, 1); if removeMultiTrough % Some waveforms look like a noisy oscillation: no single peak/trough: - nPeaksAndTroughs = nan(nClusters,1); - for i=1:nClusters - thismax = mWaveformRange(:,i)==max(mWaveformRange(:,i)); % logical - groupID = find(thismax,1); % index - waveform = nanzscore(meanWaveform(groupID,:,i)); + nPeaksAndTroughs = nan(nClusters, 1); + for i = 1:nClusters + thismax = mWaveformRange(:, i) == max(mWaveformRange(:, i)); % logical + groupID = find(thismax, 1); % index + waveform = nanzscore(meanWaveform(groupID, :, i)); if any(~isnan(waveform)) - troughs = strfind([nan;diff(Smooth(waveform(:),1))>0]',[0 1])'; + troughs = strfind([nan; diff(Smooth(waveform(:), 1)) > 0]', [0, 1])'; maxTrough = min(waveform(troughs)); - troughs(waveform(troughs)>maxTrough/2) = []; % ignore local minima that are negligible fluctuations compared to the real trough + troughs(waveform(troughs) > maxTrough/2) = []; % ignore local minima that are negligible fluctuations compared to the real trough nTroughs = length(troughs); - peaks = strfind([nan;diff(Smooth(waveform(:),1))>0]',[1 0])'; + peaks = strfind([nan; diff(Smooth(waveform(:), 1)) > 0]', [1, 0])'; maxPeak = max(waveform(peaks)); - peaks(waveform(peaks)1; - if verbose, disp([num2str(sum(multiplePeaksAndTroughs)) '/' num2str(nClusters) ' have too many troughs/peaks']); end + multiplePeaksAndTroughs = nPeaksAndTroughs > 1; + if verbose + disp([num2str(sum(multiplePeaksAndTroughs)), '/', num2str(nClusters), ' have too many troughs/peaks']); + end end -isiViolation = false(nClusters,1); % if the most spikes are in the middle bin in the ACG +isiViolation = false(nClusters, 1); % if the most spikes are in the middle bin in the ACG if removeISIpeak0 - for i=1:nClusters - spikes = rez.st3(rez.st3(:,2)==i)/rez.ops.fs; - if length(spikes)>1 - [acg,acg_t] = CCG(spikes,ones(size(spikes)),'binSize',0.003,'duration',0.031); - [~,idxMax] = max(acg); - [~,idx0] = min(abs(acg_t)); % bin t=0 - if idxMax==idx0 + for i = 1:nClusters + spikes = rez.st3(rez.st3(:, 2) == i) / rez.ops.fs; + if length(spikes) > 1 + [acg, acg_t] = CCG(spikes, ones(size(spikes)), 'binSize', 0.003, 'duration', 0.031); + [~, idxMax] = max(acg); + [~, idx0] = min(abs(acg_t)); % bin t=0 + if idxMax == idx0 isiViolation(i) = true; end end end - if verbose, disp([num2str(sum(isiViolation)) '/' num2str(nClusters) ' have a strong peak at t=0 in the ACG']); end + if verbose + disp([num2str(sum(isiViolation)), '/', num2str(nClusters), ' have a strong peak at t=0 in the ACG']); + end end % We can also delete clusters with less than a certain number of spikes, e.g. 5 clu = double(readNPY(fullfile(savepath, 'spike_clusters.npy'))); -nSpikes = accumarray(clu+1,1); -tooFew = nSpikesnClusters, tooFew = tooFew(1:nClusters); end -if verbose, disp([num2str(sum(tooFew)) '/' num2str(nClusters) ' have too few assigned spikes']); end +nSpikes = accumarray(clu+1, 1); +tooFew = nSpikes < minNumberOfSpikes; +if length(tooFew) > nClusters, tooFew = tooFew(1:nClusters); +end +if verbose + disp([num2str(sum(tooFew)), '/', num2str(nClusters), ' have too few assigned spikes']); +end detectedOnAllElectrodes(end+1:nClusters) = false; singleBin(end+1:nClusters) = false; @@ -281,30 +308,32 @@ % Clusters selected by any of the above criteria are marked as "noisy" noisy = detectedOnAllElectrodes | singleBin | multiplePeaksAndTroughs | isiViolation | tooFew; -if verbose, disp([num2str(sum(noisy)) '/' num2str(nClusters) ' clusters removed in total']); end +if verbose + disp([num2str(sum(noisy)), '/', num2str(nClusters), ' clusters removed in total']); +end % delete noisy clusters to clean the rez file -rez.cProj(ismember(rez.st3(:,2),find(noisy)),:) = []; -rez.cProjPC(ismember(rez.st3(:,2),find(noisy)),:) = []; -rez.st3(ismember(rez.st3(:,2),find(noisy)),:) = []; +rez.cProj(ismember(rez.st3(:, 2), find(noisy)), :) = []; +rez.cProjPC(ismember(rez.st3(:, 2), find(noisy)), :) = []; +rez.st3(ismember(rez.st3(:, 2), find(noisy)), :) = []; % Export labels for phy -if exist('savepath','var') && exist(savepath,'dir') % if the user has already exported to phy and wants added labels to noise clusters - display(['Exporting results to ' fullfile(savepath,'cluster_group.tsv')]); - if ~exist(fullfile(savepath,'cluster_group.tsv')) - fid = fopen(fullfile(savepath,'cluster_group.tsv'),'w'); +if exist('savepath', 'var') && exist(savepath, 'dir') % if the user has already exported to phy and wants added labels to noise clusters + display(['Exporting results to ', fullfile(savepath, 'cluster_group.tsv')]); + if ~exist(fullfile(savepath, 'cluster_group.tsv'), "file") + fid = fopen(fullfile(savepath, 'cluster_group.tsv'), 'w'); fwrite(fid, sprintf('cluster_id\t%s\r\n', 'group')); else - fid = fopen(fullfile(savepath,'cluster_group.tsv'),'a'); % the file already exists. Add labels to the end + fid = fopen(fullfile(savepath, 'cluster_group.tsv'), 'a'); % the file already exists. Add labels to the end end - indices = find(noisy)-1; % phy notation starts from 0 - for i=1:length(indices) + indices = find(noisy) - 1; % phy notation starts from 0 + for i = 1:length(indices) fwrite(fid, sprintf('%d\t%s\r\n', indices(i), 'noise')); end - if exist('badCluster','var') % if we performed Mahalanobis distance cleaning, mark this new outlier cluster (absent from the rez file) as bad as well + if exist('badCluster', 'var') % if we performed Mahalanobis distance cleaning, mark this new outlier cluster (absent from the rez file) as bad as well fwrite(fid, sprintf('%d\t%s\r\n', badCluster, 'noise')); end - if exist('badCluster2','var') % if we performed noisy periods cleaning, mark this new outlier cluster (absent from the rez file) as bad as well + if exist('badCluster2', 'var') % if we performed noisy periods cleaning, mark this new outlier cluster (absent from the rez file) as bad as well fwrite(fid, sprintf('%d\t%s\r\n', badCluster2, 'noise')); end fclose(fid);