Skip to content

Commit

Permalink
workflow: preprocess after constraining
Browse files Browse the repository at this point in the history
  • Loading branch information
chaitrasarathy committed Sep 8, 2021
2 parents 2367a9c + b6412b9 commit cdb1c54
Show file tree
Hide file tree
Showing 9 changed files with 52 additions and 63 deletions.
Binary file modified ComMet.mlx
Binary file not shown.
16 changes: 12 additions & 4 deletions src/ICA_N_optimization.m
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,22 @@
% symmetric estimation approach)

numIter = 100;
numComps = [2:1:101];
numComps = [2:1:90, 95, 100];

for uu = 1:length(numComps)
tic;
boot_ica = icassoEst('both', pcs_all_rem, numIter, 'g', 'pow3', 'approach', 'symm', 'lastEig', numComps(uu));
end_time_boot = toc;
failed = 'true';
while strcmp(failed, 'true')
try
tic;
boot_ica = icassoEst('both', pcs_all_rem, numIter, 'g', 'pow3', 'approach', 'symm', 'lastEig', numComps(uu));
end_time_boot = toc;
failed = 'false';
break;
end
end
sR = icassoExp(boot_ica);
end_time_sR = toc;
save(['Results_nComps_',num2str(numComps(uu)),'.mat'],'boot_ica','sR',...
'end_time_boot','end_time_sR');
clear('boot_ica','sR','end_time_boot','end_time_sR');
end
15 changes: 7 additions & 8 deletions src/ICA_iters.m
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
function [ica_iters, end_time] = ICA_iters(pcs_all, optN, numIters)
%UNTITLED2 Summary of this function goes here
% Detailed explanation goes here

% This function runs ICA with optimised N for 'numIters' iterations

nn = 1:numIters;
tic;
for uu = 1:length(nn)

fprintf("Iteration: %d\n", uu);
% ICA with fixed random variable
rng(uu);
ica_iters{uu} = fastICA(pcs_all, optN);

end
rng(uu, 'twister');
ica_iters{uu} = fastica(pcs_all, 'numOfIC', optN, 'g', 'pow3', 'approach', 'symm');
if any(uu == [0:500:numIters])
save(['Results_iter_',num2str(uu),'.mat'],'ica_iters');
end
end_time=toc;

end
Expand Down
10 changes: 9 additions & 1 deletion src/ICA_optNPlot.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@

% This script plots the stability of all bootstrapped independent
% components to identify the optimum N for the dataset
% Adapted from:
% Kairov U, Cantini L, Greco A, Molkenov A, Czerwinska U, Barillot E, et al.
% Determining the optimal number of independent components for reproducible
% transcriptomic data analysis. BMC Genomics. 2017;18(1).
% doi:10.1186/s12864-017-4112-9.
% Kairov U, Zinovyev A, Molkenov A. BIODICA GitHub page
% (https://github.com/LabBandSB/BIODICA/); 2017.
figure
mat = dir('R*.mat');
for q = 1:length(mat)
Expand Down
11 changes: 3 additions & 8 deletions src/filterLowLoadingsReactions.m
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
function filtered_data = filterLowLoadingsReactions(rotatedComp)
%UNTITLED Summary of this function goes here
% Detailed explanation goes here
transposedData = rotatedComp';
mval = max(abs(transposedData));
% This function filters the low loading reactions

transposedData = rotatedComp';
mval = max(abs(transposedData),[],1);

% subplot(1,2,1);
% plot(mval, 'o');
cutoff = mad(mval, 1) + median(mval);

keep = mval > cutoff;
filtered_data = transposedData(:, keep)';
% subplot(1,2,2);
% plot(max(abs(filtered_data)),'o');


end
Expand Down
9 changes: 2 additions & 7 deletions src/getGlobalModules.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,7 @@
inds = find(contains(model.rxns, modelEP.rxns(module_uniqRxns)));

% extract submodel of module rxns
% subModel = efmSubmodelExtractionAsSBML_raven(iAdipo_ex, inds);
% metaboliteList = subModel.mets(find(ismember(subModel.metNames, ubiquitousMets)));
% subModel_woUb = removeMetabolites(subModel, metaboliteList, 0);

subModel = efmSubmodelExtractionAsSBML_raven(modelEP, module_uniqRxns);
% subModel2 = efmSubmodelExtractionAsSBML_raven(modelEP, module_uniqRxns, true, ubiquitousMets);

% remove reactions that contain only ubiquitous metabolites
subModel_woUb = removeMets(subModel, ubiquitousMets, true, true);
Expand All @@ -30,8 +25,8 @@
% create attribute file
attTab = createAttribFile(modelEP, subModel_woUb, singleModuleRxnInds, singlesIDs);

% writetable(graphTab, graphFileName, 'Delimiter', ';');
% writetable(attTab, attFileName, 'Delimiter', ';');
writetable(graphTab, graphFileName, 'Delimiter', ';');
writetable(attTab, attFileName, 'Delimiter', ';');

end

32 changes: 10 additions & 22 deletions src/getPCs.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function [rotatedComp, numRot_perVar, sortedVariance, cols] = getPCs(covMatrix, var)
function [rotatedComp, numRot_perVar, sortedVariance, cols, rem] = getPCs(covMatrix, var)
% This function takes a covariance matrix, computes its principal components and rotates
% the components that explain certain (ex. 99.9%) variation in the sampled space
%
Expand All @@ -25,22 +25,13 @@
% ..Author:
% Chaitra Sarathy, 31 Aug 2020 (initial commit of ComMet)


% mean centring the sampled data points
% for ii=1:size(data,1)
% meanCentred(ii,:) = data(ii,:)-mean(data(ii,:));
% end


% basis rotation (PCA) with cols=variables(rxns), rows=sampled datapoints
% [coeff,newdata,latend,tsd,variance] = pca(meanCentred');
% "coeff" are the principal component vectors. These are the eigenvectors of the covariance matrix.

tic
[coeff,D] = eig(covMatrix);

% calculate variance explained by normalizing eigen values
variance = D / sum(D(:));
% aa=sort(diag(D),'descend');

% sort the variances
eigVals = diag(variance)*100;
Expand All @@ -50,27 +41,24 @@
numRot = length(find(cumsum(sortedVariance)<=var));
cols = cell2mat(arrayfun(@(x) find(eigVals == x), sortedVariance(1:numRot),'uni', 0)) ;


% numRot = length(find(cumsum(variance)<=95));
% Code for generating plot of variance vs numPCs
% cumulative variance
numRot_perVar=[];
num = 0:0.1:99.9;
for tt=1:length(num)
numRot_perVar(tt) = length(find(cumsum(sortedVariance)<=num(tt)));
end
plot(numRot_perVar,num, '*')
% prepare the coefficients for rotation, select the components explaining
% 99.9% variation and then rotate
% EROOR FIX: 'svd infinity' error was due to zeros in the coeff matrix, so remove zeros
[a,~] = find(coeff(:,cols)==0);
forRotation = coeff(:,cols);
forRotation(unique(a),:)=[];

% varimax rotation
% ERROR FIX: Passing 'Maxit', 1500 fixes the Error: Iteration limit exceeded for factor rotation.
[rem,~] = find(coeff(:,cols)==0);
forRotation = coeff(:,cols);
forRotation(unique(rem),:)=[];
rotatedComp = rotatefactors(forRotation, 'Maxit', 1000, 'Normalize', 'off');

toc
tPCA_Rot=toc;

% re-insert the removed rows to maintain consistency in indices
rotatedComp = insertrows(rotatedComp,zeros(1,size(rotatedComp,2)),unique(rem));

end

1 change: 1 addition & 0 deletions src/pre_processing.m
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
sol = linprog(ei,[],[],model0.S,model0.b,model0.lb, model0.ub, [], options);
lb(i) = max(model0.lb(i), sol(i));
sol = linprog(-ei,[],[],model0.S,model0.b,model0.lb, model0.ub, [], options);
% fprintf('%d\n',i)
ub(i) = min(model0.ub(i), sol(i));
ei(i) = 0;
end
Expand Down
21 changes: 8 additions & 13 deletions src/selectPCs.m
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
function [pcs_selected_cond1, pcs_selected_cond2] = selectPCs(selectedCols, cutoff, numIters, numComps_cond1)
function [pcs_selected_cond1, pcs_selected_cond2, cutoff, features_n_freq] = selectPCs(selectedCols, numIters, numComps_cond1)
% This function selects PCs based on the frequency they were estimated by
% ICA as a unique feature

features_n = cat(1, selectedCols.all{:});
features_n_freq = sortrows(tabulate(features_n), 2, 'descend');
features_n_freq(:,4) = features_n_freq(:,2)/numIters*100;
% plot(features_n_freq(:,4), 'black','LineWidth',2)
% xlim([-10 1040])
% ylim([-5 100])
% set(gca, 'FontSize', 18, 'FontWeight','bold')
% xlabel("Index of features")
% ylabel("Frequency of estimation (%)")
% axis square

% Criteria: top 20% of the features
pcs_selected = features_n_freq(1:round(length(features_n_freq)*cutoff), 1);

% Criteria: cut off is the knee point of the frequency curve
[~, cutoff] = knee_pt(features_n_freq(:,4),1:length(features_n_freq));
pcs_selected = features_n_freq(1:cutoff, 1);

% convert serial index to condition specific index
% pcs_selected = sort([colp;coln]);
inds = pcs_selected > numComps_cond1;
pcs_selected_cond1 = pcs_selected(pcs_selected < numComps_cond1); %pcs_selected(1:(min(inds)-1));
pcs_selected_cond1 = pcs_selected(pcs_selected <= numComps_cond1);
pcs_selected_cond2 = pcs_selected(inds)-numComps_cond1;


Expand Down

0 comments on commit cdb1c54

Please sign in to comment.