Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/unbalanced_rxns #222

Merged
merged 16 commits into from
Jun 12, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
283 changes: 283 additions & 0 deletions ComplementaryData/modelCuration/modMetsandSmatrix.tsv

Large diffs are not rendered by default.

66 changes: 66 additions & 0 deletions ComplementaryScripts/modelCuration/changerxn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
function model = changerxn(model,rxnID,rxnformula,grRule)
% changerxn
% Change the metabolites in the reaction
%
% model COBRA model structure
% rxnID ID of the reaction i.e. identifier in model.rxns
% rxnFormula Reaction formula in string format
% (A [comp] + B [comp] -> C [comp])
% For metabolites with space in model.metNames e.g.
% 1-pyrroline-3-hydroxy-5-carboxylic acid, use symbol '&'
% to substitute space ' ' i.e. 1-pyrroline-3-hydroxy-5-carboxylic&acid
% grRule Gene-reaction rule in boolean format (and/or allowed)
% (opt, default model.grRule from RAVEN model structure)
%
% Note: model.mets is generated for new metabolites with prefix 's_'
%
% Usage: model = changerxn(model,rxnID,rxnformula,grRule)
%
% Feiran Li, 2020-05-24
% Cheng Wei Quan (Eiden), added documentation, 2020-05-24

[~,idx] = ismember(rxnID,model.rxns);
if nargin < 4
if isfield(model,'grRules')
grRule = model.grRules{idx};
else
model1 = ravenCobraWrapper(model);
grRule = model1.grRules{idx};
end
end

rxnformula = strrep(rxnformula,' [','[');
[metaboliteList, stoichCoeffList, revFlag] = parseRxnFormula(rxnformula);
metaboliteList = strrep(metaboliteList,'[',' [');
metaboliteList = strrep(metaboliteList,'&',' ');
comps = split(metaboliteList', ' [');
comps = comps(:,2);
comps = strrep(comps,']','');
CONValldata = cat(2,model.compNames,model.comps);
[~,b] = ismember(comps,CONValldata(:,1));
comps = CONValldata(b,2);

%mapping mets to model.metnames, get s_ index for new mets
for j = 1:length(metaboliteList)
[~,metindex] = ismember(metaboliteList(j),model.metNames);
if metindex ~= 0
mets(j) = model.mets(metindex);
elseif metindex == 0
newID = getNewIndex(model.mets);
mets(j) = strcat('s_',newID,'[',comps(j),']');
model = addMetabolite(model,char(mets(j)), ...
'metName',metaboliteList{j});
end
end


[model, rxnIDexists] = addReaction(model,...
rxnID,...
'reactionName', model.rxnNames{idx},...
'metaboliteList',mets,...
'stoichCoeffList',stoichCoeffList,...
'reversible',revFlag,...
'geneRule',grRule,...
'checkDuplicate',1);

end
84 changes: 84 additions & 0 deletions ComplementaryScripts/modelCuration/checkMassChargeBalance.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
% This function is to identify charge and mass balance for reactions
%
% Input: model and reaction identifier(s) in model.rxns (e.g. 'r_0001')
% Output: result with pass or error
% NOTE: getElementalBalance.m is a function from RAVEN
%
% modified from Feiran Li's script 'checkBalanceforSce.m'
%
% Cheng Wei Quan (Eiden), 2020-05-06
eiden309 marked this conversation as resolved.
Show resolved Hide resolved

function MassChargeresults = checkMassChargeBalance(model,rxn)
exchangeRxns = findExcRxns(model);
MassChargeresults{length(rxn),5} = [];
for i = 1:length(rxn)
[~,rxnID] = ismember(rxn(i),model.rxns);
if rxnID ~= 0
if exchangeRxns(rxnID) == 1
arrayidx = find(cellfun('isempty', MassChargeresults),1);
MassChargeresults(arrayidx,1) = model.rxns(rxnID);
MassChargeresults(arrayidx,2) = {'exchange'};
else
%check mass balance
balanceStructure=getElementalBalance(model,rxn(i));
unbalancedidx = find(balanceStructure.balanceStatus==0);
missingidx = find(balanceStructure.balanceStatus==-1);
idx = union(unbalancedidx,missingidx);
for j=1:length(idx)
dif = balanceStructure.leftComp(idx(j),:) - balanceStructure.rightComp(idx(j),:);
mets = find(~dif==0);
difference = '';
for k=1:length(mets)
difference = strcat(difference,balanceStructure.elements.abbrevs(mets(k)));
difference = strcat(difference,num2str(dif(mets(k))));
end
out = difference;
end
%check charge balance
mets=find(model.S(:,rxnID));
coef = model.metCharges(mets);
if length(mets) == length(coef)
indvCharge = model.S(mets,rxnID).*coef;
balanceCharge = sum(model.S(mets,rxnID).*coef);
else
balanceCharge = -1;
end
if balanceStructure.balanceStatus == 1 && balanceCharge == 0
arrayidx = find(cellfun('isempty', MassChargeresults),1);
MassChargeresults(arrayidx,1) = model.rxns(rxnID);
MassChargeresults(arrayidx,2) = {'pass'};
elseif balanceStructure.balanceStatus == 1 && balanceCharge ~= 0
arrayidx = find(cellfun('isempty', MassChargeresults),1);
MassChargeresults(arrayidx,1) = model.rxns(rxnID);
MassChargeresults(arrayidx,2) = {'checkMetCharge'};
MassChargeresults(arrayidx,3) = {indvCharge};
MassChargeresults(arrayidx,4) = {balanceCharge};
elseif balanceStructure.balanceStatus == 0 && balanceCharge == 0
arrayidx = find(cellfun('isempty', MassChargeresults),1);
MassChargeresults(arrayidx,1) = model.rxns(rxnID);
MassChargeresults(arrayidx,2) = {'checkMetFormula'};
MassChargeresults(arrayidx,5) = out;
elseif balanceStructure.balanceStatus == 0 && balanceCharge ~= 0
arrayidx = find(cellfun('isempty', MassChargeresults),1);
MassChargeresults(arrayidx,1) = model.rxns(rxnID);
MassChargeresults(arrayidx,2) = {'checkMetChargeAndFormula'};
MassChargeresults(arrayidx,3) = {indvCharge};
MassChargeresults(arrayidx,4) = {balanceCharge};
MassChargeresults(arrayidx,5) = out;
elseif balanceStructure.balanceStatus == -1
arrayidx = find(cellfun('isempty', MassChargeresults),1);
MassChargeresults(arrayidx,1) = model.rxns(rxnID);
MassChargeresults(arrayidx,2) = {'unbalanced due to missing information'};
MassChargeresults(arrayidx,3) = {indvCharge};
MassChargeresults(arrayidx,4) = {balanceCharge};
MassChargeresults(arrayidx,5) = {'manual check required'};
end
end
else
arrayidx = find(cellfun('isempty', MassChargeresults),1);
MassChargeresults(arrayidx,1) = {'unbalanced due to error'};
MassChargeresults(arrayidx,2) = {'manual check required'};
end
end

end
169 changes: 169 additions & 0 deletions ComplementaryScripts/modelCuration/modMetsandSmatrix.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
% This script fixes unbalanced reaction in the model based on data from modMetsandSmatrix.tsv
%
% modMetsandSmatrix.tsv includes details on changes to current metFormula
% and metCharges, after manual curation of selected unbalanced reactions in the model
%
% Inputs: model and modMetsandSmatrix.tsv
% Note: functions checkMassChargeBalance.m and changerxn.m are required
%
% Cheng Wei Quan (Eiden), 2020-05-20

%Load model
cd ..
model = loadYeastModel;

%Load modMetsandSmatrix.tsv
fid = fopen('../ComplementaryData/modelCuration/modMetsandSmatrix.tsv');
format = repmat('%s ',1,14);
format = strtrim(format);
temp = textscan(fid,format,'Delimiter','\t','HeaderLines',0);
for i = 1:length(temp)
curationfile(:,i) = temp{i}; %use {} instead of () for cell array
end
commentLines = startsWith(curationfile(:,1),'#');
curationfile(commentLines,:) = [];
fclose(fid);

%Different data sets are 'bracketed' i.e. separated by % - use for indexing
brackets = startsWith(curationfile(:,1),'%');
idx = find(brackets);
idx_mets = idx(1)+1:idx(2)-1;

%Separate data into various cell arrays
updatemets = curationfile(idx_mets,1:11);

%Add 2 new metabolites to model
metList = {'trans-4-hydroxy-L-proline [cytoplasm]';'2,3-dihydroxy-3-methylbutanoate [cytoplasm]'};
metFormula = {'C5H9NO3';'C5H9O4'};
metCharge = {0;-1};
metChEBIID = {'CHEBI:18072';'CHEBI:11424'};
metKEGGID = {'C01157';'C04039'};
metMetaNetXID = {'MNXM87584';'MNXM734'};

comps = split(metList, ' [');
comps = comps(:,2);
comps = strrep(comps,']','');
CONValldata = cat(2,model.compNames,model.comps);
[~,b] = ismember(comps,CONValldata(:,1));
comps = CONValldata(b,2);

for i = 1:length(metList)
newID = getNewIndex(model.mets);
mets(i) = strcat('s_',newID,'[',comps(i),']');
model = addMetabolite(model,char(mets(i)), ...
'metName',metList{i},'metFormula', ...
metFormula{i},'Charge',metCharge{i}, ...
'ChEBIID',metChEBIID{i},'KEGGId',metKEGGID{i});
%Manually add MetaNetXID as addMetabolite does not include it
met_idx = find(ismember(model.mets,mets(i)));
model.metMetaNetXID(met_idx) = metMetaNetXID(i);
end

cd modelCuration

%Modify the following rxns:
%r_0687: L-proline is replaced by trans-4-hydroxy-L-proline
model = changerxn(model, 'r_0687', '1-pyrroline-3-hydroxy-5-carboxylic&acid [cytoplasm] + 2 H+ [cytoplasm] + NADPH [cytoplasm] -> trans-4-hydroxy-L-proline [cytoplasm] + NADP(+) [cytoplasm]');
%r_4577: 3-hydroxy-3-methyl-2-oxobutanoate is replaced by 2,3-dihydroxy-3-methylbutanoate
model = changerxn(model, 'r_4577', '3-methyl-2-oxobutanoate [cytoplasm] + H2O [cytoplasm] -> 2,3-dihydroxy-3-methylbutanoate [cytoplasm]');
model = addSBOterms(model); %Add SBO terms
model = rmfield(model,'grRules'); %remove field 'grRules' in model

%Correction of metFormula and metCharges to balance equation
met = updatemets(:,1);
[~,idx_met] = ismember(met,model.mets);
modelR = ravenCobraWrapper(model);
metNames = modelR.metNames(idx_met);
currentFormula = updatemets(:,2);
newFormula = updatemets(:,3);
currentCharge = str2double(updatemets(:,4));
newCharge = str2double(updatemets(:,5));
MNXNotes = updatemets(:,11);
metResults{500,6} = [];

for i = 1:length(metNames)
idx_met = find(ismember(modelR.metNames,metNames(i)));
currentmetFormula = model.metFormulas(idx_met);
currentmetCharges = model.metCharges(idx_met);

for j = 1:length(idx_met)
%Preliminary check of metBalance/metCharges before change
idx_rxn = find(model.S(idx_met(j),:));
rxn = model.rxns(idx_rxn);
MassChargeresults = checkMassChargeBalance(model,rxn);

if isequal(currentmetFormula(j),newFormula(i))
%no action required
elseif ~isequal(currentmetFormula(j),currentFormula(i)) && ismember(currentFormula(i),'[]') || isequal(currentmetFormula(j),currentFormula(i)) && ~ismember(newFormula(i),'[]')
model.metFormulas(idx_met(j)) = newFormula(i);
if ~contains(model.metNotes(idx_met(j)),'| metFormula curated (PR #222)')
model.metNotes(idx_met(j)) = join([model.metNotes(idx_met(j)),'| metFormula curated (PR #222)']);
end
elseif ~isequal(currentmetFormula(j),currentFormula(i))
warning('error with metFormula matching in modMetsandSmatrix.tsv for %s, idx_met: %d', string(metNames(i)), idx_met(j));
end

if isequal(currentmetCharges(j),newCharge(i))
%no action required
elseif (isequal(currentmetCharges(j),currentCharge(i)) && ~isnan(newCharge(i))) || (ismissing(currentmetCharges(j)) && isnan(currentCharge(i)) && ~isnan(newCharge(i)))
model.metCharges(idx_met(j)) = newCharge(i);
if ~contains(model.metNotes(idx_met(j)),'| metCharge curated (PR #222)')
model.metNotes(idx_met(j)) = join([model.metNotes(idx_met(j)),'| metCharge curated (PR #222)']);
end
elseif ~isequal(currentmetCharges(j),currentCharge(i)) && ~ismissing(currentmetCharges(j))
warning('error with metCharges matching in modMetsandSmatrix.tsv for %s, idx_met: %d', string(metNames(i)), idx_met(j));
end

%Find rxns which are unbalanced before change
temp_rxn = MassChargeresults(:,1);
temp_notes = MassChargeresults(:,2);
temp_idx = find(~ismember(temp_notes,'pass'));
unbalancedrxnBefore = temp_rxn(temp_idx);
unbalancedNotes = temp_notes(temp_idx);
unbalancedBefore = length(find(~ismember(temp_notes,'pass')));

arrayidx = find(cellfun('isempty', metResults),1);
metResults(arrayidx,1) = cellstr(metNames(i));
eiden309 marked this conversation as resolved.
Show resolved Hide resolved
metResults(arrayidx,2) = {[unbalancedrxnBefore,unbalancedNotes]};
metResults(arrayidx,4) = num2cell(unbalancedBefore);
end
end

%remove whitespace(s) when adding notes into metNotes
model.metNotes(:) = strtrim(model.metNotes(:));

%Check of metFormula/metCharges after change (for updatemets)
for i = 1:length(metNames)
idx_met = find(ismember(modelR.metNames,metNames(i)));
for j = 1:length(idx_met)
idx_rxn = find(model.S(idx_met(j),:));
rxn = model.rxns(idx_rxn);
MassChargeresults2 = checkMassChargeBalance(model,rxn);

%Find rxns which are unbalanced after change
temp_rxn2 = MassChargeresults2(:,1);
temp_notes2 = MassChargeresults2(:,2);
temp_idx2 = find(~ismember(temp_notes2,'pass'));
unbalancedrxnAfter = temp_rxn2(temp_idx2);
unbalancedNotes2 = temp_notes2(temp_idx2);
unbalancedAfter = length(find(~ismember(temp_notes2,'pass')));

if isempty(unbalancedrxnAfter)
arrayidx = find(cellfun('isempty', metResults(:,3)),1);
metResults(arrayidx,3) = {'NIL'};
metResults(arrayidx,5) = {0};
netChange = str2double(string((metResults(arrayidx,4)))) - unbalancedAfter;
metResults(arrayidx,6) = num2cell(netChange);
else
arrayidx = find(cellfun('isempty', metResults(:,3)),1);
metResults(arrayidx,3) = {[unbalancedrxnAfter,unbalancedNotes2]};
metResults(arrayidx,5) = num2cell(unbalancedAfter);
netChange = str2double(string((metResults(arrayidx,4)))) - unbalancedAfter;
metResults(arrayidx,6) = num2cell(netChange);
end
end
end

%Save model
cd ..
saveYeastModel(model);
4 changes: 2 additions & 2 deletions ModelFiles/txt/yeastGEM.txt
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ r_0680 s_0805[e] + s_0970[e] -> s_0420[e] + s_0974[e] ( YLR155C or YLR157C or
r_0681 s_0180[c] + s_0677[c] -> s_0282[c] + s_0991[c] YLR027C 0.00 1000.00 0.00
r_0682 s_0182[m] + s_0678[m] -> s_0283[m] + s_0993[m] YKL106W 0.00 1000.00 0.00
r_0683 s_0184[p] + s_0679[p] -> s_0284[p] + s_0995[p] YLR027C 0.00 1000.00 0.00
r_0687 s_0116[c] + 2 s_0794[c] + s_1212[c] -> s_1035[c] + s_1207[c] YER023W 0.00 1000.00 0.00
r_0687 s_0116[c] + 2 s_0794[c] + s_1212[c] -> s_1207[c] + s_4207[c] YER023W 0.00 1000.00 0.00
r_0688 s_0794[c] + s_1151[c] + s_1212[c] -> s_0062[c] + s_1207[c] ( YHR104W or YOL151W ) 0.00 1000.00 0.00
r_0689 s_1039[c] -> s_0419[c] + s_1399[c] YCL064C 0.00 1000.00 0.00
r_0690 s_1039[c] + s_1207[c] -> s_0794[c] + s_0960[c] + s_1212[c] YMR226C 0.00 1000.00 0.00
Expand Down Expand Up @@ -3938,7 +3938,7 @@ r_4573 s_0529[c] + s_1198[c] + s_4184[c] -> s_0373[c] + s_0456[c] + s_1203[c]
r_4574 s_0529[c] + s_1207[c] + s_4184[c] -> s_0373[c] + s_0456[c] + s_1212[c] 0.00 1000.00 0.00
r_4575 s_0794[c] + s_4184[c] -> s_0359[c] + s_0456[c] 0.00 1000.00 0.00
r_4576 s_1207[c] + s_4185[c] <=> s_0794[c] + s_1212[c] + s_4186[c] -1000.00 1000.00 0.00
r_4577 s_0232[c] -> s_4186[c] 0.00 1000.00 0.00
r_4577 s_0232[c] + s_0803[c] -> s_4208[c] 0.00 1000.00 0.00
r_4578 s_1212[c] + s_4187[c] -> s_0794[c] + s_1207[c] + s_4186[c] 0.00 1000.00 0.00
r_4579 s_0794[c] + 2 s_1399[c] -> s_0456[c] + s_4188[c] 0.00 1000.00 0.00
r_4580 s_1207[c] + s_4185[c] <=> s_0794[c] + s_1212[c] + s_4188[c] -1000.00 1000.00 0.00
Expand Down
Loading