From acd52ff9828ccae4dbfeeb4f20040af3b8d374e7 Mon Sep 17 00:00:00 2001 From: HossFir Date: Fri, 19 Apr 2024 00:01:52 +0200 Subject: [PATCH 1/9] Update 'reportEnzymeUsage' to modify the total protein pool by summing up protein pool exchange and proteomics integrated, adjusting 'percUsage' accordingly. --- src/geckomat/utilities/reportEnzymeUsage.m | 24 +++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/geckomat/utilities/reportEnzymeUsage.m b/src/geckomat/utilities/reportEnzymeUsage.m index 85d5a8acd..d3ec223b3 100644 --- a/src/geckomat/utilities/reportEnzymeUsage.m +++ b/src/geckomat/utilities/reportEnzymeUsage.m @@ -61,7 +61,29 @@ topUsage.rxnNames = {}; topUsage.grRules = {}; -protPool = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange'))/100; +% Calculate the protein pool flux from the 'prot_pool_exchange' reaction +protPoolExchangeFlux = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange')); + +% Obtain the fluxes +sol = solveLP(ecModel); +if isempty(sol.x) + error('No solution found.'); +end +fluxValues = sol.x; + +% Sum fluxes for all 'usage_prot_' reactions, excluding the 'usage_prot_standard' +usageProtIndices = startsWith(ecModel.rxns, 'usage_prot_') & ... + ~contains(ecModel.rxns, 'standard'); + +% Sum the absolute values of the usage fluxes +totalUsageProtFlux = sum(abs(fluxValues(usageProtIndices))); + +% Define the new protein pool as the sum of prot_pool_exchange flux and total usage_prot fluxes +protPool = (protPoolExchangeFlux + totalUsageProtFlux)/100; + +fprintf('Total Protein Pool Flux: %f\n', protPoolExchangeFlux); +fprintf('total UsageProt Flux: %f\n', totalUsageProtFlux); + for i=1:numel(topEnzyme) [rxns, kcat, idx, rxnNames, grRules] = getReactionsFromEnzyme(ecModel,topEnzyme{i}); From 978519ed5a2eb0eb204b210697904d8c6fff8cbf Mon Sep 17 00:00:00 2001 From: HossFir Date: Fri, 19 Apr 2024 00:05:17 +0200 Subject: [PATCH 2/9] Update 'reportEnzymeUsage' to modify the total protein pool by summing up protein pool exchange and proteomics integrated, adjusting 'percUsage' accordingly. --- git | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 git diff --git a/git b/git new file mode 100644 index 000000000..e69de29bb From 2ab84b239da48266d0ee5e08696447ae42fd975f Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Sun, 28 Apr 2024 14:41:10 +0200 Subject: [PATCH 3/9] feat: enzymeUsage parses fluxes to output --- src/geckomat/utilities/enzymeUsage.m | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/geckomat/utilities/enzymeUsage.m b/src/geckomat/utilities/enzymeUsage.m index 2af9f6d83..8f622ccd5 100644 --- a/src/geckomat/utilities/enzymeUsage.m +++ b/src/geckomat/utilities/enzymeUsage.m @@ -26,6 +26,7 @@ % absUsage vector of absolute enzyme usages % UB vector of enzyme exchange reaction upper bounds % protID string array of matching protein IDs +% fluxes vector of fluxes, copy of input fluxes % % Usage: % usageData = enzymeUsage(ecModel,fluxes,zero) @@ -42,6 +43,7 @@ usageData.LB = ecModel.lb(rxnIdx); usageData.absUsage = abs(fluxes(rxnIdx)); usageData.capUsage = abs(usageData.absUsage./usageData.LB); +usageData.fluxes = fluxes; if ~zero nonzero = usageData.absUsage<0; From 1b45edd21f6910ffb26feabf79e34e6afbbc7c79 Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Sun, 28 Apr 2024 14:44:25 +0200 Subject: [PATCH 4/9] feat: reportEnzymeUsage input and output - use fluxes from usageData - give protein pool in usageReport instead of printing to command window --- src/geckomat/utilities/reportEnzymeUsage.m | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/geckomat/utilities/reportEnzymeUsage.m b/src/geckomat/utilities/reportEnzymeUsage.m index d3ec223b3..5d793990d 100644 --- a/src/geckomat/utilities/reportEnzymeUsage.m +++ b/src/geckomat/utilities/reportEnzymeUsage.m @@ -1,10 +1,10 @@ -function usageReport = topEnzymeUsage(ecModel, usageData, highCapUsage, topAbsUsage) +function usageReport = reportEnzymeUsage(ecModel, usageData, highCapUsage, topAbsUsage) % reportEnzymeUsage % Summarizes the results from enzymeUsage. % % Input: % ecModel a GECKO3 ecModel -% usageData output from reportEnzymeUsage +% usageData output from enzymeUsage % highCapUsage minimum ratio of enzyme capacity usage to be considered % as high usage (Optional, default 0.9, refering to a % minimum of 90% capacity usage) @@ -64,12 +64,7 @@ % Calculate the protein pool flux from the 'prot_pool_exchange' reaction protPoolExchangeFlux = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange')); -% Obtain the fluxes -sol = solveLP(ecModel); -if isempty(sol.x) - error('No solution found.'); -end -fluxValues = sol.x; +fluxValues = usageData.fluxes; % Sum fluxes for all 'usage_prot_' reactions, excluding the 'usage_prot_standard' usageProtIndices = startsWith(ecModel.rxns, 'usage_prot_') & ... @@ -81,10 +76,6 @@ % Define the new protein pool as the sum of prot_pool_exchange flux and total usage_prot fluxes protPool = (protPoolExchangeFlux + totalUsageProtFlux)/100; -fprintf('Total Protein Pool Flux: %f\n', protPoolExchangeFlux); -fprintf('total UsageProt Flux: %f\n', totalUsageProtFlux); - - for i=1:numel(topEnzyme) [rxns, kcat, idx, rxnNames, grRules] = getReactionsFromEnzyme(ecModel,topEnzyme{i}); rxnNumber = numel(rxns); @@ -98,5 +89,7 @@ topUsage.rxnNames(end+1:end+rxnNumber,1) = rxnNames; topUsage.grRules(end+1:end+rxnNumber,1) = grRules; end -usageReport.topAbsUsage = struct2table(topUsage); +usageReport.topAbsUsage = struct2table(topUsage); +usageReport.totalProtPool = protPoolExchangeFlux; +usageReport.totalUsageFlux = totalUsageProtFlux; end From ef711db86d6a942f92c5707344c5c762996cd427 Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Sun, 28 Apr 2024 17:11:21 +0200 Subject: [PATCH 5/9] delete empty git file --- git | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 git diff --git a/git b/git deleted file mode 100644 index e69de29bb..000000000 From 86b8f3cd5657ef9033f98b4feddfa7043f94d366 Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Sun, 28 Apr 2024 17:11:40 +0200 Subject: [PATCH 6/9] Delete version.txt --- version.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 version.txt diff --git a/version.txt b/version.txt deleted file mode 100644 index ff365e06b..000000000 --- a/version.txt +++ /dev/null @@ -1 +0,0 @@ -3.1.3 From 5e9ca54b5a0d5de114911461ff24e4a8b369a08a Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Mon, 27 May 2024 00:45:20 +0200 Subject: [PATCH 7/9] fix: minor patches (#374) * fix: loadDatabases error when uniprot dl fails * chore: updateDocumentation * chore: update Actions dependency and trying to fix an issue: https://stackoverflow.com/questions/70435286/resource-not-accessible-by-integration-on-github-post-repos-owner-repo-ac * fix: calculateFfactor read header in paxDB.tsv sometimes, the first # in the file is missed * chore: updateGECKOdoc * fix: runDLKcat parse full paths * fix: applyKcatConstraints if all kcat are zero * fix: getStandardKcat also if enzyme is missing * feat: removeStandardKcat new function * chore: RAVEN release 2.9.1 as dependency --- .github/workflows/tests.yml | 9 +- GECKOInstaller.m | 34 +- doc/index.html | 33 +- .../change_model/applyKcatConstraints.html | 101 +-- .../gather_kcats/getStandardKcat.html | 596 +++++++++--------- doc/src/geckomat/gather_kcats/index.html | 2 +- .../gather_kcats/removeStandardKcat.html | 127 ++++ doc/src/geckomat/gather_kcats/runDLKcat.html | 51 +- .../get_enzyme_data/loadDatabases.html | 303 ++++----- .../sensitivityTuning.html | 8 +- .../limit_proteins/calculateFfactor.html | 2 +- .../limit_proteins/flexibilizeEnzConcs.html | 10 +- .../limit_proteins/getConcControlCoeffs.html | 4 +- doc/src/geckomat/utilities/enzymeUsage.html | 53 +- .../geckomat/utilities/reportEnzymeUsage.html | 57 +- doc/src/geckomat/utilities/saveEcModel.html | 6 +- .../change_model/applyKcatConstraints.m | 49 +- src/geckomat/gather_kcats/getStandardKcat.m | 45 +- .../gather_kcats/removeStandardKcat.m | 63 ++ src/geckomat/gather_kcats/runDLKcat.m | 3 + src/geckomat/get_enzyme_data/loadDatabases.m | 5 +- .../sensitivityTuning.m | 8 +- .../limit_proteins/calculateFfactor.m | 2 +- .../limit_proteins/flexibilizeEnzConcs.m | 10 +- .../limit_proteins/getConcControlCoeffs.m | 4 +- .../manual_tests/TestYeastGEMSimpleWorkflow.m | 8 +- tutorials/full_ecModel/code/plotlightVSfull.m | 6 +- tutorials/full_ecModel/protocol.m | 12 +- tutorials/light_ecModel/protocol.m | 12 +- 29 files changed, 947 insertions(+), 676 deletions(-) create mode 100644 doc/src/geckomat/gather_kcats/removeStandardKcat.html create mode 100644 src/geckomat/gather_kcats/removeStandardKcat.m diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cea835e8f..86c33b99f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,5 +1,10 @@ name: Tests +permissions: + contents: write + pull-requests: write + repository-projects: write + on: [pull_request] jobs: @@ -8,10 +13,10 @@ jobs: steps: - name: Fetch GECKO - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Fetch RAVEN - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: "SysBioChalmers/RAVEN" path: "RAVEN" diff --git a/GECKOInstaller.m b/GECKOInstaller.m index 2811abc84..9a088f465 100644 --- a/GECKOInstaller.m +++ b/GECKOInstaller.m @@ -10,7 +10,7 @@ sourceDir = fileparts(which(mfilename)); paths = GECKOInstaller.GetFilteredSubPaths(sourceDir, GECKOInstaller.FILE_FILTER); - GECKOInstaller.checkRAVENversion('2.8.3'); % Minimum RAVEN version + GECKOInstaller.checkRAVENversion('2.9.1'); % Minimum RAVEN version % Check unique function names if ~exist("checkFunctionUniqueness.m") @@ -62,26 +62,42 @@ end function checkRAVENversion(minmVer) + wrongVersion = false; try - currVer = checkInstallation('versionOnly'); + [currVer, installType] = checkInstallation('versionOnly'); if strcmp(currVer,'develop') printOrange('WARNING: Cannot determine your RAVEN version as it is in a development branch.\n') - else + else currVerNum = str2double(strsplit(currVer,'.')); minmVerNum = str2double(strsplit(minmVer,'.')); for i=1:3 if currVerNum(i)here, '... + 'including running ''checkInstallation()''.']) + end + if wrongVersion + switch installType + case 0 + installType = 'advanced'; + case 1 + installType = 'easy'; + case 2 + installType = 'medium'; + end + error(['Installed RAVEN version is %s, while the minimum is %s. '... + 'Upgrade RAVEN by following the instructions available ' ... + 'here. Do not attempt to run GECKO before upgrading.'], ... + currVer,minmVer) end - end function checkGECKOversion @@ -113,7 +129,7 @@ function checkRAVENversion(minmVer) fprintf('\n'); end else - fprintf('GECKO installed, unknown version (cannot find version.txt)\n') + fprintf('GECKO installed, unknown version (cannot find version.txt).\n') end end end diff --git a/doc/index.html b/doc/index.html index 8a709fdb6..f332bb806 100644 --- a/doc/index.html +++ b/doc/index.html @@ -19,22 +19,23 @@

Matlab Directories

Matlab Files found in these Directories

- - - - - - - - - - - - - - - -
ModelAdapter copyECtoGEM getKcatAcrossIsozymes readDLKcatOutput
ModelAdapterManager ecFSEOF getReactionsFromEnzyme reportEnzymeUsage
abc_max ecFVA getStandardKcat runDLKcat
adapterTemplate enzymeUsage getSubsetEcModel saveEcModel
addCarbonNum fillEnzConcs getcarbonnum selectKcatValue
addNewRxnsToEC findECInDB getrSample sensitivityTuning
anaerobicModel findGECKOroot loadBRENDAdata setKcatForReactions
applyComplexData findMaxValue loadConventionalGEM setProtPoolSize
applyCustomKcats findMetSmiles loadDatabases sigmaFitter
applyKcatConstraints flexibilizeEnzConcs loadEcModel startGECKOproject
bayesianSensitivityTuning fuzzyKcatMatching loadFluxData truncateValues
calculateFfactor getComplexData loadProtData updateGECKOdoc
calculateMW getConcControlCoeffs makeEcModel updateProtPool
changeMedia getECfromDatabase mapRxnsToConv updateprior
constrainEnzConcs getECfromGEM mergeDLKcatAndFuzzyKcats writeDLKcatInput
constrainFluxData getECstring plotEcFVA
+ ModelAdapter ecFSEOF getStandardKcat runDLKcat + ModelAdapterManager ecFVA getSubsetEcModel saveEcModel + abc_max enzymeUsage getcarbonnum selectKcatValue + adapterTemplate fillEnzConcs getrSample sensitivityTuning + addCarbonNum findECInDB loadBRENDAdata setKcatForReactions + addNewRxnsToEC findGECKOroot loadConventionalGEM setProtPoolSize + anaerobicModel findMaxValue loadDatabases sigmaFitter + applyComplexData findMetSmiles loadEcModel startGECKOproject + applyCustomKcats flexibilizeEnzConcs loadFluxData truncateValues + applyKcatConstraints fuzzyKcatMatching loadProtData updateGECKOdoc + bayesianSensitivityTuning getComplexData makeEcModel updateProtPool + calculateFfactor getConcControlCoeffs mapRxnsToConv updateprior + calculateMW getECfromDatabase mergeDLKcatAndFuzzyKcats writeDLKcatInput + changeMedia getECfromGEM plotEcFVA + constrainEnzConcs getECstring readDLKcatOutput + constrainFluxData getKcatAcrossIsozymes removeStandardKcat + copyECtoGEM getReactionsFromEnzyme reportEnzymeUsage
Generated by m2html © 2005
diff --git a/doc/src/geckomat/change_model/applyKcatConstraints.html b/doc/src/geckomat/change_model/applyKcatConstraints.html index b854c2483..5f6114c79 100644 --- a/doc/src/geckomat/change_model/applyKcatConstraints.html +++ b/doc/src/geckomat/change_model/applyKcatConstraints.html @@ -127,55 +127,58 @@

SOURCE CODE ^for i=1:numel(updateRxns) 0067 j=updateRxns(i); -0068 enzymes = find(model.ec.rxnEnzMat(j,:)); -0069 kcatLast = kcatFirst+numel(enzymes); -0070 kcatFirst = kcatFirst+1; -0071 newKcats(kcatFirst:kcatLast,1) = j; -0072 newKcats(kcatFirst:kcatLast,2) = enzymes; -0073 newKcats(kcatFirst:kcatLast,3) = model.ec.rxnEnzMat(j,enzymes); -0074 newKcats(kcatFirst:kcatLast,4) = model.ec.kcat(j); -0075 newKcats(kcatFirst:kcatLast,5) = model.ec.mw(enzymes); -0076 kcatFirst = kcatLast; -0077 end -0078 newKcats(kcatLast+1:end,:)=[]; -0079 -0080 sel = newKcats(:,4) > 0; %Only apply to non-zero kcat -0081 newKcats(sel,4) = newKcats(sel,4) * 3600; %per second -> per hour -0082 newKcats(sel,4) = newKcats(sel,5) ./ newKcats(sel,4); %MW/kcat -0083 newKcats(sel,4) = newKcats(sel,3) .* newKcats(sel,4); %Multicopy subunits. -0084 newKcats(~sel,4) = 0; %Results in zero-cost -0085 -0086 %Replace rxns and enzymes with their location in model -0087 [~,newKcats(:,1)] = ismember(model.ec.rxns(newKcats(:,1)),model.rxns); -0088 [~,newKcats(:,2)] = ismember(strcat('prot_',model.ec.enzymes(newKcats(:,2))),model.mets); -0089 linearIndices = sub2ind(size(model.S),newKcats(:,2),newKcats(:,1)); -0090 model.S(linearIndices) = -newKcats(:,4); %Substrate = negative -0091 -0092 else %GECKO light formulation, where prot_pool represents all usages -0093 prot_pool_idx = find(ismember(model.mets,'prot_pool')); -0094 %first remove the prefix of all rxns -0095 modRxns = extractAfter(model.ec.rxns,4); -0096 % Map ec-reactions to model.rxns -0097 [hasEc,~] = ismember(model.rxns,modRxns); -0098 hasEc = find(hasEc & updateRxns); -0099 [~,rxnIdx] = ismember(modRxns,model.rxns); -0100 for i = 1:numel(hasEc) -0101 % Get all isozymes per reaction -0102 ecIdx = find(rxnIdx == hasEc(i)); -0103 % ecIdx = strcmp(model.rxns(hasEc(i)),modRxns); -0104 % Multiply enzymes with their MW (they are then automatically -0105 % summed per reaction), and divide by their kcat, to get a vector -0106 % of MW/kcat values. -0107 MWkcat = (model.ec.rxnEnzMat(ecIdx,:) * model.ec.mw) ./ model.ec.kcat(ecIdx); -0108 % If kcat was zero, MWkcat is Inf. If no enzyme info was found, -0109 % MWkcat is NaN. Correct both back to zero -0110 MWkcat(isinf(MWkcat) | isnan(MWkcat)) = 0; -0111 % Select the lowest MW/kcat (= most efficient), and convert to /hour -0112 model.S(prot_pool_idx, hasEc(i)) = -min(MWkcat/3600); -0113 end -0114 end -0115 end -0116 +0068 if model.ec.kcat(j) ~= 0 +0069 enzymes = find(model.ec.rxnEnzMat(j,:)); +0070 kcatLast = kcatFirst+numel(enzymes); +0071 kcatFirst = kcatFirst+1; +0072 newKcats(kcatFirst:kcatLast,1) = j; +0073 newKcats(kcatFirst:kcatLast,2) = enzymes; +0074 newKcats(kcatFirst:kcatLast,3) = model.ec.rxnEnzMat(j,enzymes); +0075 newKcats(kcatFirst:kcatLast,4) = model.ec.kcat(j); +0076 newKcats(kcatFirst:kcatLast,5) = model.ec.mw(enzymes); +0077 kcatFirst = kcatLast; +0078 end +0079 end +0080 if exist('kcatLast','var') % If it does not, then no kcats are found +0081 newKcats(kcatLast+1:end,:)=[]; +0082 +0083 sel = newKcats(:,4) > 0; %Only apply to non-zero kcat +0084 newKcats(sel,4) = newKcats(sel,4) * 3600; %per second -> per hour +0085 newKcats(sel,4) = newKcats(sel,5) ./ newKcats(sel,4); %MW/kcat +0086 newKcats(sel,4) = newKcats(sel,3) .* newKcats(sel,4); %Multicopy subunits. +0087 newKcats(~sel,4) = 0; %Results in zero-cost +0088 +0089 %Replace rxns and enzymes with their location in model +0090 [~,newKcats(:,1)] = ismember(model.ec.rxns(newKcats(:,1)),model.rxns); +0091 [~,newKcats(:,2)] = ismember(strcat('prot_',model.ec.enzymes(newKcats(:,2))),model.mets); +0092 linearIndices = sub2ind(size(model.S),newKcats(:,2),newKcats(:,1)); +0093 model.S(linearIndices) = -newKcats(:,4); %Substrate = negative +0094 end +0095 else %GECKO light formulation, where prot_pool represents all usages +0096 prot_pool_idx = find(ismember(model.mets,'prot_pool')); +0097 %first remove the prefix of all rxns +0098 modRxns = extractAfter(model.ec.rxns,4); +0099 % Map ec-reactions to model.rxns +0100 [hasEc,~] = ismember(model.rxns,modRxns); +0101 hasEc = find(hasEc & updateRxns); +0102 [~,rxnIdx] = ismember(modRxns,model.rxns); +0103 for i = 1:numel(hasEc) +0104 % Get all isozymes per reaction +0105 ecIdx = find(rxnIdx == hasEc(i)); +0106 % ecIdx = strcmp(model.rxns(hasEc(i)),modRxns); +0107 % Multiply enzymes with their MW (they are then automatically +0108 % summed per reaction), and divide by their kcat, to get a vector +0109 % of MW/kcat values. +0110 MWkcat = (model.ec.rxnEnzMat(ecIdx,:) * model.ec.mw) ./ model.ec.kcat(ecIdx); +0111 % If kcat was zero, MWkcat is Inf. If no enzyme info was found, +0112 % MWkcat is NaN. Correct both back to zero +0113 MWkcat(isinf(MWkcat) | isnan(MWkcat)) = 0; +0114 % Select the lowest MW/kcat (= most efficient), and convert to /hour +0115 model.S(prot_pool_idx, hasEc(i)) = -min(MWkcat/3600); +0116 end +0117 end +0118 end +0119
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/gather_kcats/getStandardKcat.html b/doc/src/geckomat/gather_kcats/getStandardKcat.html index 32cd120ef..603624868 100644 --- a/doc/src/geckomat/gather_kcats/getStandardKcat.html +++ b/doc/src/geckomat/gather_kcats/getStandardKcat.html @@ -28,11 +28,15 @@

SYNOPSIS ^DESCRIPTION ^

 getStandardKcat
-   Calculate an standard kcat and standard molecular weight (MW) that can be
-   used to apply enzyme constraints to reactions without any associated genes.
-   This is done by adding those reactions to model.ec, assign a "standard"
-   pseudoenzyme with the standard MW (median of all proteins in the organism)
-   and standard kcat (median from all kcat, or subsystem specific kcat).
+   Calculate an standard kcat and standard molecular weight (MW) that can
+   be used to apply enzyme constraints to reactions without any associated
+   enzymes. Such reactions have either an empty model.grRules field, or
+   they have no match in model.ec.rxns (which can be the case if the genes
+   in the model.grRules field could not be mapped to enzymes). This is
+   done by adding those reactions to model.ec, assign a "standard"
+   pseudoenzyme with the standard MW (median of all proteins in the
+   organism) and standard kcat (median from all kcat, or subsystem
+   specific kcat).
 
    A reaction is assigned a subSystem specific kcat values if the model
    has a subSystems field and the reaction is annotated with a subSystem.
@@ -45,25 +49,26 @@ 

DESCRIPTION ^DESCRIPTION ^SUBFUNCTIONS ^SOURCE CODE ^

0001 function [model, rxnsMissingGPR, standardMW, standardKcat, rxnsNoKcat] = getStandardKcat(model, modelAdapter, threshold, fillZeroKcat)
 0002 % getStandardKcat
-0003 %   Calculate an standard kcat and standard molecular weight (MW) that can be
-0004 %   used to apply enzyme constraints to reactions without any associated genes.
-0005 %   This is done by adding those reactions to model.ec, assign a "standard"
-0006 %   pseudoenzyme with the standard MW (median of all proteins in the organism)
-0007 %   and standard kcat (median from all kcat, or subsystem specific kcat).
-0008 %
-0009 %   A reaction is assigned a subSystem specific kcat values if the model
-0010 %   has a subSystems field and the reaction is annotated with a subSystem.
-0011 %   Only the first subSystem will be considered if multiple are annotated
-0012 %   to the same reaction.
-0013 %
-0014 %   Exchange, transport and pseudoreactions are filtered out, plus any
-0015 %   reaction identifiers specified in /data/pseudoRxns.tsv in the model
-0016 %   adapter folder.
+0003 %   Calculate an standard kcat and standard molecular weight (MW) that can
+0004 %   be used to apply enzyme constraints to reactions without any associated
+0005 %   enzymes. Such reactions have either an empty model.grRules field, or
+0006 %   they have no match in model.ec.rxns (which can be the case if the genes
+0007 %   in the model.grRules field could not be mapped to enzymes). This is
+0008 %   done by adding those reactions to model.ec, assign a "standard"
+0009 %   pseudoenzyme with the standard MW (median of all proteins in the
+0010 %   organism) and standard kcat (median from all kcat, or subsystem
+0011 %   specific kcat).
+0012 %
+0013 %   A reaction is assigned a subSystem specific kcat values if the model
+0014 %   has a subSystems field and the reaction is annotated with a subSystem.
+0015 %   Only the first subSystem will be considered if multiple are annotated
+0016 %   to the same reaction.
 0017 %
-0018 %   In addition, reactions that are annotated with an enzyme (and therefore
-0019 %   already in model.ec), but not assigned any reaction-specific kcat value
-0020 %   (their model.ec.kcat entry is either 0 or NaN), can be assigned standard
-0021 %   kcat values by a similar approach. However, those reactions will not be
-0022 %   linked to the "standard" pseudoenzyme, but will use the enzyme that they had
-0023 %   already been associated with.
-0024 %
-0025 %   Any pre-existing standard kcat assignments (identified by 'standard'
-0026 %   entires in model.ec.source) are removed when applying this function.
-0027 %
-0028 % Input:
-0029 %   model           an ecModel in GECKO 3 format (with ecModel.ec structure)
-0030 %   modelAdapter    a loaded model adapter (Optional, will otherwise use the
-0031 %                   default model adapter).
-0032 %   threshold       a threshold to determine when use a kcat value based on
-0033 %                   the mean kcat of the reactions in the same subSystem or
-0034 %                   based on the median value of all the kcat in the model.
-0035 %                   Second option is used when the number of reactions in a
-0036 %                   determined subSystem is < threshold. (Optional, default = 10)
-0037 %   fillZeroKcat    logical whether zero kcat values should be replaced with
-0038 %                   standard kcat values. (Optional, default = true).
-0039 %
-0040 % Output:
-0041 %   model           ecModel where model.ec is expanded with a standard
-0042 %                   protein with standard kcat and standard MW, assigned to
-0043 %                   reactions without gene associations.
-0044 %   rxnsMissingGPR  a list of updated rxns identifiers with a standard value
-0045 %   standardMW      the standard MW value calculated
-0046 %   standardKcat    the standard Kcat value calculated
-0047 %   rxnsNoKcat      a list of rxns identifiers whose zero kcat has been replaced
-0048 %
-0049 %   While model.ec.kcat is populated, applyKcatConstraints would still need to
-0050 %   be run to apply the new constraints to the S-matrix.
-0051 %
-0052 % Usage:
-0053 %    [model, rxnsMissingGPR, standardMW, standardKcat, rxnsNoKcat] = getStandardKcat(model, modelAdapter, threshold, fillZeroKcat);
-0054 
-0055 if nargin < 2 || isempty(modelAdapter)
-0056     modelAdapter = ModelAdapterManager.getDefault();
-0057     if isempty(modelAdapter)
-0058         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
-0059     end
-0060 end
-0061 params = modelAdapter.getParameters();
-0062 
-0063 if nargin < 3 || isempty(threshold)
-0064     threshold = 10;
+0018 %   Exchange, transport and pseudoreactions are filtered out, plus any
+0019 %   reaction identifiers specified in /data/pseudoRxns.tsv in the model
+0020 %   adapter folder.
+0021 %
+0022 %   In addition, reactions that are annotated with an enzyme (and therefore
+0023 %   already in model.ec), but not assigned any reaction-specific kcat value
+0024 %   (their model.ec.kcat entry is either 0 or NaN), can be assigned
+0025 %   standard kcat values by a similar approach. However, those reactions
+0026 %   will not be linked to the "standard" pseudoenzyme, but will use the
+0027 %   enzyme that they had already been associated with.
+0028 %
+0029 %   Any pre-existing standard kcat assignments (identified by 'standard'
+0030 %   entires in model.ec.source) are removed when applying this function.
+0031 %
+0032 % Input:
+0033 %   model           an ecModel in GECKO 3 format (with ecModel.ec structure)
+0034 %   modelAdapter    a loaded model adapter (Optional, will otherwise use
+0035 %                   the default model adapter).
+0036 %   threshold       a threshold to determine when use a kcat value based on
+0037 %                   the mean kcat of the reactions in the same subSystem or
+0038 %                   based on the median value of all the kcat in the model.
+0039 %                   Second option is used when the number of reactions in a
+0040 %                   determined subSystem is < threshold. (Optional, default
+0041 %                   = 10)
+0042 %   fillZeroKcat    logical whether zero kcat values should be replaced
+0043 %                   with standard kcat values. (Optional, default = true).
+0044 %
+0045 % Output:
+0046 %   model           ecModel where model.ec is expanded with a standard
+0047 %                   protein with standard kcat and standard MW, assigned to
+0048 %                   reactions without gene associations.
+0049 %   rxnsMissingGPR  a list of updated rxns identifiers with a standard value
+0050 %   standardMW      the standard MW value calculated
+0051 %   standardKcat    the standard Kcat value calculated
+0052 %   rxnsNoKcat      a list of rxns identifiers whose zero kcat has been replaced
+0053 %
+0054 %   While model.ec.kcat is populated, applyKcatConstraints would still need
+0055 %   to be run to apply the new constraints to the S-matrix.
+0056 %
+0057 % Usage:
+0058 %    [model, rxnsMissingGPR, standardMW, standardKcat, rxnsNoKcat] = getStandardKcat(model, modelAdapter, threshold, fillZeroKcat);
+0059 
+0060 if nargin < 2 || isempty(modelAdapter)
+0061     modelAdapter = ModelAdapterManager.getDefault();
+0062     if isempty(modelAdapter)
+0063         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
+0064     end
 0065 end
-0066 
-0067 if nargin < 4 || isempty(fillZeroKcat)
-0068     fillZeroKcat = true;
-0069 end
-0070 
-0071 databases = loadDatabases('uniprot', modelAdapter);
-0072 
-0073 % An standard MW is defined for all the rxns which does not have a GPR
-0074 % rule defined. This is based in all the proteins reported for the specific
-0075 % organism in uniprot
-0076 standardMW = median(databases.uniprot.MW, 'omitnan');
+0066 params = modelAdapter.getParameters();
+0067 
+0068 if nargin < 3 || isempty(threshold)
+0069     threshold = 10;
+0070 end
+0071 
+0072 if nargin < 4 || isempty(fillZeroKcat)
+0073     fillZeroKcat = true;
+0074 end
+0075 
+0076 databases = loadDatabases('uniprot', modelAdapter);
 0077 
-0078 % An standard Kcat is defined for all the rxns which does not have a GPR
-0079 % rule defined. In this case, the kcat value for a particular reaction is
-0080 % defined as the mean of the kcat values of the reactions involved in the
-0081 % same subsystem in which the given reaction is involved. Nevertheless, if
-0082 % a subSystem have a number of reactions lower than a treshold, the kcat
-0083 % value will be the median of the kcat in all the reactions of the model.
-0084 
-0085 % Remove from the list those with kcat zero
-0086 rxnsKcatZero = model.ec.kcat > 0;
-0087 
-0088 % Get the kcat value based on all the kcats in the model
-0089 standardKcat = median(model.ec.kcat(rxnsKcatZero), 'omitnan');
-0090 
-0091 % If the model have subSystems assigned calculate kcat based on subSystem
-0092 if isfield(model,'subSystems') && ~all(cellfun(@isempty, model.subSystems))
-0093     standard = false;
-0094     if model.ec.geckoLight
-0095         modRxns = extractAfter(model.ec.rxns,4);
-0096     else
-0097         modRxns = model.ec.rxns;
-0098     end
-0099     % Map ec-rxns to model.rxns
-0100     [~,rxnIdx]  = ismember(modRxns,model.rxns);
-0101     % Choose first subSystem
-0102     enzSubSystems = flattenCell(model.subSystems(rxnIdx));
-0103     enzSubSystems = enzSubSystems(:,1);
-0104     if ~all(cellfun(@isempty, enzSubSystems))
-0105 
-0106     % Make list of unique subsystems, and which rxns are linked to them
-0107     [enzSubSystem_names, ~, rxnToSub] = unique(enzSubSystems);
-0108     % Make matrix of ec-rxns vs. unique subsystem index
-0109     ind = sub2ind([numel(enzSubSystem_names) numel(enzSubSystems)],rxnToSub',1:numel(rxnToSub));
-0110     kcatSubSystem = false([numel(enzSubSystem_names) numel(enzSubSystems)]);
-0111     kcatSubSystem(ind) = true;
-0112     % Number of kcats per subSystem
-0113     kcatsPerSubSystem = sum(kcatSubSystem,2);
-0114     % Calculate average kcat values per subSystem
-0115     kcatSubSystem = (kcatSubSystem*model.ec.kcat)./kcatsPerSubSystem;
-0116     kcatSubSystem(kcatsPerSubSystem < threshold) = standardKcat;
-0117     else
-0118         standard = true;
-0119         printOrange('WARNING: No subSystem-specific kcat values can be calculated')
-0120     end
-0121 else
-0122     standard = true;
-0123     printOrange('WARNING: No subSystem-specific kcat values can be calculated')
-0124 end
-0125 
-0126 % Find reactions without GPR
-0127 rxnsMissingGPR = find(cellfun(@isempty, model.grRules));
-0128 
-0129 % Get custom list of reaction IDs to ignore, if existing. First column
-0130 % contains reaction IDs, second column contains reaction names for
-0131 % reference only.
-0132 if exist(fullfile(params.path,'data','pseudoRxns.tsv'),'file')
-0133     fID        = fopen(fullfile(params.path,'data','pseudoRxns.tsv'));
-0134     fileData   = textscan(fID,'%s %s','delimiter','\t');
-0135     fclose(fID);
-0136     customRxns = fileData{1};
-0137     customRxns = find(ismember(model.rxns,customRxns));
-0138 else
-0139     customRxns = [];
-0140 end
-0141 % Get and remove exchange, transport, spontaneous and pseudo reactions
-0142 [~, exchangeRxns]  = getExchangeRxns(model);
-0143 transportRxns = getTransportRxns(model);
-0144 [spontaneousRxns, ~] = modelAdapter.getSpontaneousReactions(model);
-0145 pseudoRxns = contains(model.rxnNames,'pseudoreaction');
-0146 slimeRxns = contains(model.rxnNames,'SLIME rxn');
-0147 rxnsToIgnore = [customRxns; exchangeRxns; find(transportRxns); ...
-0148                 find(spontaneousRxns); find(pseudoRxns); find(slimeRxns)];
-0149 rxnsMissingGPR(ismember(rxnsMissingGPR, rxnsToIgnore)) = [];
-0150 
-0151 % Only add if not geckoLight & getStandardKcat was not run earlier
-0152 if ~any(strcmp(model.mets,'prot_standard'))
-0153     % Add a new gene to be consistent with ec field named standard
-0154     proteinStdGenes.genes = 'standard';
-0155     if isfield(model,'geneShortNames')
-0156         proteinStdGenes.geneShortNames = 'std';
-0157     end
-0158     model = addGenesRaven(model, proteinStdGenes);
-0159 
-0160     if ~model.ec.geckoLight
-0161         % Add a new metabolite named prot_standard
-0162         proteinStdMets.mets         = 'prot_standard';
-0163         proteinStdMets.metNames     = proteinStdMets.mets;
-0164         proteinStdMets.compartments = 'c';
-0165         if isfield(model,'metNotes')
-0166             proteinStdMets.metNotes = 'Standard enzyme-usage pseudometabolite';
-0167         end
-0168         model = addMets(model, proteinStdMets);
-0169 
-0170         % Add a protein usage reaction if not a light version
-0171         proteinStdUsageRxn.rxns         = {'usage_prot_standard'};
-0172         proteinStdUsageRxn.rxnNames     = proteinStdUsageRxn.rxns;
-0173         proteinStdUsageRxn.mets         = {proteinStdMets.mets, 'prot_pool'};
-0174         proteinStdUsageRxn.stoichCoeffs = [-1, 1];
-0175         proteinStdUsageRxn.lb           = -1000;
-0176         proteinStdUsageRxn.ub           = 0;
-0177         proteinStdUsageRxn.grRules      = proteinStdGenes.genes;
-0178 
-0179         model = addRxns(model, proteinStdUsageRxn);
-0180     end
-0181     % Update .ec structure in model
-0182     model.ec.genes(end+1)      = {'standard'};
-0183     model.ec.enzymes(end+1)    = {'standard'};
-0184     model.ec.mw(end+1)         = standardMW;
-0185     model.ec.sequence(end+1)   = {''};
-0186     % Additional info
-0187     if isfield(model.ec,'concs')
-0188         model.ec.concs(end+1)  = nan();
-0189     end
-0190 
-0191     % Expand the enzyme rxns matrix
-0192     model.ec.rxnEnzMat =  [model.ec.rxnEnzMat, zeros(length(model.ec.rxns), 1)]; % 1 new enzyme
-0193     model.ec.rxnEnzMat =  [model.ec.rxnEnzMat; zeros(length(rxnsMissingGPR), length(model.ec.enzymes))]; % new rxns
-0194 end
-0195 numRxns = length(model.ec.rxns);
-0196 stdMetIdx = find(strcmpi(model.ec.enzymes, 'standard'));
-0197 
-0198 % Remove previous standard kcat assignment
-0199 oldStandardEnz = find(strcmp(model.ec.source,'standard'));
-0200 if ~isempty(oldStandardEnz)
-0201     model.ec.rxns(oldStandardEnz)        = [];
-0202     model.ec.kcat(oldStandardEnz)        = [];
-0203     model.ec.source(oldStandardEnz)      = [];
-0204     model.ec.notes(oldStandardEnz)       = [];
-0205     model.ec.eccodes(oldStandardEnz)     = [];
-0206     model.ec.rxnEnzMat(oldStandardEnz,:) = [];
-0207 end
+0078 % An standard MW is defined for all the rxns which does not have a GPR
+0079 % rule defined. This is based in all the proteins reported for the specific
+0080 % organism in uniprot
+0081 standardMW = median(databases.uniprot.MW, 'omitnan');
+0082 
+0083 % An standard Kcat is defined for all the rxns which does not have a GPR
+0084 % rule defined. In this case, the kcat value for a particular reaction is
+0085 % defined as the mean of the kcat values of the reactions involved in the
+0086 % same subsystem in which the given reaction is involved. Nevertheless, if
+0087 % a subSystem have a number of reactions lower than a treshold, the kcat
+0088 % value will be the median of the kcat in all the reactions of the model.
+0089 
+0090 % Remove from the list those with kcat zero
+0091 rxnsKcatZero = model.ec.kcat > 0;
+0092 
+0093 % Get the kcat value based on all the kcats in the model
+0094 standardKcat = median(model.ec.kcat(rxnsKcatZero), 'omitnan');
+0095 
+0096 % If the model have subSystems assigned calculate kcat based on subSystem
+0097 if isfield(model,'subSystems') && ~all(cellfun(@isempty, model.subSystems))
+0098     standard = false;
+0099     if model.ec.geckoLight
+0100         modRxns = extractAfter(model.ec.rxns,4);
+0101     else
+0102         modRxns = model.ec.rxns;
+0103     end
+0104     % Map ec-rxns to model.rxns
+0105     [~,rxnIdx]  = ismember(modRxns,model.rxns);
+0106     % Choose first subSystem
+0107     enzSubSystems = flattenCell(model.subSystems(rxnIdx));
+0108     enzSubSystems = enzSubSystems(:,1);
+0109     if ~all(cellfun(@isempty, enzSubSystems))
+0110 
+0111     % Make list of unique subsystems, and which rxns are linked to them
+0112     [enzSubSystem_names, ~, rxnToSub] = unique(enzSubSystems);
+0113     % Make matrix of ec-rxns vs. unique subsystem index
+0114     ind = sub2ind([numel(enzSubSystem_names) numel(enzSubSystems)],rxnToSub',1:numel(rxnToSub));
+0115     kcatSubSystem = false([numel(enzSubSystem_names) numel(enzSubSystems)]);
+0116     kcatSubSystem(ind) = true;
+0117     % Number of kcats per subSystem
+0118     kcatsPerSubSystem = sum(kcatSubSystem,2);
+0119     % Calculate average kcat values per subSystem
+0120     kcatSubSystem = (kcatSubSystem*model.ec.kcat)./kcatsPerSubSystem;
+0121     kcatSubSystem(kcatsPerSubSystem < threshold) = standardKcat;
+0122     else
+0123         standard = true;
+0124         printOrange('WARNING: No subSystem-specific kcat values can be calculated')
+0125     end
+0126 else
+0127     standard = true;
+0128     printOrange('WARNING: No subSystem-specific kcat values can be calculated')
+0129 end
+0130 
+0131 % Find reactions without GPR
+0132 rxnsMissingGPR = find(cellfun(@isempty, model.grRules));
+0133 
+0134 % Find reactions with GPR but without model.ec entry (for instance due to
+0135 % no protein matching)
+0136 rxnsMissingEnzyme = find(~cellfun(@isempty, model.grRules));
+0137 rxnsMissingEnzyme = find(and(~ismember(model.rxns(rxnsMissingEnzyme),model.ec.rxns), ~contains(model.rxns(rxnsMissingEnzyme),'usage_prot_')));
+0138 rxnsMissingGPR = [rxnsMissingGPR;rxnsMissingEnzyme];
+0139 
+0140 % Get custom list of reaction IDs to ignore, if existing. First column
+0141 % contains reaction IDs, second column contains reaction names for
+0142 % reference only.
+0143 if exist(fullfile(params.path,'data','pseudoRxns.tsv'),'file')
+0144     fID        = fopen(fullfile(params.path,'data','pseudoRxns.tsv'));
+0145     fileData   = textscan(fID,'%s %s','delimiter','\t');
+0146     fclose(fID);
+0147     customRxns = fileData{1};
+0148     customRxns = find(ismember(model.rxns,customRxns));
+0149 else
+0150     customRxns = [];
+0151 end
+0152 % Get and remove exchange, transport, spontaneous and pseudo reactions
+0153 [~, exchangeRxns]  = getExchangeRxns(model);
+0154 transportRxns = getTransportRxns(model);
+0155 [spontaneousRxns, ~] = modelAdapter.getSpontaneousReactions(model);
+0156 pseudoRxns = contains(model.rxnNames,'pseudoreaction');
+0157 slimeRxns = contains(model.rxnNames,'SLIME rxn');
+0158 rxnsToIgnore = [customRxns; exchangeRxns; find(transportRxns); ...
+0159                 find(spontaneousRxns); find(pseudoRxns); find(slimeRxns)];
+0160 rxnsMissingGPR(ismember(rxnsMissingGPR, rxnsToIgnore)) = [];
+0161 
+0162 % Only add if not geckoLight & getStandardKcat was not run earlier
+0163 if ~any(strcmp(model.mets,'prot_standard'))
+0164     % Add a new gene to be consistent with ec field named standard
+0165     proteinStdGenes.genes = 'standard';
+0166     if isfield(model,'geneShortNames')
+0167         proteinStdGenes.geneShortNames = 'std';
+0168     end
+0169     model = addGenesRaven(model, proteinStdGenes);
+0170 
+0171     if ~model.ec.geckoLight
+0172         % Add a new metabolite named prot_standard
+0173         proteinStdMets.mets         = 'prot_standard';
+0174         proteinStdMets.metNames     = proteinStdMets.mets;
+0175         proteinStdMets.compartments = 'c';
+0176         if isfield(model,'metNotes')
+0177             proteinStdMets.metNotes = 'Standard enzyme-usage pseudometabolite';
+0178         end
+0179         model = addMets(model, proteinStdMets);
+0180 
+0181         % Add a protein usage reaction if not a light version
+0182         proteinStdUsageRxn.rxns         = {'usage_prot_standard'};
+0183         proteinStdUsageRxn.rxnNames     = proteinStdUsageRxn.rxns;
+0184         proteinStdUsageRxn.mets         = {proteinStdMets.mets, 'prot_pool'};
+0185         proteinStdUsageRxn.stoichCoeffs = [-1, 1];
+0186         proteinStdUsageRxn.lb           = -1000;
+0187         proteinStdUsageRxn.ub           = 0;
+0188         proteinStdUsageRxn.grRules      = proteinStdGenes.genes;
+0189 
+0190         model = addRxns(model, proteinStdUsageRxn);
+0191     end
+0192     % Update .ec structure in model
+0193     model.ec.genes(end+1)      = {'standard'};
+0194     model.ec.enzymes(end+1)    = {'standard'};
+0195     model.ec.mw(end+1)         = standardMW;
+0196     model.ec.sequence(end+1)   = {''};
+0197     % Additional info
+0198     if isfield(model.ec,'concs')
+0199         model.ec.concs(end+1)  = nan();
+0200     end
+0201 
+0202     % Expand the enzyme rxns matrix
+0203     model.ec.rxnEnzMat =  [model.ec.rxnEnzMat, zeros(length(model.ec.rxns), 1)]; % 1 new enzyme
+0204     model.ec.rxnEnzMat =  [model.ec.rxnEnzMat; zeros(length(rxnsMissingGPR), length(model.ec.enzymes))]; % new rxns
+0205 end
+0206 numRxns = length(model.ec.rxns);
+0207 stdMetIdx = find(strcmpi(model.ec.enzymes, 'standard'));
 0208 
-0209 for i = 1:numel(rxnsMissingGPR)
-0210     rxnIdx = rxnsMissingGPR(i);
-0211 
-0212     % Update .ec structure in model
-0213     if ~model.ec.geckoLight
-0214         model.ec.rxns(end+1)     = model.rxns(rxnIdx);
-0215         % Add prefix in case is light version
-0216     else
-0217         model.ec.rxns{end+1}     = ['001_' model.rxns{rxnIdx}];
-0218     end
+0209 % Remove previous standard kcat assignment
+0210 oldStandardEnz = find(strcmp(model.ec.source,'standard'));
+0211 if ~isempty(oldStandardEnz)
+0212     model.ec.rxns(oldStandardEnz)        = [];
+0213     model.ec.kcat(oldStandardEnz)        = [];
+0214     model.ec.source(oldStandardEnz)      = [];
+0215     model.ec.notes(oldStandardEnz)       = [];
+0216     model.ec.eccodes(oldStandardEnz)     = [];
+0217     model.ec.rxnEnzMat(oldStandardEnz,:) = [];
+0218 end
 0219 
-0220     if ~standard
-0221         kcatSubSystemIdx = strcmpi(enzSubSystem_names, model.subSystems{rxnIdx}(1));
-0222         if all(kcatSubSystemIdx)
-0223             model.ec.kcat(end+1) = kcatSubSystem(kcatSubSystemIdx);
-0224         else
-0225             model.ec.kcat(end+1) = standardKcat;
-0226         end
+0220 for i = 1:numel(rxnsMissingGPR)
+0221     rxnIdx = rxnsMissingGPR(i);
+0222 
+0223     % Update .ec structure in model
+0224     if ~model.ec.geckoLight
+0225         model.ec.rxns(end+1)     = model.rxns(rxnIdx);
+0226         % Add prefix in case is light version
 0227     else
-0228         model.ec.kcat(end+1) = standardKcat;
+0228         model.ec.rxns{end+1}     = ['001_' model.rxns{rxnIdx}];
 0229     end
 0230 
-0231     model.ec.source(end+1)   = {'standard'};
-0232     model.ec.notes(end+1)    = {''};
-0233     model.ec.eccodes(end+1)  = {''};
-0234 
-0235     % Update the enzyme rxns matrix
-0236     model.ec.rxnEnzMat(numRxns+i, stdMetIdx) = 1;
-0237 end
-0238 % Get the rxns identifiers of the updated rxns
-0239 rxnsMissingGPR = model.rxns(rxnsMissingGPR);
-0240 
-0241 if fillZeroKcat
-0242     zeroKcat = model.ec.kcat == 0 | isnan(model.ec.kcat);
-0243     model.ec.kcat(zeroKcat)     = standardKcat;
-0244     model.ec.source(zeroKcat)   = {'standard'};
-0245     rxnsNoKcat = model.ec.rxns(zeroKcat);
-0246 else
-0247     rxnsNoKcat = [];
+0231     if ~standard
+0232         kcatSubSystemIdx = strcmpi(enzSubSystem_names, model.subSystems{rxnIdx}(1));
+0233         if all(kcatSubSystemIdx)
+0234             model.ec.kcat(end+1) = kcatSubSystem(kcatSubSystemIdx);
+0235         else
+0236             model.ec.kcat(end+1) = standardKcat;
+0237         end
+0238     else
+0239         model.ec.kcat(end+1) = standardKcat;
+0240     end
+0241 
+0242     model.ec.source(end+1)   = {'standard'};
+0243     model.ec.notes(end+1)    = {''};
+0244     model.ec.eccodes(end+1)  = {''};
+0245 
+0246     % Update the enzyme rxns matrix
+0247     model.ec.rxnEnzMat(numRxns+i, stdMetIdx) = 1;
 0248 end
-0249 end
-0250 
-0251 function Cflat = flattenCell(C,strFlag)
-0252 %FLATTENCELL  Flatten a nested column cell array into a matrix cell array.
-0253 %
-0254 % CFLAT = FLATTENCELL(C) takes a column cell array in which one or more
-0255 % entries is a nested cell array, and flattens it into a 2D matrix cell
-0256 % array, where the nested entries are spread across new columns.
-0257 %
-0258 % CFLAT = FLATTENCELL(C,STRFLAG) if STRFLAG is TRUE, empty entries in the
-0259 % resulting CFLAT will be replaced with empty strings {''}. Default = FALSE
-0260 if nargin < 2
-0261     strFlag = false;
-0262 end
-0263 
-0264 % determine which entries are cells
-0265 cells = cellfun(@iscell,C);
-0266 
-0267 % determine number of elements in each nested cell
-0268 cellsizes = cellfun(@numel,C);
-0269 cellsizes(~cells) = 1;  % ignore non-cell entries
-0270 
-0271 % flatten single-entry cells
-0272 Cflat = C;
-0273 Cflat(cells & (cellsizes == 1)) = cellfun(@(x) x{1},Cflat(cells & (cellsizes == 1)),'UniformOutput',false);
+0249 % Get the rxns identifiers of the updated rxns
+0250 rxnsMissingGPR = model.rxns(rxnsMissingGPR);
+0251 
+0252 if fillZeroKcat
+0253     zeroKcat = model.ec.kcat == 0 | isnan(model.ec.kcat);
+0254     model.ec.kcat(zeroKcat)     = standardKcat;
+0255     model.ec.source(zeroKcat)   = {'standard'};
+0256     rxnsNoKcat = model.ec.rxns(zeroKcat);
+0257 else
+0258     rxnsNoKcat = [];
+0259 end
+0260 end
+0261 
+0262 function Cflat = flattenCell(C,strFlag)
+0263 %FLATTENCELL  Flatten a nested column cell array into a matrix cell array.
+0264 %
+0265 % CFLAT = FLATTENCELL(C) takes a column cell array in which one or more
+0266 % entries is a nested cell array, and flattens it into a 2D matrix cell
+0267 % array, where the nested entries are spread across new columns.
+0268 %
+0269 % CFLAT = FLATTENCELL(C,STRFLAG) if STRFLAG is TRUE, empty entries in the
+0270 % resulting CFLAT will be replaced with empty strings {''}. Default = FALSE
+0271 if nargin < 2
+0272     strFlag = false;
+0273 end
 0274 
-0275 % iterate through multi-entry cells
-0276 multiCells = find(cellsizes > 1);
-0277 for i = 1:length(multiCells)
-0278     cellContents = Cflat{multiCells(i)};
-0279     Cflat(multiCells(i),1:length(cellContents)) = cellContents;
-0280 end
+0275 % determine which entries are cells
+0276 cells = cellfun(@iscell,C);
+0277 
+0278 % determine number of elements in each nested cell
+0279 cellsizes = cellfun(@numel,C);
+0280 cellsizes(~cells) = 1;  % ignore non-cell entries
 0281 
-0282 % change empty elements to strings, if specified
-0283 if ( strFlag )
-0284     Cflat(cellfun(@isempty,Cflat)) = {''};
-0285 end
-0286 end
+0282 % flatten single-entry cells +0283 Cflat = C; +0284 Cflat(cells & (cellsizes == 1)) = cellfun(@(x) x{1},Cflat(cells & (cellsizes == 1)),'UniformOutput',false); +0285 +0286 % iterate through multi-entry cells +0287 multiCells = find(cellsizes > 1); +0288 for i = 1:length(multiCells) +0289 cellContents = Cflat{multiCells(i)}; +0290 Cflat(multiCells(i),1:length(cellContents)) = cellContents; +0291 end +0292 +0293 % change empty elements to strings, if specified +0294 if ( strFlag ) +0295 Cflat(cellfun(@isempty,Cflat)) = {''}; +0296 end +0297 end

Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/gather_kcats/index.html b/doc/src/geckomat/gather_kcats/index.html index 9dd7f6546..3f5abb635 100644 --- a/doc/src/geckomat/gather_kcats/index.html +++ b/doc/src/geckomat/gather_kcats/index.html @@ -19,7 +19,7 @@

Index for src\geckomat\gather_kcats

Matlab files in this directory:

-
 fuzzyKcatMatchingfuzzyKcatMatching
 getStandardKcatgetStandardKcat
 mergeDLKcatAndFuzzyKcatsmergeDlkcatAndFuzzyKcats
 readDLKcatOutputreadDLKcatOutput
 runDLKcatrunDLKcat
 selectKcatValueselectKcatValue
 writeDLKcatInputwriteDLKcatInput
+ fuzzyKcatMatchingfuzzyKcatMatching  getStandardKcatgetStandardKcat  mergeDLKcatAndFuzzyKcatsmergeDlkcatAndFuzzyKcats  readDLKcatOutputreadDLKcatOutput  removeStandardKcatremoveStandardKcat  runDLKcatrunDLKcat  selectKcatValueselectKcatValue  writeDLKcatInputwriteDLKcatInput diff --git a/doc/src/geckomat/gather_kcats/removeStandardKcat.html b/doc/src/geckomat/gather_kcats/removeStandardKcat.html new file mode 100644 index 000000000..66f03c331 --- /dev/null +++ b/doc/src/geckomat/gather_kcats/removeStandardKcat.html @@ -0,0 +1,127 @@ + + + + Description of removeStandardKcat + + + + + + + + + +
Home > src > geckomat > gather_kcats > removeStandardKcat.m
+ + + +

removeStandardKcat +

+ +

PURPOSE ^

+
removeStandardKcat
+ +

SYNOPSIS ^

+
function model = removeStandardKcat(model)
+ +

DESCRIPTION ^

+
 removeStandardKcat
+   Remove the "standard" pseudoenzyme and standard kcat and MW values, as
+   they were introduced by getStandardKcat. Also standard kcat values that
+   were assigned by getStandardKcat if fillZeroKcat was set to true are
+   removed from model.ec.kcat. Both the model.ec and model.S structures
+   are modified.
+
+ Input:
+   model           an ecModel in GECKO 3 format (with ecModel.ec structure)
+                   that has standard kcat values implemented by
+                   getStandardKcat.
+
+ Output:
+   model           ecModel without standard pseudoprotein and standard
+                   kcat values
+
+ Usage:
+    model = removeStandardKcat(model);
+ + +

CROSS-REFERENCE INFORMATION ^

+This function calls: +
    +
+This function is called by: +
    +
+ + + + +

SOURCE CODE ^

+
0001 function model = removeStandardKcat(model)
+0002 % removeStandardKcat
+0003 %   Remove the "standard" pseudoenzyme and standard kcat and MW values, as
+0004 %   they were introduced by getStandardKcat. Also standard kcat values that
+0005 %   were assigned by getStandardKcat if fillZeroKcat was set to true are
+0006 %   removed from model.ec.kcat. Both the model.ec and model.S structures
+0007 %   are modified.
+0008 %
+0009 % Input:
+0010 %   model           an ecModel in GECKO 3 format (with ecModel.ec structure)
+0011 %                   that has standard kcat values implemented by
+0012 %                   getStandardKcat.
+0013 %
+0014 % Output:
+0015 %   model           ecModel without standard pseudoprotein and standard
+0016 %                   kcat values
+0017 %
+0018 % Usage:
+0019 %    model = removeStandardKcat(model);
+0020 
+0021 % Remove standard enzyme from ec structure
+0022 stdEnzIdx = find(strcmpi(model.ec.enzymes, 'standard'));
+0023 if ~isempty(stdEnzIdx)
+0024     model.ec.genes(stdEnzIdx)       = [];
+0025     model.ec.enzymes(stdEnzIdx)     = [];
+0026     model.ec.mw(stdEnzIdx)          = [];
+0027     model.ec.sequence(stdEnzIdx)    = [];
+0028     if isfield(model.ec,'concs')
+0029         model.ec.concs(stdEnzIdx)   = [];
+0030     end
+0031     rxnEnzIdx = find(model.ec.rxnEnzMat(:,stdEnzIdx));
+0032     model.ec.rxns(rxnEnzIdx)        = [];
+0033     model.ec.kcat(rxnEnzIdx)        = [];
+0034     model.ec.source(rxnEnzIdx)      = [];
+0035     model.ec.notes(rxnEnzIdx)       = [];
+0036     model.ec.eccodes(rxnEnzIdx)     = [];
+0037     model.ec.rxnEnzMat(:,stdEnzIdx) = [];
+0038     model.ec.rxnEnzMat(rxnEnzIdx,:) = [];
+0039 end
+0040 
+0041 % Remove standard kcat values and reapply kcat constraints for those
+0042 % specific reactions
+0043 stdKcatIdx = find(strcmpi(model.ec.source, 'standard'));
+0044 if ~isempty(stdKcatIdx)
+0045     model.ec.source(stdKcatIdx)     = {''};
+0046     model.ec.kcat(stdKcatIdx)       = 0;
+0047     model = applyKcatConstraints(model,stdKcatIdx);
+0048 end
+0049 
+0050 % Remove standard protein, usage and gene from model structure
+0051 stdMetIdx = find(strcmpi(model.mets, 'prot_standard'));
+0052 if ~isempty(stdMetIdx)
+0053     model       = removeMets(model,stdMetIdx,false,false,false,false);
+0054 end
+0055 stdProtEx = find(strcmpi(model.rxns, 'usage_prot_standard'));
+0056 if ~isempty(stdProtEx)
+0057     model       = removeReactions(model,stdProtEx,false,false,false);
+0058 end
+0059 stdProtGene = find(strcmpi(model.genes, 'standard'));
+0060 if ~isempty(stdProtGene)
+0061     model       = removeGenes(model,stdProtGene,false,false,false);
+0062 end
+0063 end
+
Generated by m2html © 2005
+ + \ No newline at end of file diff --git a/doc/src/geckomat/gather_kcats/runDLKcat.html b/doc/src/geckomat/gather_kcats/runDLKcat.html index 1aeba1750..b11b22eef 100644 --- a/doc/src/geckomat/gather_kcats/runDLKcat.html +++ b/doc/src/geckomat/gather_kcats/runDLKcat.html @@ -75,31 +75,34 @@

SOURCE CODE ^end 0021 0022 params = modelAdapter.params; -0023 -0024 %% Check and install requirements -0025 % On macOS, Docker might not be properly loaded if MATLAB is started via -0026 % launcher and not terminal. -0027 if ismac -0028 setenv('PATH', strcat('/usr/local/bin', ':', getenv("PATH"))); -0029 end -0030 -0031 % Check if Docker is installed -0032 [checks.docker.status, checks.docker.out] = system('docker --version'); -0033 if checks.docker.status ~= 0 -0034 error('Cannot find Docker. Make sure it is installed.') -0035 end -0036 -0037 disp('Running DLKcat prediction, this may take many minutes, especially the first time.') -0038 status = system(['docker run --rm -v "' fullfile(params.path,'/data') '":/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c "python DLKcat.py /data/DLKcat.tsv /data/DLKcatOutput.tsv"']); +0023 % Make sure path is full, not relative +0024 [~, params.path] = fileattrib(params.path); +0025 params.path=params.path.Name; +0026 +0027 %% Check and install requirements +0028 % On macOS, Docker might not be properly loaded if MATLAB is started via +0029 % launcher and not terminal. +0030 if ismac +0031 setenv('PATH', strcat('/usr/local/bin', ':', getenv("PATH"))); +0032 end +0033 +0034 % Check if Docker is installed +0035 [checks.docker.status, checks.docker.out] = system('docker --version'); +0036 if checks.docker.status ~= 0 +0037 error('Cannot find Docker, make sure it is installed. If it is, it might be required to start Matlab from the command-line instead of the launcher in order for Docker to be detected and used.') +0038 end 0039 -0040 if status == 0 && exist(fullfile(params.path,'data/DLKcatOutput.tsv')) -0041 delete(fullfile(params.path,'/data/DLKcat.tsv')); -0042 movefile(fullfile(params.path,'/data/DLKcatOutput.tsv'), fullfile(params.path,'/data/DLKcat.tsv')); -0043 disp('DKLcat prediction completed.'); -0044 else -0045 error('DLKcat encountered an error or it did not create any output file.') -0046 end -0047 +0040 disp('Running DLKcat prediction, this may take many minutes, especially the first time.') +0041 status = system(['docker run --rm -v "' fullfile(params.path,'/data') '":/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c "python DLKcat.py /data/DLKcat.tsv /data/DLKcatOutput.tsv"']); +0042 +0043 if status == 0 && exist(fullfile(params.path,'data/DLKcatOutput.tsv')) +0044 delete(fullfile(params.path,'/data/DLKcat.tsv')); +0045 movefile(fullfile(params.path,'/data/DLKcatOutput.tsv'), fullfile(params.path,'/data/DLKcat.tsv')); +0046 disp('DKLcat prediction completed.'); +0047 else +0048 error('DLKcat encountered an error or it did not create any output file.') +0049 end +0050
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/get_enzyme_data/loadDatabases.html b/doc/src/geckomat/get_enzyme_data/loadDatabases.html index 80e12d51a..90cd975dc 100644 --- a/doc/src/geckomat/get_enzyme_data/loadDatabases.html +++ b/doc/src/geckomat/get_enzyme_data/loadDatabases.html @@ -124,159 +124,160 @@

SOURCE CODE ^'%2Cec%2Cmass%2Csequence&format=tsv&compressed=false&sort=protein_name%20asc']; 0063 try 0064 urlwrite(url,uniprotPath,'Timeout',30); -0065 fprintf('Model-specific KEGG database stored at %s\n',uniprotPath); +0065 fprintf('Model-specific UniProt database stored at %s\n',uniprotPath); 0066 catch -0067 error(['Download failed, check your internet connection and try again, or manually download: ' url]) -0068 end -0069 end -0070 if exist(uniprotPath,'file') -0071 fid = fopen(uniprotPath,'r'); -0072 fileContent = textscan(fid,'%q %q %q %q %q','Delimiter','\t','HeaderLines',1); -0073 fclose(fid); -0074 databases.uniprot.ID = fileContent{1}; -0075 databases.uniprot.genes = fileContent{2}; -0076 databases.uniprot.eccodes = fileContent{3}; -0077 databases.uniprot.MW = str2double(fileContent{4}); -0078 databases.uniprot.seq = fileContent{5}; -0079 else -0080 databases.uniprot = []; -0081 end -0082 if ~isempty(databases.uniprot) -0083 [uniqueIDs,uniqueIdx] = unique(databases.uniprot.ID,'stable'); -0084 if numel(uniqueIDs) < numel(databases.uniprot.ID) -0085 duplID = setdiff(1:numel(databases.uniprot.ID),uniqueIdx); -0086 dispEM(['Duplicate entries are found for the following proteins. '... -0087 'Manually curate the ''uniprot.tsv'' file, or adjust the uniprot parameters '... -0088 'in the model adapter:'],true,databases.uniprot.ID(duplID)); -0089 end -0090 end -0091 end -0092 -0093 %% KEGG -0094 if any(strcmp(selectDatabase,{'kegg','both'})) -0095 keggPath = fullfile(filePath,'kegg.tsv'); -0096 if ~exist(keggPath,'file') -0097 if isempty(kegg.ID) -0098 printOrange('WARNING: No kegg.ID is specified, unable to download KEGG DB.\n') -0099 else -0100 downloadKEGG(kegg.ID,keggPath,kegg.geneID); -0101 end -0102 end -0103 if exist(keggPath,'file') -0104 fid = fopen(keggPath,'r'); -0105 fileContent = textscan(fid,'%q %q %q %q %q %q %q','Delimiter',',','HeaderLines',0); -0106 fclose(fid); -0107 databases.kegg.uniprot = fileContent{1}; -0108 databases.kegg.genes = fileContent{2}; -0109 databases.kegg.keggGene = fileContent{3}; -0110 databases.kegg.eccodes = fileContent{4}; -0111 databases.kegg.MW = str2double(fileContent{5}); -0112 databases.kegg.pathway = fileContent{6}; -0113 databases.kegg.seq = fileContent{7}; -0114 else -0115 databases.kegg = []; -0116 end -0117 end +0067 error(['Download failed, check your internet connection and try again, or manually download: ' url ... +0068 ' After downloading, store the file as ' uniprotPath]) +0069 end +0070 end +0071 if exist(uniprotPath,'file') +0072 fid = fopen(uniprotPath,'r'); +0073 fileContent = textscan(fid,'%q %q %q %q %q','Delimiter','\t','HeaderLines',1); +0074 fclose(fid); +0075 databases.uniprot.ID = fileContent{1}; +0076 databases.uniprot.genes = fileContent{2}; +0077 databases.uniprot.eccodes = fileContent{3}; +0078 databases.uniprot.MW = str2double(fileContent{4}); +0079 databases.uniprot.seq = fileContent{5}; +0080 else +0081 databases.uniprot = []; +0082 end +0083 if ~isempty(databases.uniprot) +0084 [uniqueIDs,uniqueIdx] = unique(databases.uniprot.ID,'stable'); +0085 if numel(uniqueIDs) < numel(databases.uniprot.ID) +0086 duplID = setdiff(1:numel(databases.uniprot.ID),uniqueIdx); +0087 dispEM(['Duplicate entries are found for the following proteins. '... +0088 'Manually curate the ''uniprot.tsv'' file, or adjust the uniprot parameters '... +0089 'in the model adapter:'],true,databases.uniprot.ID(duplID)); +0090 end +0091 end +0092 end +0093 +0094 %% KEGG +0095 if any(strcmp(selectDatabase,{'kegg','both'})) +0096 keggPath = fullfile(filePath,'kegg.tsv'); +0097 if ~exist(keggPath,'file') +0098 if isempty(kegg.ID) +0099 printOrange('WARNING: No kegg.ID is specified, unable to download KEGG DB.\n') +0100 else +0101 downloadKEGG(kegg.ID,keggPath,kegg.geneID); +0102 end +0103 end +0104 if exist(keggPath,'file') +0105 fid = fopen(keggPath,'r'); +0106 fileContent = textscan(fid,'%q %q %q %q %q %q %q','Delimiter',',','HeaderLines',0); +0107 fclose(fid); +0108 databases.kegg.uniprot = fileContent{1}; +0109 databases.kegg.genes = fileContent{2}; +0110 databases.kegg.keggGene = fileContent{3}; +0111 databases.kegg.eccodes = fileContent{4}; +0112 databases.kegg.MW = str2double(fileContent{5}); +0113 databases.kegg.pathway = fileContent{6}; +0114 databases.kegg.seq = fileContent{7}; +0115 else +0116 databases.kegg = []; +0117 end 0118 end -0119 -0120 function downloadKEGG(keggID, filePath, keggGeneID) -0121 %% Download gene information -0122 progressbar(['Downloading KEGG data for organism code ' keggID]) -0123 webOptions = weboptions('Timeout',30); -0124 try -0125 gene_list = webread(['http://rest.kegg.jp/list/' keggID],webOptions); -0126 catch ME -0127 switch ME.identifier -0128 case 'MATLAB:webservices:HTTP400StatusCodeError' -0129 error(['Unable to download data form KEGG with a potentially invalid ID: ' keggID ]) -0130 end -0131 end -0132 gene_list = regexpi(gene_list, '[^\n]+','match')'; -0133 gene_id = regexpi(gene_list,['(?<=' keggID ':)\S+'],'match'); -0134 -0135 % Retrieve information for every gene in the list, 10 genes per query -0136 genesPerQuery = 10; -0137 queries = ceil(numel(gene_id)/genesPerQuery); -0138 keggData = cell(numel(gene_id),1); -0139 for i = 1:queries -0140 % Download batches of genes -0141 firstIdx = i*genesPerQuery-(genesPerQuery-1); -0142 lastIdx = i*genesPerQuery; -0143 if lastIdx > numel(gene_id) % Last query has probably less genes -0144 lastIdx = numel(gene_id); -0145 end -0146 url = ['http://rest.kegg.jp/get/' keggID ':' strjoin([gene_id{firstIdx:lastIdx}],['+' keggID ':'])]; -0147 -0148 retry = true; -0149 while retry -0150 try -0151 retry = false; -0152 out = webread(url,webOptions); -0153 catch -0154 retry = true; -0155 end -0156 end -0157 outSplit = strsplit(out,['///' 10]); %10 is new line character -0158 if numel(outSplit) < lastIdx-firstIdx+2 -0159 error('KEGG returns less genes per query') %Reduce genesPerQuery -0160 end -0161 keggData(firstIdx:lastIdx) = outSplit(1:end-1); -0162 progressbar(i/queries) -0163 end -0164 -0165 %% Parsing of info to keggDB format -0166 sequence = regexprep(keggData,'.*AASEQ\s+\d+\s+([A-Z\s])+?\s+NTSEQ.*','$1'); -0167 %No AASEQ -> no protein -> not of interest -0168 noProt = startsWith(sequence,'ENTRY '); -0169 uni = regexprep(keggData,'.*UniProt: (\S+?)\s.*','$1'); -0170 noUni = startsWith(uni,'ENTRY '); -0171 uni(noProt | noUni) = []; -0172 keggData(noProt | noUni) = []; -0173 sequence(noProt | noUni) = []; -0174 sequence = regexprep(sequence,'\s*',''); -0175 keggGene = regexprep(keggData,'ENTRY\s+(\S+?)\s.+','$1'); -0176 -0177 switch keggGeneID -0178 case {'kegg',''} -0179 gene_name = keggGene; -0180 otherwise -0181 % In case there are special characters: -0182 keggGeneIDT = regexptranslate('escape',keggGeneID); -0183 gene_name = regexprep(keggData,['.+' keggGeneIDT ': (\S+?)\n.+'],'$1'); -0184 noID = ~contains(keggData,keggGeneIDT); -0185 if all(noID) -0186 error(['None of the KEGG entries are annotated with the gene identifier ' keggGeneID]) -0187 else -0188 gene_name(noID)= []; -0189 keggData(noID) = []; -0190 keggGene(noID) = []; -0191 sequence(noID) = []; -0192 uni(noID) = []; -0193 end -0194 end -0195 -0196 EC_names = regexprep(keggData,'.*ORTHOLOGY.*\[EC:(.*?)\].*','$1'); -0197 EC_names(startsWith(EC_names,'ENTRY ')) = {''}; -0198 -0199 MW = cell(numel(sequence),1); -0200 for i=1:numel(sequence) -0201 if ~isempty(sequence{i}) -0202 MW{i} = num2str(round(calculateMW(sequence{i}))); -0203 end -0204 end -0205 -0206 pathway = regexprep(keggData,'.*PATHWAY\s+(.*?)(BRITE|MODULE).*','$1'); -0207 pathway(startsWith(pathway,'ENTRY ')) = {''}; -0208 pathway = strrep(pathway,[keggID '01100 Metabolic pathways'],''); -0209 pathway = regexprep(pathway,'\n',''); -0210 pathway = regexprep(pathway,' ',''); -0211 -0212 out = [uni, gene_name, keggGene, EC_names, MW, pathway, sequence]; -0213 out = cell2table(out); -0214 -0215 writetable(out, filePath, 'FileType', 'text', 'WriteVariableNames',false); -0216 fprintf('Model-specific KEGG database stored at %s\n',filePath); -0217 end +0119 end +0120 +0121 function downloadKEGG(keggID, filePath, keggGeneID) +0122 %% Download gene information +0123 progressbar(['Downloading KEGG data for organism code ' keggID]) +0124 webOptions = weboptions('Timeout',30); +0125 try +0126 gene_list = webread(['http://rest.kegg.jp/list/' keggID],webOptions); +0127 catch ME +0128 switch ME.identifier +0129 case 'MATLAB:webservices:HTTP400StatusCodeError' +0130 error(['Unable to download data form KEGG with a potentially invalid ID: ' keggID ]) +0131 end +0132 end +0133 gene_list = regexpi(gene_list, '[^\n]+','match')'; +0134 gene_id = regexpi(gene_list,['(?<=' keggID ':)\S+'],'match'); +0135 +0136 % Retrieve information for every gene in the list, 10 genes per query +0137 genesPerQuery = 10; +0138 queries = ceil(numel(gene_id)/genesPerQuery); +0139 keggData = cell(numel(gene_id),1); +0140 for i = 1:queries +0141 % Download batches of genes +0142 firstIdx = i*genesPerQuery-(genesPerQuery-1); +0143 lastIdx = i*genesPerQuery; +0144 if lastIdx > numel(gene_id) % Last query has probably less genes +0145 lastIdx = numel(gene_id); +0146 end +0147 url = ['http://rest.kegg.jp/get/' keggID ':' strjoin([gene_id{firstIdx:lastIdx}],['+' keggID ':'])]; +0148 +0149 retry = true; +0150 while retry +0151 try +0152 retry = false; +0153 out = webread(url,webOptions); +0154 catch +0155 retry = true; +0156 end +0157 end +0158 outSplit = strsplit(out,['///' 10]); %10 is new line character +0159 if numel(outSplit) < lastIdx-firstIdx+2 +0160 error('KEGG returns less genes per query') %Reduce genesPerQuery +0161 end +0162 keggData(firstIdx:lastIdx) = outSplit(1:end-1); +0163 progressbar(i/queries) +0164 end +0165 +0166 %% Parsing of info to keggDB format +0167 sequence = regexprep(keggData,'.*AASEQ\s+\d+\s+([A-Z\s])+?\s+NTSEQ.*','$1'); +0168 %No AASEQ -> no protein -> not of interest +0169 noProt = startsWith(sequence,'ENTRY '); +0170 uni = regexprep(keggData,'.*UniProt: (\S+?)\s.*','$1'); +0171 noUni = startsWith(uni,'ENTRY '); +0172 uni(noProt | noUni) = []; +0173 keggData(noProt | noUni) = []; +0174 sequence(noProt | noUni) = []; +0175 sequence = regexprep(sequence,'\s*',''); +0176 keggGene = regexprep(keggData,'ENTRY\s+(\S+?)\s.+','$1'); +0177 +0178 switch keggGeneID +0179 case {'kegg',''} +0180 gene_name = keggGene; +0181 otherwise +0182 % In case there are special characters: +0183 keggGeneIDT = regexptranslate('escape',keggGeneID); +0184 gene_name = regexprep(keggData,['.+' keggGeneIDT ': (\S+?)\n.+'],'$1'); +0185 noID = ~contains(keggData,keggGeneIDT); +0186 if all(noID) +0187 error(['None of the KEGG entries are annotated with the gene identifier ' keggGeneID]) +0188 else +0189 gene_name(noID)= []; +0190 keggData(noID) = []; +0191 keggGene(noID) = []; +0192 sequence(noID) = []; +0193 uni(noID) = []; +0194 end +0195 end +0196 +0197 EC_names = regexprep(keggData,'.*ORTHOLOGY.*\[EC:(.*?)\].*','$1'); +0198 EC_names(startsWith(EC_names,'ENTRY ')) = {''}; +0199 +0200 MW = cell(numel(sequence),1); +0201 for i=1:numel(sequence) +0202 if ~isempty(sequence{i}) +0203 MW{i} = num2str(round(calculateMW(sequence{i}))); +0204 end +0205 end +0206 +0207 pathway = regexprep(keggData,'.*PATHWAY\s+(.*?)(BRITE|MODULE).*','$1'); +0208 pathway(startsWith(pathway,'ENTRY ')) = {''}; +0209 pathway = strrep(pathway,[keggID '01100 Metabolic pathways'],''); +0210 pathway = regexprep(pathway,'\n',''); +0211 pathway = regexprep(pathway,' ',''); +0212 +0213 out = [uni, gene_name, keggGene, EC_names, MW, pathway, sequence]; +0214 out = cell2table(out); +0215 +0216 writetable(out, filePath, 'FileType', 'text', 'WriteVariableNames',false); +0217 fprintf('Model-specific KEGG database stored at %s\n',filePath); +0218 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/kcat_sensitivity_analysis/sensitivityTuning.html b/doc/src/geckomat/kcat_sensitivity_analysis/sensitivityTuning.html index 23d5bf937..7d71baf63 100644 --- a/doc/src/geckomat/kcat_sensitivity_analysis/sensitivityTuning.html +++ b/doc/src/geckomat/kcat_sensitivity_analysis/sensitivityTuning.html @@ -139,11 +139,11 @@

SOURCE CODE ^while true 0068 [res,hs] = solveLP(m,0,[],hs); %skip parsimonius, only takes time -0069 if (lastGrowth == -res.f) +0069 if (lastGrowth == res.f) 0070 printOrange('WARNING: No growth increase from increased kcats - check if the constraints on the uptake reactions are too tight.\n') 0071 break; 0072 end -0073 lastGrowth = -res.f; +0073 lastGrowth = res.f; 0074 if verbose; disp(['Iteration ' num2str(iteration) ': Growth: ' num2str(lastGrowth)]); end 0075 if (lastGrowth >= desiredGrowthRate) 0076 break; @@ -181,11 +181,11 @@

SOURCE CODE ^while true 0110 res = solveLP(m,0); %skip parsimonius, only takes time -0111 if (lastGrowth == -res.f) +0111 if (lastGrowth == res.f) 0112 printOrange('No growth increase from increased kcats - check if the constraints on the uptake reactions are too tight.\n') 0113 break; 0114 end -0115 lastGrowth = -res.f; +0115 lastGrowth = res.f; 0116 if (lastGrowth >= desiredGrowthRate) 0117 break; 0118 end diff --git a/doc/src/geckomat/limit_proteins/calculateFfactor.html b/doc/src/geckomat/limit_proteins/calculateFfactor.html index 495bb5d3f..0a0ad575d 100644 --- a/doc/src/geckomat/limit_proteins/calculateFfactor.html +++ b/doc/src/geckomat/limit_proteins/calculateFfactor.html @@ -106,7 +106,7 @@

SOURCE CODE ^if ischar(protData) && endsWith(protData,'paxDB.tsv') 0047 fID = fopen(fullfile(protData),'r'); 0048 fileContent = textscan(fID,'%s','delimiter','\n'); -0049 headerLines = sum(startsWith(fileContent{1},'#')); +0049 headerLines = find(startsWith(fileContent{1},'#'),1,'last'); 0050 fclose(fID); 0051 0052 %Read data file, excluding headerlines diff --git a/doc/src/geckomat/limit_proteins/flexibilizeEnzConcs.html b/doc/src/geckomat/limit_proteins/flexibilizeEnzConcs.html index 766f571c8..8ef414ab0 100644 --- a/doc/src/geckomat/limit_proteins/flexibilizeEnzConcs.html +++ b/doc/src/geckomat/limit_proteins/flexibilizeEnzConcs.html @@ -190,7 +190,7 @@

SOURCE CODE ^% model = constrainEnzConcs(model); 0096 0097 [sol,hs] = solveLP(model); -0098 predGrowth = abs(sol.f); +0098 predGrowth = sol.f; 0099 0100 % Get those proteins with a concentration defined 0101 protConcs = find(~isnan(model.ec.concs)); @@ -231,7 +231,7 @@

SOURCE CODE ^% Get the new growth rate 0138 sol = solveLP(model,0,[],hs); -0139 predGrowth = abs(sol.f); +0139 predGrowth = sol.f; 0140 0141 if verbose 0142 disp(['Protein ' proteins{maxIdx} ' LB adjusted. Grow: ' num2str(predGrowth)]) @@ -248,13 +248,13 @@

SOURCE CODE ^'lb',protPoolIdx,-1000); 0154 tempModel = setParam(tempModel,'ub',bioRxnIdx,expGrowth); 0155 sol = solveLP(tempModel); -0156 if (abs(sol.f)-predGrowth)>1e-10 % There is improvement in growth rate +0156 if (sol.f-predGrowth)>1e-10 % There is improvement in growth rate 0157 % Find new protein pool constraint -0158 predGrowth = abs(sol.f); +0158 predGrowth = sol.f; 0159 tempModel = setParam(tempModel,'lb',bioRxnIdx,predGrowth); 0160 tempModel = setParam(tempModel,'obj',protPoolIdx,1); 0161 sol = solveLP(tempModel); -0162 newProtPool = abs(sol.f); +0162 newProtPool = sol.f; 0163 model.lb(protPoolIdx) = -newProtPool; 0164 fprintf(['Changing the lower bound of protein pool exchange from %s ',... 0165 'to %s enabled a\ngrowth rate of %s. It can be helpful to set ', ... diff --git a/doc/src/geckomat/limit_proteins/getConcControlCoeffs.html b/doc/src/geckomat/limit_proteins/getConcControlCoeffs.html index f70d018b2..85739751d 100644 --- a/doc/src/geckomat/limit_proteins/getConcControlCoeffs.html +++ b/doc/src/geckomat/limit_proteins/getConcControlCoeffs.html @@ -96,7 +96,7 @@

SOURCE CODE ^% Get enzyme index 0041 [~, protIdx] = ismember(proteins, model.ec.enzymes); @@ -124,7 +124,7 @@

SOURCE CODE ^% Get the new growth rate after the adjustment 0065 [tempSol,hs] = solveLP(tempModel,0,[],hs); -0066 tempGrowth = abs(tempSol.f); +0066 tempGrowth = tempSol.f; 0067 0068 % Calculate the coeff only if new growth rate is significantly 0069 % higher than initial value diff --git a/doc/src/geckomat/utilities/enzymeUsage.html b/doc/src/geckomat/utilities/enzymeUsage.html index b8c45650d..763f93d6a 100644 --- a/doc/src/geckomat/utilities/enzymeUsage.html +++ b/doc/src/geckomat/utilities/enzymeUsage.html @@ -54,6 +54,7 @@

DESCRIPTION ^SOURCE CODE ^% absUsage vector of absolute enzyme usages 0027 % UB vector of enzyme exchange reaction upper bounds 0028 % protID string array of matching protein IDs -0029 % -0030 % Usage: -0031 % usageData = enzymeUsage(ecModel,fluxes,zero) -0032 -0033 if nargin<3 -0034 zero=true; -0035 end -0036 if ecModel.ec.geckoLight -0037 error('This function does not work on GECKO light models.') -0038 end -0039 usageData.protID = ecModel.ec.enzymes; -0040 [~,rxnIdx] = ismember(strcat('usage_prot_',ecModel.ec.enzymes),ecModel.rxns); -0041 -0042 usageData.LB = ecModel.lb(rxnIdx); -0043 usageData.absUsage = abs(fluxes(rxnIdx)); -0044 usageData.capUsage = abs(usageData.absUsage./usageData.LB); -0045 -0046 if ~zero -0047 nonzero = usageData.absUsage<0; -0048 usageData.absUsage = usageData.absUsage(nonzero); -0049 usageData.capUsage = usageData.capUsage(nonzero); -0050 usageData.LB = usageData.LB(nonzero); -0051 usageData.protID = usageData.protID(nonzero); -0052 end -0053 end +0029 % fluxes vector of fluxes, copy of input fluxes +0030 % +0031 % Usage: +0032 % usageData = enzymeUsage(ecModel,fluxes,zero) +0033 +0034 if nargin<3 +0035 zero=true; +0036 end +0037 if ecModel.ec.geckoLight +0038 error('This function does not work on GECKO light models.') +0039 end +0040 usageData.protID = ecModel.ec.enzymes; +0041 [~,rxnIdx] = ismember(strcat('usage_prot_',ecModel.ec.enzymes),ecModel.rxns); +0042 +0043 usageData.LB = ecModel.lb(rxnIdx); +0044 usageData.absUsage = abs(fluxes(rxnIdx)); +0045 usageData.capUsage = abs(usageData.absUsage./usageData.LB); +0046 usageData.fluxes = fluxes; +0047 +0048 if ~zero +0049 nonzero = usageData.absUsage<0; +0050 usageData.absUsage = usageData.absUsage(nonzero); +0051 usageData.capUsage = usageData.capUsage(nonzero); +0052 usageData.LB = usageData.LB(nonzero); +0053 usageData.protID = usageData.protID(nonzero); +0054 end +0055 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/utilities/reportEnzymeUsage.html b/doc/src/geckomat/utilities/reportEnzymeUsage.html index 63c5269b4..798d2cc07 100644 --- a/doc/src/geckomat/utilities/reportEnzymeUsage.html +++ b/doc/src/geckomat/utilities/reportEnzymeUsage.html @@ -24,7 +24,7 @@

PURPOSE ^reportEnzymeUsage

SYNOPSIS ^

-
function usageReport = topEnzymeUsage(ecModel, usageData, highCapUsage, topAbsUsage)
+
function usageReport = reportEnzymeUsage(ecModel, usageData, highCapUsage, topAbsUsage)

DESCRIPTION ^

 reportEnzymeUsage
@@ -32,7 +32,7 @@ 

DESCRIPTION ^CROSS-REFERENCE INFORMATION ^
 
 
 <h2><a name=SOURCE CODE ^

-
0001 function usageReport = topEnzymeUsage(ecModel, usageData, highCapUsage, topAbsUsage)
+
+0064 % Calculate the protein pool flux from the 'prot_pool_exchange' reaction
+0065 protPoolExchangeFlux = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange'));
+0066 
+0067 fluxValues = usageData.fluxes;
+0068 
+0069 % Sum fluxes for all 'usage_prot_' reactions, excluding the 'usage_prot_standard'
+0070 usageProtIndices = startsWith(ecModel.rxns, 'usage_prot_') & ...
+0071                    ~contains(ecModel.rxns, 'standard');
+0072 
+0073 % Sum the absolute values of the usage fluxes
+0074 totalUsageProtFlux = sum(abs(fluxValues(usageProtIndices)));
+0075 
+0076 % Define the new protein pool as the sum of prot_pool_exchange flux and total usage_prot fluxes
+0077 protPool = (protPoolExchangeFlux + totalUsageProtFlux)/100;
+0078 
+0079 for i=1:numel(topEnzyme)
+0080     [rxns, kcat, idx, rxnNames, grRules] = getReactionsFromEnzyme(ecModel,topEnzyme{i});
+0081     rxnNumber = numel(rxns);
+0082     topUsage.protID(end+1:end+rxnNumber,1)      = topEnzyme(i);
+0083     topUsage.geneID(end+1:end+rxnNumber,1)      = geneIDs(i);
+0084     topUsage.absUsage(end+1:end+rxnNumber,1)    = usageData.absUsage(topUse(i));
+0085     topUsage.percUsage(end+1:end+rxnNumber,1)   = topUsage.absUsage(end-(rxnNumber-1):end,1)/protPool;
+0086     topUsage.kcat(end+1:end+rxnNumber,1)        = kcat;
+0087     topUsage.source(end+1:end+rxnNumber,1)      = ecModel.ec.source(idx);
+0088     topUsage.rxnID(end+1:end+rxnNumber,1)       = rxns;
+0089     topUsage.rxnNames(end+1:end+rxnNumber,1)    = rxnNames;
+0090     topUsage.grRules(end+1:end+rxnNumber,1)     = grRules;
+0091 end
+0092 usageReport.topAbsUsage     = struct2table(topUsage);
+0093 usageReport.totalProtPool   = protPoolExchangeFlux;
+0094 usageReport.totalUsageFlux  = totalUsageProtFlux;
+0095 end

Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/utilities/saveEcModel.html b/doc/src/geckomat/utilities/saveEcModel.html index b08233440..304a2d2fc 100644 --- a/doc/src/geckomat/utilities/saveEcModel.html +++ b/doc/src/geckomat/utilities/saveEcModel.html @@ -106,12 +106,12 @@

SOURCE CODE ^switch filename(end-3:end) 0042 case {'.xml','sbml'} -0043 exportModel(ecModel,[filename '.xml']); +0043 exportModel(ecModel, filename); 0044 %Convert notation "e-005" to "e-05 " in stoich. coeffs. to avoid 0045 %inconsistencies between Windows and MAC: -0046 copyfile([filename '.xml'],'backup.xml') +0046 copyfile(filename,'backup.xml') 0047 fin = fopen('backup.xml', 'r'); -0048 fout = fopen([filename '.xml'], 'w'); +0048 fout = fopen(filename, 'w'); 0049 still_reading = true; 0050 while still_reading 0051 inline = fgets(fin); diff --git a/src/geckomat/change_model/applyKcatConstraints.m b/src/geckomat/change_model/applyKcatConstraints.m index 7c3594710..947fd431b 100644 --- a/src/geckomat/change_model/applyKcatConstraints.m +++ b/src/geckomat/change_model/applyKcatConstraints.m @@ -65,30 +65,33 @@ kcatFirst=0; for i=1:numel(updateRxns) j=updateRxns(i); - enzymes = find(model.ec.rxnEnzMat(j,:)); - kcatLast = kcatFirst+numel(enzymes); - kcatFirst = kcatFirst+1; - newKcats(kcatFirst:kcatLast,1) = j; - newKcats(kcatFirst:kcatLast,2) = enzymes; - newKcats(kcatFirst:kcatLast,3) = model.ec.rxnEnzMat(j,enzymes); - newKcats(kcatFirst:kcatLast,4) = model.ec.kcat(j); - newKcats(kcatFirst:kcatLast,5) = model.ec.mw(enzymes); - kcatFirst = kcatLast; + if model.ec.kcat(j) ~= 0 + enzymes = find(model.ec.rxnEnzMat(j,:)); + kcatLast = kcatFirst+numel(enzymes); + kcatFirst = kcatFirst+1; + newKcats(kcatFirst:kcatLast,1) = j; + newKcats(kcatFirst:kcatLast,2) = enzymes; + newKcats(kcatFirst:kcatLast,3) = model.ec.rxnEnzMat(j,enzymes); + newKcats(kcatFirst:kcatLast,4) = model.ec.kcat(j); + newKcats(kcatFirst:kcatLast,5) = model.ec.mw(enzymes); + kcatFirst = kcatLast; + end + end + if exist('kcatLast','var') % If it does not, then no kcats are found + newKcats(kcatLast+1:end,:)=[]; + + sel = newKcats(:,4) > 0; %Only apply to non-zero kcat + newKcats(sel,4) = newKcats(sel,4) * 3600; %per second -> per hour + newKcats(sel,4) = newKcats(sel,5) ./ newKcats(sel,4); %MW/kcat + newKcats(sel,4) = newKcats(sel,3) .* newKcats(sel,4); %Multicopy subunits. + newKcats(~sel,4) = 0; %Results in zero-cost + + %Replace rxns and enzymes with their location in model + [~,newKcats(:,1)] = ismember(model.ec.rxns(newKcats(:,1)),model.rxns); + [~,newKcats(:,2)] = ismember(strcat('prot_',model.ec.enzymes(newKcats(:,2))),model.mets); + linearIndices = sub2ind(size(model.S),newKcats(:,2),newKcats(:,1)); + model.S(linearIndices) = -newKcats(:,4); %Substrate = negative end - newKcats(kcatLast+1:end,:)=[]; - - sel = newKcats(:,4) > 0; %Only apply to non-zero kcat - newKcats(sel,4) = newKcats(sel,4) * 3600; %per second -> per hour - newKcats(sel,4) = newKcats(sel,5) ./ newKcats(sel,4); %MW/kcat - newKcats(sel,4) = newKcats(sel,3) .* newKcats(sel,4); %Multicopy subunits. - newKcats(~sel,4) = 0; %Results in zero-cost - - %Replace rxns and enzymes with their location in model - [~,newKcats(:,1)] = ismember(model.ec.rxns(newKcats(:,1)),model.rxns); - [~,newKcats(:,2)] = ismember(strcat('prot_',model.ec.enzymes(newKcats(:,2))),model.mets); - linearIndices = sub2ind(size(model.S),newKcats(:,2),newKcats(:,1)); - model.S(linearIndices) = -newKcats(:,4); %Substrate = negative - else %GECKO light formulation, where prot_pool represents all usages prot_pool_idx = find(ismember(model.mets,'prot_pool')); %first remove the prefix of all rxns diff --git a/src/geckomat/gather_kcats/getStandardKcat.m b/src/geckomat/gather_kcats/getStandardKcat.m index 7f32ff059..be1074be1 100644 --- a/src/geckomat/gather_kcats/getStandardKcat.m +++ b/src/geckomat/gather_kcats/getStandardKcat.m @@ -1,10 +1,14 @@ function [model, rxnsMissingGPR, standardMW, standardKcat, rxnsNoKcat] = getStandardKcat(model, modelAdapter, threshold, fillZeroKcat) % getStandardKcat -% Calculate an standard kcat and standard molecular weight (MW) that can be -% used to apply enzyme constraints to reactions without any associated genes. -% This is done by adding those reactions to model.ec, assign a "standard" -% pseudoenzyme with the standard MW (median of all proteins in the organism) -% and standard kcat (median from all kcat, or subsystem specific kcat). +% Calculate an standard kcat and standard molecular weight (MW) that can +% be used to apply enzyme constraints to reactions without any associated +% enzymes. Such reactions have either an empty model.grRules field, or +% they have no match in model.ec.rxns (which can be the case if the genes +% in the model.grRules field could not be mapped to enzymes). This is +% done by adding those reactions to model.ec, assign a "standard" +% pseudoenzyme with the standard MW (median of all proteins in the +% organism) and standard kcat (median from all kcat, or subsystem +% specific kcat). % % A reaction is assigned a subSystem specific kcat values if the model % has a subSystems field and the reaction is annotated with a subSystem. @@ -17,25 +21,26 @@ % % In addition, reactions that are annotated with an enzyme (and therefore % already in model.ec), but not assigned any reaction-specific kcat value -% (their model.ec.kcat entry is either 0 or NaN), can be assigned standard -% kcat values by a similar approach. However, those reactions will not be -% linked to the "standard" pseudoenzyme, but will use the enzyme that they had -% already been associated with. +% (their model.ec.kcat entry is either 0 or NaN), can be assigned +% standard kcat values by a similar approach. However, those reactions +% will not be linked to the "standard" pseudoenzyme, but will use the +% enzyme that they had already been associated with. % % Any pre-existing standard kcat assignments (identified by 'standard' % entires in model.ec.source) are removed when applying this function. % % Input: % model an ecModel in GECKO 3 format (with ecModel.ec structure) -% modelAdapter a loaded model adapter (Optional, will otherwise use the -% default model adapter). +% modelAdapter a loaded model adapter (Optional, will otherwise use +% the default model adapter). % threshold a threshold to determine when use a kcat value based on % the mean kcat of the reactions in the same subSystem or % based on the median value of all the kcat in the model. % Second option is used when the number of reactions in a -% determined subSystem is < threshold. (Optional, default = 10) -% fillZeroKcat logical whether zero kcat values should be replaced with -% standard kcat values. (Optional, default = true). +% determined subSystem is < threshold. (Optional, default +% = 10) +% fillZeroKcat logical whether zero kcat values should be replaced +% with standard kcat values. (Optional, default = true). % % Output: % model ecModel where model.ec is expanded with a standard @@ -43,11 +48,11 @@ % reactions without gene associations. % rxnsMissingGPR a list of updated rxns identifiers with a standard value % standardMW the standard MW value calculated -% standardKcat the standard Kcat value calculated +% standardKcat the standard Kcat value calculated % rxnsNoKcat a list of rxns identifiers whose zero kcat has been replaced % -% While model.ec.kcat is populated, applyKcatConstraints would still need to -% be run to apply the new constraints to the S-matrix. +% While model.ec.kcat is populated, applyKcatConstraints would still need +% to be run to apply the new constraints to the S-matrix. % % Usage: % [model, rxnsMissingGPR, standardMW, standardKcat, rxnsNoKcat] = getStandardKcat(model, modelAdapter, threshold, fillZeroKcat); @@ -126,6 +131,12 @@ % Find reactions without GPR rxnsMissingGPR = find(cellfun(@isempty, model.grRules)); +% Find reactions with GPR but without model.ec entry (for instance due to +% no protein matching) +rxnsMissingEnzyme = find(~cellfun(@isempty, model.grRules)); +rxnsMissingEnzyme = find(and(~ismember(model.rxns(rxnsMissingEnzyme),model.ec.rxns), ~contains(model.rxns(rxnsMissingEnzyme),'usage_prot_'))); +rxnsMissingGPR = [rxnsMissingGPR;rxnsMissingEnzyme]; + % Get custom list of reaction IDs to ignore, if existing. First column % contains reaction IDs, second column contains reaction names for % reference only. diff --git a/src/geckomat/gather_kcats/removeStandardKcat.m b/src/geckomat/gather_kcats/removeStandardKcat.m new file mode 100644 index 000000000..0c2dcea9e --- /dev/null +++ b/src/geckomat/gather_kcats/removeStandardKcat.m @@ -0,0 +1,63 @@ +function model = removeStandardKcat(model) +% removeStandardKcat +% Remove the "standard" pseudoenzyme and standard kcat and MW values, as +% they were introduced by getStandardKcat. Also standard kcat values that +% were assigned by getStandardKcat if fillZeroKcat was set to true are +% removed from model.ec.kcat. Both the model.ec and model.S structures +% are modified. +% +% Input: +% model an ecModel in GECKO 3 format (with ecModel.ec structure) +% that has standard kcat values implemented by +% getStandardKcat. +% +% Output: +% model ecModel without standard pseudoprotein and standard +% kcat values +% +% Usage: +% model = removeStandardKcat(model); + +% Remove standard enzyme from ec structure +stdEnzIdx = find(strcmpi(model.ec.enzymes, 'standard')); +if ~isempty(stdEnzIdx) + model.ec.genes(stdEnzIdx) = []; + model.ec.enzymes(stdEnzIdx) = []; + model.ec.mw(stdEnzIdx) = []; + model.ec.sequence(stdEnzIdx) = []; + if isfield(model.ec,'concs') + model.ec.concs(stdEnzIdx) = []; + end + rxnEnzIdx = find(model.ec.rxnEnzMat(:,stdEnzIdx)); + model.ec.rxns(rxnEnzIdx) = []; + model.ec.kcat(rxnEnzIdx) = []; + model.ec.source(rxnEnzIdx) = []; + model.ec.notes(rxnEnzIdx) = []; + model.ec.eccodes(rxnEnzIdx) = []; + model.ec.rxnEnzMat(:,stdEnzIdx) = []; + model.ec.rxnEnzMat(rxnEnzIdx,:) = []; +end + +% Remove standard kcat values and reapply kcat constraints for those +% specific reactions +stdKcatIdx = find(strcmpi(model.ec.source, 'standard')); +if ~isempty(stdKcatIdx) + model.ec.source(stdKcatIdx) = {''}; + model.ec.kcat(stdKcatIdx) = 0; + model = applyKcatConstraints(model,stdKcatIdx); +end + +% Remove standard protein, usage and gene from model structure +stdMetIdx = find(strcmpi(model.mets, 'prot_standard')); +if ~isempty(stdMetIdx) + model = removeMets(model,stdMetIdx,false,false,false,false); +end +stdProtEx = find(strcmpi(model.rxns, 'usage_prot_standard')); +if ~isempty(stdProtEx) + model = removeReactions(model,stdProtEx,false,false,false); +end +stdProtGene = find(strcmpi(model.genes, 'standard')); +if ~isempty(stdProtGene) + model = removeGenes(model,stdProtGene,false,false,false); +end +end diff --git a/src/geckomat/gather_kcats/runDLKcat.m b/src/geckomat/gather_kcats/runDLKcat.m index d82b4861d..4701cc268 100644 --- a/src/geckomat/gather_kcats/runDLKcat.m +++ b/src/geckomat/gather_kcats/runDLKcat.m @@ -20,6 +20,9 @@ function runDLKcat(modelAdapter) end params = modelAdapter.params; +% Make sure path is full, not relative +[~, params.path] = fileattrib(params.path); +params.path=params.path.Name; %% Check and install requirements % On macOS, Docker might not be properly loaded if MATLAB is started via diff --git a/src/geckomat/get_enzyme_data/loadDatabases.m b/src/geckomat/get_enzyme_data/loadDatabases.m index d882a2447..3e4493257 100644 --- a/src/geckomat/get_enzyme_data/loadDatabases.m +++ b/src/geckomat/get_enzyme_data/loadDatabases.m @@ -62,9 +62,10 @@ '%2Cec%2Cmass%2Csequence&format=tsv&compressed=false&sort=protein_name%20asc']; try urlwrite(url,uniprotPath,'Timeout',30); - fprintf('Model-specific KEGG database stored at %s\n',uniprotPath); + fprintf('Model-specific UniProt database stored at %s\n',uniprotPath); catch - error(['Download failed, check your internet connection and try again, or manually download: ' url]) + error(['Download failed, check your internet connection and try again, or manually download: ' url ... + ' After downloading, store the file as ' uniprotPath]) end end if exist(uniprotPath,'file') diff --git a/src/geckomat/kcat_sensitivity_analysis/sensitivityTuning.m b/src/geckomat/kcat_sensitivity_analysis/sensitivityTuning.m index d20a0692e..169ae74a6 100644 --- a/src/geckomat/kcat_sensitivity_analysis/sensitivityTuning.m +++ b/src/geckomat/kcat_sensitivity_analysis/sensitivityTuning.m @@ -66,11 +66,11 @@ iteration = 1; while true [res,hs] = solveLP(m,0,[],hs); %skip parsimonius, only takes time - if (lastGrowth == -res.f) + if (lastGrowth == res.f) printOrange('WARNING: No growth increase from increased kcats - check if the constraints on the uptake reactions are too tight.\n') break; end - lastGrowth = -res.f; + lastGrowth = res.f; if verbose; disp(['Iteration ' num2str(iteration) ': Growth: ' num2str(lastGrowth)]); end if (lastGrowth >= desiredGrowthRate) break; @@ -108,11 +108,11 @@ iteration = 1; while true res = solveLP(m,0); %skip parsimonius, only takes time - if (lastGrowth == -res.f) + if (lastGrowth == res.f) printOrange('No growth increase from increased kcats - check if the constraints on the uptake reactions are too tight.\n') break; end - lastGrowth = -res.f; + lastGrowth = res.f; if (lastGrowth >= desiredGrowthRate) break; end diff --git a/src/geckomat/limit_proteins/calculateFfactor.m b/src/geckomat/limit_proteins/calculateFfactor.m index fdc953cdd..790e9b970 100644 --- a/src/geckomat/limit_proteins/calculateFfactor.m +++ b/src/geckomat/limit_proteins/calculateFfactor.m @@ -46,7 +46,7 @@ if ischar(protData) && endsWith(protData,'paxDB.tsv') fID = fopen(fullfile(protData),'r'); fileContent = textscan(fID,'%s','delimiter','\n'); - headerLines = sum(startsWith(fileContent{1},'#')); + headerLines = find(startsWith(fileContent{1},'#'),1,'last'); fclose(fID); %Read data file, excluding headerlines diff --git a/src/geckomat/limit_proteins/flexibilizeEnzConcs.m b/src/geckomat/limit_proteins/flexibilizeEnzConcs.m index 26f41d477..e4a10e371 100644 --- a/src/geckomat/limit_proteins/flexibilizeEnzConcs.m +++ b/src/geckomat/limit_proteins/flexibilizeEnzConcs.m @@ -95,7 +95,7 @@ % model = constrainEnzConcs(model); [sol,hs] = solveLP(model); -predGrowth = abs(sol.f); +predGrowth = sol.f; % Get those proteins with a concentration defined protConcs = find(~isnan(model.ec.concs)); @@ -136,7 +136,7 @@ % Get the new growth rate sol = solveLP(model,0,[],hs); - predGrowth = abs(sol.f); + predGrowth = sol.f; if verbose disp(['Protein ' proteins{maxIdx} ' LB adjusted. Grow: ' num2str(predGrowth)]) @@ -153,13 +153,13 @@ tempModel = setParam(model,'lb',protPoolIdx,-1000); tempModel = setParam(tempModel,'ub',bioRxnIdx,expGrowth); sol = solveLP(tempModel); - if (abs(sol.f)-predGrowth)>1e-10 % There is improvement in growth rate + if (sol.f-predGrowth)>1e-10 % There is improvement in growth rate % Find new protein pool constraint - predGrowth = abs(sol.f); + predGrowth = sol.f; tempModel = setParam(tempModel,'lb',bioRxnIdx,predGrowth); tempModel = setParam(tempModel,'obj',protPoolIdx,1); sol = solveLP(tempModel); - newProtPool = abs(sol.f); + newProtPool = sol.f; model.lb(protPoolIdx) = -newProtPool; fprintf(['Changing the lower bound of protein pool exchange from %s ',... 'to %s enabled a\ngrowth rate of %s. It can be helpful to set ', ... diff --git a/src/geckomat/limit_proteins/getConcControlCoeffs.m b/src/geckomat/limit_proteins/getConcControlCoeffs.m index 1b5c91fa8..9f7ebf833 100644 --- a/src/geckomat/limit_proteins/getConcControlCoeffs.m +++ b/src/geckomat/limit_proteins/getConcControlCoeffs.m @@ -35,7 +35,7 @@ controlCoeffs = zeros(length(proteins),1); [sol,hs] = solveLP(model); -initialGrowth = abs(sol.f); +initialGrowth = sol.f; % Get enzyme index [~, protIdx] = ismember(proteins, model.ec.enzymes); @@ -63,7 +63,7 @@ % Get the new growth rate after the adjustment [tempSol,hs] = solveLP(tempModel,0,[],hs); - tempGrowth = abs(tempSol.f); + tempGrowth = tempSol.f; % Calculate the coeff only if new growth rate is significantly % higher than initial value diff --git a/test/manual_tests/TestYeastGEMSimpleWorkflow.m b/test/manual_tests/TestYeastGEMSimpleWorkflow.m index 177983dc5..3e3bfc461 100644 --- a/test/manual_tests/TestYeastGEMSimpleWorkflow.m +++ b/test/manual_tests/TestYeastGEMSimpleWorkflow.m @@ -35,10 +35,10 @@ fullECModelMerged.lb(fullECModelMerged.lb == -10) = -1000; fullECModelMerged.ub(fullECModelMerged.ub == 1) = 1000; sol = solveLP(fullECModelMerged, 1); -growthBef = -sol.f; +growthBef = sol.f; fullECModelTuned = sensitivityTuning(fullECModelMerged, 0.4, yeastAdapter); sol = solveLP(fullECModelTuned, 1); -growthAfter = -sol.f; +growthAfter = sol.f; disp(['Growth before: ' num2str(growthBef) ', Growth after: ' num2str(growthAfter)]) %Light model @@ -70,9 +70,9 @@ lightECModelMerged.lb(lightECModelMerged.lb == -10) = -1000; lightECModelMerged.ub(lightECModelMerged.ub == 1) = 1000; sol = solveLP(lightECModelMerged, 1); -growthBef = -sol.f; +growthBef = sol.f; lightECModelTuned = sensitivityTuning(lightECModelMerged, 0.4, yeastAdapter); sol = solveLP(lightECModelTuned, 1); -growthAfter = -sol.f; +growthAfter = sol.f; disp(['Growth before: ' num2str(growthBef) ', Growth after: ' num2str(growthAfter)]) diff --git a/tutorials/full_ecModel/code/plotlightVSfull.m b/tutorials/full_ecModel/code/plotlightVSfull.m index e9aa90ccb..b230f651d 100644 --- a/tutorials/full_ecModel/code/plotlightVSfull.m +++ b/tutorials/full_ecModel/code/plotlightVSfull.m @@ -55,7 +55,7 @@ fprintf('Mapping fluxes: %.0f%% (%.3f vs %.3f seconds)\n', (lightTime/fullTime)*100, lightTime, fullTime); % -fprintf('Growth rate that is reached: %.4f vs %.4f\n', abs(solFull.f) , abs(solLight.f)) +fprintf('Growth rate that is reached: %.4f vs %.4f\n', solFull.f , solLight.f) % solF(abs(solF)<1e-8) = 0; @@ -70,7 +70,7 @@ set(gca,'FontSize',11) text(1e-7,3,'Growth rate(/hour)','FontSize',11) text(1e-7,0.5,'full:','FontSize',11) -text(1.e-6,0.5,num2str(abs(solFull.f)),'FontSize',11) +text(1.e-6,0.5,num2str(solFull.f),'FontSize',11) text(1e-7,0.1,'light:','FontSize',11) -text(1.e-6,0.1,num2str(abs(solLight.f)),'FontSize',11) +text(1.e-6,0.1,num2str(solLight.f),'FontSize',11) saveas(gca, fullfile(findGECKOroot,'tutorials','full_ecModel','output','lightVSfull.pdf')) diff --git a/tutorials/full_ecModel/protocol.m b/tutorials/full_ecModel/protocol.m index 19805f4ad..ae85482dc 100644 --- a/tutorials/full_ecModel/protocol.m +++ b/tutorials/full_ecModel/protocol.m @@ -24,7 +24,7 @@ % - Simplest, RAVEN can be installed as MATLAB Add-On: % https://se.mathworks.com/help/matlab/matlab_env/get-add-ons.html % - The installation of Gurobi as LP solver is highly recommended -checkInstallation; % Confirm that RAVEN is functional, should be 2.8.3 or later. +checkInstallation; % Confirm that RAVEN is functional, should be 2.9.1 or later. % - Install GECKO by following the installation instructions: % https://github.com/SysBioChalmers/GECKO/wiki/Installation-and-upgrade @@ -343,7 +343,7 @@ ecModel = constrainFluxData(ecModel,fluxData,1,'max','loose'); % Observe if the intended growth rate was reached. sol = solveLP(ecModel); -fprintf('Growth rate that is reached: %f /hour.\n', abs(sol.f)) +fprintf('Growth rate that is reached: %f /hour.\n', sol.f) % The growth rate of 0.1 is far from being reached. Therefore, the next % step is to flexibilize enzyme concentrations. @@ -358,7 +358,7 @@ %model = loadConventionalGEM(); model = constrainFluxData(model,fluxData); sol = solveLP(model) -fprintf('Growth rate that is reached: %f /hour.\n', abs(sol.f)) +fprintf('Growth rate that is reached: %f /hour.\n', sol.f) % The starting model reaches a similar growth rate as the ecModel after % flexibilizing enzyme concentrations. So the metabolic network would not @@ -423,16 +423,16 @@ % STEP 69-70 Selecting objective functions ecModel = setParam(ecModel,'obj',params.bioRxn,1); sol = solveLP(ecModel) -fprintf('Growth rate that is reached: %f /hour.\n', abs(sol.f)) +fprintf('Growth rate that is reached: %f /hour.\n', sol.f) % Set growth lower bound to 99% of the previous value. -ecModel = setParam(ecModel,'lb',params.bioRxn,0.99*abs(sol.f)); +ecModel = setParam(ecModel,'lb',params.bioRxn,0.99*sol.f); % Minimize protein pool usage. As protein pool exchange is defined in the % reverse direction (with negative flux), minimization of protein pool % usage is computationally represented by maximizing the prot_pool_exchange % reaction. ecModel = setParam(ecModel,'obj','prot_pool_exchange',1); sol = solveLP(ecModel) -fprintf('Minimum protein pool usage: %.2f mg/gDCW.\n', abs(sol.f)) +fprintf('Minimum protein pool usage: %.2f mg/gDCW.\n', sol.f) % STEP 71 Inspect enzyme usage % Show the result from the earlier simulation, without mapping to diff --git a/tutorials/light_ecModel/protocol.m b/tutorials/light_ecModel/protocol.m index bccc053bc..7a2ad9185 100644 --- a/tutorials/light_ecModel/protocol.m +++ b/tutorials/light_ecModel/protocol.m @@ -133,13 +133,13 @@ ecModel = loadEcModel(); ecModel = setParam(ecModel,'obj',adapter.params.bioRxn,1); sol = solveLP(ecModel) -fprintf('Growth rate reached: %g /hour.\n', abs(sol.f)) +fprintf('Growth rate reached: %g /hour.\n', sol.f) % Set growth lower bound to 99% of the previous value. -ecModel = setParam(ecModel,'lb',adapter.params.bioRxn,0.99*abs(sol.f)); +ecModel = setParam(ecModel,'lb',adapter.params.bioRxn,0.99*sol.f); % Minimize protein pool usage. ecModel = setParam(ecModel,'obj','prot_pool_exchange',1); sol = solveLP(ecModel) -fprintf('Minimum protein pool usage: %g mg/gDCW.\n', abs(sol.f)) +fprintf('Minimum protein pool usage: %g mg/gDCW.\n', sol.f) % STEP 71 % Individual enzyme usages cannot be investigated in light ecModels, as @@ -169,8 +169,8 @@ % it is clear that both the enzyme constraints and contextualization have % had its impact as the maximum growth rate is affected: sol = solveLP(HT29); -fprintf('Growth rate in HT29-GEM: %.3f /hour.\n', abs(sol.f)) +fprintf('Growth rate in HT29-GEM: %.3f /hour.\n', sol.f) sol = solveLP(ecModel); -fprintf('Growth rate in ecHuman-GEM: %.3f /hour.\n', abs(sol.f)) +fprintf('Growth rate in ecHuman-GEM: %.3f /hour.\n', sol.f) sol = solveLP(ecHT29); -fprintf('Growth rate in ecHT29-GEM: %.3f /hour.\n', abs(sol.f)) +fprintf('Growth rate in ecHT29-GEM: %.3f /hour.\n', sol.f) From cb5474794462b6adcd8cc3ab612c7ccaf5af5cf7 Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Mon, 27 May 2024 11:39:13 +0200 Subject: [PATCH 8/9] feat: prot_usage always draws from prot_pool (#379) * fix: calculateFfactor can take protData input * feat: constrainEnzConcs keep prot pool draw addresses #375 * fix: updateProtPool obsolete * doc: update full_ecModel for prot_usage rxns * fix: enzymeUsage correct mention of output units solves #376 * doc: updateGECKOdoc * doc: update README.md with protocol change * fix: README.md link --- README.md | 3 + .../limit_proteins/calculateFfactor.html | 31 ++-- .../limit_proteins/constrainEnzConcs.html | 2 +- .../limit_proteins/updateProtPool.html | 146 +++++++++++------- doc/src/geckomat/utilities/enzymeUsage.html | 4 +- .../limit_proteins/calculateFfactor.m | 17 +- .../limit_proteins/constrainEnzConcs.m | 2 +- src/geckomat/limit_proteins/updateProtPool.m | 30 +++- src/geckomat/utilities/enzymeUsage.m | 2 +- tutorials/full_ecModel/protocol.m | 38 +++-- 10 files changed, 174 insertions(+), 101 deletions(-) diff --git a/README.md b/README.md index 77aab20e5..d7c451e7f 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,9 @@ The **GECKO** toolbox enhances a **G**enome-scale model to account for **E**nzym 💡 In the [`GECKO/tutorials`](https://github.com/SysBioChalmers/GECKO/tree/main/tutorials) folder there are examples of how GECKO can be applied to GEMs, in either of its _full_ or _light_ forms. Each `protocol.m` contains instructions on how to reconstruct and analyze an ecModel, demonstrating how different fuctions in GECKO can be used. These two scripts complement the [protocols paper](#citation). +### Significant changes since protocol publication +- GECKO **3.2.0**: all protein usage reactions draw from the protein pool, even if they are constrained by proteomics data. This affects **Step 58** in the protocol, changing behaviour of `constrainEnzConcs` and making `updateProtPool` obsolete, `tutorials/full_ecModel/protocol.m` is updated to reflect this change. See [#357](https://github.com/SysBioChalmers/GECKO/issues/375) for more details. + _**Note:** Regarding code and model compatibility with earlier GECKO versions, see [Previous versions: GECKO 1 and 2](https://github.com/SysBioChalmers/GECKO/wiki/Previous-versions:-GECKO-1-and-2)_. ## Cite us diff --git a/doc/src/geckomat/limit_proteins/calculateFfactor.html b/doc/src/geckomat/limit_proteins/calculateFfactor.html index 0a0ad575d..855823fed 100644 --- a/doc/src/geckomat/limit_proteins/calculateFfactor.html +++ b/doc/src/geckomat/limit_proteins/calculateFfactor.html @@ -121,23 +121,22 @@

SOURCE CODE ^end -0067 -0068 % Get MW and abundance (unit does not matter, f is fraction) -0069 [~,idx] = ismember(protData.uniprot,uniprotDB.ID); -0070 protData.MW = uniprotDB.MW(idx); -0071 protData.abundance = protData.level .* protData.MW; -0072 -0073 totalProt = sum(protData.abundance); -0074 -0075 % Get enzymes in model -0076 enzymesInModel = ismember(protData.uniprot,enzymes); -0077 totalEnz = sum(protData.abundance(enzymesInModel)); -0078 -0079 f = totalEnz/totalProt; -0080 end

+0066 % Get MW and abundance (unit does not matter, f is fraction) +0067 [~,idx] = ismember(protData.uniprotIDs,uniprotDB.ID); +0068 protData.MW = uniprotDB.MW(idx); +0069 protData.abundances = protData.level .* protData.MW; +0070 end +0071 +0072 totalProt = sum(protData.abundances); +0073 +0074 % Get enzymes in model +0075 enzymesInModel = ismember(protData.uniprotIDs,enzymes); +0076 totalEnz = sum(protData.abundances(enzymesInModel)); +0077 +0078 f = totalEnz/totalProt; +0079 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/limit_proteins/constrainEnzConcs.html b/doc/src/geckomat/limit_proteins/constrainEnzConcs.html index 1ee4e2b56..9c3b7d133 100644 --- a/doc/src/geckomat/limit_proteins/constrainEnzConcs.html +++ b/doc/src/geckomat/limit_proteins/constrainEnzConcs.html @@ -103,7 +103,7 @@

SOURCE CODE ^%If non-NaN in model.ec.concs, then constrain by UB 0045 if any(protCons) -0046 model.S(protPoolIdx, usageRxnsIdx(protCons)) = 0; +0046 % model.S(protPoolIdx, usageRxnsIdx(protCons)) = 0; % Since GECKO 3.2.0 0047 model.lb(usageRxnsIdx(protCons)) = -model.ec.concs(protCons); 0048 end 0049 end diff --git a/doc/src/geckomat/limit_proteins/updateProtPool.html b/doc/src/geckomat/limit_proteins/updateProtPool.html index ce88a1a0b..e757f1f7c 100644 --- a/doc/src/geckomat/limit_proteins/updateProtPool.html +++ b/doc/src/geckomat/limit_proteins/updateProtPool.html @@ -28,8 +28,13 @@

SYNOPSIS ^DESCRIPTION ^

 updateProtPool
-   Updates the protein pool to compensate for measured proteomics data (in
-   model.ec.concs).
+   Obsolete since GECKO 3.2.0, as all (measured and unmeasured) enzymes
+   are drawn from the protein pool. Instead, use setProtPoolSize. See
+   https://github.com/SysBioChalmers/GECKO/issues/375 for explanation.
+
+   Before GECKO 3.2.0: updates the protein pool to compensate for measured
+   proteomics data (in model.ec.concs), as only the unmeasured enzymes
+   draw from the protein pool.
 
  Input:
    ecModel         an ecModel in GECKO 3 format (with ecModel.ec structure)
@@ -39,6 +44,7 @@ 

DESCRIPTION ^CROSS-REFERENCE INFORMATION ^
 <h2><a name=SOURCE CODE ^

0001 function ecModel  = updateProtPool(ecModel, Ptot, modelAdapter)
 0002 % updateProtPool
-0003 %   Updates the protein pool to compensate for measured proteomics data (in
-0004 %   model.ec.concs).
-0005 %
-0006 % Input:
-0007 %   ecModel         an ecModel in GECKO 3 format (with ecModel.ec structure)
-0008 %   Ptot            total protein content in g/gDCW, overwrites the value
-0009 %                   from modelAdapter. For instance, condition-specific
-0010 %                   fluxData.Ptot from loadFluxData can be used. If nothing
-0011 %                   is provided, the modelAdapter value is used.
-0012 %   modelAdapter    a loaded model adapter (Optional, will otherwise use the
-0013 %                   default model adapter).
-0014 % Output:
-0015 %   model           an ecModel where model.ec.concs is populated with
-0016 %                   protein concentrations
-0017 % Usage:
-0018 %   ecModel  = updateProtPool(ecModel, Ptot, modelAdapter)
-0019 
-0020 if nargin < 3 || isempty(modelAdapter)
-0021     modelAdapter = ModelAdapterManager.getDefault();
-0022     if isempty(modelAdapter)
-0023         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
-0024     end
-0025 end
-0026 params = modelAdapter.params;
-0027 
-0028 if nargin < 2 || isempty(Ptot)
-0029     Ptot = params.Ptot;
-0030     disp(['Total protein content used: ' num2str(Ptot) ' [g protein/gDw]'])
-0031 end
-0032 
-0033 % Convert Ptot to mg/gDW if provided in g/gDCW (which is default)
-0034 if Ptot < 1
-0035     Ptot = Ptot * 1000;
-0036 end
-0037 
-0038 Pmeas = sum(ecModel.ec.concs,'omitnan');
-0039 if Pmeas == 0
-0040     error('The model has not yet been constrained with proteomics, as ecModel.ec.concs is empty.')
-0041 end
-0042 
-0043 Pnew = (Ptot - Pmeas) * params.f;
-0044 
-0045 if Pnew > 0
-0046     PoolRxnIdx = strcmp(ecModel.rxns,'prot_pool_exchange');
-0047     ecModel.lb(PoolRxnIdx) = -Pnew*params.sigma;
-0048     sol = solveLP(ecModel);
-0049     if isempty(sol.x)
-0050         error(['Estimating the remaining protein pool by subtracting the ' ...
-0051                'sum of measured enzyme concentrations (in ecModel.ec.concs) ' ...
-0052                'from the total protein pool (Ptot) does not yield a functional ' ...
-0053                'model.'])
-0054     end
-0055 else
-0056     error('The total measured protein mass exceeds the total protein content.')
-0057 end
-0058 end
+0003 % Obsolete since GECKO 3.2.0, as all (measured and unmeasured) enzymes +0004 % are drawn from the protein pool. Instead, use setProtPoolSize. See +0005 % https://github.com/SysBioChalmers/GECKO/issues/375 for explanation. +0006 % +0007 % Before GECKO 3.2.0: updates the protein pool to compensate for measured +0008 % proteomics data (in model.ec.concs), as only the unmeasured enzymes +0009 % draw from the protein pool. +0010 % +0011 % Input: +0012 % ecModel an ecModel in GECKO 3 format (with ecModel.ec structure) +0013 % Ptot total protein content in g/gDCW, overwrites the value +0014 % from modelAdapter. For instance, condition-specific +0015 % fluxData.Ptot from loadFluxData can be used. If nothing +0016 % is provided, the modelAdapter value is used. +0017 % modelAdapter a loaded model adapter (Optional, will otherwise use the +0018 % default model adapter). +0019 % +0020 % Output: +0021 % model an ecModel where model.ec.concs is populated with +0022 % protein concentrations +0023 % Usage: +0024 % ecModel = updateProtPool(ecModel, Ptot, modelAdapter) +0025 +0026 % Do not run from GECKO version 3.2.0 onwards. This can be recognized by +0027 % prot_usage reactions that are constrained by proteomics and still draw +0028 % from the protein pool +0029 protRxns = find(startsWith(ecModel.rxns,'usage_prot_')); +0030 poolMetIdx = find(strcmp(ecModel.mets,'prot_pool')); +0031 % Selected proteins with proteomics constraints +0032 constProtRxns = ~(ecModel.lb(protRxns)==-1000); +0033 % Are any still drawing from prot_pool? This is introduced in GECKO 3.2.0. +0034 if any(full(ecModel.S(poolMetIdx,protRxns(constProtRxns)))) +0035 error(['In the provided ecModel, all protein usage reactions, both with ' ... +0036 'and without concentration constraints, draw from the protein pool. ' ... +0037 'This was introduced with GECKO 3.2.0. Since then, updateProtPool ' ... +0038 'has become obsolete, use setProtPoolSize instead to constrain the ' ... +0039 'total protein pool with condition-specific total protein content. See '... +0040 '<a href="https://github.com/SysBioChalmers/GECKO/issues/375">here</a> ' ... +0041 'for more explanation.']) +0042 end +0043 +0044 if nargin < 3 || isempty(modelAdapter) +0045 modelAdapter = ModelAdapterManager.getDefault(); +0046 if isempty(modelAdapter) +0047 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.') +0048 end +0049 end +0050 params = modelAdapter.params; +0051 +0052 if nargin < 2 || isempty(Ptot) +0053 Ptot = params.Ptot; +0054 disp(['Total protein content used: ' num2str(Ptot) ' [g protein/gDw]']) +0055 end +0056 +0057 % Convert Ptot to mg/gDW if provided in g/gDCW (which is default) +0058 if Ptot < 1 +0059 Ptot = Ptot * 1000; +0060 end +0061 +0062 Pmeas = sum(ecModel.ec.concs,'omitnan'); +0063 if Pmeas == 0 +0064 error('The model has not yet been populated with proteomics, as ecModel.ec.concs is empty.') +0065 end +0066 +0067 Pnew = (Ptot - Pmeas) * params.f; +0068 +0069 if Pnew > 0 +0070 PoolRxnIdx = strcmp(ecModel.rxns,'prot_pool_exchange'); +0071 ecModel.lb(PoolRxnIdx) = -Pnew*params.sigma; +0072 sol = solveLP(ecModel); +0073 if isempty(sol.x) +0074 error(['Estimating the remaining protein pool by subtracting the ' ... +0075 'sum of measured enzyme concentrations (in ecModel.ec.concs) ' ... +0076 'from the total protein pool (Ptot) does not yield a functional ' ... +0077 'model.']) +0078 end +0079 else +0080 error('The total measured protein mass exceeds the total protein content.') +0081 end +0082 end

Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/utilities/enzymeUsage.html b/doc/src/geckomat/utilities/enzymeUsage.html index 763f93d6a..012d491c0 100644 --- a/doc/src/geckomat/utilities/enzymeUsage.html +++ b/doc/src/geckomat/utilities/enzymeUsage.html @@ -30,7 +30,7 @@

DESCRIPTION ^
 enzymeUsage
    Gives enzyme usages based on a provided flux distribution, as obtained
    from a full GECKO model. It can give:
-   1)  absolute usage: the specific enzyme usage in ug/gDCW/h, which can
+   1)  absolute usage: the specific enzyme usage in mg/gDCW, which can
        be given for enzymes with- and without concentration information;
    2)  capacity usage: the ratio of available enzyme that is used, calcuted
        by (absUsage/UB) (note that capacity usage is 0 if an enzyme
@@ -76,7 +76,7 @@ 

SOURCE CODE ^% enzymeUsage 0003 % Gives enzyme usages based on a provided flux distribution, as obtained 0004 % from a full GECKO model. It can give: -0005 % 1) absolute usage: the specific enzyme usage in ug/gDCW/h, which can +0005 % 1) absolute usage: the specific enzyme usage in mg/gDCW, which can 0006 % be given for enzymes with- and without concentration information; 0007 % 2) capacity usage: the ratio of available enzyme that is used, calcuted 0008 % by (absUsage/UB) (note that capacity usage is 0 if an enzyme diff --git a/src/geckomat/limit_proteins/calculateFfactor.m b/src/geckomat/limit_proteins/calculateFfactor.m index 790e9b970..81cc87b6e 100644 --- a/src/geckomat/limit_proteins/calculateFfactor.m +++ b/src/geckomat/limit_proteins/calculateFfactor.m @@ -61,20 +61,19 @@ uniprot = uniprotDB.ID(b(a)); level(~a) = []; clear protData - protData.uniprot = uniprot; + protData.uniprotIDs = uniprot; protData.level = level; + % Get MW and abundance (unit does not matter, f is fraction) + [~,idx] = ismember(protData.uniprotIDs,uniprotDB.ID); + protData.MW = uniprotDB.MW(idx); + protData.abundances = protData.level .* protData.MW; end -% Get MW and abundance (unit does not matter, f is fraction) -[~,idx] = ismember(protData.uniprot,uniprotDB.ID); -protData.MW = uniprotDB.MW(idx); -protData.abundance = protData.level .* protData.MW; - -totalProt = sum(protData.abundance); +totalProt = sum(protData.abundances); % Get enzymes in model -enzymesInModel = ismember(protData.uniprot,enzymes); -totalEnz = sum(protData.abundance(enzymesInModel)); +enzymesInModel = ismember(protData.uniprotIDs,enzymes); +totalEnz = sum(protData.abundances(enzymesInModel)); f = totalEnz/totalProt; end diff --git a/src/geckomat/limit_proteins/constrainEnzConcs.m b/src/geckomat/limit_proteins/constrainEnzConcs.m index 736f30c28..66094b53b 100644 --- a/src/geckomat/limit_proteins/constrainEnzConcs.m +++ b/src/geckomat/limit_proteins/constrainEnzConcs.m @@ -43,7 +43,7 @@ %If non-NaN in model.ec.concs, then constrain by UB if any(protCons) - model.S(protPoolIdx, usageRxnsIdx(protCons)) = 0; +% model.S(protPoolIdx, usageRxnsIdx(protCons)) = 0; % Since GECKO 3.2.0 model.lb(usageRxnsIdx(protCons)) = -model.ec.concs(protCons); end end diff --git a/src/geckomat/limit_proteins/updateProtPool.m b/src/geckomat/limit_proteins/updateProtPool.m index 3cc4f54c3..dcbc20f21 100644 --- a/src/geckomat/limit_proteins/updateProtPool.m +++ b/src/geckomat/limit_proteins/updateProtPool.m @@ -1,7 +1,12 @@ function ecModel = updateProtPool(ecModel, Ptot, modelAdapter) % updateProtPool -% Updates the protein pool to compensate for measured proteomics data (in -% model.ec.concs). +% Obsolete since GECKO 3.2.0, as all (measured and unmeasured) enzymes +% are drawn from the protein pool. Instead, use setProtPoolSize. See +% https://github.com/SysBioChalmers/GECKO/issues/375 for explanation. +% +% Before GECKO 3.2.0: updates the protein pool to compensate for measured +% proteomics data (in model.ec.concs), as only the unmeasured enzymes +% draw from the protein pool. % % Input: % ecModel an ecModel in GECKO 3 format (with ecModel.ec structure) @@ -11,12 +16,31 @@ % is provided, the modelAdapter value is used. % modelAdapter a loaded model adapter (Optional, will otherwise use the % default model adapter). +% % Output: % model an ecModel where model.ec.concs is populated with % protein concentrations % Usage: % ecModel = updateProtPool(ecModel, Ptot, modelAdapter) +% Do not run from GECKO version 3.2.0 onwards. This can be recognized by +% prot_usage reactions that are constrained by proteomics and still draw +% from the protein pool +protRxns = find(startsWith(ecModel.rxns,'usage_prot_')); +poolMetIdx = find(strcmp(ecModel.mets,'prot_pool')); +% Selected proteins with proteomics constraints +constProtRxns = ~(ecModel.lb(protRxns)==-1000); +% Are any still drawing from prot_pool? This is introduced in GECKO 3.2.0. +if any(full(ecModel.S(poolMetIdx,protRxns(constProtRxns)))) + error(['In the provided ecModel, all protein usage reactions, both with ' ... + 'and without concentration constraints, draw from the protein pool. ' ... + 'This was introduced with GECKO 3.2.0. Since then, updateProtPool ' ... + 'has become obsolete, use setProtPoolSize instead to constrain the ' ... + 'total protein pool with condition-specific total protein content. See '... + 'here ' ... + 'for more explanation.']) +end + if nargin < 3 || isempty(modelAdapter) modelAdapter = ModelAdapterManager.getDefault(); if isempty(modelAdapter) @@ -37,7 +61,7 @@ Pmeas = sum(ecModel.ec.concs,'omitnan'); if Pmeas == 0 - error('The model has not yet been constrained with proteomics, as ecModel.ec.concs is empty.') + error('The model has not yet been populated with proteomics, as ecModel.ec.concs is empty.') end Pnew = (Ptot - Pmeas) * params.f; diff --git a/src/geckomat/utilities/enzymeUsage.m b/src/geckomat/utilities/enzymeUsage.m index 8f622ccd5..f9a0faca8 100644 --- a/src/geckomat/utilities/enzymeUsage.m +++ b/src/geckomat/utilities/enzymeUsage.m @@ -2,7 +2,7 @@ % enzymeUsage % Gives enzyme usages based on a provided flux distribution, as obtained % from a full GECKO model. It can give: -% 1) absolute usage: the specific enzyme usage in ug/gDCW/h, which can +% 1) absolute usage: the specific enzyme usage in mg/gDCW, which can % be given for enzymes with- and without concentration information; % 2) capacity usage: the ratio of available enzyme that is used, calcuted % by (absUsage/UB) (note that capacity usage is 0 if an enzyme diff --git a/tutorials/full_ecModel/protocol.m b/tutorials/full_ecModel/protocol.m index ae85482dc..8260f7e81 100644 --- a/tutorials/full_ecModel/protocol.m +++ b/tutorials/full_ecModel/protocol.m @@ -250,7 +250,7 @@ % STEP 39-42 Relax protein pool constraint % As a simplistic way to ensure the model to reach the growth rate, the % upper bound of the protein pool exchange reaction can be increased to -% whatever is required. This works, but STEP 17 is preferred. +% whatever is required. This works, but STEP 43 is preferred. ecModel = setParam(ecModel, 'lb', 'r_4041', 0.41); ecModel = setParam(ecModel, 'lb', 'prot_pool_exchange', -1000); ecModel = setParam(ecModel, 'obj', 'prot_pool_exchange', 1); @@ -266,7 +266,7 @@ % STEP 43-44 Sensitivity tuning % First reset the protein pool constraint to a more realistic value, -% reverting STEP 16. +% reverting STEP 42. ecModel = setProtPoolSize(ecModel); [ecModel, tunedKcats] = sensitivityTuning(ecModel); @@ -325,17 +325,35 @@ ecModel = constrainEnzConcs(ecModel); % STEP 58 Update protein pool -% The protein pool reaction will be constraint by the remaining, unmeasured -% enzyme content. This is calculated by subtracting the sum of -% ecModel.ec.concs from the condition-specific total protein content. The -% latter is stored together with the flux data that otherwise will be used -% in Step 21. +% +% ==> Since GECKO 3.2.0: +% All protein usage reactions draw from the protein pool, both for +% proteins with and without concentration constraints. As a +% consequence, updateProtPool has become obsolete. To set the protein +% pool exchange with condition-specific total protein content, use +% setProtPoolSize instead. For more explanation, see +% https://github.com/SysBioChalmers/GECKO/issues/375 +% +% See STEP 32 for considerations about the f-factor. Here, we can +% recalculate the f-factor based on the proteomics dataset. + +f = calculateFfactor(ecModel,protData); fluxData = loadFluxData(); -ecModel = updateProtPool(ecModel,fluxData.Ptot(1)); +ecModel = setProtPoolSize(ecModel,fluxData.Ptot(1),f); + +% ==> Before GECKO 3.2.0: +% The legacy code is still shown here, but should not be run. + % The protein pool reaction will be constraint by the remaining, unmeasured + % enzyme content. This is calculated by subtracting the sum of + % ecModel.ec.concs from the condition-specific total protein content. The + % latter is stored together with the flux data that otherwise will be used + % in STEP 59. + % fluxData = loadFluxData(); + % ecModel = updateProtPool(ecModel,fluxData.Ptot(1)); % STEP 59-63 Load flux data % Matching the proteomics sample(s), condition-specific flux data needs to -% be loaded to constrain the model. This was already loaded in Step 20 for +% be loaded to constrain the model. This was already loaded in STEP 58 for % gathering Ptot, but is repeated here nonetheless. Flux data is read from % /data/fluxData.tsv. fluxData = loadFluxData(); @@ -411,7 +429,7 @@ % It is obvious that no total protein constraint is reached, and Crabtree % effect is not observed. -% Perform the Crabtree simulation on the pre-Step 33 ecModel (where kcat +% Perform the Crabtree simulation on the pre-STEP 33 ecModel (where kcat % sensitivity tuning has not yet been applied). ecModel_preTuning = loadEcModel('ecYeastGEM_stage2.yml'); ecModel_preTuning = setParam(ecModel_preTuning,'lb','r_1714',-1000); From 10cfd74f114c75c039c93b3c6f5c1d9b5c1645ee Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Mon, 27 May 2024 11:57:21 +0200 Subject: [PATCH 9/9] revert #372 (#380) * Revert "Merge pull request #372 from HossFir/update-reportEnzymeUsage" This reverts commit cc8e449c8b7488ea3eeaa820831b1d41fd8f27bc, reversing changes made to 883b40db1d8cfbc76f10de076445ea6a3d9b2d7a. * fix: reportEnzymeUsage gives total protein usage --- doc/src/geckomat/utilities/enzymeUsage.html | 53 +++++++++---------- .../geckomat/utilities/reportEnzymeUsage.html | 50 +++++++---------- src/geckomat/utilities/enzymeUsage.m | 2 - src/geckomat/utilities/reportEnzymeUsage.m | 18 +------ 4 files changed, 45 insertions(+), 78 deletions(-) diff --git a/doc/src/geckomat/utilities/enzymeUsage.html b/doc/src/geckomat/utilities/enzymeUsage.html index 012d491c0..030ecaa31 100644 --- a/doc/src/geckomat/utilities/enzymeUsage.html +++ b/doc/src/geckomat/utilities/enzymeUsage.html @@ -54,7 +54,6 @@

DESCRIPTION ^SOURCE CODE ^% absUsage vector of absolute enzyme usages 0027 % UB vector of enzyme exchange reaction upper bounds 0028 % protID string array of matching protein IDs -0029 % fluxes vector of fluxes, copy of input fluxes -0030 % -0031 % Usage: -0032 % usageData = enzymeUsage(ecModel,fluxes,zero) -0033 -0034 if nargin<3 -0035 zero=true; -0036 end -0037 if ecModel.ec.geckoLight -0038 error('This function does not work on GECKO light models.') -0039 end -0040 usageData.protID = ecModel.ec.enzymes; -0041 [~,rxnIdx] = ismember(strcat('usage_prot_',ecModel.ec.enzymes),ecModel.rxns); -0042 -0043 usageData.LB = ecModel.lb(rxnIdx); -0044 usageData.absUsage = abs(fluxes(rxnIdx)); -0045 usageData.capUsage = abs(usageData.absUsage./usageData.LB); -0046 usageData.fluxes = fluxes; -0047 -0048 if ~zero -0049 nonzero = usageData.absUsage<0; -0050 usageData.absUsage = usageData.absUsage(nonzero); -0051 usageData.capUsage = usageData.capUsage(nonzero); -0052 usageData.LB = usageData.LB(nonzero); -0053 usageData.protID = usageData.protID(nonzero); -0054 end -0055 end

+0029 % +0030 % Usage: +0031 % usageData = enzymeUsage(ecModel,fluxes,zero) +0032 +0033 if nargin<3 +0034 zero=true; +0035 end +0036 if ecModel.ec.geckoLight +0037 error('This function does not work on GECKO light models.') +0038 end +0039 usageData.protID = ecModel.ec.enzymes; +0040 [~,rxnIdx] = ismember(strcat('usage_prot_',ecModel.ec.enzymes),ecModel.rxns); +0041 +0042 usageData.LB = ecModel.lb(rxnIdx); +0043 usageData.absUsage = abs(fluxes(rxnIdx)); +0044 usageData.capUsage = abs(usageData.absUsage./usageData.LB); +0045 +0046 if ~zero +0047 nonzero = usageData.absUsage<0; +0048 usageData.absUsage = usageData.absUsage(nonzero); +0049 usageData.capUsage = usageData.capUsage(nonzero); +0050 usageData.LB = usageData.LB(nonzero); +0051 usageData.protID = usageData.protID(nonzero); +0052 end +0053 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/utilities/reportEnzymeUsage.html b/doc/src/geckomat/utilities/reportEnzymeUsage.html index 798d2cc07..f1a33b97a 100644 --- a/doc/src/geckomat/utilities/reportEnzymeUsage.html +++ b/doc/src/geckomat/utilities/reportEnzymeUsage.html @@ -123,38 +123,24 @@

SOURCE CODE ^% Calculate the protein pool flux from the 'prot_pool_exchange' reaction -0065 protPoolExchangeFlux = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange')); -0066 -0067 fluxValues = usageData.fluxes; -0068 -0069 % Sum fluxes for all 'usage_prot_' reactions, excluding the 'usage_prot_standard' -0070 usageProtIndices = startsWith(ecModel.rxns, 'usage_prot_') & ... -0071 ~contains(ecModel.rxns, 'standard'); -0072 -0073 % Sum the absolute values of the usage fluxes -0074 totalUsageProtFlux = sum(abs(fluxValues(usageProtIndices))); -0075 -0076 % Define the new protein pool as the sum of prot_pool_exchange flux and total usage_prot fluxes -0077 protPool = (protPoolExchangeFlux + totalUsageProtFlux)/100; -0078 -0079 for i=1:numel(topEnzyme) -0080 [rxns, kcat, idx, rxnNames, grRules] = getReactionsFromEnzyme(ecModel,topEnzyme{i}); -0081 rxnNumber = numel(rxns); -0082 topUsage.protID(end+1:end+rxnNumber,1) = topEnzyme(i); -0083 topUsage.geneID(end+1:end+rxnNumber,1) = geneIDs(i); -0084 topUsage.absUsage(end+1:end+rxnNumber,1) = usageData.absUsage(topUse(i)); -0085 topUsage.percUsage(end+1:end+rxnNumber,1) = topUsage.absUsage(end-(rxnNumber-1):end,1)/protPool; -0086 topUsage.kcat(end+1:end+rxnNumber,1) = kcat; -0087 topUsage.source(end+1:end+rxnNumber,1) = ecModel.ec.source(idx); -0088 topUsage.rxnID(end+1:end+rxnNumber,1) = rxns; -0089 topUsage.rxnNames(end+1:end+rxnNumber,1) = rxnNames; -0090 topUsage.grRules(end+1:end+rxnNumber,1) = grRules; -0091 end -0092 usageReport.topAbsUsage = struct2table(topUsage); -0093 usageReport.totalProtPool = protPoolExchangeFlux; -0094 usageReport.totalUsageFlux = totalUsageProtFlux; -0095 end +0064 protPool = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange'))/100; +0065 +0066 for i=1:numel(topEnzyme) +0067 [rxns, kcat, idx, rxnNames, grRules] = getReactionsFromEnzyme(ecModel,topEnzyme{i}); +0068 rxnNumber = numel(rxns); +0069 topUsage.protID(end+1:end+rxnNumber,1) = topEnzyme(i); +0070 topUsage.geneID(end+1:end+rxnNumber,1) = geneIDs(i); +0071 topUsage.absUsage(end+1:end+rxnNumber,1) = usageData.absUsage(topUse(i)); +0072 topUsage.percUsage(end+1:end+rxnNumber,1) = topUsage.absUsage(end-(rxnNumber-1):end,1)/protPool; +0073 topUsage.kcat(end+1:end+rxnNumber,1) = kcat; +0074 topUsage.source(end+1:end+rxnNumber,1) = ecModel.ec.source(idx); +0075 topUsage.rxnID(end+1:end+rxnNumber,1) = rxns; +0076 topUsage.rxnNames(end+1:end+rxnNumber,1) = rxnNames; +0077 topUsage.grRules(end+1:end+rxnNumber,1) = grRules; +0078 end +0079 usageReport.topAbsUsage = struct2table(topUsage); +0080 usageReport.totalUsageFlux = protPool; +0081 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/src/geckomat/utilities/enzymeUsage.m b/src/geckomat/utilities/enzymeUsage.m index f9a0faca8..5d80bf19c 100644 --- a/src/geckomat/utilities/enzymeUsage.m +++ b/src/geckomat/utilities/enzymeUsage.m @@ -26,7 +26,6 @@ % absUsage vector of absolute enzyme usages % UB vector of enzyme exchange reaction upper bounds % protID string array of matching protein IDs -% fluxes vector of fluxes, copy of input fluxes % % Usage: % usageData = enzymeUsage(ecModel,fluxes,zero) @@ -43,7 +42,6 @@ usageData.LB = ecModel.lb(rxnIdx); usageData.absUsage = abs(fluxes(rxnIdx)); usageData.capUsage = abs(usageData.absUsage./usageData.LB); -usageData.fluxes = fluxes; if ~zero nonzero = usageData.absUsage<0; diff --git a/src/geckomat/utilities/reportEnzymeUsage.m b/src/geckomat/utilities/reportEnzymeUsage.m index 5d793990d..b487bd543 100644 --- a/src/geckomat/utilities/reportEnzymeUsage.m +++ b/src/geckomat/utilities/reportEnzymeUsage.m @@ -61,20 +61,7 @@ topUsage.rxnNames = {}; topUsage.grRules = {}; -% Calculate the protein pool flux from the 'prot_pool_exchange' reaction -protPoolExchangeFlux = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange')); - -fluxValues = usageData.fluxes; - -% Sum fluxes for all 'usage_prot_' reactions, excluding the 'usage_prot_standard' -usageProtIndices = startsWith(ecModel.rxns, 'usage_prot_') & ... - ~contains(ecModel.rxns, 'standard'); - -% Sum the absolute values of the usage fluxes -totalUsageProtFlux = sum(abs(fluxValues(usageProtIndices))); - -% Define the new protein pool as the sum of prot_pool_exchange flux and total usage_prot fluxes -protPool = (protPoolExchangeFlux + totalUsageProtFlux)/100; +protPool = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange'))/100; for i=1:numel(topEnzyme) [rxns, kcat, idx, rxnNames, grRules] = getReactionsFromEnzyme(ecModel,topEnzyme{i}); @@ -90,6 +77,5 @@ topUsage.grRules(end+1:end+rxnNumber,1) = grRules; end usageReport.topAbsUsage = struct2table(topUsage); -usageReport.totalProtPool = protPoolExchangeFlux; -usageReport.totalUsageFlux = totalUsageProtFlux; +usageReport.totalUsageFlux = protPool; end