From 9cd80f72534087efa474b9c5f28617e9f09f9043 Mon Sep 17 00:00:00 2001 From: danielgeiszler Date: Wed, 1 May 2024 15:25:36 +0300 Subject: [PATCH] PSMFile.getColumnValues improve error message IterativeLocalizer.calculateFalseLocalizationRates add model q-values to PSM table bump to RC5 --- build.gradle | 2 +- .../umich/andykong/ptmshepherd/PSMFile.java | 4 +- .../andykong/ptmshepherd/PTMShepherd.java | 2 +- .../IterativeLocalizer.java | 132 ++++++++++++++---- 4 files changed, 105 insertions(+), 35 deletions(-) diff --git a/build.gradle b/build.gradle index 648acc5..afc4670 100644 --- a/build.gradle +++ b/build.gradle @@ -30,7 +30,7 @@ java { targetCompatibility = JavaVersion.VERSION_1_9 } -version = '3.0.0-rc4' +version = '3.0.0-rc5' application { // Define the main class for the application diff --git a/src/edu/umich/andykong/ptmshepherd/PSMFile.java b/src/edu/umich/andykong/ptmshepherd/PSMFile.java index 46a1d8b..b24e494 100644 --- a/src/edu/umich/andykong/ptmshepherd/PSMFile.java +++ b/src/edu/umich/andykong/ptmshepherd/PSMFile.java @@ -1027,8 +1027,8 @@ public ArrayList getColumnValues(String header) { //TODO I think you can int colIndx = getColumn(header); if (colIndx < 0 || colIndx >= (this.headers.length)) { - throw new ArrayIndexOutOfBoundsException(String.format("Column index %d is out of bounds for a %d column" + - "wide table", colIndx, this.headers.length)); + throw new ArrayIndexOutOfBoundsException(String.format("Cannot fetch %s column. Column index %d is out of bounds for a %d column" + + "wide table", header, colIndx, this.headers.length)); } ArrayList values = new ArrayList<>(this.data.size()); diff --git a/src/edu/umich/andykong/ptmshepherd/PTMShepherd.java b/src/edu/umich/andykong/ptmshepherd/PTMShepherd.java index 0b19f14..5a9f9ec 100644 --- a/src/edu/umich/andykong/ptmshepherd/PTMShepherd.java +++ b/src/edu/umich/andykong/ptmshepherd/PTMShepherd.java @@ -69,7 +69,7 @@ public class PTMShepherd { public static final String name = "PTM-Shepherd"; - public static final String version = "3.0.0-rc4"; + public static final String version = "3.0.0-rc5"; static HashMap params; static TreeMap> datasets; diff --git a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java index 155e9f3..9ee33d3 100644 --- a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java +++ b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java @@ -5,6 +5,7 @@ import edu.umich.andykong.ptmshepherd.core.MXMLReader; import edu.umich.andykong.ptmshepherd.core.Spectrum; import edu.umich.andykong.ptmshepherd.utils.Peptide; +import org.jetbrains.annotations.NotNull; import java.io.File; import java.text.DecimalFormat; @@ -363,13 +364,13 @@ private void calculateLocalizationProbabilities() throws Exception { if (finalPass) { // Update PSM table with new columns psmf.addColumn(psmf.getColumn("Observed Modifications") + 1, - "delta_mass_loc", specNames, strOutputProbs); - psmf.addColumn(psmf.getColumn("delta_mass_loc") + 1, - "delta_mass_maxloc", specNames, strMaxProbs); - psmf.addColumn(psmf.getColumn("delta_mass_maxloc") + 1, - "delta_mass_entropy", specNames, strEntropies); - psmf.addColumn(psmf.getColumn("delta_mass_entropy") + 1, - "delta_mass_maxprob", specNames, strMaxProbs2); + "PTM-Shepherd Localization", specNames, strOutputProbs); + psmf.addColumn(psmf.getColumn("PTM-Shepherd Localization") + 1, + "PTM-Shepherd Best Localization", specNames, strMaxProbs); + //psmf.addColumn(psmf.getColumn("delta_mass_maxloc") + 1, + // "delta_mass_entropy", specNames, strEntropies); + //psmf.addColumn(psmf.getColumn("PTM-Shepherd Best Localization") + 1, + // "PTM-Shepherd Max Probability", specNames, strMaxProbs2); psmf.save(true); // Do not overwrite complete = true; } @@ -478,6 +479,9 @@ public static boolean[] parseAllowedPositions(String seq, String allowedAAs, flo * which may not be desirable under all circumstances. This is calculated using the unbiased estimator (d+1)/t * from Levitsky J Proteome Res. (2017), but adjusted by the ratio of decoy AAs/target AAs. * + * //TODO clean up + * //TODO figure out double-decoy system + * * @return */ private void calculateFalseLocalizationRates() throws Exception { //TODO this needs to be modularized so it can be unit tested @@ -495,6 +499,31 @@ private void calculateFalseLocalizationRates() throws Exception { //TODO this ne int[] targetEntropies = new int[1000+1]; //4 digit accuracy, plus one bin for 1.0000 int[] decoyEntropies = new int[1000+1]; + + // Create container that will allow us to sort on probabilities while retaining PSM level mapping + class SpecProbQ implements Comparable { + String spec; + double prob; + double q; + + SpecProbQ(String spec, double prob) { + this.spec = spec; + this.prob = prob; + } + + @Override + public int compareTo(SpecProbQ o) { + if (this.prob < o.prob) { + return -1; // Return -1 if this object's prob is less than the other object's prob + } else if (this.prob > o.prob) { + return 1; // Return 1 if this object's prob is greater than the other object's prob + } + return 0; // Return 0 if they are equal + } + } + + ArrayList spqs = new ArrayList<>(); + // Loop through datasets for (String ds : this.datasets.keySet()) { ArrayList dsData = this.datasets.get(ds); @@ -503,9 +532,10 @@ private void calculateFalseLocalizationRates() throws Exception { //TODO this ne // Get values we're working with on first pass PSMFile psmf = new PSMFile(new File(dsData.get(i)[0])); + ArrayList specs = psmf.getColumnValues("Spectrum"); ArrayList peps = psmf.getColumnValues("Peptide"); - ArrayList maxProbs = psmf.getColumnValues("delta_mass_maxloc"); - ArrayList entropies = psmf.getColumnValues("delta_mass_entropy"); + ArrayList maxProbs = psmf.getColumnValues("PTM-Shepherd Best Localization"); + //ArrayList entropies = psmf.getColumnValues("delta_mass_entropy"); // Add probs to target and decoy histos and calculate nTarget and nDecoyAAs for (int j = 0; j < peps.size(); j++) { @@ -527,18 +557,57 @@ private void calculateFalseLocalizationRates() throws Exception { //TODO this ne // Add max probability and entropy float p = Float.parseFloat(subString(maxProb, "(", ")")); - float entropy = Float.parseFloat(entropies.get(j)); + //float entropy = Float.parseFloat(entropies.get(j)); if (isDecoyAA(maxProb.charAt(0))) { decoyProbs[(int) (p * 1000.0)]++; - decoyEntropies[(int) (entropy * 1000.0)]++; + // decoyEntropies[(int) (entropy * 1000.0)]++; } else { targetProbs[(int) (p * 1000.0)]++; - targetEntropies[(int) (entropy * 1000.0)]++; + // targetEntropies[(int) (entropy * 1000.0)]++; } } + for (int j = 0; j < specs.size(); j++) { + String maxProb = maxProbs.get(j); + if (maxProb.equals("")) + continue; + double prob = Double.parseDouble(subString(maxProb, "(", ")")); + spqs.add(new SpecProbQ(specs.get(j), prob)); + } + } } + // Sort in descending probability order + Collections.sort(spqs, Collections.reverseOrder()); + + // Calculate local q-val + double runningQSum = 0.0; + int runningCount = 0; + for (SpecProbQ spq : spqs) { + runningQSum += 1.0 - spq.prob; + runningCount++; + spq.q = runningQSum / runningCount; + } + + // Ensure q-val monotonicity + double cMin = 10000.0; + for (int i = spqs.size() - 1; i >= 0; i--) { + double tQ = spqs.get(i).q; + if (spqs.get(i).q < cMin) { + cMin = tQ; + } else { + while ((i >= 0) && (spqs.get(i).q >= cMin)) { + spqs.get(i).q = cMin; + i--; + } + i++; + } + } + // Extract spectrum to q-val and probability mappings in matched order + HashMap qValsMap = new HashMap<>(); + for (SpecProbQ spq : spqs) + qValsMap.put(spq.spec, spq.q); + // Calculate the FLRs of each type // Three different FLRs to compute double[] flrProb = new double[1000+1]; // Assumes model probabilities are valid, does not use decoys @@ -610,6 +679,9 @@ private void calculateFalseLocalizationRates() throws Exception { //TODO this ne qEntropyDecoyModel[i] = min; } + + + /** // Print to test for (int i = flrProb.length-1; i >= 0; i--) { @@ -645,46 +717,44 @@ private void calculateFalseLocalizationRates() throws Exception { //TODO this ne // Get values to map to q-vals PSMFile psmf = new PSMFile(new File(dsData.get(i)[0])); ArrayList specNames = psmf.getColumnValues("Spectrum"); - ArrayList maxProbs = psmf.getColumnValues("delta_mass_maxloc"); - ArrayList entropies = psmf.getColumnValues("delta_mass_entropy"); + ArrayList maxProbs = psmf.getColumnValues("PTM-Shepherd Best Localization"); + //ArrayList entropies = psmf.getColumnValues("delta_mass_entropy"); // Assign each q-Val - ArrayList probModelVals = new ArrayList<>(specNames.size()); - ArrayList probDecoyModelVals = new ArrayList<>(specNames.size()); - ArrayList entropyDecoyModelVals = new ArrayList<>(specNames.size()); + ArrayList probModelQVals = new ArrayList<>(specNames.size()); + //ArrayList probDecoyModelVals = new ArrayList<>(specNames.size()); + //ArrayList entropyDecoyModelVals = new ArrayList<>(specNames.size()); for (int j = 0; j < specNames.size(); j++) { - // Check if peptide is unmod and deal with it if it is - boolean unmodFlag = maxProbs.get(i).equals("") ? true : false; + boolean unmodFlag = maxProbs.get(j).equals("") ? true : false; if (unmodFlag) { // prob model - probModelVals.add(""); + probModelQVals.add(""); // prob model with decoys - probDecoyModelVals.add(""); + //probDecoyModelVals.add(""); // entropy model - entropyDecoyModelVals.add(""); + //entropyDecoyModelVals.add(""); } else { // prob model - double maxProb = Double.parseDouble(subString(maxProbs.get(i), "(", ")")); - probModelVals.add(new DecimalFormat("0.0000").format(qProbModel[(int) (maxProb * 1000)])); + probModelQVals.add(new DecimalFormat("0.0000").format(qValsMap.get(specNames.get(j)))); // prob model with decoys - probDecoyModelVals.add(new DecimalFormat("0.0000").format(qProbDecoyModel[(int) (maxProb * 1000)])); + //probDecoyModelVals.add(new DecimalFormat("0.0000").format(qProbDecoyModel[(int) (maxProb * 1000)])); // entropy model - double entropy = Double.parseDouble(entropies.get(i)); - entropyDecoyModelVals.add(new DecimalFormat("0.0000").format(qEntropyDecoyModel[(int) (entropy * 1000)])); + //double entropy = Double.parseDouble(entropies.get(i)); + //entropyDecoyModelVals.add(new DecimalFormat("0.0000").format(qEntropyDecoyModel[(int) (entropy * 1000)])); } } // Send to PSM file - psmf.addColumn(psmf.getColumn("delta_mass_maxloc") + 1, "delta_mass_BH_loc_q", - specNames, probModelVals); + psmf.addColumn(psmf.getColumn("PTM-Shepherd Best Localization") + 1, "PTM-Shepherd q-val", + specNames, probModelQVals); /** //TODO figure out what's going on with these before implementing them, assuming they're even worth doing psmf.addColumn(psmf.getColumn("delta_mass_BH_loc_q") + 1, "delta_mass_prob_decoyAA_q", specNames, probDecoyModelVals); psmf.addColumn(psmf.getColumn("delta_mass_entropy") + 1, "delta_mass_entropy_decoyAA_q", specNames, entropyDecoyModelVals); - psmf.save(true); //TODO this will add these columns - **/ + **/ + psmf.save(true); // add columns } }