From 2bd17911e14109d708c4a67c664dd68482f6f72d Mon Sep 17 00:00:00 2001 From: danielgeiszler Date: Fri, 6 Oct 2023 18:32:51 +0300 Subject: [PATCH] Peptide.calculatePeptideFragments skips a1/b1 ions Peptide.generateDecoy fixed error not fixing C-term in place MatchedIonDistribution added poisson-binomial model with decoys for intensity modeling IterativeLocalizer added RNG seed for reproducible decoy generation IterativeLocalizer added PoissonBinomial model with decoys IterativeLocalizer added temporary decoy generation to check effectiveness of --- .../IterativeLocalizer.java | 57 ++++++++--- .../MatchedIonDistribution.java | 99 ++++++++++++++++--- .../PoissonBinomialLikelihood.java | 17 ++-- .../andykong/ptmshepherd/utils/Peptide.java | 6 +- 4 files changed, 142 insertions(+), 37 deletions(-) diff --git a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java index 8abf8f2..f80043e 100644 --- a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java +++ b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java @@ -43,6 +43,9 @@ public class IterativeLocalizer { static int scanNum; static boolean debugFlag; boolean printIonDistribution = true; // TODO make this a parameter + boolean poissonBinomialDistribution = true; // TODO make this a parameter + int seed = 3341; + Random rng; //1. Learn distribution of intensities from unmodified peptides //TODO change this description... // Loop through files @@ -70,6 +73,7 @@ public IterativeLocalizer(double[][] peakBounds, double peakTol, int precursorMa this.convCriterion = convCriterion; this.maxEpoch = maxEpoch; this.decoyAAs = "ALIV"; // Well, you can tell by the way I use my walks //TODO make this a parameter if its worth it + this.rng = new Random(seed); } private void initPrecursorPeakBounds(double[][] peakBounds, double peakTol, int precursorMassUnits) { @@ -90,7 +94,7 @@ public void localize() throws Exception { private void fitMatchedIonDistribution() throws Exception { System.out.println("\tFitting distribution to matched zero-bin fragments"); // Set up distribution - this.matchedIonDist = new MatchedIonDistribution(0.01f); + this.matchedIonDist = new MatchedIonDistribution(0.01f, this.poissonBinomialDistribution); // Set up zero bin boundaries for faster comp double zbL = this.peaks[1][this.zeroBin]; double zbR = this.peaks[2][this.zeroBin]; @@ -128,13 +132,29 @@ private void fitMatchedIonDistribution() throws Exception { continue; } - float [] matchedInts = spec.getMatchedFrags(pep, mods, this.fragTol, this.ionTypes, 0.0F); - this.matchedIonDist.addIons(matchedInts); + if (!this.poissonBinomialDistribution) { + float[] matchedInts = spec.getMatchedFrags( + pep, mods, this.fragTol, this.ionTypes, 0.0F); + this.matchedIonDist.addIons(matchedInts); + } else { + // Add real values + float[] matchedInts = spec.getMatchedFrags( + pep, mods, this.fragTol, this.ionTypes, 0.0F); + this.matchedIonDist.addIons(matchedInts, false); + // Add decoy values + Peptide decoyPep = Peptide.generateDecoy(pep, mods, this.rng); + matchedInts = spec.getMatchedFrags( + decoyPep.pepSeq, decoyPep.mods, this.fragTol, this.ionTypes, 0.0F); + this.matchedIonDist.addIons(matchedInts, true); + } } } } } - this.matchedIonDist.calculateCdf(); + if (!this.poissonBinomialDistribution) + this.matchedIonDist.calculateCdf(); + else + this.matchedIonDist.calculateIonPosterior(); if (this.printIonDistribution) this.matchedIonDist.printHisto("matched_ion_distribution.tsv"); @@ -249,8 +269,22 @@ private void calculateLocalizationProbabilities() throws Exception { strMaxProbs.add(maxProbToString(maxProb, maxProbAA)); double locEntropy = calculateLocalizationEntropy(siteProbs, allowedPoses); strEntropies.add(entropyToString(locEntropy)); - } + // TODO check decoy usefulness + Peptide decoyPep = Peptide.generateDecoy(pep, mods, this.rng); + boolean[] decoyAllowedPoses = parseAllowedPositions(decoyPep.pepSeq, + this.allowedAAs, decoyPep.mods); + double[] decoySiteProbs = localizePsm(spec, decoyPep.pepSeq, decoyPep.mods, dMassApex, + cBin, decoyAllowedPoses); + double decoyMaxProb = findMaxLocalizationProbability(decoySiteProbs); + String decoyMaxProbAA = findMaxLocalizationProbabilitySite(decoySiteProbs, decoyPep.pepSeq); + if (decoyMaxProb >= maxProb) { + System.out.println(specName + "\t" + pep + "\t" + decoyPep.pepSeq + "\t" + + maxProbToString(maxProb, maxProbAA) + "\t" + + maxProbToString(decoyMaxProb, decoyMaxProbAA)); + } + + } } } // If models have converged, update PSM tables @@ -720,11 +754,12 @@ private double[] localizePsm (Spectrum spec, String pep, float[] mods, float dMa /** * Computes the likelihood P(Spec_i|Pep_{ij}) of all localization sites using the Poisson Binomial model. Matched * ions currently map to the adapted PTMiner function. - * @param pep - * @param allowedPoses - * @param peakMzs - * @param peakInts - * @return + * @param pep peptide sequence as string + * @param mods modifications on the peptides as float array + * @param allowedPoses allowed positions on the peptides as boolean array + * @param peakMzs reduced peak M/Z float array matching at least one site + * @param peakInts reduced peak intensity float array matching at least one site + * @return likelihoods of each site as double array */ private double[] computePoissonBinomialLikelihood(String pep, float[] mods, float dMass, boolean[] allowedPoses, float[] peakMzs, float[] peakInts) { @@ -754,8 +789,6 @@ private double[] computePoissonBinomialLikelihood(String pep, float[] mods, floa System.out.println(Arrays.toString(matchedIonIntensities)); System.out.println(Arrays.toString(matchedIonProbabilities)); System.out.println("Matched ions done"); - - } // Format these for Poisson Binomial input ionProbs[cAllowedPos] = matchedIonProbabilities; diff --git a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/MatchedIonDistribution.java b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/MatchedIonDistribution.java index 0970782..4209478 100644 --- a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/MatchedIonDistribution.java +++ b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/MatchedIonDistribution.java @@ -10,18 +10,21 @@ public class MatchedIonDistribution { int[] pdf; - int[] pdfUnmatched; + int[] pdfDecoy; double[] cdf; + double[] ionPosterior; float resolution; int binMult; - public final String mode = "ptminer-unmatched-theoretical"; + public String mode = "ptminer-unmatched-theoretical"; - public MatchedIonDistribution(float resolution) { + public MatchedIonDistribution(float resolution, boolean poissonBinomial) { this.resolution = resolution; this.binMult = (int) (1.0f/resolution); this.pdf = new int[((int) (100.0 * this.binMult + 1))]; - this.pdfUnmatched = new int[((int) (100.0 * this.binMult + 1))]; //TODO implement and test this alternative model + this.pdfDecoy = new int[((int) (100.0 * this.binMult + 1))]; //TODO implement and test this alternative model + if (poissonBinomial) + this.mode = "poissonbinomial-matched-theoretical-decoy"; } // Original was ptminer mode @@ -39,10 +42,10 @@ public void addIon(float intensity) { int idx = (int) (intensity * this.binMult); this.pdf[idx]++; } - } else if (this.mode.equals("ptminer-unmatched-theoretical-experimental")) { + } else if (this.mode.equals("poissonbinomial-matched-theoretical-decoy")) { if (intensity < 0.0f) { int idx = (int) (-1 * intensity * this.binMult); - this.pdfUnmatched[idx]++; + this.pdfDecoy[idx]++; } else { int idx = (int) (-1 * intensity * this.binMult); this.pdf[idx]++; @@ -50,18 +53,43 @@ public void addIon(float intensity) { } } - public synchronized void addIons(float [] intensities) { + /** + * Adds ion to ion posterior probability distribution. Currently only using MATCHED ions. + * @param intensity + * @param isDecoy + */ + public void addIon(float intensity, boolean isDecoy) { + if (!isDecoy) { + if (intensity > 0.0f) { + int idx = (int) (intensity * this.binMult); + this.pdf[idx]++; + } + } else { + if (intensity > 0.0f) { + int idx = (int) (intensity * this.binMult); + this.pdfDecoy[idx]++; + } + } + } + + public void addIons(float [] intensities) { for(int i = 0; i < intensities.length; i++) { addIon(intensities[i]); } } + public void addIons(float [] intensities, boolean isDecoy) { + for(int i = 0; i < intensities.length; i++) { + addIon(intensities[i], isDecoy); + } + } + public void addIon_Original(float intensity) { int idx = (int) (intensity * this.binMult); this.pdf[idx]++; } - public synchronized void addIons_Original(float [] intensities) { + public void addIons_Original(float [] intensities) { for(int i = 0; i < intensities.length; i++) { if (intensities[i] < 0) continue; @@ -92,6 +120,37 @@ public void calculateCdf() { this.cdf[this.cdf.length-1] = this.cdf[this.cdf.length-2] = lastBinsAverage; } + public void calculateIonPosterior() { + // Calculate local q-val estimate + this.ionPosterior = new double[((int) (100.0 * this.binMult + 1))]; + int sumTarget = 0; + int sumDecoy = 0; + for (int i = this.pdf.length - 1; i >= 0; i--) { + sumTarget += this.pdf[i]; + sumDecoy += this.pdfDecoy[i]; + this.ionPosterior[i] = (double) sumTarget / ((double) sumDecoy + (double) sumTarget); + } + + // Find q-val monotonic increasing + // leftStop is always 0 + int rightStop = this.ionPosterior.length; + while (rightStop >= 0) { + double max = -1; + int maxIndx = -1; + // Search within window for max + for (int i = 0; i < rightStop; i++) { + if (this.ionPosterior[i] > max) { + max = this.ionPosterior[i]; + maxIndx = i; + } + } + for (int i = maxIndx; i < rightStop; i++) { + this.ionPosterior[i] = max; + } + rightStop = maxIndx - 1; + } + } + public void calculateCdf_UnmatchedExperimentalTheoretical() { //TODO test this.cdf = new double[((int) (100.0 * this.binMult + 1))]; int sum = 0; @@ -120,7 +179,11 @@ public double calcIonProbability(float intensity) { if (intensity < 0) prob = -1; else - prob = this.cdf[(int) intensity * this.binMult]; + if (!this.mode.equals("poissonbinomial-matched-theoretical-decoy")) { + prob = this.cdf[(int) intensity * this.binMult]; + } else { + prob = this.ionPosterior[(int) intensity * this.binMult]; + } return prob; } @@ -134,11 +197,19 @@ public double[] calcIonProbabilities(float [] intensities) { public String toString() { StringBuffer sb = new StringBuffer(); - sb.append("intensity\tcount\n"); - for (int i = 0; i < this.pdf.length; i++) - sb.append(new DecimalFormat("0.00").format((double) i / (double) this.binMult) - + "\t" + this.pdf[i] + "\n"); - + if (!this.mode.equals("poissonbinomial-matched-theoretical-decoy")) { + sb.append("intensity\tcount\n"); + for (int i = 0; i < this.pdf.length; i++) { + sb.append(new DecimalFormat("0.00").format((double) i / (double) this.binMult) + + "\t" + this.pdf[i] + "\n"); + } + } else { + sb.append("intensity\tcount_target\tcount_decoy\tion_PEP\n"); + for (int i = 0; i < this.pdf.length; i++) { + sb.append(new DecimalFormat("0.00").format((double) i / (double) this.binMult) + + "\t" + this.pdf[i] + "\t" + this.pdfDecoy[i] + "\t" + this.ionPosterior[i] + "\n"); + } + } return sb.toString(); } diff --git a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/PoissonBinomialLikelihood.java b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/PoissonBinomialLikelihood.java index 33c8690..e04b6c0 100644 --- a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/PoissonBinomialLikelihood.java +++ b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/PoissonBinomialLikelihood.java @@ -136,15 +136,14 @@ public static double calculateProbXGreaterThanY(double[] xIonProbs, double[] yIo int nMatchedIonsX = 0; int nMatchedIonsY = 0; int nSharedIons = 0; - for (int i = 0; i < xIonProbs.length; i++) { - if (xIonProbs[i] > 0.0) - nMatchedIonsX++; - if (yIonProbs[i] > 0.0) - nMatchedIonsY++; - if (xIonProbs[i] > 0.0 && yIonProbs[i] > 0.0) - nSharedIons++; - } - + for (int i = 0; i < xIonProbs.length; i++) { + if (xIonProbs[i] > 0.0) + nMatchedIonsX++; + if (yIonProbs[i] > 0.0) + nMatchedIonsY++; + if (xIonProbs[i] > 0.0 && yIonProbs[i] > 0.0) + nSharedIons++; + } // Set up arrays to be convolved double[][] xBinPoiParams = new double[nMatchedIonsX - nSharedIons][2]; double[][] yBinPoiParams = new double[nMatchedIonsY - nSharedIons][2]; diff --git a/src/edu/umich/andykong/ptmshepherd/utils/Peptide.java b/src/edu/umich/andykong/ptmshepherd/utils/Peptide.java index 560a353..8b264ab 100644 --- a/src/edu/umich/andykong/ptmshepherd/utils/Peptide.java +++ b/src/edu/umich/andykong/ptmshepherd/utils/Peptide.java @@ -19,6 +19,7 @@ public static ArrayList calculatePeptideFragments(String seq, float[] mod ArrayList nIonTypes = new ArrayList<>(); ArrayList cIonTypes = new ArrayList<>(); + // Todo this should start at b2 for (int i = 0; i < ionTypes.length(); i++) { char curIonType = ionTypes.charAt(i); if (curIonType == 'a' || curIonType == 'b' || curIonType == 'c') @@ -38,7 +39,8 @@ else if (curIonType == 'x' || curIonType == 'y' || curIonType == 'z') float cmass = AAMasses.monoisotopic_nterm_mass + nTermMass; for (int i = 0; i < cLen - 1; i++) { //loop through positions on the peptide cmass += (aaMasses[seq.charAt(i) - 'A'] + mods[i]) / ccharge; - knownFrags.add(cmass); + if (i != 0) // todo skip a1/b1 ion, check if this needs to be skipped for c too + knownFrags.add(cmass); } } } @@ -75,7 +77,7 @@ public static Peptide generateDecoy(String pep, float[] mods, Random rng) { newMods[i+1] = sites.get(i).mod; } // C-term AA - newPep.append(sites.get(sites.size()-1).aa); + newPep.append(pep.charAt(pep.length()-1)); newMods[newMods.length-1] = mods[mods.length-1]; return new Peptide(newPep.toString(), newMods);