From 43f946619fe5d148b10ad4e8095ce8a4a2a903a5 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 12 Feb 2025 18:52:17 -0600 Subject: [PATCH 1/8] Fixed bug where recursive peak cutting functions did not function as intended --- mzLib/FlashLFQ/FlashLfqEngine.cs | 30 ++-- .../TestFlashLFQ/ChromatographicPeakTests.cs | 2 +- mzLib/TestFlashLFQ/TestFlashLFQ.cs | 143 +++++++++++++++++- .../TestFlashLFQ/TestIdentificationAdapter.cs | 2 +- mzLib/TestFlashLFQ/TestPipEcho.cs | 2 +- 5 files changed, 153 insertions(+), 26 deletions(-) diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index 01d3c6dc0..76a1dd44f 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -1828,11 +1828,13 @@ private void CutPeak(ChromatographicPeak peak, double identificationTime) return; } + // Ordered list of all time points where the apex charge state had a valid isotopic envelope List timePointsForApexZ = peak.IsotopicEnvelopes .Where(p => p.ChargeState == peak.Apex.ChargeState).ToList(); HashSet scanNumbers = new HashSet(timePointsForApexZ.Select(p => p.IndexedPeak.ZeroBasedMs1ScanIndex)); int apexIndex = timePointsForApexZ.IndexOf(peak.Apex); IsotopicEnvelope valleyEnvelope = null; + int numberOfMs1Scans = _ms1Scans[peak.SpectraFileInfo].Length; // -1 checks the left side, +1 checks the right side int[] directions = { 1, -1 }; @@ -1846,6 +1848,7 @@ private void CutPeak(ChromatographicPeak peak, double identificationTime) { IsotopicEnvelope timepoint = timePointsForApexZ[i]; + // Valley envelope is the lowest intensity point that has been encountered thus far if (valleyEnvelope == null || timepoint.Intensity < valleyEnvelope.Intensity) { valleyEnvelope = timepoint; @@ -1855,35 +1858,20 @@ private void CutPeak(ChromatographicPeak peak, double identificationTime) double discriminationFactor = (timepoint.Intensity - valleyEnvelope.Intensity) / timepoint.Intensity; + // If the time point is at least discriminationFactor times more intense than the valley + // We perform an additional check to see if the time point is more intense than the point next to the valley if (discriminationFactor > DiscriminationFactorToCutPeak && (indexOfValley + direction < timePointsForApexZ.Count && indexOfValley + direction >= 0)) { + IsotopicEnvelope secondValleyTimepoint = timePointsForApexZ[indexOfValley + direction]; discriminationFactor = (timepoint.Intensity - secondValleyTimepoint.Intensity) / timepoint.Intensity; - if (discriminationFactor > DiscriminationFactorToCutPeak) - { - cutThisPeak = true; - break; - } - - int nextMs1ScanNum = -1; - for (int j = valleyEnvelope.IndexedPeak.ZeroBasedMs1ScanIndex - 1; - j < _ms1Scans[peak.SpectraFileInfo].Length && j >= 0; - j += direction) - { - if (_ms1Scans[peak.SpectraFileInfo][j].OneBasedScanNumber >= 0 && - _ms1Scans[peak.SpectraFileInfo][j].OneBasedScanNumber != - valleyEnvelope.IndexedPeak.ZeroBasedMs1ScanIndex) - { - nextMs1ScanNum = j + 1; - break; - } - } - - if (!scanNumbers.Contains(nextMs1ScanNum)) + // If the current timepoint is more intense than the second valley, we cut the peak + // If the scan following the valley isn't in the timePointsForApexZ list (i.e., no isotopic envelope is observed in the scan immediately after the valley), we also cut the peak + if (discriminationFactor > DiscriminationFactorToCutPeak || !scanNumbers.Contains(valleyEnvelope.IndexedPeak.ZeroBasedMs1ScanIndex + direction)) { cutThisPeak = true; break; diff --git a/mzLib/TestFlashLFQ/ChromatographicPeakTests.cs b/mzLib/TestFlashLFQ/ChromatographicPeakTests.cs index cd6301a91..aa7d25000 100644 --- a/mzLib/TestFlashLFQ/ChromatographicPeakTests.cs +++ b/mzLib/TestFlashLFQ/ChromatographicPeakTests.cs @@ -7,7 +7,7 @@ using System.Text; using System.Threading.Tasks; -namespace TestFlashLFQ +namespace Test { public class ChromatographicPeakTests { diff --git a/mzLib/TestFlashLFQ/TestFlashLFQ.cs b/mzLib/TestFlashLFQ/TestFlashLFQ.cs index 430f75d67..b0133d0d6 100644 --- a/mzLib/TestFlashLFQ/TestFlashLFQ.cs +++ b/mzLib/TestFlashLFQ/TestFlashLFQ.cs @@ -869,6 +869,145 @@ public static void TestPeakSplittingLeftWithEmptyScan() Assert.That(peak.IsotopicEnvelopes.Count == 6); } + [Test] + public static void TestPeakSplittingRightWithEmptyScanAndMs2Spectra() + { + string fileToWrite = "myMzml.mzML"; + string peptide = "PEPTIDE"; + double intensity = 1e6; + + Loaders.LoadElements(); + + // generate mzml file + + // 1 MS1 scan per peptide + MsDataScan[] scans = new MsDataScan[20]; + double[] intensityMultipliers = { 1, 3, 5, 10, 5, 3, 1, 1, 3, 1 }; + + for (int s = 0; s < 10; s++) + { + ChemicalFormula cf = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula(); + IsotopicDistribution dist = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8); + double[] mz = dist.Masses.Select(v => v.ToMz(1)).ToArray(); + double[] intensities = dist.Intensities.Select(v => v * intensity * intensityMultipliers[s]).ToArray(); + + if (s == 7) + { + mz = new[] { 401.0 }; + intensities = new[] { 1000.0 }; + } + + int zeroBasedScanIndex = s * 2; + // add the MS1 scan + scans[zeroBasedScanIndex] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: zeroBasedScanIndex + 1, msnOrder: 1, isCentroid: true, + polarity: Polarity.Positive, retentionTime: 1.0 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f", + mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (zeroBasedScanIndex + 1)); + + // add the MS2 scan + scans[zeroBasedScanIndex + 1] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: zeroBasedScanIndex + 2, msnOrder: 2, isCentroid: true, + polarity: Polarity.Positive, retentionTime: 1.5 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f", + mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (zeroBasedScanIndex + 2), + oneBasedPrecursorScanNumber: zeroBasedScanIndex + 1, + selectedIonMz: mz.First(), + dissociationType: DissociationType.HCD); + } + + // write the .mzML + Readers.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans), + Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), false); + + // set up spectra file info + SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), "", 0, 0, 0); + + // create some PSMs + var pg = new ProteinGroup("MyProtein", "gene", "org"); + + Identification id1 = new Identification(file1, peptide, peptide, + new Proteomics.AminoAcidPolymer.Peptide(peptide).MonoisotopicMass, 1.3 + 0.001, 1, new List { pg }); + + // create the FlashLFQ engine + FlashLfqEngine engine = new FlashLfqEngine(new List { id1 }); + + // run the engine + var results = engine.Run(); + ChromatographicPeak peak = results.Peaks.First().Value.First(); + + Assert.That(peak.Apex.IndexedPeak.RetentionTime == 1.3); + Assert.That(peak.SplitRT == 1.6); + Assert.That(!peak.IsotopicEnvelopes.Any(p => p.IndexedPeak.RetentionTime > 1.6)); + Assert.That(peak.IsotopicEnvelopes.Count == 6); + } + + [Test] + public static void TestPeakSplittingLeftWithEmptyScanAndMs2Spectra() + { + string fileToWrite = "myMzml.mzML"; + string peptide = "PEPTIDE"; + double intensity = 1e6; + + Loaders.LoadElements(); + + // generate mzml file + + // 1 MS1 scan per peptide + MsDataScan[] scans = new MsDataScan[20]; + double[] intensityMultipliers = { 1, 3, 1, 1, 3, 5, 10, 5, 3, 1 }; + + for (int s = 0; s < 10; s++) + { + ChemicalFormula cf = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula(); + IsotopicDistribution dist = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8); + double[] mz = dist.Masses.Select(v => v.ToMz(1)).ToArray(); + double[] intensities = dist.Intensities.Select(v => v * intensity * intensityMultipliers[s]).ToArray(); + + if (s == 2) + { + mz = new[] { 401.0 }; + intensities = new[] { 1000.0 }; + } + + int zeroBasedScanIndex = s * 2; + // add the MS1 scan + scans[zeroBasedScanIndex] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: zeroBasedScanIndex + 1, msnOrder: 1, isCentroid: true, + polarity: Polarity.Positive, retentionTime: 1.0 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f", + mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (zeroBasedScanIndex + 1)); + + // add the MS2 scan + scans[zeroBasedScanIndex+1] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: zeroBasedScanIndex + 2, msnOrder: 2, isCentroid: true, + polarity: Polarity.Positive, retentionTime: 1.5 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f", + mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (zeroBasedScanIndex + 2), + oneBasedPrecursorScanNumber: zeroBasedScanIndex + 1, + selectedIonMz: mz.First(), + dissociationType: DissociationType.HCD); + } + + // write the .mzML + Readers.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans), + Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), false); + + // set up spectra file info + SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), "", 0, 0, 0); + + // create some PSMs + var pg = new ProteinGroup("MyProtein", "gene", "org"); + + Identification id1 = new Identification(file1, peptide, peptide, + new Proteomics.AminoAcidPolymer.Peptide(peptide).MonoisotopicMass, 1.3 + 0.001, 1, new List { pg }); + + // create the FlashLFQ engine + FlashLfqEngine engine = new FlashLfqEngine(new List { id1 }); + + // run the engine + var results = engine.Run(); + ChromatographicPeak peak = results.Peaks.First().Value.First(); + + Assert.That(peak.Apex.IndexedPeak.RetentionTime == 1.6); + Assert.That(peak.SplitRT == 1.3); + Assert.That(!peak.IsotopicEnvelopes.Any(p => p.IndexedPeak.RetentionTime < 1.3)); + Assert.That(peak.IsotopicEnvelopes.Count == 6); + } + + [Test] public static void TestToString() { @@ -1387,8 +1526,8 @@ public static void RealDataMbrTest() // Any change to ML.NET or the PEP Analysis engine will cause these to change. Console.WriteLine("r1 PIP event count: " + f1r1MbrResults.Count); Console.WriteLine("r2 PIP event count: " + f1r2MbrResults.Count); - Assert.AreEqual(138, f1r1MbrResults.Count); - Assert.AreEqual(70, f1r2MbrResults.Count); + Assert.AreEqual(141, f1r1MbrResults.Count); + Assert.AreEqual(78, f1r2MbrResults.Count); // Check that MS/MS identified peaks and MBR identified peaks have similar intensities List<(double, double)> peptideIntensities = f1r1MbrResults.Select(pep => (Math.Log(pep.Value.GetIntensity(f1r1)), Math.Log(pep.Value.GetIntensity(f1r2)))).ToList(); diff --git a/mzLib/TestFlashLFQ/TestIdentificationAdapter.cs b/mzLib/TestFlashLFQ/TestIdentificationAdapter.cs index cd6412731..95b76462c 100644 --- a/mzLib/TestFlashLFQ/TestIdentificationAdapter.cs +++ b/mzLib/TestFlashLFQ/TestIdentificationAdapter.cs @@ -6,7 +6,7 @@ using Assert = NUnit.Framework.Legacy.ClassicAssert; using System.IO; -namespace TestFlashLFQ +namespace Test { internal class TestIdentificationAdapter { diff --git a/mzLib/TestFlashLFQ/TestPipEcho.cs b/mzLib/TestFlashLFQ/TestPipEcho.cs index 0d2388142..37392b469 100644 --- a/mzLib/TestFlashLFQ/TestPipEcho.cs +++ b/mzLib/TestFlashLFQ/TestPipEcho.cs @@ -14,7 +14,7 @@ using UsefulProteomicsDatabases; -namespace TestFlashLFQ +namespace Test { [TestFixture] [System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage] From 4b1a67dbddc8ef75f43208b57ed365d20df23526 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 12 Feb 2025 18:54:46 -0600 Subject: [PATCH 2/8] deleted unused variable --- mzLib/FlashLFQ/FlashLfqEngine.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index 76a1dd44f..488f1c771 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -1834,11 +1834,9 @@ private void CutPeak(ChromatographicPeak peak, double identificationTime) HashSet scanNumbers = new HashSet(timePointsForApexZ.Select(p => p.IndexedPeak.ZeroBasedMs1ScanIndex)); int apexIndex = timePointsForApexZ.IndexOf(peak.Apex); IsotopicEnvelope valleyEnvelope = null; - int numberOfMs1Scans = _ms1Scans[peak.SpectraFileInfo].Length; // -1 checks the left side, +1 checks the right side int[] directions = { 1, -1 }; - foreach (int direction in directions) { valleyEnvelope = null; From 3a90a4529c30af03c361376182c8149bcb252f5d Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 13 Feb 2025 16:34:58 -0600 Subject: [PATCH 3/8] Implemented ITraceable and ISingleScanDatum interfaces, refactored CutPeak --- mzLib/FlashLFQ/ChromatographicPeak.cs | 14 ++- mzLib/FlashLFQ/FlashLfqEngine.cs | 96 +++++++++++-------- mzLib/FlashLFQ/IndexedMassSpectralPeak.cs | 13 ++- mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs | 25 +++++ mzLib/FlashLFQ/Interfaces/ITraceable.cs | 29 ++++++ mzLib/FlashLFQ/IsotopicEnvelope.cs | 7 +- 6 files changed, 132 insertions(+), 52 deletions(-) create mode 100644 mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs create mode 100644 mzLib/FlashLFQ/Interfaces/ITraceable.cs diff --git a/mzLib/FlashLFQ/ChromatographicPeak.cs b/mzLib/FlashLFQ/ChromatographicPeak.cs index d7bac2195..f821775c0 100644 --- a/mzLib/FlashLFQ/ChromatographicPeak.cs +++ b/mzLib/FlashLFQ/ChromatographicPeak.cs @@ -4,17 +4,20 @@ using System.Text; using ClassExtensions = Chemistry.ClassExtensions; using FlashLFQ.PEP; +using FlashLFQ.Interfaces; namespace FlashLFQ { - public class ChromatographicPeak + public class ChromatographicPeak : ITraceable { public double Intensity; public double ApexRetentionTime => Apex?.IndexedPeak.RetentionTime ?? -1; public readonly SpectraFileInfo SpectraFileInfo; - public List IsotopicEnvelopes; + public List IsotopicEnvelopes { get; set; } + public List ScanOrderedPoints => IsotopicEnvelopes; + public List ApexChargeStateScanOrderedPoints => IsotopicEnvelopes.Where(p => p.ChargeState == Apex.ChargeState).ToList(); public int ScanCount => IsotopicEnvelopes.Count; - public double SplitRT; + public double SplitRT { get; private set; } public readonly bool IsMbrPeak; public double MbrScore; public double PpmScore { get; set; } @@ -125,6 +128,11 @@ public void MergeFeatureWith(ChromatographicPeak otherFeature, bool integrate) } } + public void SetSplitLocation(IsotopicEnvelope splitEnvelope) + { + SplitRT = splitEnvelope.IndexedPeak.RetentionTime; + } + /// /// Sets two ChromatographicPeak properties: NumIdentificationsByBaseSeq and NumIdentificationsByFullSeq /// diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index 488f1c771..d1f17f59b 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -15,6 +15,7 @@ using FlashLFQ.PEP; using System.IO; using System.Threading; +using FlashLFQ.Interfaces; [assembly: InternalsVisibleTo("TestFlashLFQ")] @@ -507,7 +508,7 @@ private void QuantifyMs2IdentifiedPeptides(SpectraFileInfo fileInfo) } msmsFeature.CalculateIntensityForThisFeature(Integrate); - CutPeak(msmsFeature, identification.Ms2RetentionTimeInMinutes); + CutPeak(msmsFeature, identification.Ms2RetentionTimeInMinutes); if (!msmsFeature.IsotopicEnvelopes.Any()) { @@ -1292,7 +1293,7 @@ internal ChromatographicPeak FindIndividualAcceptorPeak( acceptorPeak.IsotopicEnvelopes.AddRange(bestChargeEnvelopes); acceptorPeak.CalculateIntensityForThisFeature(Integrate); - CutPeak(acceptorPeak, seedEnv.IndexedPeak.RetentionTime); + CutPeak(acceptorPeak, seedEnv.IndexedPeak.RetentionTime, apexTimePoints: acceptorPeak.ApexChargeStateScanOrderedPoints); var claimedPeaks = new HashSet(acceptorPeak.IsotopicEnvelopes.Select(p => p.IndexedPeak)) { @@ -1811,65 +1812,55 @@ public List Peakfind(double idRetentionTime, double mas } /// - /// Recursively cuts ChromatographicPeaks, removing all IsotopicEnvelopes - /// that occur before or after potential "valleys" surrounding the identification's - /// MS2 retention time. Then, the peak intensity is recalculated + /// Determines whether a peak should be cut based on the intensity of the surrounding time points. /// - /// Peak to be cut, where envelopes are sorted by MS1 scan number - /// MS2 scan retention time - private void CutPeak(ChromatographicPeak peak, double identificationTime) + /// The type of the time points, which must implement ISingleScanDatum. + /// The list of time points + /// The index of the apex (most intense, best, whatever) in the list of time points. + /// The valley point, which is the lowest intensity point encountered. + /// The discrimination factor to determine if the peak should be cut. Default is 0.6. + /// True if the peak should be cut, otherwise false. + public static bool ShouldCutPeak(List timePoints, int indexOfApexInTimePoints, + out T valley, double discriminationFactorToCutPeak = 0.6) where T : ISingleScanDatum { - // find out if we need to split this peak by using the discrimination factor - // this method assumes that the isotope envelopes in a chromatographic peak are already sorted by MS1 scan number + valley = default(T); bool cutThisPeak = false; - - if (peak.IsotopicEnvelopes.Count < 5) - { - return; - } - - // Ordered list of all time points where the apex charge state had a valid isotopic envelope - List timePointsForApexZ = peak.IsotopicEnvelopes - .Where(p => p.ChargeState == peak.Apex.ChargeState).ToList(); - HashSet scanNumbers = new HashSet(timePointsForApexZ.Select(p => p.IndexedPeak.ZeroBasedMs1ScanIndex)); - int apexIndex = timePointsForApexZ.IndexOf(peak.Apex); - IsotopicEnvelope valleyEnvelope = null; + HashSet zeroIndexedScanNumbers = timePoints.Select(p => p.ZeroBasedScanIndex).ToHashSet(); // -1 checks the left side, +1 checks the right side int[] directions = { 1, -1 }; foreach (int direction in directions) { - valleyEnvelope = null; + valley = default(T); int indexOfValley = 0; - for (int i = apexIndex + direction; i < timePointsForApexZ.Count && i >= 0; i += direction) + for (int i = indexOfApexInTimePoints + direction; i < timePoints.Count && i >= 0; i += direction) { - IsotopicEnvelope timepoint = timePointsForApexZ[i]; + ISingleScanDatum timepoint = timePoints[i]; // Valley envelope is the lowest intensity point that has been encountered thus far - if (valleyEnvelope == null || timepoint.Intensity < valleyEnvelope.Intensity) + if (valley == null || timepoint.Intensity < valley.Intensity) { - valleyEnvelope = timepoint; - indexOfValley = timePointsForApexZ.IndexOf(valleyEnvelope); + valley = (T)timepoint; + indexOfValley = i; } double discriminationFactor = - (timepoint.Intensity - valleyEnvelope.Intensity) / timepoint.Intensity; + (timepoint.Intensity - valley.Intensity) / timepoint.Intensity; // If the time point is at least discriminationFactor times more intense than the valley // We perform an additional check to see if the time point is more intense than the point next to the valley - if (discriminationFactor > DiscriminationFactorToCutPeak && - (indexOfValley + direction < timePointsForApexZ.Count && indexOfValley + direction >= 0)) + if (discriminationFactor > discriminationFactorToCutPeak && + (indexOfValley + direction < timePoints.Count && indexOfValley + direction >= 0)) { - - IsotopicEnvelope secondValleyTimepoint = timePointsForApexZ[indexOfValley + direction]; + ISingleScanDatum secondValleyTimepoint = timePoints[indexOfValley + direction]; discriminationFactor = (timepoint.Intensity - secondValleyTimepoint.Intensity) / timepoint.Intensity; // If the current timepoint is more intense than the second valley, we cut the peak // If the scan following the valley isn't in the timePointsForApexZ list (i.e., no isotopic envelope is observed in the scan immediately after the valley), we also cut the peak - if (discriminationFactor > DiscriminationFactorToCutPeak || !scanNumbers.Contains(valleyEnvelope.IndexedPeak.ZeroBasedMs1ScanIndex + direction)) + if (discriminationFactor > discriminationFactorToCutPeak || !zeroIndexedScanNumbers.Contains(valley.ZeroBasedScanIndex + direction)) { cutThisPeak = true; break; @@ -1883,28 +1874,49 @@ private void CutPeak(ChromatographicPeak peak, double identificationTime) } } + return cutThisPeak; + } + + /// + /// Recursively cuts ITraceable objects, removing all datapoints + /// that occur before or after potential "valleys" surrounding the identification's + /// MS2 retention time. Then, the peak intensity is recalculated and the split location is set + /// + /// ITraceable object to be cut + /// Time representing the center of the peak + /// List of ISingleScanDatum objects that represent the trace apex. Most commonly used to pass in the apex charge state Isotopic Envelopes from a Chromatographic peak object + private void CutPeak(T peak, double identificationTime, List apexTimePoints = null) where T : ITraceable where TU : ISingleScanDatum + { + // find out if we need to split this peak by using the discrimination factor + // this method assumes that the isotope envelopes in a chromatographic peak are already sorted by MS1 scan number + if (peak.ScanOrderedPoints.Count < 5) + { + return; + } + + List timePointsToSplitOn = apexTimePoints ?? peak.ScanOrderedPoints; + int apexIndex = timePointsToSplitOn.IndexOf(peak.Apex); + // cut - if (cutThisPeak) + if (ShouldCutPeak(timePointsToSplitOn, apexIndex, out var valleyEnvelope, discriminationFactorToCutPeak: DiscriminationFactorToCutPeak)) { - if (identificationTime > valleyEnvelope.IndexedPeak.RetentionTime) + if (identificationTime > valleyEnvelope.RelativeSeparationValue) { // MS2 identification is to the right of the valley; remove all peaks left of the valley - peak.IsotopicEnvelopes.RemoveAll(p => - p.IndexedPeak.RetentionTime <= valleyEnvelope.IndexedPeak.RetentionTime); + peak.ScanOrderedPoints.RemoveAll(p => p.RelativeSeparationValue <= valleyEnvelope.RelativeSeparationValue); } else { // MS2 identification is to the left of the valley; remove all peaks right of the valley - peak.IsotopicEnvelopes.RemoveAll(p => - p.IndexedPeak.RetentionTime >= valleyEnvelope.IndexedPeak.RetentionTime); + peak.ScanOrderedPoints.RemoveAll(p => p.RelativeSeparationValue >= valleyEnvelope.RelativeSeparationValue); } // recalculate intensity for the peak peak.CalculateIntensityForThisFeature(Integrate); - peak.SplitRT = valleyEnvelope.IndexedPeak.RetentionTime; + peak.SetSplitLocation(valleyEnvelope); // recursively cut - CutPeak(peak, identificationTime); + CutPeak(peak, identificationTime, apexTimePoints); } } } diff --git a/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs b/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs index cdadc56eb..82092dc5e 100644 --- a/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs +++ b/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs @@ -3,12 +3,15 @@ namespace FlashLFQ { [Serializable] - public class IndexedMassSpectralPeak + public class IndexedMassSpectralPeak : ISingleScanDatum { - public readonly int ZeroBasedMs1ScanIndex; - public readonly double Mz; - public readonly double RetentionTime; - public readonly double Intensity; + public int ZeroBasedMs1ScanIndex { get; init; } + public double Mz { get; init; } + public double RetentionTime { get; init; } + public double Intensity { get; init; } + // ISingleScanDatum properties + public double RelativeSeparationValue => RetentionTime; + public int ZeroBasedScanIndex => ZeroBasedMs1ScanIndex; public IndexedMassSpectralPeak(double mz, double intensity, int zeroBasedMs1ScanIndex, double retentionTime) { diff --git a/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs b/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs new file mode 100644 index 000000000..ae04eb8bc --- /dev/null +++ b/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace FlashLFQ +{ + /// + /// An ISingleScanDatum represents information that exists in a single mass spectrometric scan! + /// E.g., a single m/z peak, a single isotopic envelope + /// + public interface ISingleScanDatum + { + public double Mz { get; } + public double Intensity { get; } + + /// + /// The position of the datum in separation domain, + /// e.g., m/z, ion mobility + /// + public double RelativeSeparationValue { get; } + public int ZeroBasedScanIndex { get; } + } +} diff --git a/mzLib/FlashLFQ/Interfaces/ITraceable.cs b/mzLib/FlashLFQ/Interfaces/ITraceable.cs new file mode 100644 index 000000000..1d3fb9a17 --- /dev/null +++ b/mzLib/FlashLFQ/Interfaces/ITraceable.cs @@ -0,0 +1,29 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace FlashLFQ.Interfaces +{ + /// + /// Represent an object that can be traced over a separation domain + /// E.g., A chromatographic peak trace composed of multiple IsotopicEnvelopes + /// + public interface ITraceable where T : ISingleScanDatum + { + /// + /// The most intense point in the trace + /// + public T Apex { get; } + /// + /// A list of data points that compose the ITraceable object + /// This list must be ordered by the separation domain in ascending order! (e.g., retention time) + /// + public List ScanOrderedPoints { get; } + + public void CalculateIntensityForThisFeature(bool integrate); + + public void SetSplitLocation(T splitPoint); + } +} diff --git a/mzLib/FlashLFQ/IsotopicEnvelope.cs b/mzLib/FlashLFQ/IsotopicEnvelope.cs index 938ac7850..61929ca75 100644 --- a/mzLib/FlashLFQ/IsotopicEnvelope.cs +++ b/mzLib/FlashLFQ/IsotopicEnvelope.cs @@ -3,7 +3,7 @@ /// /// Contains the summed intensities of all isotope peaks detected in a single MS1 scan for a given species. /// - public class IsotopicEnvelope + public class IsotopicEnvelope : ISingleScanDatum { /// /// The most abundant isotopic peak used for peak finding. @@ -11,6 +11,10 @@ public class IsotopicEnvelope public readonly IndexedMassSpectralPeak IndexedPeak; public readonly int ChargeState; + public double Mz => IndexedPeak.Mz; + public double RelativeSeparationValue => IndexedPeak.RetentionTime; + public int ZeroBasedScanIndex => IndexedPeak.ZeroBasedMs1ScanIndex; + public IsotopicEnvelope(IndexedMassSpectralPeak monoisotopicPeak, int chargeState, double intensity, double pearsonCorrelation) { IndexedPeak = monoisotopicPeak; @@ -26,7 +30,6 @@ public IsotopicEnvelope(IndexedMassSpectralPeak monoisotopicPeak, int chargeStat /// public double Intensity { get; private set; } - public double PearsonCorrelation { get; init; } public void Normalize(double normalizationFactor) From cbeaff77b7221f840681ccf5b0fc1199fa4aa78c Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 13 Feb 2025 17:52:32 -0600 Subject: [PATCH 4/8] Moved peak cutting to TraceablePeak abstract class --- mzLib/FlashLFQ/ChromatographicPeak.cs | 48 +++++--- mzLib/FlashLFQ/FlashLfqEngine.cs | 16 ++- mzLib/FlashLFQ/Interfaces/ITraceable.cs | 29 ----- mzLib/FlashLFQ/Interfaces/TraceablePeak.cs | 131 +++++++++++++++++++++ 4 files changed, 171 insertions(+), 53 deletions(-) delete mode 100644 mzLib/FlashLFQ/Interfaces/ITraceable.cs create mode 100644 mzLib/FlashLFQ/Interfaces/TraceablePeak.cs diff --git a/mzLib/FlashLFQ/ChromatographicPeak.cs b/mzLib/FlashLFQ/ChromatographicPeak.cs index f821775c0..017dc8b7f 100644 --- a/mzLib/FlashLFQ/ChromatographicPeak.cs +++ b/mzLib/FlashLFQ/ChromatographicPeak.cs @@ -5,10 +5,11 @@ using ClassExtensions = Chemistry.ClassExtensions; using FlashLFQ.PEP; using FlashLFQ.Interfaces; +using MathNet.Numerics; namespace FlashLFQ { - public class ChromatographicPeak : ITraceable + public class ChromatographicPeak : TraceablePeak { public double Intensity; public double ApexRetentionTime => Apex?.IndexedPeak.RetentionTime ?? -1; @@ -34,6 +35,18 @@ public class ChromatographicPeak : ITraceable internal double MbrQValue { get; set; } public ChromatographicPeakData PepPeakData { get; set; } public double? MbrPep { get; set; } + public IsotopicEnvelope Apex { get; private set; } + public List Identifications { get; private set; } + public int NumChargeStatesObserved { get; private set; } + public int NumIdentificationsByBaseSeq { get; private set; } + public int NumIdentificationsByFullSeq { get; private set; } + public double MassError { get; private set; } + /// + /// Bool that describes whether the retention time of this peak was randomized + /// If true, implies that this peak is a decoy peak identified by the MBR algorithm + /// + public bool RandomRt { get; } + public bool DecoyPeptide => Identifications.First().IsDecoy; public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fileInfo, bool randomRt = false) { @@ -56,18 +69,22 @@ public bool Equals(ChromatographicPeak peak) && ApexRetentionTime == peak.ApexRetentionTime; } - public IsotopicEnvelope Apex { get; private set; } - public List Identifications { get; private set; } - public int NumChargeStatesObserved { get; private set; } - public int NumIdentificationsByBaseSeq { get; private set; } - public int NumIdentificationsByFullSeq { get; private set; } - public double MassError { get; private set; } - /// - /// Bool that describes whether the retention time of this peak was randomized - /// If true, implies that this peak is a decoy peak identified by the MBR algorithm - /// - public bool RandomRt { get; } - public bool DecoyPeptide => Identifications.First().IsDecoy; + public void CutPeak(double ms2IdRetentionTime, double discriminationFactorToCutPeak = 0.6, bool integrate = false) + { + // Find all envelopes associated with the apex peak charge state + List envelopesForApexCharge = IsotopicEnvelopes.Where(e => e.ChargeState == Apex.ChargeState).ToList(); + var centerEnvelope = envelopesForApexCharge.MinBy(e => Math.Abs(e.RelativeSeparationValue - ms2IdRetentionTime)); + // Find the boundaries of the peak, based on the apex charge state envelopes + var peakBoundaries = FindPeakBoundaries(envelopesForApexCharge, envelopesForApexCharge.IndexOf(centerEnvelope), discriminationFactorToCutPeak); + // Remove all points outside the peak boundaries (inclusive) + CutPeak(peakBoundaries, ms2IdRetentionTime); + + // Recalculate the intensity of the peak and update the split RT + CalculateIntensityForThisFeature(integrate); + // technically, there could be two "SplitRT" values if the peak is split into two separate peaks + // however, this edge case wasn't handled in the legacy code and won't be handled here + SplitRT = peakBoundaries.Count > 0 ? peakBoundaries.Last().RelativeSeparationValue : 0; + } public void CalculateIntensityForThisFeature(bool integrate) { @@ -128,11 +145,6 @@ public void MergeFeatureWith(ChromatographicPeak otherFeature, bool integrate) } } - public void SetSplitLocation(IsotopicEnvelope splitEnvelope) - { - SplitRT = splitEnvelope.IndexedPeak.RetentionTime; - } - /// /// Sets two ChromatographicPeak properties: NumIdentificationsByBaseSeq and NumIdentificationsByFullSeq /// diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index d1f17f59b..4232a9929 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -508,7 +508,7 @@ private void QuantifyMs2IdentifiedPeptides(SpectraFileInfo fileInfo) } msmsFeature.CalculateIntensityForThisFeature(Integrate); - CutPeak(msmsFeature, identification.Ms2RetentionTimeInMinutes); + CutPeak(msmsFeature, identification.Ms2RetentionTimeInMinutes, msmsFeature.ApexChargeStateScanOrderedPoints); if (!msmsFeature.IsotopicEnvelopes.Any()) { @@ -1880,12 +1880,13 @@ public static bool ShouldCutPeak(List timePoints, int indexOfApexInTimePoi /// /// Recursively cuts ITraceable objects, removing all datapoints /// that occur before or after potential "valleys" surrounding the identification's - /// MS2 retention time. Then, the peak intensity is recalculated and the split location is set + /// MS2 retention time. Then, the peak intensity is recalculated and the split location is set. + /// If using this on a ChromatographicPeak, it is important to pass in the apexTimePoints parameter /// /// ITraceable object to be cut /// Time representing the center of the peak /// List of ISingleScanDatum objects that represent the trace apex. Most commonly used to pass in the apex charge state Isotopic Envelopes from a Chromatographic peak object - private void CutPeak(T peak, double identificationTime, List apexTimePoints = null) where T : ITraceable where TU : ISingleScanDatum + private void CutPeak(T peak, double identificationTime, List apexTimePoints = null) where T : TraceablePeak where TU : ISingleScanDatum { // find out if we need to split this peak by using the discrimination factor // this method assumes that the isotope envelopes in a chromatographic peak are already sorted by MS1 scan number @@ -1894,7 +1895,10 @@ private void CutPeak(T peak, double identificationTime, List apexTime return; } - List timePointsToSplitOn = apexTimePoints ?? peak.ScanOrderedPoints; + List timePointsToSplitOn; + + + timePointsToSplitOn = apexTimePoints ?? peak.ScanOrderedPoints; int apexIndex = timePointsToSplitOn.IndexOf(peak.Apex); // cut @@ -1912,8 +1916,8 @@ private void CutPeak(T peak, double identificationTime, List apexTime } // recalculate intensity for the peak - peak.CalculateIntensityForThisFeature(Integrate); - peak.SetSplitLocation(valleyEnvelope); + //peak.CalculateIntensityForThisFeature(Integrate); + //peak.SetSplitLocation(valleyEnvelope); // recursively cut CutPeak(peak, identificationTime, apexTimePoints); diff --git a/mzLib/FlashLFQ/Interfaces/ITraceable.cs b/mzLib/FlashLFQ/Interfaces/ITraceable.cs deleted file mode 100644 index 1d3fb9a17..000000000 --- a/mzLib/FlashLFQ/Interfaces/ITraceable.cs +++ /dev/null @@ -1,29 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace FlashLFQ.Interfaces -{ - /// - /// Represent an object that can be traced over a separation domain - /// E.g., A chromatographic peak trace composed of multiple IsotopicEnvelopes - /// - public interface ITraceable where T : ISingleScanDatum - { - /// - /// The most intense point in the trace - /// - public T Apex { get; } - /// - /// A list of data points that compose the ITraceable object - /// This list must be ordered by the separation domain in ascending order! (e.g., retention time) - /// - public List ScanOrderedPoints { get; } - - public void CalculateIntensityForThisFeature(bool integrate); - - public void SetSplitLocation(T splitPoint); - } -} diff --git a/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs b/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs new file mode 100644 index 000000000..5a5614f6b --- /dev/null +++ b/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs @@ -0,0 +1,131 @@ +using Easy.Common.Extensions; +using MathNet.Numerics; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace FlashLFQ.Interfaces +{ + /// + /// Represent an object that can be traced over a separation domain + /// E.g., A chromatographic peak trace composed of multiple IsotopicEnvelopes + /// + public abstract class TraceablePeak where T : ISingleScanDatum + { + /// + /// The most intense point in the trace + /// + public T Apex { get; } + /// + /// A list of data points that compose the ITraceable object + /// This list must be ordered by the separation domain in ascending order! (e.g., retention time) + /// + public List ScanOrderedPoints { get; } + + /// + /// Determines whether a peak should be cut based on the intensity of the surrounding time points. + /// + /// The type of the time points, which must implement ISingleScanDatum. + /// The list of time points + /// The index of the apex (most intense, best, whatever) in the list of time points. + /// The valley point, which is the lowest intensity point encountered. + /// The discrimination factor to determine if the peak should be cut. Default is 0.6. + /// True if the peak should be cut, otherwise false. + public static List FindPeakBoundaries(List timePoints, int indexOfPeakCenterInTimePoints, double discriminationFactorToCutPeak = 0.6) where T : ISingleScanDatum + { + List peakBoundaries = new List(); + HashSet zeroIndexedScanNumbers = timePoints.Select(p => p.ZeroBasedScanIndex).ToHashSet(); + + // -1 checks the left side, +1 checks the right side + int[] directions = { -1, 1 }; + foreach (int direction in directions) + { + T valley = default(T); + int indexOfValley = 0; + + for (int i = indexOfPeakCenterInTimePoints + direction; i < timePoints.Count && i >= 0; i += direction) + { + ISingleScanDatum timepoint = timePoints[i]; + + // Valley envelope is the lowest intensity point that has been encountered thus far + if (valley == null || timepoint.Intensity < valley.Intensity) + { + valley = (T)timepoint; + indexOfValley = i; + } + + double discriminationFactor = (timepoint.Intensity - valley.Intensity) / timepoint.Intensity; + + // If the time point is at least discriminationFactor times more intense than the valley + // We perform an additional check to see if the time point is more intense than the point next to the valley + if (discriminationFactor > discriminationFactorToCutPeak && + (indexOfValley + direction < timePoints.Count && indexOfValley + direction >= 0)) + { + ISingleScanDatum secondValleyTimepoint = timePoints[indexOfValley + direction]; + + discriminationFactor = + (timepoint.Intensity - secondValleyTimepoint.Intensity) / timepoint.Intensity; + + // If the current timepoint is more intense than the second valley, we cut the peak + // If the scan following the valley isn't in the timePointsForApexZ list (i.e., no isotopic envelope is observed in the scan immediately after the valley), we also cut the peak + if (discriminationFactor > discriminationFactorToCutPeak || !zeroIndexedScanNumbers.Contains(valley.ZeroBasedScanIndex + direction)) + { + peakBoundaries.Add(valley); + break; + } + } + } + } + + return peakBoundaries; + } + + public List FindPeakBoundaries(double separationValueAtPeakCenter, double discriminationFactorToCutPeak = 0.6) + { + if(ScanOrderedPoints.Count < 5) return null; + T peakCenter = ScanOrderedPoints.MinBy(d => Math.Abs(d.RelativeSeparationValue - separationValueAtPeakCenter)); + return FindPeakBoundaries(ScanOrderedPoints, ScanOrderedPoints.IndexOf(peakCenter), discriminationFactorToCutPeak); + } + + /// + /// Recursively cuts ITraceable objects, removing all datapoints + /// that occur before or after potential "valleys" surrounding the "separationValueAtPeakCenter", + /// which in the case of a Chromatographic peak will be the associated identification's + /// MS2 retention time. + /// + /// Time representing the center of the peak + /// The discrimination factor to determine if the peak should be cut. Default is 0.6. + public virtual void CutPeak(double separationValueAtPeakCenter, double discriminationFactorToCutPeak = 0.6) + { + var peakBoundaries = FindPeakBoundaries(separationValueAtPeakCenter, discriminationFactorToCutPeak); + CutPeak(peakBoundaries, separationValueAtPeakCenter); + } + + /// + /// Recursively cuts ITraceable objects, removing all datapoints outside of the peak boundaries, inclusive + /// + /// + /// + public void CutPeak(List peakBoundaries, double separationValueAtPeakCenter) + { + if (peakBoundaries.IsNotNullOrEmpty()) + { + foreach (var boundary in peakBoundaries) + { + if (boundary.RelativeSeparationValue > separationValueAtPeakCenter) + { + ScanOrderedPoints.RemoveAll(d => d.RelativeSeparationValue >= boundary.RelativeSeparationValue); + } + else + { + ScanOrderedPoints.RemoveAll(d => d.RelativeSeparationValue <= boundary.RelativeSeparationValue); + } + } + } + } + + + } +} From 959434414ff2e733bc88cc28d905b7035998689c Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 13 Feb 2025 17:59:32 -0600 Subject: [PATCH 5/8] idk --- mzLib/FlashLFQ/FlashLfqEngine.cs | 117 +------------------------------ 1 file changed, 3 insertions(+), 114 deletions(-) diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index 4232a9929..67f2e42f8 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -16,6 +16,7 @@ using System.IO; using System.Threading; using FlashLFQ.Interfaces; +using MassSpectrometry; [assembly: InternalsVisibleTo("TestFlashLFQ")] @@ -508,7 +509,7 @@ private void QuantifyMs2IdentifiedPeptides(SpectraFileInfo fileInfo) } msmsFeature.CalculateIntensityForThisFeature(Integrate); - CutPeak(msmsFeature, identification.Ms2RetentionTimeInMinutes, msmsFeature.ApexChargeStateScanOrderedPoints); + msmsFeature.CutPeak(identification.Ms2RetentionTimeInMinutes, DiscriminationFactorToCutPeak, Integrate); if (!msmsFeature.IsotopicEnvelopes.Any()) { @@ -1293,7 +1294,7 @@ internal ChromatographicPeak FindIndividualAcceptorPeak( acceptorPeak.IsotopicEnvelopes.AddRange(bestChargeEnvelopes); acceptorPeak.CalculateIntensityForThisFeature(Integrate); - CutPeak(acceptorPeak, seedEnv.IndexedPeak.RetentionTime, apexTimePoints: acceptorPeak.ApexChargeStateScanOrderedPoints); + acceptorPeak.CutPeak(seedEnv.IndexedPeak.RetentionTime, DiscriminationFactorToCutPeak, Integrate); var claimedPeaks = new HashSet(acceptorPeak.IsotopicEnvelopes.Select(p => p.IndexedPeak)) { @@ -1811,117 +1812,5 @@ public List Peakfind(double idRetentionTime, double mas return xic; } - /// - /// Determines whether a peak should be cut based on the intensity of the surrounding time points. - /// - /// The type of the time points, which must implement ISingleScanDatum. - /// The list of time points - /// The index of the apex (most intense, best, whatever) in the list of time points. - /// The valley point, which is the lowest intensity point encountered. - /// The discrimination factor to determine if the peak should be cut. Default is 0.6. - /// True if the peak should be cut, otherwise false. - public static bool ShouldCutPeak(List timePoints, int indexOfApexInTimePoints, - out T valley, double discriminationFactorToCutPeak = 0.6) where T : ISingleScanDatum - { - valley = default(T); - bool cutThisPeak = false; - HashSet zeroIndexedScanNumbers = timePoints.Select(p => p.ZeroBasedScanIndex).ToHashSet(); - - // -1 checks the left side, +1 checks the right side - int[] directions = { 1, -1 }; - foreach (int direction in directions) - { - valley = default(T); - int indexOfValley = 0; - - for (int i = indexOfApexInTimePoints + direction; i < timePoints.Count && i >= 0; i += direction) - { - ISingleScanDatum timepoint = timePoints[i]; - - // Valley envelope is the lowest intensity point that has been encountered thus far - if (valley == null || timepoint.Intensity < valley.Intensity) - { - valley = (T)timepoint; - indexOfValley = i; - } - - double discriminationFactor = - (timepoint.Intensity - valley.Intensity) / timepoint.Intensity; - - // If the time point is at least discriminationFactor times more intense than the valley - // We perform an additional check to see if the time point is more intense than the point next to the valley - if (discriminationFactor > discriminationFactorToCutPeak && - (indexOfValley + direction < timePoints.Count && indexOfValley + direction >= 0)) - { - ISingleScanDatum secondValleyTimepoint = timePoints[indexOfValley + direction]; - - discriminationFactor = - (timepoint.Intensity - secondValleyTimepoint.Intensity) / timepoint.Intensity; - - // If the current timepoint is more intense than the second valley, we cut the peak - // If the scan following the valley isn't in the timePointsForApexZ list (i.e., no isotopic envelope is observed in the scan immediately after the valley), we also cut the peak - if (discriminationFactor > discriminationFactorToCutPeak || !zeroIndexedScanNumbers.Contains(valley.ZeroBasedScanIndex + direction)) - { - cutThisPeak = true; - break; - } - } - } - - if (cutThisPeak) - { - break; - } - } - - return cutThisPeak; - } - - /// - /// Recursively cuts ITraceable objects, removing all datapoints - /// that occur before or after potential "valleys" surrounding the identification's - /// MS2 retention time. Then, the peak intensity is recalculated and the split location is set. - /// If using this on a ChromatographicPeak, it is important to pass in the apexTimePoints parameter - /// - /// ITraceable object to be cut - /// Time representing the center of the peak - /// List of ISingleScanDatum objects that represent the trace apex. Most commonly used to pass in the apex charge state Isotopic Envelopes from a Chromatographic peak object - private void CutPeak(T peak, double identificationTime, List apexTimePoints = null) where T : TraceablePeak where TU : ISingleScanDatum - { - // find out if we need to split this peak by using the discrimination factor - // this method assumes that the isotope envelopes in a chromatographic peak are already sorted by MS1 scan number - if (peak.ScanOrderedPoints.Count < 5) - { - return; - } - - List timePointsToSplitOn; - - - timePointsToSplitOn = apexTimePoints ?? peak.ScanOrderedPoints; - int apexIndex = timePointsToSplitOn.IndexOf(peak.Apex); - - // cut - if (ShouldCutPeak(timePointsToSplitOn, apexIndex, out var valleyEnvelope, discriminationFactorToCutPeak: DiscriminationFactorToCutPeak)) - { - if (identificationTime > valleyEnvelope.RelativeSeparationValue) - { - // MS2 identification is to the right of the valley; remove all peaks left of the valley - peak.ScanOrderedPoints.RemoveAll(p => p.RelativeSeparationValue <= valleyEnvelope.RelativeSeparationValue); - } - else - { - // MS2 identification is to the left of the valley; remove all peaks right of the valley - peak.ScanOrderedPoints.RemoveAll(p => p.RelativeSeparationValue >= valleyEnvelope.RelativeSeparationValue); - } - - // recalculate intensity for the peak - //peak.CalculateIntensityForThisFeature(Integrate); - //peak.SetSplitLocation(valleyEnvelope); - - // recursively cut - CutPeak(peak, identificationTime, apexTimePoints); - } - } } } \ No newline at end of file From 5d7fd4384bfb48a1542830447da34de93dcdecf4 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 13 Feb 2025 18:34:53 -0600 Subject: [PATCH 6/8] Finished implementation, MBR results change though... --- mzLib/FlashLFQ/ChromatographicPeak.cs | 33 ++++++++++++++-------- mzLib/FlashLFQ/Interfaces/TraceablePeak.cs | 32 ++++++++------------- mzLib/TestFlashLFQ/TestFlashLFQ.cs | 4 +-- 3 files changed, 34 insertions(+), 35 deletions(-) diff --git a/mzLib/FlashLFQ/ChromatographicPeak.cs b/mzLib/FlashLFQ/ChromatographicPeak.cs index 017dc8b7f..e73965a11 100644 --- a/mzLib/FlashLFQ/ChromatographicPeak.cs +++ b/mzLib/FlashLFQ/ChromatographicPeak.cs @@ -6,6 +6,7 @@ using FlashLFQ.PEP; using FlashLFQ.Interfaces; using MathNet.Numerics; +using Easy.Common.Extensions; namespace FlashLFQ { @@ -15,8 +16,7 @@ public class ChromatographicPeak : TraceablePeak public double ApexRetentionTime => Apex?.IndexedPeak.RetentionTime ?? -1; public readonly SpectraFileInfo SpectraFileInfo; public List IsotopicEnvelopes { get; set; } - public List ScanOrderedPoints => IsotopicEnvelopes; - public List ApexChargeStateScanOrderedPoints => IsotopicEnvelopes.Where(p => p.ChargeState == Apex.ChargeState).ToList(); + public override List ScanOrderedPoints => IsotopicEnvelopes; public int ScanCount => IsotopicEnvelopes.Count; public double SplitRT { get; private set; } public readonly bool IsMbrPeak; @@ -73,17 +73,26 @@ public void CutPeak(double ms2IdRetentionTime, double discriminationFactorToCutP { // Find all envelopes associated with the apex peak charge state List envelopesForApexCharge = IsotopicEnvelopes.Where(e => e.ChargeState == Apex.ChargeState).ToList(); - var centerEnvelope = envelopesForApexCharge.MinBy(e => Math.Abs(e.RelativeSeparationValue - ms2IdRetentionTime)); + // Find the boundaries of the peak, based on the apex charge state envelopes - var peakBoundaries = FindPeakBoundaries(envelopesForApexCharge, envelopesForApexCharge.IndexOf(centerEnvelope), discriminationFactorToCutPeak); - // Remove all points outside the peak boundaries (inclusive) - CutPeak(peakBoundaries, ms2IdRetentionTime); - - // Recalculate the intensity of the peak and update the split RT - CalculateIntensityForThisFeature(integrate); - // technically, there could be two "SplitRT" values if the peak is split into two separate peaks - // however, this edge case wasn't handled in the legacy code and won't be handled here - SplitRT = peakBoundaries.Count > 0 ? peakBoundaries.Last().RelativeSeparationValue : 0; + var peakBoundaries = FindPeakBoundaries(envelopesForApexCharge, envelopesForApexCharge.IndexOf(Apex), discriminationFactorToCutPeak); + + // This is done in a while loop, as it's possible that there are multiple distinct peaks in the trace + while(peakBoundaries.IsNotNullOrEmpty()) + { + // Remove all points outside the peak boundaries (inclusive) + CutPeak(peakBoundaries, ms2IdRetentionTime); + + // Recalculate the intensity of the peak and update the split RT + CalculateIntensityForThisFeature(integrate); + // technically, there could be two "SplitRT" values if the peak is split into two separate peaks + // however, this edge case wasn't handled in the legacy code and won't be handled here + SplitRT = peakBoundaries.Count > 0 ? peakBoundaries.Last().RelativeSeparationValue : 0; + + // Update the list of envelopes for the apex charge state (as some were removed during the peak cutting) + envelopesForApexCharge = IsotopicEnvelopes.Where(e => e.ChargeState == Apex.ChargeState).ToList(); + peakBoundaries = FindPeakBoundaries(envelopesForApexCharge, envelopesForApexCharge.IndexOf(Apex), discriminationFactorToCutPeak); + } } public void CalculateIntensityForThisFeature(bool integrate) diff --git a/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs b/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs index 5a5614f6b..17dfe52c1 100644 --- a/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs +++ b/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs @@ -17,23 +17,22 @@ public abstract class TraceablePeak where T : ISingleScanDatum /// /// The most intense point in the trace /// - public T Apex { get; } + public virtual T Apex { get; } /// /// A list of data points that compose the ITraceable object /// This list must be ordered by the separation domain in ascending order! (e.g., retention time) /// - public List ScanOrderedPoints { get; } + public virtual List ScanOrderedPoints { get; } /// /// Determines whether a peak should be cut based on the intensity of the surrounding time points. /// /// The type of the time points, which must implement ISingleScanDatum. /// The list of time points - /// The index of the apex (most intense, best, whatever) in the list of time points. - /// The valley point, which is the lowest intensity point encountered. + /// The index of the apex (most intense, best, whatever) in the list of time points. /// The discrimination factor to determine if the peak should be cut. Default is 0.6. /// True if the peak should be cut, otherwise false. - public static List FindPeakBoundaries(List timePoints, int indexOfPeakCenterInTimePoints, double discriminationFactorToCutPeak = 0.6) where T : ISingleScanDatum + public static List FindPeakBoundaries(List timePoints, int apexTimepointIndex, double discriminationFactorToCutPeak = 0.6) where T : ISingleScanDatum { List peakBoundaries = new List(); HashSet zeroIndexedScanNumbers = timePoints.Select(p => p.ZeroBasedScanIndex).ToHashSet(); @@ -45,7 +44,7 @@ public static List FindPeakBoundaries(List timePoints, int indexOfPeakC T valley = default(T); int indexOfValley = 0; - for (int i = indexOfPeakCenterInTimePoints + direction; i < timePoints.Count && i >= 0; i += direction) + for (int i = apexTimepointIndex + direction; i < timePoints.Count && i >= 0; i += direction) { ISingleScanDatum timepoint = timePoints[i]; @@ -82,29 +81,20 @@ public static List FindPeakBoundaries(List timePoints, int indexOfPeakC return peakBoundaries; } - public List FindPeakBoundaries(double separationValueAtPeakCenter, double discriminationFactorToCutPeak = 0.6) - { - if(ScanOrderedPoints.Count < 5) return null; - T peakCenter = ScanOrderedPoints.MinBy(d => Math.Abs(d.RelativeSeparationValue - separationValueAtPeakCenter)); - return FindPeakBoundaries(ScanOrderedPoints, ScanOrderedPoints.IndexOf(peakCenter), discriminationFactorToCutPeak); - } - /// /// Recursively cuts ITraceable objects, removing all datapoints - /// that occur before or after potential "valleys" surrounding the "separationValueAtPeakCenter", - /// which in the case of a Chromatographic peak will be the associated identification's - /// MS2 retention time. + /// that occur before or after potential "valleys" surrounding the Apex. /// - /// Time representing the center of the peak /// The discrimination factor to determine if the peak should be cut. Default is 0.6. - public virtual void CutPeak(double separationValueAtPeakCenter, double discriminationFactorToCutPeak = 0.6) + public virtual void CutPeak(double discriminationFactorToCutPeak = 0.6) { - var peakBoundaries = FindPeakBoundaries(separationValueAtPeakCenter, discriminationFactorToCutPeak); - CutPeak(peakBoundaries, separationValueAtPeakCenter); + var peakBoundaries = FindPeakBoundaries(ScanOrderedPoints, ScanOrderedPoints.IndexOf(Apex), discriminationFactorToCutPeak); + CutPeak(peakBoundaries, Apex.RelativeSeparationValue); } /// - /// Recursively cuts ITraceable objects, removing all datapoints outside of the peak boundaries, inclusive + /// Cuts a TraceablePeak objects, starting at the separationValueAtPeakCenter and removing all datapoints + /// that occur before or after the peakBoundaries /// /// /// diff --git a/mzLib/TestFlashLFQ/TestFlashLFQ.cs b/mzLib/TestFlashLFQ/TestFlashLFQ.cs index b0133d0d6..922627d70 100644 --- a/mzLib/TestFlashLFQ/TestFlashLFQ.cs +++ b/mzLib/TestFlashLFQ/TestFlashLFQ.cs @@ -1526,8 +1526,8 @@ public static void RealDataMbrTest() // Any change to ML.NET or the PEP Analysis engine will cause these to change. Console.WriteLine("r1 PIP event count: " + f1r1MbrResults.Count); Console.WriteLine("r2 PIP event count: " + f1r2MbrResults.Count); - Assert.AreEqual(141, f1r1MbrResults.Count); - Assert.AreEqual(78, f1r2MbrResults.Count); + Assert.AreEqual(140, f1r1MbrResults.Count); + Assert.AreEqual(77, f1r2MbrResults.Count); // Check that MS/MS identified peaks and MBR identified peaks have similar intensities List<(double, double)> peptideIntensities = f1r1MbrResults.Select(pep => (Math.Log(pep.Value.GetIntensity(f1r1)), Math.Log(pep.Value.GetIntensity(f1r2)))).ToList(); From 4502ae215e666097ee6bb48fa5ee714b8048e3ab Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 13 Feb 2025 18:45:44 -0600 Subject: [PATCH 7/8] Added more comments --- mzLib/FlashLFQ/ChromatographicPeak.cs | 11 +++++++++++ mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/mzLib/FlashLFQ/ChromatographicPeak.cs b/mzLib/FlashLFQ/ChromatographicPeak.cs index e73965a11..a4da81a1e 100644 --- a/mzLib/FlashLFQ/ChromatographicPeak.cs +++ b/mzLib/FlashLFQ/ChromatographicPeak.cs @@ -69,6 +69,11 @@ public bool Equals(ChromatographicPeak peak) && ApexRetentionTime == peak.ApexRetentionTime; } + /// + /// Cuts the peak in such a way as to ensure that the final peak contains the ms2IdRetention time + /// + /// The time at which the MS2 scan matched to a peptide was collected + /// Passed to CalculateIntensityForThisFeature public void CutPeak(double ms2IdRetentionTime, double discriminationFactorToCutPeak = 0.6, bool integrate = false) { // Find all envelopes associated with the apex peak charge state @@ -95,6 +100,12 @@ public void CutPeak(double ms2IdRetentionTime, double discriminationFactorToCutP } } + /// + /// Calculated the intensity of the peak. If Integrate is false, this is equal to the summed intensity of all + /// m/z peak in the the most intense isotopic envelope of the most intense charge state. If Integrate is true, + /// intensity is set as the summed intensity of every m/z peak in every isotopic envelope in every charge state. + /// + /// public void CalculateIntensityForThisFeature(bool integrate) { if (IsotopicEnvelopes.Any()) diff --git a/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs b/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs index ae04eb8bc..964d3df80 100644 --- a/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs +++ b/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs @@ -16,8 +16,8 @@ public interface ISingleScanDatum public double Intensity { get; } /// - /// The position of the datum in separation domain, - /// e.g., m/z, ion mobility + /// The position of the datum in the separation domain, + /// e.g., retention time, ion mobility /// public double RelativeSeparationValue { get; } public int ZeroBasedScanIndex { get; } From 2711f041f8fc05785f2bae6c711fa78b801e8ddb Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 14 Feb 2025 12:47:38 -0600 Subject: [PATCH 8/8] figured out abstract properties --- mzLib/FlashLFQ/ChromatographicPeak.cs | 7 ++++--- mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs | 1 - mzLib/FlashLFQ/Interfaces/TraceablePeak.cs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mzLib/FlashLFQ/ChromatographicPeak.cs b/mzLib/FlashLFQ/ChromatographicPeak.cs index a4da81a1e..987648f31 100644 --- a/mzLib/FlashLFQ/ChromatographicPeak.cs +++ b/mzLib/FlashLFQ/ChromatographicPeak.cs @@ -35,7 +35,8 @@ public class ChromatographicPeak : TraceablePeak internal double MbrQValue { get; set; } public ChromatographicPeakData PepPeakData { get; set; } public double? MbrPep { get; set; } - public IsotopicEnvelope Apex { get; private set; } + public override IsotopicEnvelope Apex => _apex; + private IsotopicEnvelope _apex; public List Identifications { get; private set; } public int NumChargeStatesObserved { get; private set; } public int NumIdentificationsByBaseSeq { get; private set; } @@ -110,7 +111,7 @@ public void CalculateIntensityForThisFeature(bool integrate) { if (IsotopicEnvelopes.Any()) { - Apex = IsotopicEnvelopes.MaxBy(p => p.Intensity); + _apex = IsotopicEnvelopes.MaxBy(p => p.Intensity); if (integrate) { @@ -140,7 +141,7 @@ public void CalculateIntensityForThisFeature(bool integrate) Intensity = 0; MassError = double.NaN; NumChargeStatesObserved = 0; - Apex = null; + _apex = null; } } diff --git a/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs b/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs index 964d3df80..0576b9ab6 100644 --- a/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs +++ b/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs @@ -12,7 +12,6 @@ namespace FlashLFQ /// public interface ISingleScanDatum { - public double Mz { get; } public double Intensity { get; } /// diff --git a/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs b/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs index 17dfe52c1..003c099fe 100644 --- a/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs +++ b/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs @@ -17,12 +17,12 @@ public abstract class TraceablePeak where T : ISingleScanDatum /// /// The most intense point in the trace /// - public virtual T Apex { get; } + public abstract T Apex { get; } /// /// A list of data points that compose the ITraceable object /// This list must be ordered by the separation domain in ascending order! (e.g., retention time) /// - public virtual List ScanOrderedPoints { get; } + public abstract List ScanOrderedPoints { get; } /// /// Determines whether a peak should be cut based on the intensity of the surrounding time points.