diff --git a/mzLib/FlashLFQ/ChromatographicPeak.cs b/mzLib/FlashLFQ/ChromatographicPeak.cs index d7bac219..987648f3 100644 --- a/mzLib/FlashLFQ/ChromatographicPeak.cs +++ b/mzLib/FlashLFQ/ChromatographicPeak.cs @@ -4,17 +4,21 @@ using System.Text; using ClassExtensions = Chemistry.ClassExtensions; using FlashLFQ.PEP; +using FlashLFQ.Interfaces; +using MathNet.Numerics; +using Easy.Common.Extensions; namespace FlashLFQ { - public class ChromatographicPeak + public class ChromatographicPeak : TraceablePeak { public double Intensity; public double ApexRetentionTime => Apex?.IndexedPeak.RetentionTime ?? -1; public readonly SpectraFileInfo SpectraFileInfo; - public List IsotopicEnvelopes; + public List IsotopicEnvelopes { get; set; } + public override List ScanOrderedPoints => IsotopicEnvelopes; public int ScanCount => IsotopicEnvelopes.Count; - public double SplitRT; + public double SplitRT { get; private set; } public readonly bool IsMbrPeak; public double MbrScore; public double PpmScore { get; set; } @@ -31,6 +35,19 @@ public class ChromatographicPeak internal double MbrQValue { get; set; } public ChromatographicPeakData PepPeakData { get; set; } public double? MbrPep { get; set; } + public override IsotopicEnvelope Apex => _apex; + private IsotopicEnvelope _apex; + public List Identifications { get; private set; } + public int NumChargeStatesObserved { get; private set; } + public int NumIdentificationsByBaseSeq { get; private set; } + public int NumIdentificationsByFullSeq { get; private set; } + public double MassError { get; private set; } + /// + /// Bool that describes whether the retention time of this peak was randomized + /// If true, implies that this peak is a decoy peak identified by the MBR algorithm + /// + public bool RandomRt { get; } + public bool DecoyPeptide => Identifications.First().IsDecoy; public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fileInfo, bool randomRt = false) { @@ -53,24 +70,48 @@ public bool Equals(ChromatographicPeak peak) && ApexRetentionTime == peak.ApexRetentionTime; } - public IsotopicEnvelope Apex { get; private set; } - public List Identifications { get; private set; } - public int NumChargeStatesObserved { get; private set; } - public int NumIdentificationsByBaseSeq { get; private set; } - public int NumIdentificationsByFullSeq { get; private set; } - public double MassError { get; private set; } /// - /// Bool that describes whether the retention time of this peak was randomized - /// If true, implies that this peak is a decoy peak identified by the MBR algorithm + /// Cuts the peak in such a way as to ensure that the final peak contains the ms2IdRetention time /// - public bool RandomRt { get; } - public bool DecoyPeptide => Identifications.First().IsDecoy; + /// The time at which the MS2 scan matched to a peptide was collected + /// Passed to CalculateIntensityForThisFeature + public void CutPeak(double ms2IdRetentionTime, double discriminationFactorToCutPeak = 0.6, bool integrate = false) + { + // Find all envelopes associated with the apex peak charge state + List envelopesForApexCharge = IsotopicEnvelopes.Where(e => e.ChargeState == Apex.ChargeState).ToList(); + + // Find the boundaries of the peak, based on the apex charge state envelopes + var peakBoundaries = FindPeakBoundaries(envelopesForApexCharge, envelopesForApexCharge.IndexOf(Apex), discriminationFactorToCutPeak); + // This is done in a while loop, as it's possible that there are multiple distinct peaks in the trace + while(peakBoundaries.IsNotNullOrEmpty()) + { + // Remove all points outside the peak boundaries (inclusive) + CutPeak(peakBoundaries, ms2IdRetentionTime); + + // Recalculate the intensity of the peak and update the split RT + CalculateIntensityForThisFeature(integrate); + // technically, there could be two "SplitRT" values if the peak is split into two separate peaks + // however, this edge case wasn't handled in the legacy code and won't be handled here + SplitRT = peakBoundaries.Count > 0 ? peakBoundaries.Last().RelativeSeparationValue : 0; + + // Update the list of envelopes for the apex charge state (as some were removed during the peak cutting) + envelopesForApexCharge = IsotopicEnvelopes.Where(e => e.ChargeState == Apex.ChargeState).ToList(); + peakBoundaries = FindPeakBoundaries(envelopesForApexCharge, envelopesForApexCharge.IndexOf(Apex), discriminationFactorToCutPeak); + } + } + + /// + /// Calculated the intensity of the peak. If Integrate is false, this is equal to the summed intensity of all + /// m/z peak in the the most intense isotopic envelope of the most intense charge state. If Integrate is true, + /// intensity is set as the summed intensity of every m/z peak in every isotopic envelope in every charge state. + /// + /// public void CalculateIntensityForThisFeature(bool integrate) { if (IsotopicEnvelopes.Any()) { - Apex = IsotopicEnvelopes.MaxBy(p => p.Intensity); + _apex = IsotopicEnvelopes.MaxBy(p => p.Intensity); if (integrate) { @@ -100,7 +141,7 @@ public void CalculateIntensityForThisFeature(bool integrate) Intensity = 0; MassError = double.NaN; NumChargeStatesObserved = 0; - Apex = null; + _apex = null; } } diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index 488f1c77..67f2e42f 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -15,6 +15,8 @@ using FlashLFQ.PEP; using System.IO; using System.Threading; +using FlashLFQ.Interfaces; +using MassSpectrometry; [assembly: InternalsVisibleTo("TestFlashLFQ")] @@ -507,7 +509,7 @@ private void QuantifyMs2IdentifiedPeptides(SpectraFileInfo fileInfo) } msmsFeature.CalculateIntensityForThisFeature(Integrate); - CutPeak(msmsFeature, identification.Ms2RetentionTimeInMinutes); + msmsFeature.CutPeak(identification.Ms2RetentionTimeInMinutes, DiscriminationFactorToCutPeak, Integrate); if (!msmsFeature.IsotopicEnvelopes.Any()) { @@ -1292,7 +1294,7 @@ internal ChromatographicPeak FindIndividualAcceptorPeak( acceptorPeak.IsotopicEnvelopes.AddRange(bestChargeEnvelopes); acceptorPeak.CalculateIntensityForThisFeature(Integrate); - CutPeak(acceptorPeak, seedEnv.IndexedPeak.RetentionTime); + acceptorPeak.CutPeak(seedEnv.IndexedPeak.RetentionTime, DiscriminationFactorToCutPeak, Integrate); var claimedPeaks = new HashSet(acceptorPeak.IsotopicEnvelopes.Select(p => p.IndexedPeak)) { @@ -1810,102 +1812,5 @@ public List Peakfind(double idRetentionTime, double mas return xic; } - /// - /// Recursively cuts ChromatographicPeaks, removing all IsotopicEnvelopes - /// that occur before or after potential "valleys" surrounding the identification's - /// MS2 retention time. Then, the peak intensity is recalculated - /// - /// Peak to be cut, where envelopes are sorted by MS1 scan number - /// MS2 scan retention time - private void CutPeak(ChromatographicPeak peak, double identificationTime) - { - // find out if we need to split this peak by using the discrimination factor - // this method assumes that the isotope envelopes in a chromatographic peak are already sorted by MS1 scan number - bool cutThisPeak = false; - - if (peak.IsotopicEnvelopes.Count < 5) - { - return; - } - - // Ordered list of all time points where the apex charge state had a valid isotopic envelope - List timePointsForApexZ = peak.IsotopicEnvelopes - .Where(p => p.ChargeState == peak.Apex.ChargeState).ToList(); - HashSet scanNumbers = new HashSet(timePointsForApexZ.Select(p => p.IndexedPeak.ZeroBasedMs1ScanIndex)); - int apexIndex = timePointsForApexZ.IndexOf(peak.Apex); - IsotopicEnvelope valleyEnvelope = null; - - // -1 checks the left side, +1 checks the right side - int[] directions = { 1, -1 }; - foreach (int direction in directions) - { - valleyEnvelope = null; - int indexOfValley = 0; - - for (int i = apexIndex + direction; i < timePointsForApexZ.Count && i >= 0; i += direction) - { - IsotopicEnvelope timepoint = timePointsForApexZ[i]; - - // Valley envelope is the lowest intensity point that has been encountered thus far - if (valleyEnvelope == null || timepoint.Intensity < valleyEnvelope.Intensity) - { - valleyEnvelope = timepoint; - indexOfValley = timePointsForApexZ.IndexOf(valleyEnvelope); - } - - double discriminationFactor = - (timepoint.Intensity - valleyEnvelope.Intensity) / timepoint.Intensity; - - // If the time point is at least discriminationFactor times more intense than the valley - // We perform an additional check to see if the time point is more intense than the point next to the valley - if (discriminationFactor > DiscriminationFactorToCutPeak && - (indexOfValley + direction < timePointsForApexZ.Count && indexOfValley + direction >= 0)) - { - - IsotopicEnvelope secondValleyTimepoint = timePointsForApexZ[indexOfValley + direction]; - - discriminationFactor = - (timepoint.Intensity - secondValleyTimepoint.Intensity) / timepoint.Intensity; - - // If the current timepoint is more intense than the second valley, we cut the peak - // If the scan following the valley isn't in the timePointsForApexZ list (i.e., no isotopic envelope is observed in the scan immediately after the valley), we also cut the peak - if (discriminationFactor > DiscriminationFactorToCutPeak || !scanNumbers.Contains(valleyEnvelope.IndexedPeak.ZeroBasedMs1ScanIndex + direction)) - { - cutThisPeak = true; - break; - } - } - } - - if (cutThisPeak) - { - break; - } - } - - // cut - if (cutThisPeak) - { - if (identificationTime > valleyEnvelope.IndexedPeak.RetentionTime) - { - // MS2 identification is to the right of the valley; remove all peaks left of the valley - peak.IsotopicEnvelopes.RemoveAll(p => - p.IndexedPeak.RetentionTime <= valleyEnvelope.IndexedPeak.RetentionTime); - } - else - { - // MS2 identification is to the left of the valley; remove all peaks right of the valley - peak.IsotopicEnvelopes.RemoveAll(p => - p.IndexedPeak.RetentionTime >= valleyEnvelope.IndexedPeak.RetentionTime); - } - - // recalculate intensity for the peak - peak.CalculateIntensityForThisFeature(Integrate); - peak.SplitRT = valleyEnvelope.IndexedPeak.RetentionTime; - - // recursively cut - CutPeak(peak, identificationTime); - } - } } } \ No newline at end of file diff --git a/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs b/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs index cdadc56e..82092dc5 100644 --- a/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs +++ b/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs @@ -3,12 +3,15 @@ namespace FlashLFQ { [Serializable] - public class IndexedMassSpectralPeak + public class IndexedMassSpectralPeak : ISingleScanDatum { - public readonly int ZeroBasedMs1ScanIndex; - public readonly double Mz; - public readonly double RetentionTime; - public readonly double Intensity; + public int ZeroBasedMs1ScanIndex { get; init; } + public double Mz { get; init; } + public double RetentionTime { get; init; } + public double Intensity { get; init; } + // ISingleScanDatum properties + public double RelativeSeparationValue => RetentionTime; + public int ZeroBasedScanIndex => ZeroBasedMs1ScanIndex; public IndexedMassSpectralPeak(double mz, double intensity, int zeroBasedMs1ScanIndex, double retentionTime) { diff --git a/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs b/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs new file mode 100644 index 00000000..0576b9ab --- /dev/null +++ b/mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace FlashLFQ +{ + /// + /// An ISingleScanDatum represents information that exists in a single mass spectrometric scan! + /// E.g., a single m/z peak, a single isotopic envelope + /// + public interface ISingleScanDatum + { + public double Intensity { get; } + + /// + /// The position of the datum in the separation domain, + /// e.g., retention time, ion mobility + /// + public double RelativeSeparationValue { get; } + public int ZeroBasedScanIndex { get; } + } +} diff --git a/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs b/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs new file mode 100644 index 00000000..003c099f --- /dev/null +++ b/mzLib/FlashLFQ/Interfaces/TraceablePeak.cs @@ -0,0 +1,121 @@ +using Easy.Common.Extensions; +using MathNet.Numerics; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace FlashLFQ.Interfaces +{ + /// + /// Represent an object that can be traced over a separation domain + /// E.g., A chromatographic peak trace composed of multiple IsotopicEnvelopes + /// + public abstract class TraceablePeak where T : ISingleScanDatum + { + /// + /// The most intense point in the trace + /// + public abstract T Apex { get; } + /// + /// A list of data points that compose the ITraceable object + /// This list must be ordered by the separation domain in ascending order! (e.g., retention time) + /// + public abstract List ScanOrderedPoints { get; } + + /// + /// Determines whether a peak should be cut based on the intensity of the surrounding time points. + /// + /// The type of the time points, which must implement ISingleScanDatum. + /// The list of time points + /// The index of the apex (most intense, best, whatever) in the list of time points. + /// The discrimination factor to determine if the peak should be cut. Default is 0.6. + /// True if the peak should be cut, otherwise false. + public static List FindPeakBoundaries(List timePoints, int apexTimepointIndex, double discriminationFactorToCutPeak = 0.6) where T : ISingleScanDatum + { + List peakBoundaries = new List(); + HashSet zeroIndexedScanNumbers = timePoints.Select(p => p.ZeroBasedScanIndex).ToHashSet(); + + // -1 checks the left side, +1 checks the right side + int[] directions = { -1, 1 }; + foreach (int direction in directions) + { + T valley = default(T); + int indexOfValley = 0; + + for (int i = apexTimepointIndex + direction; i < timePoints.Count && i >= 0; i += direction) + { + ISingleScanDatum timepoint = timePoints[i]; + + // Valley envelope is the lowest intensity point that has been encountered thus far + if (valley == null || timepoint.Intensity < valley.Intensity) + { + valley = (T)timepoint; + indexOfValley = i; + } + + double discriminationFactor = (timepoint.Intensity - valley.Intensity) / timepoint.Intensity; + + // If the time point is at least discriminationFactor times more intense than the valley + // We perform an additional check to see if the time point is more intense than the point next to the valley + if (discriminationFactor > discriminationFactorToCutPeak && + (indexOfValley + direction < timePoints.Count && indexOfValley + direction >= 0)) + { + ISingleScanDatum secondValleyTimepoint = timePoints[indexOfValley + direction]; + + discriminationFactor = + (timepoint.Intensity - secondValleyTimepoint.Intensity) / timepoint.Intensity; + + // If the current timepoint is more intense than the second valley, we cut the peak + // If the scan following the valley isn't in the timePointsForApexZ list (i.e., no isotopic envelope is observed in the scan immediately after the valley), we also cut the peak + if (discriminationFactor > discriminationFactorToCutPeak || !zeroIndexedScanNumbers.Contains(valley.ZeroBasedScanIndex + direction)) + { + peakBoundaries.Add(valley); + break; + } + } + } + } + + return peakBoundaries; + } + + /// + /// Recursively cuts ITraceable objects, removing all datapoints + /// that occur before or after potential "valleys" surrounding the Apex. + /// + /// The discrimination factor to determine if the peak should be cut. Default is 0.6. + public virtual void CutPeak(double discriminationFactorToCutPeak = 0.6) + { + var peakBoundaries = FindPeakBoundaries(ScanOrderedPoints, ScanOrderedPoints.IndexOf(Apex), discriminationFactorToCutPeak); + CutPeak(peakBoundaries, Apex.RelativeSeparationValue); + } + + /// + /// Cuts a TraceablePeak objects, starting at the separationValueAtPeakCenter and removing all datapoints + /// that occur before or after the peakBoundaries + /// + /// + /// + public void CutPeak(List peakBoundaries, double separationValueAtPeakCenter) + { + if (peakBoundaries.IsNotNullOrEmpty()) + { + foreach (var boundary in peakBoundaries) + { + if (boundary.RelativeSeparationValue > separationValueAtPeakCenter) + { + ScanOrderedPoints.RemoveAll(d => d.RelativeSeparationValue >= boundary.RelativeSeparationValue); + } + else + { + ScanOrderedPoints.RemoveAll(d => d.RelativeSeparationValue <= boundary.RelativeSeparationValue); + } + } + } + } + + + } +} diff --git a/mzLib/FlashLFQ/IsotopicEnvelope.cs b/mzLib/FlashLFQ/IsotopicEnvelope.cs index 938ac785..61929ca7 100644 --- a/mzLib/FlashLFQ/IsotopicEnvelope.cs +++ b/mzLib/FlashLFQ/IsotopicEnvelope.cs @@ -3,7 +3,7 @@ /// /// Contains the summed intensities of all isotope peaks detected in a single MS1 scan for a given species. /// - public class IsotopicEnvelope + public class IsotopicEnvelope : ISingleScanDatum { /// /// The most abundant isotopic peak used for peak finding. @@ -11,6 +11,10 @@ public class IsotopicEnvelope public readonly IndexedMassSpectralPeak IndexedPeak; public readonly int ChargeState; + public double Mz => IndexedPeak.Mz; + public double RelativeSeparationValue => IndexedPeak.RetentionTime; + public int ZeroBasedScanIndex => IndexedPeak.ZeroBasedMs1ScanIndex; + public IsotopicEnvelope(IndexedMassSpectralPeak monoisotopicPeak, int chargeState, double intensity, double pearsonCorrelation) { IndexedPeak = monoisotopicPeak; @@ -26,7 +30,6 @@ public IsotopicEnvelope(IndexedMassSpectralPeak monoisotopicPeak, int chargeStat /// public double Intensity { get; private set; } - public double PearsonCorrelation { get; init; } public void Normalize(double normalizationFactor) diff --git a/mzLib/TestFlashLFQ/TestFlashLFQ.cs b/mzLib/TestFlashLFQ/TestFlashLFQ.cs index b0133d0d..922627d7 100644 --- a/mzLib/TestFlashLFQ/TestFlashLFQ.cs +++ b/mzLib/TestFlashLFQ/TestFlashLFQ.cs @@ -1526,8 +1526,8 @@ public static void RealDataMbrTest() // Any change to ML.NET or the PEP Analysis engine will cause these to change. Console.WriteLine("r1 PIP event count: " + f1r1MbrResults.Count); Console.WriteLine("r2 PIP event count: " + f1r2MbrResults.Count); - Assert.AreEqual(141, f1r1MbrResults.Count); - Assert.AreEqual(78, f1r2MbrResults.Count); + Assert.AreEqual(140, f1r1MbrResults.Count); + Assert.AreEqual(77, f1r2MbrResults.Count); // Check that MS/MS identified peaks and MBR identified peaks have similar intensities List<(double, double)> peptideIntensities = f1r1MbrResults.Select(pep => (Math.Log(pep.Value.GetIntensity(f1r1)), Math.Log(pep.Value.GetIntensity(f1r2)))).ToList();