Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FlashLFQ refactoring to enable generic peak operations #837

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
71 changes: 56 additions & 15 deletions mzLib/FlashLFQ/ChromatographicPeak.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,21 @@
using System.Text;
using ClassExtensions = Chemistry.ClassExtensions;
using FlashLFQ.PEP;
using FlashLFQ.Interfaces;
using MathNet.Numerics;
using Easy.Common.Extensions;

namespace FlashLFQ
{
public class ChromatographicPeak
public class ChromatographicPeak : TraceablePeak<IsotopicEnvelope>
{
public double Intensity;
public double ApexRetentionTime => Apex?.IndexedPeak.RetentionTime ?? -1;
public readonly SpectraFileInfo SpectraFileInfo;
public List<IsotopicEnvelope> IsotopicEnvelopes;
public List<IsotopicEnvelope> IsotopicEnvelopes { get; set; }
public override List<IsotopicEnvelope> ScanOrderedPoints => IsotopicEnvelopes;
public int ScanCount => IsotopicEnvelopes.Count;
public double SplitRT;
public double SplitRT { get; private set; }
public readonly bool IsMbrPeak;
public double MbrScore;
public double PpmScore { get; set; }
Expand All @@ -31,6 +35,19 @@ public class ChromatographicPeak
internal double MbrQValue { get; set; }
public ChromatographicPeakData PepPeakData { get; set; }
public double? MbrPep { get; set; }
public override IsotopicEnvelope Apex => _apex;
private IsotopicEnvelope _apex;
public List<Identification> Identifications { get; private set; }
public int NumChargeStatesObserved { get; private set; }
public int NumIdentificationsByBaseSeq { get; private set; }
public int NumIdentificationsByFullSeq { get; private set; }
public double MassError { get; private set; }
/// <summary>
/// Bool that describes whether the retention time of this peak was randomized
/// If true, implies that this peak is a decoy peak identified by the MBR algorithm
/// </summary>
public bool RandomRt { get; }
public bool DecoyPeptide => Identifications.First().IsDecoy;

public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fileInfo, bool randomRt = false)
{
Expand All @@ -53,24 +70,48 @@ public bool Equals(ChromatographicPeak peak)
&& ApexRetentionTime == peak.ApexRetentionTime;
}

public IsotopicEnvelope Apex { get; private set; }
public List<Identification> Identifications { get; private set; }
public int NumChargeStatesObserved { get; private set; }
public int NumIdentificationsByBaseSeq { get; private set; }
public int NumIdentificationsByFullSeq { get; private set; }
public double MassError { get; private set; }
/// <summary>
/// Bool that describes whether the retention time of this peak was randomized
/// If true, implies that this peak is a decoy peak identified by the MBR algorithm
/// Cuts the peak in such a way as to ensure that the final peak contains the ms2IdRetention time
/// </summary>
public bool RandomRt { get; }
public bool DecoyPeptide => Identifications.First().IsDecoy;
/// <param name="ms2IdRetentionTime"> The time at which the MS2 scan matched to a peptide was collected</param>
/// <param name="integrate"> Passed to CalculateIntensityForThisFeature </param>
public void CutPeak(double ms2IdRetentionTime, double discriminationFactorToCutPeak = 0.6, bool integrate = false)
{
// Find all envelopes associated with the apex peak charge state
List<IsotopicEnvelope> envelopesForApexCharge = IsotopicEnvelopes.Where(e => e.ChargeState == Apex.ChargeState).ToList();

// Find the boundaries of the peak, based on the apex charge state envelopes
var peakBoundaries = FindPeakBoundaries(envelopesForApexCharge, envelopesForApexCharge.IndexOf(Apex), discriminationFactorToCutPeak);

// This is done in a while loop, as it's possible that there are multiple distinct peaks in the trace
while(peakBoundaries.IsNotNullOrEmpty())
{
// Remove all points outside the peak boundaries (inclusive)
CutPeak(peakBoundaries, ms2IdRetentionTime);

// Recalculate the intensity of the peak and update the split RT
CalculateIntensityForThisFeature(integrate);
// technically, there could be two "SplitRT" values if the peak is split into two separate peaks
// however, this edge case wasn't handled in the legacy code and won't be handled here
SplitRT = peakBoundaries.Count > 0 ? peakBoundaries.Last().RelativeSeparationValue : 0;

// Update the list of envelopes for the apex charge state (as some were removed during the peak cutting)
envelopesForApexCharge = IsotopicEnvelopes.Where(e => e.ChargeState == Apex.ChargeState).ToList();
peakBoundaries = FindPeakBoundaries(envelopesForApexCharge, envelopesForApexCharge.IndexOf(Apex), discriminationFactorToCutPeak);
}
}

/// <summary>
/// Calculated the intensity of the peak. If Integrate is false, this is equal to the summed intensity of all
/// m/z peak in the the most intense isotopic envelope of the most intense charge state. If Integrate is true,
/// intensity is set as the summed intensity of every m/z peak in every isotopic envelope in every charge state.
/// </summary>
/// <param name="integrate"></param>
public void CalculateIntensityForThisFeature(bool integrate)
{
if (IsotopicEnvelopes.Any())
{
Apex = IsotopicEnvelopes.MaxBy(p => p.Intensity);
_apex = IsotopicEnvelopes.MaxBy(p => p.Intensity);

if (integrate)
{
Expand Down Expand Up @@ -100,7 +141,7 @@ public void CalculateIntensityForThisFeature(bool integrate)
Intensity = 0;
MassError = double.NaN;
NumChargeStatesObserved = 0;
Apex = null;
_apex = null;
}
}

Expand Down
103 changes: 4 additions & 99 deletions mzLib/FlashLFQ/FlashLfqEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
using FlashLFQ.PEP;
using System.IO;
using System.Threading;
using FlashLFQ.Interfaces;
using MassSpectrometry;

[assembly: InternalsVisibleTo("TestFlashLFQ")]

Expand Down Expand Up @@ -507,7 +509,7 @@ private void QuantifyMs2IdentifiedPeptides(SpectraFileInfo fileInfo)
}

msmsFeature.CalculateIntensityForThisFeature(Integrate);
CutPeak(msmsFeature, identification.Ms2RetentionTimeInMinutes);
msmsFeature.CutPeak(identification.Ms2RetentionTimeInMinutes, DiscriminationFactorToCutPeak, Integrate);

if (!msmsFeature.IsotopicEnvelopes.Any())
{
Expand Down Expand Up @@ -1292,7 +1294,7 @@ internal ChromatographicPeak FindIndividualAcceptorPeak(
acceptorPeak.IsotopicEnvelopes.AddRange(bestChargeEnvelopes);
acceptorPeak.CalculateIntensityForThisFeature(Integrate);

CutPeak(acceptorPeak, seedEnv.IndexedPeak.RetentionTime);
acceptorPeak.CutPeak(seedEnv.IndexedPeak.RetentionTime, DiscriminationFactorToCutPeak, Integrate);

var claimedPeaks = new HashSet<IndexedMassSpectralPeak>(acceptorPeak.IsotopicEnvelopes.Select(p => p.IndexedPeak))
{
Expand Down Expand Up @@ -1810,102 +1812,5 @@ public List<IndexedMassSpectralPeak> Peakfind(double idRetentionTime, double mas
return xic;
}

/// <summary>
/// Recursively cuts ChromatographicPeaks, removing all IsotopicEnvelopes
/// that occur before or after potential "valleys" surrounding the identification's
/// MS2 retention time. Then, the peak intensity is recalculated
/// </summary>
/// <param name="peak"> Peak to be cut, where envelopes are sorted by MS1 scan number </param>
/// <param name="identificationTime"> MS2 scan retention time </param>
private void CutPeak(ChromatographicPeak peak, double identificationTime)
{
// find out if we need to split this peak by using the discrimination factor
// this method assumes that the isotope envelopes in a chromatographic peak are already sorted by MS1 scan number
bool cutThisPeak = false;

if (peak.IsotopicEnvelopes.Count < 5)
{
return;
}

// Ordered list of all time points where the apex charge state had a valid isotopic envelope
List<IsotopicEnvelope> timePointsForApexZ = peak.IsotopicEnvelopes
.Where(p => p.ChargeState == peak.Apex.ChargeState).ToList();
HashSet<int> scanNumbers = new HashSet<int>(timePointsForApexZ.Select(p => p.IndexedPeak.ZeroBasedMs1ScanIndex));
int apexIndex = timePointsForApexZ.IndexOf(peak.Apex);
IsotopicEnvelope valleyEnvelope = null;

// -1 checks the left side, +1 checks the right side
int[] directions = { 1, -1 };
foreach (int direction in directions)
{
valleyEnvelope = null;
int indexOfValley = 0;

for (int i = apexIndex + direction; i < timePointsForApexZ.Count && i >= 0; i += direction)
{
IsotopicEnvelope timepoint = timePointsForApexZ[i];

// Valley envelope is the lowest intensity point that has been encountered thus far
if (valleyEnvelope == null || timepoint.Intensity < valleyEnvelope.Intensity)
{
valleyEnvelope = timepoint;
indexOfValley = timePointsForApexZ.IndexOf(valleyEnvelope);
}

double discriminationFactor =
(timepoint.Intensity - valleyEnvelope.Intensity) / timepoint.Intensity;

// If the time point is at least discriminationFactor times more intense than the valley
// We perform an additional check to see if the time point is more intense than the point next to the valley
if (discriminationFactor > DiscriminationFactorToCutPeak &&
(indexOfValley + direction < timePointsForApexZ.Count && indexOfValley + direction >= 0))
{

IsotopicEnvelope secondValleyTimepoint = timePointsForApexZ[indexOfValley + direction];

discriminationFactor =
(timepoint.Intensity - secondValleyTimepoint.Intensity) / timepoint.Intensity;

// If the current timepoint is more intense than the second valley, we cut the peak
// If the scan following the valley isn't in the timePointsForApexZ list (i.e., no isotopic envelope is observed in the scan immediately after the valley), we also cut the peak
if (discriminationFactor > DiscriminationFactorToCutPeak || !scanNumbers.Contains(valleyEnvelope.IndexedPeak.ZeroBasedMs1ScanIndex + direction))
{
cutThisPeak = true;
break;
}
}
}

if (cutThisPeak)
{
break;
}
}

// cut
if (cutThisPeak)
{
if (identificationTime > valleyEnvelope.IndexedPeak.RetentionTime)
{
// MS2 identification is to the right of the valley; remove all peaks left of the valley
peak.IsotopicEnvelopes.RemoveAll(p =>
p.IndexedPeak.RetentionTime <= valleyEnvelope.IndexedPeak.RetentionTime);
}
else
{
// MS2 identification is to the left of the valley; remove all peaks right of the valley
peak.IsotopicEnvelopes.RemoveAll(p =>
p.IndexedPeak.RetentionTime >= valleyEnvelope.IndexedPeak.RetentionTime);
}

// recalculate intensity for the peak
peak.CalculateIntensityForThisFeature(Integrate);
peak.SplitRT = valleyEnvelope.IndexedPeak.RetentionTime;

// recursively cut
CutPeak(peak, identificationTime);
}
}
}
}
13 changes: 8 additions & 5 deletions mzLib/FlashLFQ/IndexedMassSpectralPeak.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
namespace FlashLFQ
{
[Serializable]
public class IndexedMassSpectralPeak
public class IndexedMassSpectralPeak : ISingleScanDatum
{
public readonly int ZeroBasedMs1ScanIndex;
public readonly double Mz;
public readonly double RetentionTime;
public readonly double Intensity;
public int ZeroBasedMs1ScanIndex { get; init; }
public double Mz { get; init; }
public double RetentionTime { get; init; }
public double Intensity { get; init; }
// ISingleScanDatum properties
public double RelativeSeparationValue => RetentionTime;
public int ZeroBasedScanIndex => ZeroBasedMs1ScanIndex;

Check warning on line 14 in mzLib/FlashLFQ/IndexedMassSpectralPeak.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/FlashLFQ/IndexedMassSpectralPeak.cs#L13-L14

Added lines #L13 - L14 were not covered by tests

public IndexedMassSpectralPeak(double mz, double intensity, int zeroBasedMs1ScanIndex, double retentionTime)
{
Expand Down
24 changes: 24 additions & 0 deletions mzLib/FlashLFQ/Interfaces/ISingleScanDatum.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FlashLFQ
{
/// <summary>
/// An ISingleScanDatum represents information that exists in a single mass spectrometric scan!
/// E.g., a single m/z peak, a single isotopic envelope
/// </summary>
public interface ISingleScanDatum
{
public double Intensity { get; }

/// <summary>
/// The position of the datum in the separation domain,
/// e.g., retention time, ion mobility
/// </summary>
public double RelativeSeparationValue { get; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use retention time here instead?

public int ZeroBasedScanIndex { get; }
}
}
Loading
Loading