Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spectral library new decoy #2004

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 78 additions & 12 deletions EngineLayer/ClassicSearch/ClassicSearchEngine.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using MzLibUtil;
using Chemistry;
using MzLibUtil;
using Proteomics;
using Proteomics.Fragmentation;
using Proteomics.ProteolyticDigestion;
Expand All @@ -11,7 +12,8 @@ namespace EngineLayer.ClassicSearch
{
public class ClassicSearchEngine : MetaMorpheusEngine
{
private readonly SpectralLibrary SpectralLibrary;
public SpectralLibrary SpectralLibrary { get; set; }
public Dictionary<PeptideWithSetModifications, PeptideWithSetModifications> decoyTargetPairs;
private readonly MassDiffAcceptor SearchMode;
private readonly List<Protein> Proteins;
private readonly List<Modification> FixedModifications;
Expand Down Expand Up @@ -49,9 +51,10 @@ protected override MetaMorpheusEngineResults RunSpecific()

double proteinsSearched = 0;
int oldPercentProgress = 0;
decoyTargetPairs = new Dictionary<PeptideWithSetModifications, PeptideWithSetModifications>();

// one lock for each MS2 scan; a scan can only be accessed by one thread at a time
var myLocks = new object[PeptideSpectralMatches.Length];
// one lock for each MS2 scan; a scan can only be accessed by one thread at a time
var myLocks = new object[PeptideSpectralMatches.Length];
for (int i = 0; i < myLocks.Length; i++)
{
myLocks[i] = new object();
Expand All @@ -66,6 +69,7 @@ protected override MetaMorpheusEngineResults RunSpecific()
Parallel.ForEach(threads, (i) =>
{
var peptideTheorProducts = new List<Product>();
var decoyPeptideTheorProducts = new List<Product>();

for (; i < Proteins.Count; i += maxThreadsPerFile)
{
Expand All @@ -75,19 +79,42 @@ protected override MetaMorpheusEngineResults RunSpecific()
// digest each protein into peptides and search for each peptide in all spectra within precursor mass tolerance
foreach (PeptideWithSetModifications peptide in Proteins[i].Digest(CommonParameters.DigestionParams, FixedModifications, VariableModifications, SilacLabels, TurnoverLabels))
{
//if (peptide.FullSequence != "TTGIVMDSGDGVTHTVPIYEGYALPHAILR" && peptide.FullSequence != "VTDEILHLVPNIDNFR")
//{
// continue;
//}
peptide.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus, peptideTheorProducts);
int[] newAAlocations = new int[peptide.BaseSequence.Length];
PeptideWithSetModifications decoy = DecoyOnTheFly.GetReverseDecoyFromTarget(peptide, newAAlocations);
decoy.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus, decoyPeptideTheorProducts);

foreach (ScanWithIndexAndNotchInfo scan in GetAcceptableScans(peptide.MonoisotopicMass, SearchMode))
// we need a function to get the original target sequence of a decoy peptide

lock (decoyTargetPairs)
{
if (SpectralLibrary != null && !SpectralLibrary.ContainsSpectrum(peptide.FullSequence, scan.TheScan.PrecursorCharge))
if (!decoyTargetPairs.ContainsKey(decoy))
{
continue;
decoyTargetPairs.Add(decoy, peptide);
}
}




foreach (ScanWithIndexAndNotchInfo scan in GetAcceptableScans(peptide.MonoisotopicMass, SearchMode))
{

//if (SpectralLibrary != null && !SpectralLibrary.ContainsSpectrum(peptide.FullSequence, scan.TheScan.PrecursorCharge))
//{
// continue;
//}

List<MatchedFragmentIon> matchedIons = MatchFragmentIons(scan.TheScan, peptideTheorProducts, CommonParameters);
List<MatchedFragmentIon> decoyMatchedIons = MatchFragmentIons(scan.TheScan, decoyPeptideTheorProducts, CommonParameters);

double thisScore = CalculatePeptideScore(scan.TheScan.TheScan, matchedIons);
bool meetsScoreCutoff = thisScore >= CommonParameters.ScoreCutoff;
double decoyScore = CalculatePeptideScore(scan.TheScan.TheScan, decoyMatchedIons);
bool meetsScoreCutoff = Math.Max(thisScore, decoyScore) >= CommonParameters.ScoreCutoff;

// this is thread-safe because even if the score improves from another thread writing to this PSM,
// the lock combined with AddOrReplace method will ensure thread safety
Expand All @@ -96,24 +123,42 @@ protected override MetaMorpheusEngineResults RunSpecific()
// valid hit (met the cutoff score); lock the scan to prevent other threads from accessing it
lock (myLocks[scan.ScanIndex])
{
bool scoreImprovement = PeptideSpectralMatches[scan.ScanIndex] == null || (thisScore - PeptideSpectralMatches[scan.ScanIndex].RunnerUpScore) > -PeptideSpectralMatch.ToleranceForScoreDifferentiation;
bool scoreImprovement = PeptideSpectralMatches[scan.ScanIndex] == null || (Math.Max(thisScore, decoyScore) - PeptideSpectralMatches[scan.ScanIndex].RunnerUpScore) > -PeptideSpectralMatch.ToleranceForScoreDifferentiation;

if (scoreImprovement)
{
if (PeptideSpectralMatches[scan.ScanIndex] == null)
{
PeptideSpectralMatches[scan.ScanIndex] = new PeptideSpectralMatch(peptide, scan.Notch, thisScore, scan.ScanIndex, scan.TheScan, CommonParameters, matchedIons, 0);
if (thisScore >= decoyScore)
{
PeptideSpectralMatches[scan.ScanIndex] = new PeptideSpectralMatch(peptide, scan.Notch, thisScore, scan.ScanIndex, scan.TheScan, CommonParameters, matchedIons, 0);
}
else
{
PeptideSpectralMatches[scan.ScanIndex] = new PeptideSpectralMatch(decoy, scan.Notch, decoyScore, scan.ScanIndex, scan.TheScan, CommonParameters, decoyMatchedIons, 0);

}
}
else
{
PeptideSpectralMatches[scan.ScanIndex].AddOrReplace(peptide, thisScore, scan.Notch, CommonParameters.ReportAllAmbiguity, matchedIons, 0);
if (thisScore >= decoyScore)
{
PeptideSpectralMatches[scan.ScanIndex].AddOrReplace(peptide, thisScore, scan.Notch, CommonParameters.ReportAllAmbiguity, matchedIons, 0);
}
else
{
PeptideSpectralMatches[scan.ScanIndex].AddOrReplace(decoy, decoyScore, scan.Notch, CommonParameters.ReportAllAmbiguity, decoyMatchedIons, 0);
}

}
}


}
}

}
}

// report search progress (proteins searched so far out of total proteins in database)
proteinsSearched++;
var percentProgress = (int)((proteinsSearched / Proteins.Count) * 100);
Expand All @@ -131,10 +176,31 @@ protected override MetaMorpheusEngineResults RunSpecific()
{
psm.ResolveAllAmbiguities();
}
if (SpectralLibrary != null)
{
SpectralLibrary.DecoyTargetPairs = decoyTargetPairs;
}

return new MetaMorpheusEngineResults(this);
}


private List<E> ShuffleList<E>(List<E> inputList)
{
List<E> randomList = new List<E>();

Random r = new Random();
int randomIndex = 0;
while (inputList.Count > 0)
{
randomIndex = r.Next(0, inputList.Count); //Choose a random object in the list
randomList.Add(inputList[randomIndex]); //add it to the new, random list
inputList.RemoveAt(randomIndex); //remove to avoid duplicates
}

return randomList; //return the new random list
}

private IEnumerable<ScanWithIndexAndNotchInfo> GetAcceptableScans(double peptideMonoisotopicMass, MassDiffAcceptor searchMode)
{
foreach (AllowedIntervalWithNotch allowedIntervalWithNotch in searchMode.GetAllowedPrecursorMassIntervalsFromTheoreticalMass(peptideMonoisotopicMass).ToList())
Expand Down
168 changes: 168 additions & 0 deletions EngineLayer/DecoyOnTheFly.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
using Proteomics;
using Proteomics.ProteolyticDigestion;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace EngineLayer
{
public class DecoyOnTheFly
{
//This function maintains the amino acids associated with the protease motif and reverses all other amino acids.
//N-terminal modificatons are preserved. Other modifications travel with their respective amino acids. this results
//in a decoy peptide composed the same amino acids and modifications as the original.
//Occasionally, this process results in peptide with exactly the same sequence. Therefore, there is a stop-gap measure
//the returns the mirror image of the original. N-terminal mods are preserved, but other mods are also reversed.
//this should yield a unique decoy for each target sequence.
public static PeptideWithSetModifications GetReverseDecoyFromTarget(PeptideWithSetModifications pwsm, int[] revisedAminoAcidOrder)
{
Dictionary<int, Modification> newModificationsDictionary = new Dictionary<int, Modification>();
//Copy N-terminal modifications from target dictionary to decoy dictionary.
if (pwsm.AllModsOneIsNterminus.ContainsKey(1))
{
newModificationsDictionary.Add(1, pwsm.AllModsOneIsNterminus[1]);
}
char[] newBase = new char[pwsm.BaseSequence.Length];
ProteomicsExtenstionMethods.Fill(newBase, '0');
char[] evaporatingBase = pwsm.BaseSequence.ToCharArray();
List<DigestionMotif> motifs = pwsm.DigestionParams.Protease.DigestionMotifs;
if (motifs != null && motifs.Count > 0)
{
foreach (var motif in motifs.Where(m => m.InducingCleavage != ""))//check the empty "" for topdown
{
string cleavingMotif = motif.InducingCleavage;
List<int> cleavageMotifLocations = new List<int>();

for (int i = 0; i < pwsm.BaseSequence.Length; i++)
{
bool Fits;
bool Prevents;
(Fits, Prevents) = motif.Fits(pwsm.BaseSequence, i);

if (Fits && !Prevents)
{
cleavageMotifLocations.Add(i);
}
}

foreach (int location in cleavageMotifLocations)
{
char[] motifArray = pwsm.BaseSequence.Substring(location, cleavingMotif.Length).ToCharArray();
for (int i = 0; i < cleavingMotif.Length; i++)
{
newBase[location + i] = motifArray[i];
revisedAminoAcidOrder[location + i] = location + i;//
//directly copy mods that were on amino acids in the motif. Those amino acids don't change position.
if (pwsm.AllModsOneIsNterminus.ContainsKey(location + i + 2))
{
newModificationsDictionary.Add(location + i + 2, pwsm.AllModsOneIsNterminus[location + i + 2]);
}

evaporatingBase[location + i] = '0';//can null a char so i use a number which doesnt' appear in peptide string
}
}
}
}

//We've kept amino acids in the digestion motif in the same position in the decoy peptide.
//Now we will fill the remaining open positions in the decoy with the reverse of amino acids from the target.
int fillPosition = 0;
int extractPosition = pwsm.BaseSequence.Length - 1;
while (fillPosition < pwsm.BaseSequence.Length && extractPosition >= 0)
{
if (evaporatingBase[extractPosition] != '0')
{
while (newBase[fillPosition] != '0')
{
fillPosition++;
}
newBase[fillPosition] = evaporatingBase[extractPosition];
revisedAminoAcidOrder[fillPosition] = extractPosition;
if (pwsm.AllModsOneIsNterminus.ContainsKey(extractPosition + 2))
{
newModificationsDictionary.Add(fillPosition + 2, pwsm.AllModsOneIsNterminus[extractPosition + 2]);
}
fillPosition++;
}
extractPosition--;
}

string newBaseString = new string(newBase);

var proteinSequence = pwsm.Protein.BaseSequence;
var aStringBuilder = new StringBuilder(proteinSequence);
aStringBuilder.Remove(pwsm.OneBasedStartResidueInProtein - 1, pwsm.BaseSequence.Length);
aStringBuilder.Insert(pwsm.OneBasedStartResidueInProtein - 1, newBaseString);
proteinSequence = aStringBuilder.ToString();

Protein decoyProtein = new Protein(proteinSequence, "DECOY_" + pwsm.Protein.Accession, null, new List<Tuple<string, string>>(), new Dictionary<int, List<Modification>>(), null, null, null, true);

DigestionParams d = pwsm.DigestionParams;

if (newBaseString != pwsm.BaseSequence)
{
return new PeptideWithSetModifications(decoyProtein, d, pwsm.OneBasedStartResidueInProtein, pwsm.OneBasedEndResidueInProtein, pwsm.CleavageSpecificityForFdrCategory, pwsm.PeptideDescription, pwsm.MissedCleavages, newModificationsDictionary, pwsm.NumFixedMods, newBaseString);
}
else
{
//The reverse decoy procedure failed to create a PeptideWithSetModificatons with a different sequence. Therefore,
//we retrun the mirror image peptide.
return GetPeptideMirror(pwsm, revisedAminoAcidOrder);
}

}

//Returns a PeptideWithSetModifications mirror image. Used when reverse decoy sequence is same as target sequence
public static PeptideWithSetModifications GetPeptideMirror(PeptideWithSetModifications pwsm, int[] revisedOrderNisOne)
{
Dictionary<int, Modification> newModificationsDictionary = new Dictionary<int, Modification>();
//Copy N-terminal modifications from target dictionary to decoy dictionary.
if (pwsm.AllModsOneIsNterminus.ContainsKey(1))
{
newModificationsDictionary.Add(1, pwsm.AllModsOneIsNterminus[1]);
}

//First step is to reverse the position of all modifications except the mod on the peptide N-terminus.
if (pwsm.AllModsOneIsNterminus.Any())
{
int newPosition = pwsm.BaseSequence.Length + 1;
for (int i = 2; i < pwsm.BaseSequence.Length + 2; i++)
{
if (pwsm.AllModsOneIsNterminus.ContainsKey(i))
{
newModificationsDictionary.Add(newPosition, pwsm.AllModsOneIsNterminus[i]);
newPosition--;
}
else
{
newPosition--;
}
}
}

//Second step is to reverse the sequence.
string newBaseString = ProteomicsExtenstionMethods.Reverse(pwsm.BaseSequence);

var proteinSequence = pwsm.Protein.BaseSequence;
var aStringBuilder = new StringBuilder(proteinSequence);
aStringBuilder.Remove(pwsm.OneBasedStartResidueInProtein - 1, pwsm.BaseSequence.Length);
aStringBuilder.Insert(pwsm.OneBasedStartResidueInProtein - 1, newBaseString);
proteinSequence = aStringBuilder.ToString();

Protein decoyProtein = new Protein(proteinSequence, "DECOY_" + pwsm.Protein.Accession, null, new List<Tuple<string, string>>(), new Dictionary<int, List<Modification>>(), null, null, null, true);
DigestionParams d = pwsm.DigestionParams;

//now fill in the revised amino acid order
int oldStringPosition = pwsm.BaseSequence.Length - 1;
for (int i = 0; i < newBaseString.Length; i++)
{
revisedOrderNisOne[i] = oldStringPosition;
oldStringPosition--;
}
return new PeptideWithSetModifications(decoyProtein, d, pwsm.OneBasedStartResidueInProtein, pwsm.OneBasedEndResidueInProtein, pwsm.CleavageSpecificityForFdrCategory, pwsm.PeptideDescription, pwsm.MissedCleavages, newModificationsDictionary, pwsm.NumFixedMods, newBaseString);
}


}
}
Loading