Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional Digestion Count Output #2460

Merged
merged 19 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
90b2ce5
Add WriteDigestCountCheckBox to SearchTaskWindow and Search Task Para…
nbollis Jan 28, 2025
f8bfbdb
Add DictionaryExtensions class with AddOrCreate method with tests
nbollis Jan 28, 2025
83d956d
moved anlyte type to util folder
nbollis Jan 28, 2025
2cc25f6
Add Increment method to DictionaryExtensions
nbollis Jan 28, 2025
ccdf5c9
Add feature to track digestion product counts per protein to ClassicS…
nbollis Jan 28, 2025
7f5a2ad
Only write tsv file for classic search
nbollis Jan 28, 2025
3657dc6
Add IsNullOrEmpty method to DictionaryExtensions
nbollis Jan 28, 2025
7c33067
Add feature to track and write digestion product counts
nbollis Jan 29, 2025
4429039
Ensured writing consistency with multiple data files
nbollis Jan 29, 2025
ffcd713
Cleanup and Additional Comments
nbollis Jan 29, 2025
7c3e2ca
Added more comments to tooltop
nbollis Jan 29, 2025
ca062ca
Ensured thread safety for extension methods with concurent dictionary.
nbollis Jan 29, 2025
6d7471a
Added tests for thread safety
nbollis Jan 29, 2025
73bedd9
AddOrCreateThreadSafe take in lock object
nbollis Jan 30, 2025
1cb8dfe
Adjusted Dictionary Extensions Increment method to
nbollis Jan 30, 2025
5d67fd4
Update digestion count tracking and file output format
nbollis Jan 30, 2025
7c58288
Merge branch 'master' into DigestionCountsInOutput
nbollis Jan 30, 2025
c144951
Merge branch 'master' into DigestionCountsInOutput
nbollis Feb 3, 2025
d749c32
Merge branch 'master' into DigestionCountsInOutput
nbollis Feb 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Threading;
using Omics.Modifications;
using System.Collections.Concurrent;

Expand All @@ -26,12 +25,14 @@ public class ClassicSearchEngine : MetaMorpheusEngine
private readonly Ms2ScanWithSpecificMass[] ArrayOfSortedMS2Scans;
private readonly double[] MyScanPrecursorMasses;
private readonly bool WriteSpectralLibrary;
private readonly bool WriteDigestionCounts;
private readonly object[] Locks;
public readonly ConcurrentDictionary<(string Accession, string BaseSequence), int> DigestionCountDictionary; // Used to track the amount of digestion products from each protein when the option is enabled.

public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans,
List<Modification> variableModifications, List<Modification> fixedModifications, List<SilacLabel> silacLabels, SilacLabel startLabel, SilacLabel endLabel,
List<Protein> proteinList, MassDiffAcceptor searchMode, CommonParameters commonParameters, List<(string FileName, CommonParameters Parameters)> fileSpecificParameters,
SpectralLibrary spectralLibrary, List<string> nestedIds, bool writeSpectralLibrary)
SpectralLibrary spectralLibrary, List<string> nestedIds, bool writeSpectralLibrary, bool writeDigestionCounts = false)
: base(commonParameters, fileSpecificParameters, nestedIds)
{
PeptideSpectralMatches = globalPsms;
Expand All @@ -48,6 +49,8 @@ public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[]
SearchMode = searchMode;
SpectralLibrary = spectralLibrary;
WriteSpectralLibrary = writeSpectralLibrary;
WriteDigestionCounts = writeDigestionCounts;
DigestionCountDictionary = new();

// Create one lock for each PSM to ensure thread safety
Locks = new object[PeptideSpectralMatches.Length];
Expand Down Expand Up @@ -108,6 +111,9 @@ protected override MetaMorpheusEngineResults RunSpecific()
// digest each protein into peptides and search for each peptide in all spectra within precursor mass tolerance
foreach (PeptideWithSetModifications peptide in Proteins[i].Digest(CommonParameters.DigestionParams, FixedModifications, VariableModifications, SilacLabels, TurnoverLabels))
{
if (WriteDigestionCounts)
DigestionCountDictionary.Increment((peptide.Parent.Accession, peptide.BaseSequence));

PeptideWithSetModifications reversedOnTheFlyDecoy = null;

if (SpectralLibrary != null)
Expand Down
121 changes: 121 additions & 0 deletions MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
using Nett;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Numerics;

namespace EngineLayer;

public static class DictionaryExtensions
{
/// <summary>
/// Adds a value to the list associated with the specified key in the dictionary.
/// If the key does not exist, a new list is created with the value and added to the dictionary.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValues">The type of the values in the lists.</typeparam>
/// <param name="dictionary">The dictionary to operate on.</param>
/// <param name="key">The key whose value list to add to or create.</param>
/// <param name="value">The value to add to the list associated with the specified key.</param>
/// <remarks>
/// This is not thread-safe!
/// </remarks>
public static void AddOrCreate<TKey, TValues>(this IDictionary<TKey, IList<TValues>> dictionary, TKey key, TValues value)
{
if (dictionary.TryGetValue(key, out IList<TValues> values))
{
values.Add(value);
}
else
{
dictionary.Add(key, new List<TValues> { value });
}
}

/// <summary>
/// Adds a value to the list associated with the specified key in the dictionary.
/// If the key does not exist, a new list is created with the value and added to the dictionary.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValues">The type of the values in the lists.</typeparam>
/// <param name="dictionary">The dictionary to operate on.</param>
/// <param name="key">The key whose value list to add to or create.</param>
/// <param name="value">The value to add to the list associated with the specified key.</param>
/// <param name="lockObject">Object used to lock this specific dictionary</param>
/// <remarks>
/// This is thread safe for all dictionary types.
/// </remarks>
public static void AddOrCreateThreadSafe<TKey, TValues>(this IDictionary<TKey, IList<TValues>> dictionary, TKey key, TValues value, object lockObject)
{
if (dictionary is ConcurrentDictionary<TKey, IList<TValues>> concurrentDictionary)
{
concurrentDictionary.AddOrUpdate(key, new List<TValues> { value }, (k, v) =>
{
// must lock inside the add or update as the List.Add method is not thread safe.
lock (lockObject)
{
v.Add(value);
return v;
}
});
}
else
{
lock (lockObject)
{
if (dictionary.TryGetValue(key, out IList<TValues> values))
{
values.Add(value);
}
else
{
dictionary.Add(key, new List<TValues> { value });
}
}
}
}

/// <summary>
/// Increments the value associated with the specified key in the dictionary by a defined number with a default of one.
/// If the key does not exist, a new entry is created with the value set to one.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValue">The type of the values in the dictionary, which must implement <see cref="INumber{TValue}"/>.</typeparam>
/// <param name="dictionary">The dictionary to operate on.</param>
/// <param name="key">The key whose value to increment or create.</param>
/// <param name="incrementBy">The amount to increment by with a default of one</param>
public static void Increment<TKey, TValue>(this IDictionary<TKey, TValue> dictionary, TKey key, TValue incrementBy = default)
where TValue : INumber<TValue>
{
TValue incrementValue = incrementBy.Equals(default) ? TValue.One : incrementBy;

if (dictionary is ConcurrentDictionary<TKey, TValue> concurrentDictionary)
{
concurrentDictionary.AddOrUpdate(key, incrementValue, (k, v) => v + incrementValue);
}
else
{
if (dictionary.TryGetValue(key, out TValue value))
{
dictionary[key] = value + incrementValue;
}
else
{
dictionary.Add(key, incrementValue);
}
}
}

/// <summary>
/// Determines whether the dictionary is null or has no elements.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValue">The type of the values in the dictionary.</typeparam>
/// <param name="dictionary">The dictionary to check.</param>
/// <returns>
/// <c>true</c> if the dictionary is null or has no elements; otherwise, <c>false</c>.
/// </returns>
public static bool IsNullOrEmpty<TKey, TValue>(this IDictionary<TKey, TValue> dictionary)
{
return dictionary == null || dictionary.Count == 0;
}
}
12 changes: 12 additions & 0 deletions MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,18 @@
</TextBlock>
</ToolTipService.ToolTip>
</CheckBox>
<CheckBox x:Name="WriteDigestCountCheckBox" Margin="20 0 0 0"
Content="Write digestion product count histogram" IsEnabled="{Binding IsChecked, ElementName=ClassicSearchRadioButton}">
<ToolTipService.ToolTip>
<TextBlock>
Checking this box will create an additional output file with a histogram of the number of digestion products per protein.
<LineBreak/>
The number of digestion products is limited by the MaxModsPerPeptide parameter and limited to the MaxModifiedIsoforms parameter for each primary sequence generated by each database entry
<LineBreak/>
Multiple base sequences can be generated per protein due to variable methionine and splice variants if annotated in a database
</TextBlock>
</ToolTipService.ToolTip>
</CheckBox>
</StackPanel>
</Expander>
</GroupBox>
Expand Down
2 changes: 2 additions & 0 deletions MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ private void UpdateFieldsFromTask(SearchTask task)

OutputFileNameTextBox.Text = task.CommonParameters.TaskDescriptor;
CkbMzId.IsChecked = task.SearchParameters.WriteMzId;
WriteDigestCountCheckBox.IsChecked = task.SearchParameters.WriteDigestionProductCountFile;
WriteHighQPsmsCheckBox.IsChecked = task.SearchParameters.WriteHighQValuePsms;
WriteDecoyCheckBox.IsChecked = task.SearchParameters.WriteDecoys;
WriteContaminantCheckBox.IsChecked = task.SearchParameters.WriteContaminants;
Expand Down Expand Up @@ -651,6 +652,7 @@ private void SaveButton_Click(object sender, RoutedEventArgs e)
TheTask.SearchParameters.UpdateSpectralLibrary = UpdateSpectralLibraryCheckBox.IsChecked.Value;
TheTask.SearchParameters.CompressIndividualFiles = CompressIndividualResultsCheckBox.IsChecked.Value;
TheTask.SearchParameters.IncludeModMotifInMzid = IncludeMotifInModNamesCheckBox.IsChecked.Value;
TheTask.SearchParameters.WriteDigestionProductCountFile = WriteDigestCountCheckBox.IsChecked.Value;

if (RemoveContaminantRadioBox.IsChecked.Value)
{
Expand Down
68 changes: 67 additions & 1 deletion MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ public class PostSearchAnalysisTask : MetaMorpheusTask
/// Used for storage of results for writing to Results.tsv. It is explained in the method ConstructResultsDictionary()
/// </summary>
private Dictionary<(string,string),string> ResultsDictionary { get; set; }

/// <summary>
/// Used for storage of results for writing digestion product counts to a .tsv.
/// </summary>
internal IDictionary<(string Accession, string BaseSeqeunce), int> DigestionCountDictionary { get; set; }
public PostSearchAnalysisTask()
: base(MyTask.Search)
{
Expand Down Expand Up @@ -110,6 +113,12 @@ public MyTaskResults Run()
UpdateSpectralLibrary();
}

if (Parameters.SearchParameters.WriteDigestionProductCountFile)
{
WriteDigestionCountByProtein();
WriteDigestionCountHistogram();
}

WriteFlashLFQResults();

if (Parameters.ProteinList.Any((p => p.AppliedSequenceVariations.Count > 0)))
Expand Down Expand Up @@ -1939,5 +1948,62 @@ private void WritePeakQuantificationResultsToTsv(FlashLfqResults flashLFQResults

FinishedWritingFile(peaksPath, nestedIds);
}

/// <summary>
/// Writes the digestion product counts for each protein to a .tsv file.
/// </summary>
private void WriteDigestionCountByProtein()
{
if (DigestionCountDictionary.IsNullOrEmpty())
return;

var nestedIds = new List<string> { Parameters.SearchTaskId };
var countByProteinPath = Path.Combine(Parameters.OutputFolder, $"DigestionCountsBy{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s.tsv");

// write all values to file
using (var writer = new StreamWriter(countByProteinPath))
{
writer.WriteLine("Protein Accession\tPrimary Sequence\tDigestion Products");
foreach (var proteinEntry in DigestionCountDictionary!)
{
if (!Parameters.SearchParameters.WriteDecoys && proteinEntry.Key.Accession.StartsWith("DECOY"))
continue;
writer.WriteLine($"{proteinEntry.Key.Accession}\t{proteinEntry.Key.BaseSeqeunce}\t{proteinEntry.Value}");
}
}
FinishedWritingFile(countByProteinPath, nestedIds);
}

/// <summary>
/// Writes a histogram of digestion product counts to a .tsv file.
/// </summary>
private void WriteDigestionCountHistogram()
{
if (DigestionCountDictionary.IsNullOrEmpty())
return;

var nestedIds = new List<string> { Parameters.SearchTaskId };
var countHistogramPath = Path.Combine(Parameters.OutputFolder, $"DigestionCountHistogram.tsv");

// Create Histogram
var countDictionary = new Dictionary<int, int>(CommonParameters.DigestionParams.MaxModificationIsoforms);
foreach (var proteinEntry in DigestionCountDictionary!)
{
if (!Parameters.SearchParameters.WriteDecoys && proteinEntry.Key.Accession.StartsWith("DECOY"))
continue;
countDictionary.Increment(proteinEntry.Value);
}

// Write Histogram
using (StreamWriter writer = new(countHistogramPath))
{
writer.WriteLine($"Digestion Products\tCount of {GlobalVariables.AnalyteType.GetBioPolymerLabel()}s");
foreach (var count in countDictionary.OrderBy(p => p.Key))
{
writer.WriteLine($"{count.Key}\t{count.Value}");
}
}
FinishedWritingFile(countHistogramPath, nestedIds);
}
}
}
3 changes: 2 additions & 1 deletion MetaMorpheus/TaskLayer/SearchTask/SearchParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
using UsefulProteomicsDatabases;
using EngineLayer;
using Omics.Modifications;
using Proteomics;

namespace TaskLayer
{
Expand Down Expand Up @@ -32,6 +31,7 @@ public SearchParameters()
WriteMzId = true;
WritePepXml = false;
IncludeModMotifInMzid = false;
WriteDigestionProductCountFile = false;

ModsToWriteSelection = new Dictionary<string, int>
{
Expand Down Expand Up @@ -103,5 +103,6 @@ public SearchParameters()
public SilacLabel EndTurnoverLabel { get; set; } //used for SILAC turnover experiments
public TargetContaminantAmbiguity TCAmbiguity { get; set; }
public bool IncludeModMotifInMzid { get; set; }
public bool WriteDigestionProductCountFile { get; set; }
}
}
16 changes: 13 additions & 3 deletions MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
Status("Searching files...", new List<string> { taskId, "Individual Spectra Files" });

Dictionary<string, int[]> numMs2SpectraPerFile = new Dictionary<string, int[]>();
bool collectedDigestionInformation = false;
IDictionary<(string Accession, string BaseSequence), int> digestionCountDictionary = null;
for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
{
if (GlobalVariables.StopLoops) { break; }
Expand Down Expand Up @@ -374,8 +376,15 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
{
Status("Starting search...", thisId);
var newClassicSearchEngine = new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels,
SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId,SearchParameters.WriteSpectralLibrary);
newClassicSearchEngine.Run();
SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId,SearchParameters.WriteSpectralLibrary, SearchParameters.WriteDigestionProductCountFile);
var result = newClassicSearchEngine.Run();

// The same proteins (all of them) get digested with each classic search engine, therefor we only need to calculate this for the first file that runs
if (!collectedDigestionInformation)
{
collectedDigestionInformation = true;
digestionCountDictionary = (result.MyEngine as ClassicSearchEngine).DigestionCountDictionary;
}

ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
}
Expand Down Expand Up @@ -447,7 +456,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
{
Parameters = parameters,
FileSpecificParameters = this.FileSpecificParameters,
CommonParameters = CommonParameters
CommonParameters = CommonParameters,
DigestionCountDictionary = digestionCountDictionary
};
return postProcessing.Run();
}
Expand Down
Loading