Skip to content

Commit

Permalink
Revert "PrSM output for top-down (#2062)" (#2079)
Browse files Browse the repository at this point in the history
This reverts commit 97335a4.
  • Loading branch information
rmmiller22 authored Jun 29, 2021
1 parent 97335a4 commit fc72929
Show file tree
Hide file tree
Showing 11 changed files with 27 additions and 4,602 deletions.
10 changes: 2 additions & 8 deletions EngineLayer/GlobalVariables.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,7 @@ public static class GlobalVariables
private static char[] _InvalidAminoAcids;

// this affects output labels, etc. and can be changed to "Proteoform" for top-down searches
public static Analyte AnalyteType;

public enum Analyte
{
Peptide,
Proteoform
}
public static string AnalyteType;

public static List<string> ErrorsReadingMods;

Expand Down Expand Up @@ -69,7 +63,7 @@ public static void SetUpGlobalVariables()
Loaders.LoadElements();
AcceptedDatabaseFormats = new List<string> { ".fasta", ".fa", ".xml", ".msp" };
AcceptedSpectraFormats = new List<string> { ".raw", ".mzml", ".mgf" };
AnalyteType = Analyte.Peptide;
AnalyteType = "Peptide";
_InvalidAminoAcids = new char[] { 'X', 'B', 'J', 'Z', ':', '|', ';', '[', ']', '{', '}', '(', ')', '+', '-' };
ExperimentalDesignFileName = "ExperimentalDesign.tsv";
SeparationTypes = new List<string> { { "HPLC" }, { "CZE" } };
Expand Down
2 changes: 1 addition & 1 deletion EngineLayer/MetaMorpheusEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -216,4 +216,4 @@ private void FinishedSingleEngine(MetaMorpheusEngineResults myResults)
FinishedSingleEngineHandler?.Invoke(this, new SingleEngineFinishedEventArgs(myResults));
}
}
}
}
4 changes: 2 additions & 2 deletions GUI/TaskWindows/GPTMDTaskWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ private void CancelButton_Click(object sender, RoutedEventArgs e)
DialogResult = false;
CustomFragmentationWindow.Close();
}

private void ProteaseSpecificUpdate(object sender, SelectionChangedEventArgs e)
{
string proteaseName = ((Protease)ProteaseComboBox.SelectedItem).Name;
Expand Down Expand Up @@ -456,7 +456,7 @@ private void SaveButton_Click(object sender, RoutedEventArgs e)
dissociationType: dissociationType,
scoreCutoff: double.Parse(MinScoreAllowed.Text, CultureInfo.InvariantCulture),
precursorMassTolerance: precursorMassTolerance,
productMassTolerance: productMassTolerance,
productMassTolerance: productMassTolerance,
trimMs1Peaks: TrimMs1Peaks,
trimMsMsPeaks: TrimMsMsPeaks,
numberOfPeaksToKeepPerWindow: numPeaksToKeep,
Expand Down
4 changes: 2 additions & 2 deletions TaskLayer/MetaMorpheusTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -946,11 +946,11 @@ public static void DetermineAnalyteType(CommonParameters commonParameters)
&& commonParameters.DigestionParams.Protease != null
&& commonParameters.DigestionParams.Protease.Name == "top-down")
{
GlobalVariables.AnalyteType = GlobalVariables.Analyte.Proteoform;
GlobalVariables.AnalyteType = "Proteoform";
}
else
{
GlobalVariables.AnalyteType = GlobalVariables.Analyte.Peptide;
GlobalVariables.AnalyteType = "Peptide";
}
}

Expand Down
31 changes: 14 additions & 17 deletions TaskLayer/SearchTask/PostSearchAnalysisTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ private void ProteinAnalysis()
List<PeptideSpectralMatch> psmsForProteinParsimony = Parameters.AllPsms;

// run parsimony
ProteinParsimonyResults proteinAnalysisResults = (ProteinParsimonyResults)(new ProteinParsimonyEngine(psmsForProteinParsimony, Parameters.SearchParameters.ModPeptidesAreDifferent, CommonParameters, FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run());
ProteinParsimonyResults proteinAnalysisResults = (ProteinParsimonyResults)(new ProteinParsimonyEngine(psmsForProteinParsimony, Parameters.SearchParameters.ModPeptidesAreDifferent, CommonParameters, this.FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run());

// score protein groups and calculate FDR
ProteinScoringAndFdrResults proteinScoringAndFdrResults = (ProteinScoringAndFdrResults)new ProteinScoringAndFdrEngine(proteinAnalysisResults.ProteinGroups, psmsForProteinParsimony,
Parameters.SearchParameters.NoOneHitWonders, Parameters.SearchParameters.ModPeptidesAreDifferent, true, CommonParameters, FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run();
Parameters.SearchParameters.NoOneHitWonders, Parameters.SearchParameters.ModPeptidesAreDifferent, true, CommonParameters, this.FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run();

ProteinGroups = proteinScoringAndFdrResults.SortedAndScoredProteinGroups;

Expand Down Expand Up @@ -516,10 +516,7 @@ private void HistogramAnalysis()

private void WritePsmResults()
{
//if doing proteoform analysis, then output is proteoform-spectrum match (PrSM) instead of peptide-spectrum match (PSM)
string analyteString = GlobalVariables.AnalyteType == GlobalVariables.Analyte.Proteoform ? "PrSM" : "PSM";

Status("Writing " + analyteString + " results...", Parameters.SearchTaskId);
Status("Writing PSM results...", Parameters.SearchTaskId);
var FilteredPsmListForOutput = Parameters.AllPsms
.Where(p => p.FdrInfo.QValue <= CommonParameters.QValueOutputFilter
&& p.FdrInfo.QValueNotch <= CommonParameters.QValueOutputFilter).ToList();
Expand All @@ -534,18 +531,18 @@ private void WritePsmResults()
}

// write PSMs
string writtenFile = Path.Combine(Parameters.OutputFolder, "All" + analyteString + "s.psmtsv");
string writtenFile = Path.Combine(Parameters.OutputFolder, "AllPSMs.psmtsv");
WritePsmsToTsv(FilteredPsmListForOutput, writtenFile, Parameters.SearchParameters.ModsToWriteSelection);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });

// write PSMs for percolator
// percolator native read format is .tab
writtenFile = Path.Combine(Parameters.OutputFolder, "All" + analyteString + "s_FormattedForPercolator.tab");
writtenFile = Path.Combine(Parameters.OutputFolder, "AllPSMs_FormattedForPercolator.tab");
WritePsmsForPercolator(FilteredPsmListForOutput, writtenFile);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });

// write summary text
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target " + analyteString + "s within 1% FDR: " + Parameters.AllPsms.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target PSMS within 1% FDR: " + Parameters.AllPsms.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
if (Parameters.SearchParameters.DoParsimony)
{
Parameters.SearchTaskResults.AddTaskSummaryText("All target protein groups within 1% FDR: " + ProteinGroups.Count(b => b.QValue <= 0.01 && !b.IsDecoy)
Expand All @@ -562,7 +559,7 @@ private void WritePsmResults()

Parameters.SearchTaskResults.AddTaskSummaryText("MS2 spectra in " + strippedFileName + ": " + Parameters.NumMs2SpectraPerFile[strippedFileName][0]);
Parameters.SearchTaskResults.AddTaskSummaryText("Precursors fragmented in " + strippedFileName + ": " + Parameters.NumMs2SpectraPerFile[strippedFileName][1]);
Parameters.SearchTaskResults.AddTaskSummaryText("Target " + analyteString + "s within 1% FDR in " + strippedFileName + ": " + psmsForThisFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));
Parameters.SearchTaskResults.AddTaskSummaryText("Target PSMs within 1% FDR in " + strippedFileName + ": " + psmsForThisFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));

// writes all individual spectra file search results to subdirectory
if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)
Expand All @@ -571,12 +568,12 @@ private void WritePsmResults()
Directory.CreateDirectory(Parameters.IndividualResultsOutputFolder);

// write PSMs
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_" + analyteString + "s.psmtsv");
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_PSMs.psmtsv");
WritePsmsToTsv(psmsForThisFile, writtenFile, Parameters.SearchParameters.ModsToWriteSelection);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", file.First().FullFilePath });

// write PSMs for percolator
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_" + analyteString + "sFormattedForPercolator.tab");
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_PSMsFormattedForPercolator.tab");
WritePsmsForPercolator(psmsForThisFile, writtenFile);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", file.First().FullFilePath });
}
Expand Down Expand Up @@ -656,10 +653,10 @@ private void WriteProteinResults()
{
mzidFilePath = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + ".mzID");
}
MzIdentMLWriter.WriteMzIdentMl(psmsForThisFile.Where(p => p.FdrInfo.QValue <= CommonParameters.QValueOutputFilter),
MzIdentMLWriter.WriteMzIdentMl(psmsForThisFile.Where(p => p.FdrInfo.QValue <= CommonParameters.QValueOutputFilter),
subsetProteinGroupsForThisFile, Parameters.VariableModifications, Parameters.FixedModifications, Parameters.SearchParameters.SilacLabels,
new List<Protease> { CommonParameters.DigestionParams.Protease }, CommonParameters.QValueOutputFilter, CommonParameters.ProductMassTolerance,
CommonParameters.PrecursorMassTolerance, CommonParameters.DigestionParams.MaxMissedCleavages, mzidFilePath,
CommonParameters.PrecursorMassTolerance, CommonParameters.DigestionParams.MaxMissedCleavages, mzidFilePath,
Parameters.SearchParameters.IncludeModMotifInMzid);

FinishedWritingFile(mzidFilePath, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", fullFilePath });
Expand Down Expand Up @@ -1005,16 +1002,16 @@ private void WritePeptideResults()
WritePsmsToTsv(peptides, writtenFile, Parameters.SearchParameters.ModsToWriteSelection);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });

Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target " + GlobalVariables.AnalyteType.ToString().ToLower() + "s within 1% FDR: " + peptides.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target " + GlobalVariables.AnalyteType.ToLower() + "s within 1% FDR: " + peptides.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));

foreach (var file in PsmsGroupedByFile)
{
// write summary text
var psmsForThisFile = file.ToList();
string strippedFileName = Path.GetFileNameWithoutExtension(file.First().FullFilePath);
var peptidesForFile = psmsForThisFile.GroupBy(b => b.FullSequence).Select(b => b.FirstOrDefault()).OrderByDescending(b => b.Score).ToList();
new FdrAnalysisEngine(peptidesForFile, Parameters.NumNotches, CommonParameters, FileSpecificParameters, new List<string> { Parameters.SearchTaskId }, "Peptide").Run();
Parameters.SearchTaskResults.AddTaskSummaryText("Target " + GlobalVariables.AnalyteType.ToString().ToLower() + "s within 1% FDR in " + strippedFileName + ": " + peptidesForFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
new FdrAnalysisEngine(peptidesForFile, Parameters.NumNotches, CommonParameters, this.FileSpecificParameters, new List<string> { Parameters.SearchTaskId }, "Peptide").Run();
Parameters.SearchTaskResults.AddTaskSummaryText("Target " + GlobalVariables.AnalyteType.ToLower() + "s within 1% FDR in " + strippedFileName + ": " + peptidesForFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);

// writes all individual spectra file search results to subdirectory
if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)
Expand Down
2 changes: 1 addition & 1 deletion Test/MyTaskTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ public static void MakeSureFdrDoesntSkip()

// RUN!
var theStringResult = task.RunTask(outputFolder, new List<DbForTask> { new DbForTask(xmlName, false) }, new List<string> { mzmlName }, "taskId1").ToString();
Assert.IsTrue(theStringResult.Contains("All target PSMs within 1% FDR: 1"));
Assert.IsTrue(theStringResult.Contains("All target PSMS within 1% FDR: 1"));
Directory.Delete(outputFolder, true);
File.Delete(xmlName);
File.Delete(mzmlName);
Expand Down
2 changes: 1 addition & 1 deletion Test/SilacTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ public static void TestSilacQuantification()
Directory.CreateDirectory(outputFolder);
var theStringResult = task.RunTask(outputFolder, new List<DbForTask> { new DbForTask(xmlName, false) }, new List<string> { mzmlName, mzmlName2 }, "taskId1").ToString();

Assert.IsTrue(theStringResult.Contains("All target PSMs within 1% FDR: 2")); //it's not a psm, it's a MBR feature. 2 because there are two files, but not 4 because MBR != psm
Assert.IsTrue(theStringResult.Contains("All target PSMS within 1% FDR: 2")); //it's not a psm, it's a MBR feature. 2 because there are two files, but not 4 because MBR != psm

///Normal Peptide
//test proteins
Expand Down
6 changes: 0 additions & 6 deletions Test/Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,6 @@
<None Update="TestVariantPep.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TopDownTestData\ProteoformAmbiguity.fasta">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TopDownTestData\ProteoformAmbiguity.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TopDownTestData\slicedTDYeast.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
Expand Down
58 changes: 4 additions & 54 deletions Test/TopDownTest.cs → Test/TestTopDown.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
namespace Test
{
[TestFixture]
public class TopDownTest
public class TestTopDown
{
[Test]
public static void TestClassicSearchEngineTopDown()
Expand All @@ -29,7 +29,7 @@ public static void TestClassicSearchEngineTopDown()
MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

// test output file name (should be proteoform and not peptide)
Assert.That(GlobalVariables.AnalyteType == GlobalVariables.Analyte.Proteoform);
Assert.That(GlobalVariables.AnalyteType == "Proteoform");

var variableModifications = new List<Modification>();
var fixedModifications = new List<Modification>();
Expand All @@ -47,63 +47,13 @@ public static void TestClassicSearchEngineTopDown()
var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
proteinList, searchMode, CommonParameters, null, null, new List<string>()).Run();

var psm = allPsmsArray.Where(p => p != null).FirstOrDefault();
Assert.That(psm.MatchedFragmentIons.Count == 47);
}

/// <summary>
/// TODO: MetaMorpheus currently reports ambiguity at the PrSM level, but starts tossing things when we get to the proteoform/protein level. See issue #2061
/// Example 1: a base seqeunce is needed for parsimony, but an ambiguous sequence means the base sequence is null.
/// Example 2: a full sequence is needed for determining which peptides/proteoforms are unique, but ambiguous localization means the full sequence is null.
/// </summary>
[Test]
public static void TestAmbiguousProteoformOutput()
{
CommonParameters commonParameters = new CommonParameters(
digestionParams: new DigestionParams(protease: "top-down"),
scoreCutoff: 1,
useProvidedPrecursorInfo: false,
deconvolutionMaxAssumedChargeState: 60,
trimMsMsPeaks: false,
listOfModsVariable: new List<(string, string)> { ("Common Variable", "Oxidation on M"), ("Common Biological", "Acetylation on K"), ("Common Biological", "Trimethylation on K") },
listOfModsFixed: new List<(string, string)> { ("Common Fixed", "Carbamidomethyl on C") }
);

SearchParameters searchParameters = new SearchParameters
{
DoQuantification = false
};


SearchTask searchTask = new SearchTask
{
CommonParameters = commonParameters,
SearchParameters = searchParameters
};

List<(string, MetaMorpheusTask)> taskList = new List<(string, MetaMorpheusTask)> { ("task1", searchTask) };
string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestProteoformAmbiguity");
string mzmlName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData/ProteoformAmbiguity.mzML");
string fastaName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData/ProteoformAmbiguity.fasta");
// RUN!
var engine = new EverythingRunnerEngine(taskList, new List<string> { mzmlName }, new List<DbForTask> { new DbForTask(fastaName, false) }, outputFolder);
engine.Run();

//There are 8 PrSMs, each with a unique proteoform and protein
//check that all 8 PrSMs are reported, all 8 unique proteoforms, and all 8 proteins
string[] prsmLines = File.ReadAllLines(Path.Combine(outputFolder, "task1/AllPrSMs.psmtsv"));
Assert.AreEqual(prsmLines.Length, 9); //8 + header
string[] proteoformLines = File.ReadAllLines(Path.Combine(outputFolder, "task1/AllProteoforms.psmtsv"));
Assert.AreEqual(proteoformLines.Length, 4); //3 + header, five of the PrSMs have ambiguous full sequences, which are needed to determine individuality
string[] proteinLines = File.ReadAllLines(Path.Combine(outputFolder, "task1/AllProteinGroups.tsv"));
Assert.AreEqual(proteinLines.Length, 7); //6 + header, two of the PrSMs have ambiguous base sequences, which prevents their use in parsimony

Directory.Delete(outputFolder, true);
}

[Test]
public static void TestModernSearchEngineTopDown()
{
Expand Down Expand Up @@ -139,4 +89,4 @@ public static void TestModernSearchEngineTopDown()
Assert.That(psm.MatchedFragmentIons.Count == 47);
}
}
}
}
Loading

0 comments on commit fc72929

Please sign in to comment.