Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "PrSM output for top-down" #2079

Merged
merged 1 commit into from
Jun 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions EngineLayer/GlobalVariables.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,7 @@ public static class GlobalVariables
private static char[] _InvalidAminoAcids;

// this affects output labels, etc. and can be changed to "Proteoform" for top-down searches
public static Analyte AnalyteType;

public enum Analyte
{
Peptide,
Proteoform
}
public static string AnalyteType;

public static List<string> ErrorsReadingMods;

Expand Down Expand Up @@ -69,7 +63,7 @@ public static void SetUpGlobalVariables()
Loaders.LoadElements();
AcceptedDatabaseFormats = new List<string> { ".fasta", ".fa", ".xml", ".msp" };
AcceptedSpectraFormats = new List<string> { ".raw", ".mzml", ".mgf" };
AnalyteType = Analyte.Peptide;
AnalyteType = "Peptide";
_InvalidAminoAcids = new char[] { 'X', 'B', 'J', 'Z', ':', '|', ';', '[', ']', '{', '}', '(', ')', '+', '-' };
ExperimentalDesignFileName = "ExperimentalDesign.tsv";
SeparationTypes = new List<string> { { "HPLC" }, { "CZE" } };
Expand Down
2 changes: 1 addition & 1 deletion EngineLayer/MetaMorpheusEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -216,4 +216,4 @@ private void FinishedSingleEngine(MetaMorpheusEngineResults myResults)
FinishedSingleEngineHandler?.Invoke(this, new SingleEngineFinishedEventArgs(myResults));
}
}
}
}
4 changes: 2 additions & 2 deletions GUI/TaskWindows/GPTMDTaskWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ private void CancelButton_Click(object sender, RoutedEventArgs e)
DialogResult = false;
CustomFragmentationWindow.Close();
}

private void ProteaseSpecificUpdate(object sender, SelectionChangedEventArgs e)
{
string proteaseName = ((Protease)ProteaseComboBox.SelectedItem).Name;
Expand Down Expand Up @@ -456,7 +456,7 @@ private void SaveButton_Click(object sender, RoutedEventArgs e)
dissociationType: dissociationType,
scoreCutoff: double.Parse(MinScoreAllowed.Text, CultureInfo.InvariantCulture),
precursorMassTolerance: precursorMassTolerance,
productMassTolerance: productMassTolerance,
productMassTolerance: productMassTolerance,
trimMs1Peaks: TrimMs1Peaks,
trimMsMsPeaks: TrimMsMsPeaks,
numberOfPeaksToKeepPerWindow: numPeaksToKeep,
Expand Down
4 changes: 2 additions & 2 deletions TaskLayer/MetaMorpheusTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -946,11 +946,11 @@ public static void DetermineAnalyteType(CommonParameters commonParameters)
&& commonParameters.DigestionParams.Protease != null
&& commonParameters.DigestionParams.Protease.Name == "top-down")
{
GlobalVariables.AnalyteType = GlobalVariables.Analyte.Proteoform;
GlobalVariables.AnalyteType = "Proteoform";
}
else
{
GlobalVariables.AnalyteType = GlobalVariables.Analyte.Peptide;
GlobalVariables.AnalyteType = "Peptide";
}
}

Expand Down
31 changes: 14 additions & 17 deletions TaskLayer/SearchTask/PostSearchAnalysisTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ private void ProteinAnalysis()
List<PeptideSpectralMatch> psmsForProteinParsimony = Parameters.AllPsms;

// run parsimony
ProteinParsimonyResults proteinAnalysisResults = (ProteinParsimonyResults)(new ProteinParsimonyEngine(psmsForProteinParsimony, Parameters.SearchParameters.ModPeptidesAreDifferent, CommonParameters, FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run());
ProteinParsimonyResults proteinAnalysisResults = (ProteinParsimonyResults)(new ProteinParsimonyEngine(psmsForProteinParsimony, Parameters.SearchParameters.ModPeptidesAreDifferent, CommonParameters, this.FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run());

// score protein groups and calculate FDR
ProteinScoringAndFdrResults proteinScoringAndFdrResults = (ProteinScoringAndFdrResults)new ProteinScoringAndFdrEngine(proteinAnalysisResults.ProteinGroups, psmsForProteinParsimony,
Parameters.SearchParameters.NoOneHitWonders, Parameters.SearchParameters.ModPeptidesAreDifferent, true, CommonParameters, FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run();
Parameters.SearchParameters.NoOneHitWonders, Parameters.SearchParameters.ModPeptidesAreDifferent, true, CommonParameters, this.FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run();

ProteinGroups = proteinScoringAndFdrResults.SortedAndScoredProteinGroups;

Expand Down Expand Up @@ -516,10 +516,7 @@ private void HistogramAnalysis()

private void WritePsmResults()
{
//if doing proteoform analysis, then output is proteoform-spectrum match (PrSM) instead of peptide-spectrum match (PSM)
string analyteString = GlobalVariables.AnalyteType == GlobalVariables.Analyte.Proteoform ? "PrSM" : "PSM";

Status("Writing " + analyteString + " results...", Parameters.SearchTaskId);
Status("Writing PSM results...", Parameters.SearchTaskId);
var FilteredPsmListForOutput = Parameters.AllPsms
.Where(p => p.FdrInfo.QValue <= CommonParameters.QValueOutputFilter
&& p.FdrInfo.QValueNotch <= CommonParameters.QValueOutputFilter).ToList();
Expand All @@ -534,18 +531,18 @@ private void WritePsmResults()
}

// write PSMs
string writtenFile = Path.Combine(Parameters.OutputFolder, "All" + analyteString + "s.psmtsv");
string writtenFile = Path.Combine(Parameters.OutputFolder, "AllPSMs.psmtsv");
WritePsmsToTsv(FilteredPsmListForOutput, writtenFile, Parameters.SearchParameters.ModsToWriteSelection);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });

// write PSMs for percolator
// percolator native read format is .tab
writtenFile = Path.Combine(Parameters.OutputFolder, "All" + analyteString + "s_FormattedForPercolator.tab");
writtenFile = Path.Combine(Parameters.OutputFolder, "AllPSMs_FormattedForPercolator.tab");
WritePsmsForPercolator(FilteredPsmListForOutput, writtenFile);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });

// write summary text
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target " + analyteString + "s within 1% FDR: " + Parameters.AllPsms.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target PSMS within 1% FDR: " + Parameters.AllPsms.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
if (Parameters.SearchParameters.DoParsimony)
{
Parameters.SearchTaskResults.AddTaskSummaryText("All target protein groups within 1% FDR: " + ProteinGroups.Count(b => b.QValue <= 0.01 && !b.IsDecoy)
Expand All @@ -562,7 +559,7 @@ private void WritePsmResults()

Parameters.SearchTaskResults.AddTaskSummaryText("MS2 spectra in " + strippedFileName + ": " + Parameters.NumMs2SpectraPerFile[strippedFileName][0]);
Parameters.SearchTaskResults.AddTaskSummaryText("Precursors fragmented in " + strippedFileName + ": " + Parameters.NumMs2SpectraPerFile[strippedFileName][1]);
Parameters.SearchTaskResults.AddTaskSummaryText("Target " + analyteString + "s within 1% FDR in " + strippedFileName + ": " + psmsForThisFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));
Parameters.SearchTaskResults.AddTaskSummaryText("Target PSMs within 1% FDR in " + strippedFileName + ": " + psmsForThisFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));

// writes all individual spectra file search results to subdirectory
if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)
Expand All @@ -571,12 +568,12 @@ private void WritePsmResults()
Directory.CreateDirectory(Parameters.IndividualResultsOutputFolder);

// write PSMs
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_" + analyteString + "s.psmtsv");
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_PSMs.psmtsv");
WritePsmsToTsv(psmsForThisFile, writtenFile, Parameters.SearchParameters.ModsToWriteSelection);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", file.First().FullFilePath });

// write PSMs for percolator
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_" + analyteString + "sFormattedForPercolator.tab");
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_PSMsFormattedForPercolator.tab");
WritePsmsForPercolator(psmsForThisFile, writtenFile);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", file.First().FullFilePath });
}
Expand Down Expand Up @@ -656,10 +653,10 @@ private void WriteProteinResults()
{
mzidFilePath = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + ".mzID");
}
MzIdentMLWriter.WriteMzIdentMl(psmsForThisFile.Where(p => p.FdrInfo.QValue <= CommonParameters.QValueOutputFilter),
MzIdentMLWriter.WriteMzIdentMl(psmsForThisFile.Where(p => p.FdrInfo.QValue <= CommonParameters.QValueOutputFilter),
subsetProteinGroupsForThisFile, Parameters.VariableModifications, Parameters.FixedModifications, Parameters.SearchParameters.SilacLabels,
new List<Protease> { CommonParameters.DigestionParams.Protease }, CommonParameters.QValueOutputFilter, CommonParameters.ProductMassTolerance,
CommonParameters.PrecursorMassTolerance, CommonParameters.DigestionParams.MaxMissedCleavages, mzidFilePath,
CommonParameters.PrecursorMassTolerance, CommonParameters.DigestionParams.MaxMissedCleavages, mzidFilePath,
Parameters.SearchParameters.IncludeModMotifInMzid);

FinishedWritingFile(mzidFilePath, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", fullFilePath });
Expand Down Expand Up @@ -1005,16 +1002,16 @@ private void WritePeptideResults()
WritePsmsToTsv(peptides, writtenFile, Parameters.SearchParameters.ModsToWriteSelection);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });

Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target " + GlobalVariables.AnalyteType.ToString().ToLower() + "s within 1% FDR: " + peptides.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target " + GlobalVariables.AnalyteType.ToLower() + "s within 1% FDR: " + peptides.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));

foreach (var file in PsmsGroupedByFile)
{
// write summary text
var psmsForThisFile = file.ToList();
string strippedFileName = Path.GetFileNameWithoutExtension(file.First().FullFilePath);
var peptidesForFile = psmsForThisFile.GroupBy(b => b.FullSequence).Select(b => b.FirstOrDefault()).OrderByDescending(b => b.Score).ToList();
new FdrAnalysisEngine(peptidesForFile, Parameters.NumNotches, CommonParameters, FileSpecificParameters, new List<string> { Parameters.SearchTaskId }, "Peptide").Run();
Parameters.SearchTaskResults.AddTaskSummaryText("Target " + GlobalVariables.AnalyteType.ToString().ToLower() + "s within 1% FDR in " + strippedFileName + ": " + peptidesForFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
new FdrAnalysisEngine(peptidesForFile, Parameters.NumNotches, CommonParameters, this.FileSpecificParameters, new List<string> { Parameters.SearchTaskId }, "Peptide").Run();
Parameters.SearchTaskResults.AddTaskSummaryText("Target " + GlobalVariables.AnalyteType.ToLower() + "s within 1% FDR in " + strippedFileName + ": " + peptidesForFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);

// writes all individual spectra file search results to subdirectory
if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)
Expand Down
2 changes: 1 addition & 1 deletion Test/MyTaskTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ public static void MakeSureFdrDoesntSkip()

// RUN!
var theStringResult = task.RunTask(outputFolder, new List<DbForTask> { new DbForTask(xmlName, false) }, new List<string> { mzmlName }, "taskId1").ToString();
Assert.IsTrue(theStringResult.Contains("All target PSMs within 1% FDR: 1"));
Assert.IsTrue(theStringResult.Contains("All target PSMS within 1% FDR: 1"));
Directory.Delete(outputFolder, true);
File.Delete(xmlName);
File.Delete(mzmlName);
Expand Down
2 changes: 1 addition & 1 deletion Test/SilacTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ public static void TestSilacQuantification()
Directory.CreateDirectory(outputFolder);
var theStringResult = task.RunTask(outputFolder, new List<DbForTask> { new DbForTask(xmlName, false) }, new List<string> { mzmlName, mzmlName2 }, "taskId1").ToString();

Assert.IsTrue(theStringResult.Contains("All target PSMs within 1% FDR: 2")); //it's not a psm, it's a MBR feature. 2 because there are two files, but not 4 because MBR != psm
Assert.IsTrue(theStringResult.Contains("All target PSMS within 1% FDR: 2")); //it's not a psm, it's a MBR feature. 2 because there are two files, but not 4 because MBR != psm

///Normal Peptide
//test proteins
Expand Down
6 changes: 0 additions & 6 deletions Test/Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,6 @@
<None Update="TestVariantPep.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TopDownTestData\ProteoformAmbiguity.fasta">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TopDownTestData\ProteoformAmbiguity.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TopDownTestData\slicedTDYeast.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
Expand Down
58 changes: 4 additions & 54 deletions Test/TopDownTest.cs → Test/TestTopDown.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
namespace Test
{
[TestFixture]
public class TopDownTest
public class TestTopDown
{
[Test]
public static void TestClassicSearchEngineTopDown()
Expand All @@ -29,7 +29,7 @@ public static void TestClassicSearchEngineTopDown()
MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

// test output file name (should be proteoform and not peptide)
Assert.That(GlobalVariables.AnalyteType == GlobalVariables.Analyte.Proteoform);
Assert.That(GlobalVariables.AnalyteType == "Proteoform");

var variableModifications = new List<Modification>();
var fixedModifications = new List<Modification>();
Expand All @@ -47,63 +47,13 @@ public static void TestClassicSearchEngineTopDown()
var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
proteinList, searchMode, CommonParameters, null, null, new List<string>()).Run();

var psm = allPsmsArray.Where(p => p != null).FirstOrDefault();
Assert.That(psm.MatchedFragmentIons.Count == 47);
}

/// <summary>
/// TODO: MetaMorpheus currently reports ambiguity at the PrSM level, but starts tossing things when we get to the proteoform/protein level. See issue #2061
/// Example 1: a base seqeunce is needed for parsimony, but an ambiguous sequence means the base sequence is null.
/// Example 2: a full sequence is needed for determining which peptides/proteoforms are unique, but ambiguous localization means the full sequence is null.
/// </summary>
[Test]
public static void TestAmbiguousProteoformOutput()
{
CommonParameters commonParameters = new CommonParameters(
digestionParams: new DigestionParams(protease: "top-down"),
scoreCutoff: 1,
useProvidedPrecursorInfo: false,
deconvolutionMaxAssumedChargeState: 60,
trimMsMsPeaks: false,
listOfModsVariable: new List<(string, string)> { ("Common Variable", "Oxidation on M"), ("Common Biological", "Acetylation on K"), ("Common Biological", "Trimethylation on K") },
listOfModsFixed: new List<(string, string)> { ("Common Fixed", "Carbamidomethyl on C") }
);

SearchParameters searchParameters = new SearchParameters
{
DoQuantification = false
};


SearchTask searchTask = new SearchTask
{
CommonParameters = commonParameters,
SearchParameters = searchParameters
};

List<(string, MetaMorpheusTask)> taskList = new List<(string, MetaMorpheusTask)> { ("task1", searchTask) };
string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestProteoformAmbiguity");
string mzmlName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData/ProteoformAmbiguity.mzML");
string fastaName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData/ProteoformAmbiguity.fasta");
// RUN!
var engine = new EverythingRunnerEngine(taskList, new List<string> { mzmlName }, new List<DbForTask> { new DbForTask(fastaName, false) }, outputFolder);
engine.Run();

//There are 8 PrSMs, each with a unique proteoform and protein
//check that all 8 PrSMs are reported, all 8 unique proteoforms, and all 8 proteins
string[] prsmLines = File.ReadAllLines(Path.Combine(outputFolder, "task1/AllPrSMs.psmtsv"));
Assert.AreEqual(prsmLines.Length, 9); //8 + header
string[] proteoformLines = File.ReadAllLines(Path.Combine(outputFolder, "task1/AllProteoforms.psmtsv"));
Assert.AreEqual(proteoformLines.Length, 4); //3 + header, five of the PrSMs have ambiguous full sequences, which are needed to determine individuality
string[] proteinLines = File.ReadAllLines(Path.Combine(outputFolder, "task1/AllProteinGroups.tsv"));
Assert.AreEqual(proteinLines.Length, 7); //6 + header, two of the PrSMs have ambiguous base sequences, which prevents their use in parsimony

Directory.Delete(outputFolder, true);
}

[Test]
public static void TestModernSearchEngineTopDown()
{
Expand Down Expand Up @@ -139,4 +89,4 @@ public static void TestModernSearchEngineTopDown()
Assert.That(psm.MatchedFragmentIons.Count == 47);
}
}
}
}
Loading