diff --git a/MetaMorpheus/EngineLayer/EngineLayer.csproj b/MetaMorpheus/EngineLayer/EngineLayer.csproj index 5b15cc792..d59bb2d6a 100644 --- a/MetaMorpheus/EngineLayer/EngineLayer.csproj +++ b/MetaMorpheus/EngineLayer/EngineLayer.csproj @@ -62,6 +62,9 @@ PreserveNewest + + Always + PreserveNewest diff --git a/MetaMorpheus/EngineLayer/Glycan_Mods/OGlycan/OGlycan_withIsobaric.gdb b/MetaMorpheus/EngineLayer/Glycan_Mods/OGlycan/OGlycan_withIsobaric.gdb new file mode 100644 index 000000000..427e960d1 --- /dev/null +++ b/MetaMorpheus/EngineLayer/Glycan_Mods/OGlycan/OGlycan_withIsobaric.gdb @@ -0,0 +1,13 @@ +(N) +(N(H)) +(N(A)) +(N(H)(N)) +(N(H(A))) +(N(N(K))) +(N(H)(N(H))) +(N(H(A))(N)) +(N(H(A))(A)) +(N(H(A))(N(H))) +(N(H)(N(H(A))(F))) +(N(H(A))(N(H(A)))) +(N(H(A))(N(H(A))(F))) \ No newline at end of file diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs b/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs index 31843be86..d7d20b5db 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs @@ -4,6 +4,7 @@ namespace EngineLayer.GlycoSearch { + //the class is for localization graph matrix. Each node in the matrix is represented by AdjNode. public class AdjNode { //AdjNode -> Adjactent node is used to build graph matrix for localizaiton. Each node in graph matrix contain Sources, max cost, current cost, etc. diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs b/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs index 0ce4f1361..21b0654e9 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs @@ -13,14 +13,14 @@ public class GlycanIon { public GlycanIon(string ionStruct, int ionMass, byte[] ionKind, int lossIonMass) { - IonStruct = ionStruct; + IonStruct = ionStruct; // Always set null, deprecated. IonMass = ionMass; IonKind = ionKind; - LossIonMass = lossIonMass; + LossIonMass = lossIonMass; // Neutral loss mass = Glycan.Mass - IonMass } public string IonStruct { get; set; } public int IonMass { get; set; } - public int LossIonMass { get; set; }//Glycan.Mass - IonMass + public int LossIonMass { get; set; } public byte[] IonKind { get; set; } } @@ -41,13 +41,13 @@ public Glycan(byte[] kind) Mass = GetMass(kind); } - public int GlyId { get; set; } - public string Struc { get; private set; } + public int GlyId { get; set; } // Glycan ID, which is the index of glycan in the glycan database. + public string Struc { get; private set; } // Glycan structure string represented the glycan structure and linkage. Ex. (N(H(A))(N(H(A))(F))) public int Mass { get; private set; } - //Glycans are composed of several different types of mono saccharides. In Kind, each number correspond to one type of mono saccharide in the same order as Glycan.CharMassDic. - public byte[] Kind { get; private set; } - public string Composition + + public byte[] Kind { get; private set; } // Glycans are composed of several types of mono suagr. In Kind, each number correspond to one type (corresponded order as Glycan.CharMassDic). + public string Composition // Glycan composition string. Ex. H2N2A2F1. { get { @@ -57,18 +57,18 @@ public string Composition public List Ions { get; set; } public bool Decoy { get; private set; } - public HashSet DiagnosticIons + public HashSet DiagnosticIons // B ions (the sugar fragment dropped from the glycopeptide), used for the N-glycan. There are more ions to set... { get - { + { HashSet diagnosticIons = new HashSet(); - if (Kind[0] >= 1) + if (Kind[0] >= 1) //if we have Hexose(the number more than one), then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(10902895 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(11503951 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(16306064 - hydrogenAtomMonoisotopicMass); } - if (Kind[1] >= 1) + if (Kind[1] >= 1) // if we have HexNAc(the number more than one), then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(12605550 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(13805550 - hydrogenAtomMonoisotopicMass); @@ -77,16 +77,16 @@ public HashSet DiagnosticIons diagnosticIons.Add(18607663 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(20408720 - hydrogenAtomMonoisotopicMass); } - if (Kind[1] >= 1 && Kind[0] >= 1) + if (Kind[1] >= 1 && Kind[0] >= 1) // if we have HexNAc and Hexose, then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(36614002 - hydrogenAtomMonoisotopicMass); } - if (Kind[2] >= 1) + if (Kind[2] >= 1) //If we have NeuNAc, then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(27409268 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(29210324 - hydrogenAtomMonoisotopicMass); } - if (Kind[3] >= 1) + if (Kind[3] >= 1) //If we have NeuNGc, then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(29008759 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(30809816 - hydrogenAtomMonoisotopicMass); @@ -105,7 +105,6 @@ public HashSet DiagnosticIons //H: C6O5H10 Hexose, N: C8O5NH13 HexNAc, A: C11O8NH17 Neu5Ac, G: C11H17NO9 Neu5Gc, F: C6O4H10 Fucose, //P: PO3H Phosphate, S: SO3H Sulfo, Y: Na Sodium, C:Acetyl for Neu5Ac //X: C5H10O5 Xylose - //If add more monosacchrades here, please change GetMass, GetKind, GetKindString, GlycanBox constructor, search byte[]. private readonly static Dictionary CharMassDic = new Dictionary { { 'H', 16205282 }, { 'N', 20307937 }, @@ -117,9 +116,10 @@ public HashSet DiagnosticIons { 'Y', 2298977 }, { 'C', 4201056 }, { 'X', 15005282 }, + { 'K', 25006897 }, }; - //Compitable with Byonic, for loading glycan by Kind. + // The corresponding index for sugar and Kind. public readonly static Dictionary> NameCharDic = new Dictionary> { {"Hex", new Tuple('H', 0) }, @@ -131,16 +131,19 @@ public HashSet DiagnosticIons {"Sulfo", new Tuple('S', 6) }, {"Na", new Tuple('Y', 7) }, {"Ac", new Tuple('C', 8) }, - {"Xylose", new Tuple('X', 9) } + {"Xylose", new Tuple('X', 9) }, + {"Kdn", new Tuple('K',10)} }; - public readonly static HashSet CommonOxoniumIons = new HashSet + //The same ion as we describe above in the diagnostic ions. That just for the initial filtering for glycopeptide peaks. Not used now. + public readonly static HashSet CommonOxoniumIons = new HashSet {13805550, 16806607, 18607663, 20408720, 36614002 }; - public readonly static int[] AllOxoniumIons = new int[] + //The same ion as we describe above in the diagnostic ions. Used for building the oxoniumIntensity list. + public readonly static int[] AllOxoniumIons = new int[] {10902895, 11503951, 12605550, 12703952, 13805550, 14406607, 16306064, 16806607, 18607663, 20408720, 27409268, 29008759, 29210324, 30809816, 36614002, 65723544, 67323035}; - //TrimannosylCore is only useful for N-Glyco peptides. + //TrimannosylCore. Only useful for N-Glyco peptides. public readonly static Dictionary TrimannosylCores = new Dictionary { //Each of the mass represent as a N-Glycan core. @@ -160,21 +163,30 @@ public HashSet DiagnosticIons #region Glycan Structure manipulation - //There are two ways to represent a glycan in string, one only combination, the other structure. - //The method generate a glycan by read in a glycan structure string from database. + //There are two ways to represent a glycan in string + //Composition: HexNAc(2)Hex(5)NeuAc(1)NeuGc(1)Fuc(1)Phospho(1)Sulfo(1)Na(1)Ac(1)Xylose(1), + //Struct(Linkage): (N(H(A))(N(H(A))(F))) + + /// + /// Only for Gdb. The method generate a glycan object by reading the glycan structure string from database. + /// + /// structrue string ex. (N(H(A))(N(H(A))(F))) + /// + /// + /// Glycan Object public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglycan = false) { - Node node = Struct2Node(theGlycanStruct); - List nodeIons = GetAllChildrenCombination(node); - int mass = Glycan.GetMass(theGlycanStruct); - byte[] kind = Glycan.GetKind(theGlycanStruct); + Node node = Struct2Node(theGlycanStruct); // String to tree structure. + List nodeIons = GetAllChildrenCombination(node); // Get all possible fragmentation & neutralLoss of a glycan. + int mass = Glycan.GetMass(theGlycanStruct); // Get glycan mass. + byte[] kind = Glycan.GetKind(theGlycanStruct); // Get glycan composition array, EX. [2, 5, 1, 1, 1, 1, 1, 1, 1, 1]. List glycanIons = new List(); HashSet ionMasses = new HashSet(); foreach (var aNodeIon in nodeIons) { - var ionMass = Glycan.GetMass(Node2Struct(aNodeIon)); - if (!ionMasses.Contains(ionMass) && ionMass != mass) - { + var ionMass = Glycan.GetMass(Node2Struct(aNodeIon)); // Get the ionMass + if (!ionMasses.Contains(ionMass) && ionMass != mass) // Avoid duplicate ions with the same mass. Ex. N(H)N and N(N(H)) have the same ionMass. + { // We also avoid the ionMass equals to the glycan mass. Because we won't assume the whole glycan is a fragment ion. ionMasses.Add(ionMass); var ionKind = Glycan.GetKind(Node2Struct(aNodeIon)); var lossIonMass = GetIonLossMass(kind, ionKind); @@ -184,34 +196,39 @@ public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglyca } if (!isOglycan) { - glycanIons.Add(new GlycanIon(null, 8303819, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, mass - 8303819)); //Cross-ring mass + glycanIons.Add(new GlycanIon(null, 8303819, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, mass - 8303819)); //Cross-ring mass } - glycanIons.Add(new GlycanIon(null, 0, kind, mass)); + glycanIons.Add(new GlycanIon(null, 0, kind, mass)); //That is Y0 ion. The whole glycan dropped from the glycopeptide. Like a netural loss. Glycan glycan = new Glycan(theGlycanStruct, mass, kind, glycanIons.OrderBy(p => p.IonMass).ToList(), false); glycan.GlyId = id; return glycan; } - //Glycan are represented in tree structures composed of Node. The function here is to transfer a string into connected Node. + + /// + /// Convert the glycan structure string to tree format + /// + /// linkage inforamtion ex. (N(H)) + /// glycan tree node ex. Current Nonde = Node(N, 0), left Child = Node(H, 1) public static Node Struct2Node(string theGlycanStruct) { int level = 0; - Node curr = new Node(theGlycanStruct[1], level); - for (int i = 2; i < theGlycanStruct.Length - 1; i++) + Node curr = new Node(theGlycanStruct[1], level); // The first character is always '(', so the second character is the root of the tree. In this case of (N(H)), N is the root. + for (int i = 2; i < theGlycanStruct.Length - 1; i++) // Try to extract the following characters. { - if (theGlycanStruct[i] == '(') + if (theGlycanStruct[i] == '(') // Skip the '(' character. { continue; } - if (theGlycanStruct[i] == ')') + if (theGlycanStruct[i] == ')') // When we meet a ')', we need to go back to the parent node. { curr = curr.Father; level--; } - else + else // While meeting a character, we need to decide where to put it in the tree. (putting priority: left -> right side -> middle) { - level++; + level++; // Move to the level.(Deeper/Child level) if (curr.LeftChild == null) { curr.LeftChild = new Node(theGlycanStruct[i], level); @@ -233,11 +250,16 @@ public static Node Struct2Node(string theGlycanStruct) } } } - return curr; + return curr; + } - //The function is to generate all possible fragmentation/neutral loss of a glycan, which is a subset of glycan. - //Node is tree structured glycan. subset of glycans are also represented by Node. + + /// + /// Generate all possible fragments(subset) of a glycan. The fragments are also represented by a Node. + /// + /// + /// The all combination of the Glycan fragment. Presented by Node private static List GetAllChildrenCombination(Node node) { List nodes = new List(); @@ -364,6 +386,7 @@ private static List GetAllChildrenCombination(Node node) } //Node structure to string structure. + // input: Node(N, 0) -> left Child = Node(H, 1), output: (N(H)) private static string Node2Struct(Node node) { string output = ""; @@ -374,7 +397,12 @@ private static string Node2Struct(Node node) return output; } - //kind are compositions of glycan. The function here is to generate mass difference of two glycan. + /// + /// Calculate the mass difference of two glycan kind. + /// + /// Composition of the glycan + /// Composition of the glycanIon + /// Mass different between the glycan and its glycanIon public static int GetIonLossMass(byte[] Kind, byte[] ionKind) { byte[] lossKind = new byte[Kind.Length]; @@ -388,8 +416,12 @@ public static int GetIonLossMass(byte[] Kind, byte[] ionKind) #endregion #region Transfer information - - private static int GetMass(string structure) + /// + /// Get glycan mass by glycan structure string + /// + /// ex.(N(H(A))(N(H(A))(F))) + /// The glycan Mass + private static int GetMass(string structure) { int y = CharMassDic['H'] * structure.Count(p => p == 'H') + CharMassDic['N'] * structure.Count(p => p == 'N') + @@ -400,12 +432,18 @@ private static int GetMass(string structure) CharMassDic['S'] * structure.Count(p => p == 'S') + CharMassDic['Y'] * structure.Count(p => p == 'Y') + CharMassDic['C'] * structure.Count(p => p == 'C') + - CharMassDic['X'] * structure.Count(p => p == 'X') + CharMassDic['X'] * structure.Count(p => p == 'X') + + CharMassDic['K'] * structure.Count(p => p == 'K') ; return y; } - public static int GetMass(byte[] kind) + /// + /// Get glycan mass by glycan composition + /// + /// [2, 2, 2, 0, 1, 0, 0, 0, 0, 0] + /// The glycan mass + public static int GetMass(byte[] kind) { int mass = CharMassDic['H'] * kind[0] + CharMassDic['N'] * kind[1] + @@ -416,13 +454,20 @@ public static int GetMass(byte[] kind) CharMassDic['S'] * kind[6] + CharMassDic['Y'] * kind[7] + CharMassDic['C'] * kind[8] + - CharMassDic['X'] * kind[9] + CharMassDic['X'] * kind[9] + + CharMassDic['K'] * kind[10] ; return mass; } - public static byte[] GetKind(string structure) + + /// + /// Get glycan composition by the structure string + /// + /// structure format : (N(H(A))(N(H(A))(F))) + /// The kind List ex [2, 2, 2, 0, 1, 0, 0, 0, 0, 0]. + public static byte[] GetKind(string structure) { var kind = new byte[] { Convert.ToByte(structure.Count(p => p == 'H')), @@ -435,10 +480,17 @@ public static byte[] GetKind(string structure) Convert.ToByte(structure.Count(p => p == 'Y')), Convert.ToByte(structure.Count(p => p == 'C')), Convert.ToByte(structure.Count(p => p == 'X')), + Convert.ToByte(structure.Count(p => p == 'K')) }; return kind; } + + /// + /// Get glycan composition text from the glycan kind[]. + /// + /// ex. [2, 2, 2, 0, 1, 0, 0, 0, 0, 0] + /// The composition text ex. H2N2A2F1 public static string GetKindString(byte[] Kind) { string H = Kind[0]==0 ? "" : "H" + Kind[0].ToString(); @@ -451,7 +503,8 @@ public static string GetKindString(byte[] Kind) string Y = Kind[7] == 0 ? "" : "Y" + Kind[7].ToString(); string C = Kind[8] == 0 ? "" : "C" + Kind[8].ToString(); string X = Kind[9] == 0 ? "" : "X" + Kind[9].ToString(); - string kindString = H + N + A + G + F + P + S + Y + C + X; + string K = Kind[10] == 0 ? "" : "K" + Kind[10].ToString(); + string kindString = H + N + A + G + F + P + S + Y + C + X + K; return kindString; } @@ -459,6 +512,12 @@ public static string GetKindString(byte[] Kind) //TO THINK: Is it reasonable to transfer Glycan to Modification the first time Glycan is read in? Which could save time. //Use glycan index and modification index to reduce space. + + /// + /// Input the N-glycan object, and transfer it to the modification object. + /// + /// + /// public static Modification NGlycanToModification(Glycan glycan) { Dictionary> neutralLosses = new Dictionary>(); @@ -488,7 +547,12 @@ public static Modification NGlycanToModification(Glycan glycan) return modification; } - public static Modification OGlycanToModification(Glycan glycan) + /// + /// Input the O-glycan object, and transfer it to the modification object. + /// + /// + /// The modification object + public static Modification OGlycanToModification(Glycan glycan) //try to transfer the glycan object to modification object. { //TO THINK: what the neutralLoss for O-Glyco? Dictionary> neutralLosses = new Dictionary>(); @@ -522,9 +586,10 @@ public static Modification OGlycanToModification(Glycan glycan) #region Combination or Permutation functions not directly related to glycan, use carefully these function don't deal duplicate elements. + public static IEnumerable> GetKCombs(IEnumerable list, int length) where T : IComparable { - if (length == 1) return list.Select(t => new T[] { t }); + if (length == 1) return list.Select(t => new T[] { t }); // Return the list of the single element. return GetKCombs(list, length - 1).SelectMany(t => list.Where(o => o.CompareTo(t.Last()) > 0), (t1, t2) => t1.Concat(new T[] { t2 })); } @@ -538,7 +603,7 @@ public static IEnumerable> GetPermutations(IEnumerable list { if (length == 1) { - return list.Select(t => new T[] { t }); + return list.Select(t => new T[] { t }); } return GetPermutations(list, length - 1).SelectMany(t => list.Where(o => !t.Contains(o)), (t1, t2) => t1.Concat(new T[] { t2 })); } @@ -553,6 +618,12 @@ public static IEnumerable> GetPermutationsWithRept(IEnumerable #region Functions are not used now, could be useful in the future. + /// + /// Test the equality of two glycan objects. Including the glycan mass and the glycan ions should be totally indentical. + /// + /// + /// + /// public static bool Equals(Glycan glycan1, Glycan glycan2) { if (glycan1.Mass == glycan2.Mass) @@ -573,7 +644,7 @@ public static bool Equals(Glycan glycan1, Glycan glycan2) return false; } - public static Glycan[] BuildTargetDecoyGlycans(IEnumerable glycans) + public static Glycan[] BuildTargetDecoyGlycans(IEnumerable glycans) //Build target-decoy glycans for testing. { List allGlycans = new List(); diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs index 9a1d0f5d2..b00776f18 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs @@ -9,20 +9,25 @@ namespace EngineLayer { - //One peptide can have several o-glycans. The combined glycans are grouped as a glycan box. Used for localization. - //GlycanBox -- A defined combination of glycans will be considered to modify on one peptide. - //The GlycanBoxMass is the total mass of all glycans on the peptide + + /// + /// A defined combination of glycans to modify on one peptide. Ex. if we have 3 glycans on one peptide (g1,g2,g3), the GlycanBoxMass is the sum of the three glycans.(glycanBox: [g1,g2,g3]) + /// public class GlycanBox:ModBox { - public static Glycan[] GlobalOGlycans { get; set; } + public static Glycan[] GlobalOGlycans { get; set; } // The glycan list in the database file + + public GlycanBox[] ChildGlycanBoxes { get; set; } // all possible glycan combinations in the glycanBox public static Modification[] GlobalOGlycanModifications { get; set; } - public static GlycanBox[] OGlycanBoxes { get; set; } + public static GlycanBox[] OGlycanBoxes { get; set; } // all possible glycan boxes + + public byte[] Kind { get; private set; } //TO DO: Decoy O-glycan can be created, but the results need to be reasoned. //public static int[] SugarShift = new int[]{ -16205282, -20307937, -29109542, -14605791, -30709033, -15005282, -36513219, -40615874, 16205282, 20307937, 29109542, 14605791, 30709033, 15005282, 36513219, 40615874 }; - private readonly static int[] SugarShift = new int[] + private readonly static int[] SugarShift = new int[] //still unclear about the shift... { 7103710, 10300920, 11502690, 12904260, 14706840, 5702150, 13705890, 12809500, 11308410, 13104050, 11404290, 9705280, 12805860, 15610110, 8703200, 10104770, 9906840, 18607930, 16306330, @@ -31,7 +36,11 @@ public class GlycanBox:ModBox }; - //After O-glycans are read in from database, we build combinations of glycans into GlycanBox. The maxNum is maximum glycans allowed on one peptides. + /// + /// Use the glycan from database to create all possible combination glycan set into GlycanBox. + /// + /// The maxNum is maximum glycans allowed on one peptides + /// The glycanBox collection, glycanBox[] public static IEnumerable BuildOGlycanBoxes(int maxNum) { return BuildOGlycanBoxes(maxNum, false); @@ -51,7 +60,7 @@ public static IEnumerable BuildOGlycanBoxes(int maxNum, bool buildDec if (buildDecoy) { - GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray()); + GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray(),false); // decoy glycanBox glycanBox_decoy.TargetDecoy = false; glycanBox_decoy.ChildGlycanBoxes = BuildChildOGlycanBoxes(glycanBox_decoy.NumberOfMods, glycanBox_decoy.ModIds, glycanBox_decoy.TargetDecoy).ToArray(); yield return glycanBox_decoy; @@ -60,8 +69,11 @@ public static IEnumerable BuildOGlycanBoxes(int maxNum, bool buildDec } } - //After O-glycans are read in from database, we transfer the glycans into 'Modification' class type for MetaMorpheus to manipulate sequences. - //In the future we may able to combine the two type together. + /// + /// Convert the glycan into Modification type for MetaMorpheus to manipulate sequences. In the future we may able to combine the two type together. + /// + /// + /// public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGlycans) { Modification[] globalOGlycanModifications = new Modification[globalOGlycans.Length]; @@ -73,20 +85,26 @@ public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGly return globalOGlycanModifications; } - //The function here is to build GlycanBoxes used for LocalizationGraph. - //In LocalizationGraph matrix, for each AdjNode, it represent a ChildOGlycanBox here at certain glycosite. + + /// + /// Generate all possible child/fragment box of the specific glycanBox. The childBoxes is uesd for LocalizationGraph. + /// + /// + /// The glycanBox, ex. [0,0,1] means glycan0 + glycan0 + glycan1 + /// + /// The ChildBox collection, ChildBox[] public static IEnumerable BuildChildOGlycanBoxes(int maxNum, int[] glycanIds, bool targetDecoy = true) { yield return new GlycanBox(new int[0], targetDecoy); HashSet seen = new HashSet(); for (int i = 1; i <= maxNum; i++) { - foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i)) - { - List ids = new List(); - foreach (var id in idCombine) + foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i)) //get all combinations of glycans on the peptide, ex. we have three glycosite and three glycan maybe on that (A,B,C) + { //the combination of glycans on the peptide can be (A),(A+B),(A+C),(B+C),(A+B+C) totally six + List ids = new List(); + foreach (var id in idCombine) { - ids.Add(glycanIds[id]); + ids.Add(glycanIds[id]); } if (!seen.Contains(string.Join(",", ids.Select(p => p.ToString())))) @@ -102,19 +120,24 @@ public static IEnumerable BuildChildOGlycanBoxes(int maxNum, int[] gl } } - public GlycanBox(int[] ids, bool targetDecoy = true):base(ids) + /// + /// Constructor of GlycanBox. + /// + /// The glycanBox composition, each number represent one glycan index in the database + /// + public GlycanBox(int[] ids, bool Istarget = true):base(ids) { - byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - foreach (var id in ModIds) + byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + foreach (var id in ModIds) //ModIds is the same as ids. { - for (int i = 0; i < kind.Length; i++) + for (int i = 0; i < kind.Length; i++) { - kind[i] += GlobalOGlycans[id].Kind[i]; + kind[i] += GlobalOGlycans[id].Kind[i]; //kind is the sum of all glycan Kind in the Box. } } Kind = kind; - if (targetDecoy) + if (Istarget) { Mass = (double)Glycan.GetMass(Kind) / 1E5; } @@ -125,18 +148,13 @@ public GlycanBox(int[] ids, bool targetDecoy = true):base(ids) Mass = (double)(Glycan.GetMass(Kind) + SugarShift[shiftInd]) / 1E5; } } - - public GlycanBox[] ChildGlycanBoxes { get; set; } - - public string GlycanIdString + + public string GlycanIdString // the composition of glycanBox. Example: [1,2,3] means glycan1 + glycan2 + glycan3 are on the peptide. { get { return string.Join(",", ModIds.Select(p => p.ToString())); } } - - public byte[] Kind{ get; private set; } - } } diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs index ddc64d7f6..4b3148b52 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs @@ -6,10 +6,18 @@ namespace EngineLayer { - - public static class GlycanDatabase + // in our database, the N-glycan.gdb should be correct to the new format + // the class for loading glycan database then creeat the glycan object. + public static class GlycanDatabase { - //Load Glycan. Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions. + + /// + /// Load Glycan from the database file. Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions + /// + /// Database file path + /// Do we need to generate the glycanIon? + /// + /// A glycan object collection public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIons, bool IsOGlycanSearch) { bool isKind = true; @@ -18,7 +26,7 @@ public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIon while(lines.Peek() != -1) { string line = lines.ReadLine(); - if (!line.Contains("HexNAc")) + if (!line.Contains("HexNAc")) // use the first line to determine the format (kind / structure) of glycan database. { isKind = false; } @@ -28,15 +36,22 @@ public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIon if (isKind) { - return LoadKindGlycan(filePath, ToGenerateIons, IsOGlycanSearch); + return LoadKindGlycan(filePath, ToGenerateIons, IsOGlycanSearch); // open the file of the kind format, example: HexNAc(2)Hex(5)NeuAc(1)Fuc(1) } else { - return LoadStructureGlycan(filePath, IsOGlycanSearch); + return LoadStructureGlycan(filePath, IsOGlycanSearch); // open the file of the structure format, example: (N(H(A))(A)) } } - //Load KindGlycan. Compatible with Byonic. + + /// + /// Load composition format Glycan database, then convert to kind format followed by generating the glycan object. + /// + /// + /// + /// + /// The glycan collection public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerateIons, bool IsOGlycanSearch) { using (StreamReader lines = new StreamReader(filePath)) @@ -46,14 +61,14 @@ public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerat { string line = lines.ReadLine().Split('\t').First(); - if (!(line.Contains("HexNAc") || line.Contains("Hex"))) + if (!(line.Contains("HexNAc") || line.Contains("Hex"))) // Make sure the line is a glycan line. The line should contain HexNAc or Hex. { continue; } - var kind = String2Kind(line); + var kind = String2Kind(line); // Convert the database string to kind[] format (byte array). - var glycan = new Glycan(kind); + var glycan = new Glycan(kind); // Use the kind[] to create a glycan object. glycan.GlyId = id++; if (ToGenerateIons) { @@ -71,9 +86,14 @@ public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerat } } - public static byte[] String2Kind(string line) + /// + /// Convert the glycan string to Kind array + /// + /// ex. HexNAc(2)Hex(5)NeuAc(1)Fuc(1) + /// The glycan Kind List ex. [2, 5, 0, 0, 1, 0, 0, 0, 0, 1] + public static byte[] String2Kind(string line) { - byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; var x = line.Split(new char[] { '(', ')' }); int i = 0; while (i < x.Length - 1) @@ -85,7 +105,12 @@ public static byte[] String2Kind(string line) return kind; } - //Load structured Glycan database. + /// + /// Load structured format Glycan database and generate the glycan object. + /// + /// + /// + /// The Glycan object collection public static IEnumerable LoadStructureGlycan(string filePath, bool IsOGlycan) { using (StreamReader glycans = new StreamReader(filePath)) @@ -93,8 +118,8 @@ public static IEnumerable LoadStructureGlycan(string filePath, bool IsOG int id = 1; while (glycans.Peek() != -1) { - string line = glycans.ReadLine(); - yield return Glycan.Struct2Glycan(line, id++, IsOGlycan); + string line = glycans.ReadLine(); // Read the line from the database file. Ex. (N(H(A))(A)) + yield return Glycan.Struct2Glycan(line, id++, IsOGlycan); // Directly convert the string to Glycan object. } } } @@ -102,28 +127,33 @@ public static IEnumerable LoadStructureGlycan(string filePath, bool IsOG //This function build fragments based on the general core of NGlyco fragments. //From https://github.com/mobiusklein/glycopeptidepy/structure/fragmentation_strategy/glycan.py#L408 //The fragment generation is not as good as structure based method. So it is better to use a structure based N-Glycan database. - public static List NGlycanCompositionFragments(byte[] kind) + // The function is used to load the database from the different formats, but we don't use it now. + public static List NGlycanCompositionFragments(byte[] kind, bool isfucExtended = false) { int glycan_mass = Glycan.GetMass(kind); - int core_count = 1; + // int core_count = 1; int iteration_count = 0; + int hexnac_Core = 2; + int hexose_Core = 3; bool extended = true; - bool extended_fucosylation = false; + bool extended_fucosylation = isfucExtended; int fuc_count = kind[4]; int xyl_count = kind[9]; - int hexnac_inaggregate = kind[0]; - int hexose_inaggregate = kind[1]; + int hexnac_total = kind[1]; + int hexose_total = kind[0]; List glycanIons = new List(); - int base_hexnac = Math.Min(hexnac_inaggregate + 1, 3); - for (int hexnac_count = 0; hexnac_count < base_hexnac; hexnac_count++) + int base_hexnac = Math.Min(hexnac_total, hexnac_Core); // base_hexnac is the first priority hexnac count, they all come from the core. + for (int hexnac_count = 0; hexnac_count < base_hexnac + 1 ; hexnac_count++) { if (hexnac_count == 0) { - GlycanIon glycanIon = new GlycanIon(null, 8303819, new byte[] { 0, (byte)hexnac_count, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, glycan_mass - 8303819); + byte[] startKind = new byte[] { 0, (byte)hexnac_count, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + string glycanName = Glycan.GetKindString(startKind); + GlycanIon glycanIon = new GlycanIon(glycanName, 8303819, startKind, glycan_mass - 8303819); glycanIons.Add(glycanIon); } else if (hexnac_count == 1) @@ -165,7 +195,7 @@ public static List NGlycanCompositionFragments(byte[] kind) for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++) { - GlycanIon add_fuc_glycanIon = ExtendGlycanIon(glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass); + GlycanIon add_fuc_glycanIon = ExtendGlycanIon(glycanIon, 0, 0, 1, 0, glycan_mass); glycanIons.Add(add_fuc_glycanIon); } @@ -183,22 +213,25 @@ public static List NGlycanCompositionFragments(byte[] kind) } - int min_hexose_inaggregate = Math.Min(hexose_inaggregate + 1, 4); - for (int hexose_count = 1; hexose_count <= min_hexose_inaggregate; hexose_count++) + int base_hexose = Math.Min(hexose_total, hexose_Core); // base_hexose is the first priority hexose count, they all come from the core. + for (int hexose_count = 1; hexose_count <= base_hexose + 1; hexose_count++) { GlycanIon hexose_glycanIon = GenerateGlycanIon((byte)hexose_count, (byte)hexnac_count, 0, 0, glycan_mass); glycanIons.Add(hexose_glycanIon); if (!extended_fucosylation) { - GlycanIon fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, 1, 0, glycan_mass); - glycanIons.Add(fuc_glycanIon); - - if (iteration_count < xyl_count) + if (iteration_count < fuc_count) { - GlycanIon xyl_fuc_glycanIon = ExtendGlycanIon(fuc_glycanIon, 0, 0, 0, 1, glycan_mass); - glycanIons.Add(xyl_fuc_glycanIon); - } + GlycanIon fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, 1, 0, glycan_mass); + glycanIons.Add(fuc_glycanIon); + + if (iteration_count < xyl_count) + { + GlycanIon xyl_fuc_glycanIon = ExtendGlycanIon(fuc_glycanIon, 0, 0, 0, 1, glycan_mass); + glycanIons.Add(xyl_fuc_glycanIon); + } + } } else if (fuc_count > 0) { @@ -207,7 +240,7 @@ public static List NGlycanCompositionFragments(byte[] kind) for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++) { - GlycanIon add_fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass); + GlycanIon add_fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, 1, 0, glycan_mass); glycanIons.Add(add_fuc_glycanIon); } @@ -224,11 +257,11 @@ public static List NGlycanCompositionFragments(byte[] kind) glycanIons.Add(xyl_glycanIon); } - if (hexose_count == 3 && hexnac_count >= 2 * core_count && extended) + if (hexose_count == hexose_Core && hexnac_count >= hexnac_Core && extended) //After the core motif has been exhausted, speculatively add on the remaining core monosaccharides sequentially until exhausted. { - for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_inaggregate - hexnac_count + 1; extra_hexnac_count++) + for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_total - hexnac_count + 1; extra_hexnac_count++) { - if (extra_hexnac_count + hexnac_count > hexnac_inaggregate) + if (extra_hexnac_count + hexnac_count > hexnac_total) // this part is doesn't make sense, because the hexnac_count cannot be larger than total-hexnac { continue; } @@ -257,7 +290,7 @@ public static List NGlycanCompositionFragments(byte[] kind) for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++) { - GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass); + GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, 1, 0, glycan_mass); glycanIons.Add(add_fuc_glycanIon); } @@ -276,9 +309,9 @@ public static List NGlycanCompositionFragments(byte[] kind) } - for (int extra_hexose_count = 1; extra_hexose_count < hexose_inaggregate - hexose_count + 1; extra_hexose_count++) + for (int extra_hexose_count = 1; extra_hexose_count < hexose_total - hexose_Core + 1; extra_hexose_count++) { - if (extra_hexose_count + hexose_count > hexose_inaggregate) + if (extra_hexose_count + hexose_count > hexose_total) // this part is doesn't make sense, because the hexnac_count cannot be larger than total-hexnac { continue; } @@ -305,7 +338,7 @@ public static List NGlycanCompositionFragments(byte[] kind) for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++) { - GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass); + GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, 1, 0, glycan_mass); glycanIons.Add(add_fuc_glycanIon); } @@ -337,11 +370,13 @@ public static List NGlycanCompositionFragments(byte[] kind) private static GlycanIon GenerateGlycanIon(byte hexose_count, byte hexnac_count, byte fuc_count, byte xyl_count, int glycan_mass) { - byte[] ionKind = new byte[] { hexose_count, hexnac_count, 0, 0, fuc_count, 0, 0, 0, 0, xyl_count }; + byte[] ionKind = new byte[] { hexose_count, hexnac_count, 0, 0, fuc_count, 0, 0, 0, 0, xyl_count,0 }; int ionMass = Glycan.GetMass(ionKind); - GlycanIon glycanIon = new GlycanIon(null, ionMass, ionKind, glycan_mass - ionMass); + String glycanName = Glycan.GetKindString(ionKind); + + GlycanIon glycanIon = new GlycanIon(glycanName, ionMass, ionKind, glycan_mass - ionMass); return glycanIon; } @@ -355,8 +390,9 @@ private static GlycanIon ExtendGlycanIon(GlycanIon glycanIon, byte hexose_count, ionKind[9] += xyl_count; int ionMass = Glycan.GetMass(ionKind); + string glycanName = Glycan.GetKindString(ionKind); - GlycanIon extend_glycanIon = new GlycanIon(null, ionMass, ionKind, glycan_mass - ionMass); + GlycanIon extend_glycanIon = new GlycanIon(glycanName, ionMass, ionKind, glycan_mass - ionMass); return extend_glycanIon; } @@ -364,6 +400,7 @@ private static GlycanIon ExtendGlycanIon(GlycanIon glycanIon, byte hexose_count, //This function build fragments based on the general core of OGlyco fragments. //From https://github.com/mobiusklein/glycopeptidepy/structure/fragmentation_strategy/glycan.py //The fragment generation is not as good as structure based method. So it is better to use a structure based O-Glycan database. + // We don't use this function now, alternatively, we use the 'OGlycanCompositionCombinationChildIons'. public static List OGlycanCompositionFragments(byte[] kind) { List glycanIons = new List(); @@ -374,12 +411,12 @@ public static List OGlycanCompositionFragments(byte[] kind) bool extended = true; int fuc_count = kind[4]; - int hexnac_inaggregate = kind[0]; - int hexose_inaggregate = kind[1]; + int hexnac_total = kind[1]; + int hexose_total = kind[0]; for (int hexnac_count = 0; hexnac_count < 3; hexnac_count++) { - if (hexnac_inaggregate < hexnac_count) + if (hexnac_total < hexnac_count) { continue; } @@ -400,7 +437,7 @@ public static List OGlycanCompositionFragments(byte[] kind) for (int hexose_count = 0; hexose_count < 2; hexose_count++) { - if (hexose_inaggregate < hexose_count) + if (hexose_total < hexose_count) { continue; } @@ -420,9 +457,9 @@ public static List OGlycanCompositionFragments(byte[] kind) // After the core motif has been exhausted, speculatively add on the remaining core monosaccharides sequentially until exhausted. - if (extended && hexnac_inaggregate - hexnac_count >= 0) + if (extended && hexnac_total - hexnac_count >= 0) { - for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_inaggregate - hexnac_count + 1; extra_hexnac_count ++) + for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_total - hexnac_count + 1; extra_hexnac_count ++) { if (extra_hexnac_count > 0) { @@ -440,9 +477,9 @@ public static List OGlycanCompositionFragments(byte[] kind) } - if (hexose_inaggregate > hexose_count && hexose_count > 0) + if (hexose_total > hexose_count && hexose_count > 0) { - for (int extra_hexose_count = 0; extra_hexose_count < hexose_inaggregate - hexose_count; extra_hexose_count++) + for (int extra_hexose_count = 0; extra_hexose_count < hexose_total - hexose_count; extra_hexose_count++) { if (extra_hexose_count > 0 && extra_hexose_count + hexose_count >0) { @@ -473,7 +510,11 @@ public static List OGlycanCompositionFragments(byte[] kind) return glycanIons; } - //The OGlycanCompositionFragments just generate some core GlycanIons. We need a combination solution. + /// + /// Generate some child ions based on the kind array. The kind array is the combination of the monosaccharides then filter by the rules. + /// + /// glycan Kind[] + /// The glycanIon collection public static List OGlycanCompositionCombinationChildIons(byte[] kind) { List glycanIons = new List(); @@ -488,7 +529,7 @@ public static List OGlycanCompositionCombinationChildIons(byte[] kind foreach (var k in _kinds) { - //Rules to build OGlycan child ions. + //Rules to build OGlycan child ions. Filter the kind array which doesn't meet the rules. //At least one HexNAc if (k[1] == 0) { @@ -515,15 +556,21 @@ public static List OGlycanCompositionCombinationChildIons(byte[] kind return glycanIons.OrderBy(p=>p.IonMass).ToList(); } - private static void _GetCombinations(byte[] kind, List _kinds, HashSet _keys) - { - if (kind.Sum(p=>p) == 0) + /// + /// Try to create all possible combinations from the glycan kind[]. And store the combination array in the _kinds list. + /// + /// ex. [2,2,0] + /// + /// + private static void _GetCombinations(byte[] kind, List _kinds, HashSet _keys) + { + if (kind.Sum(p=>p) == 0) { - return; + return; // if we don't have any monosaccharide, no need to generate the child ions. } else { - for (int i = 0; i < kind.Length; i++) + for (int i = 0; i < kind.Length; i++) //traverse the kind array { if (kind[i] >= 1) { diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs index b7557c83e..019c926e9 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs @@ -10,8 +10,14 @@ namespace EngineLayer.GlycoSearch { - public static class GlycoPeptides + public static class GlycoPeptides { + /// + /// Generate a list of isotopic intesitry of the oxonium ions + /// + /// The MS2 Scan + /// + /// int[], The intensity list public static double[] ScanOxoniumIonFilter(Ms2ScanWithSpecificMass theScan, MassDiffAcceptor massDiffAcceptor) { double[] oxoniumIonsintensities = new double[Glycan.AllOxoniumIons.Length]; @@ -180,7 +186,7 @@ public static bool DissociationTypeContainETD(DissociationType dissociationType, return true; } - if (dissociationType == DissociationType.Custom ) + if (dissociationType == DissociationType.Custom ) //Use the fragment type to determine the dissociation type. { if (customIons.Contains(ProductType.zDot) || customIons.Contains(ProductType.c)) { @@ -192,13 +198,22 @@ public static bool DissociationTypeContainETD(DissociationType dissociationType, } //TO THINK: filter reasonable fragments here. The final solution is to change mzLib.Proteomics.PeptideWithSetModifications.Fragment + + /// + /// Get the theoretical fragments of the peptide with the glycan modification. With different dissociation type, the fragment ions are different. + /// + /// + /// + /// + /// + /// product[], Fragments list public static List OGlyGetTheoreticalFragments(DissociationType dissociationType, List customIons, PeptideWithSetModifications peptide, PeptideWithSetModifications modPeptide) { List theoreticalProducts = new List(); HashSet masses = new HashSet(); List products = new List(); - if (dissociationType == DissociationType.HCD || dissociationType == DissociationType.CID) + if (dissociationType == DissociationType.HCD || dissociationType == DissociationType.CID) { List diag = new List(); modPeptide.Fragment(dissociationType, FragmentationTerminus.Both, diag); @@ -241,7 +256,7 @@ public static List OGlyGetTheoreticalFragments(DissociationType dissoci } - foreach (var fragment in products) + foreach (var fragment in products) //this part just for the unique fragment ions. (filter the fragment with the same neturalMass) { if (!masses.Contains(fragment.NeutralMass)) { @@ -253,23 +268,31 @@ public static List OGlyGetTheoreticalFragments(DissociationType dissoci return theoreticalProducts; } + + /// + /// Generate the theroertical glycan modified peptide. With the glycanBox, modPos, and the peptide. + /// + /// + /// + /// + /// A modfiied peptide. public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(int[] theModPositions, PeptideWithSetModifications peptide, GlycanBox glycanBox) { Modification[] modifications = new Modification[glycanBox.NumberOfMods]; for (int i = 0; i < glycanBox.NumberOfMods; i++) { - modifications[i] = GlycanBox.GlobalOGlycanModifications[glycanBox.ModIds.ElementAt(i)]; + modifications[i] = GlycanBox.GlobalOGlycanModifications[glycanBox.ModIds.ElementAt(i)]; // transfer the glycanBox information to a new list. } Dictionary testMods = new Dictionary(); foreach (var mod in peptide.AllModsOneIsNterminus) { - testMods.Add(mod.Key, mod.Value); + testMods.Add(mod.Key, mod.Value); // transfer the AllMod information to a new list. } for (int i = 0; i < theModPositions.Count(); i++) { - testMods.Add(theModPositions.ElementAt(i), modifications[i]); + testMods.Add(theModPositions.ElementAt(i), modifications[i]); //combine the glycanBox information to the AllMod list } var testPeptide = new PeptideWithSetModifications(peptide.Protein, peptide.DigestionParams, peptide.OneBasedStartResidue, @@ -278,6 +301,12 @@ public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(int[] theMod return testPeptide; } + /// + /// Generate the theroertical glycan modified peptide. With the route the peptide. Because the route contains the glycanBox and modPos information. + /// + /// + /// + /// A modfiied peptide public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(Route theModPositions, PeptideWithSetModifications peptide) { Modification[] modifications = new Modification[theModPositions.Mods.Count]; @@ -303,16 +332,24 @@ public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(Route theMod return testPeptide; } - //The function here is to calculate permutation localization which could be used to compare with Graph-Localization. + //Should be revised for easier understanding. + /// + /// Generate all possible glycosite for the glycan set. Supposed we will put the glycan on the glycosite in sequence. + /// + /// Ex. [3,5,2,7] + /// Ex. [2,2,3] means id2 + id2 + id3 + /// A glycosite set collection. Ex. ([2,5,7],[3,5,7]...), each one list means the glcosites for glycanBox. + /// [2,5,7] means we will put the glycan on position 2, 5, 7. + /// public static List GetPermutations(List allModPos, int[] glycanBoxId) { var length = glycanBoxId.Length; - var indexes = Enumerable.Range(0, length).ToArray(); + var indexes = Enumerable.Range(0, length).ToArray(); // just the index for the glycanBoxId to keep the order. int[] orderGlycan = new int[length]; - List permutateModPositions = new List(); + List permutateModPositions = new List(); //The list to store all possible permutation localization. - var combinations = Glycan.GetKCombs(allModPos, length); + var combinations = Glycan.GetKCombs(allModPos, length); //Get all possible combinations of the mod sites. ex. four site[1,2,3,4], length:3 -> combination [1,2,3], [1,2,4], [1,3,4], [2,3,4] foreach (var com in combinations) { @@ -332,7 +369,7 @@ public static List GetPermutations(List allModPos, int[] glycanBoxId orderGlycan[i] = glycanBoxId[indexes[i]]; } var key = string.Join(",", orderGlycan.Select(p => p.ToString())); - if (!keys.Contains(key)) + if (!keys.Contains(key)) //Remove the duplicate permutation localization. { keys.Add(key); permutateModPositions.Add(per.ToArray()); @@ -343,25 +380,32 @@ public static List GetPermutations(List allModPos, int[] glycanBoxId return permutateModPositions; } - //The purpose of the funtion is to generate hash fragment ions without generate the PeptideWithMod. keyValuePair key:GlycanBoxId, Value:mod sites + + /// + /// Generate the new fragment list, we add the glycan mass to the c ions and z ions from the peptide fragment list + /// + /// + /// + /// + /// + /// public static int[] GetFragmentHash(List products, Tuple keyValuePair, GlycanBox[] OGlycanBoxes, int FragmentBinsPerDalton) { - double[] newFragments = products.OrderBy(p=>p.ProductType).ThenBy(p=>p.FragmentNumber).Select(p => p.NeutralMass).ToArray(); + double[] newFragments = products.OrderBy(p=>p.ProductType).ThenBy(p=>p.FragmentNumber).Select(p => p.NeutralMass).ToArray(); // store the fragment mass in the order of c1, c2, c3, y1, y2, y3, z1, z2, z3 var len = products.Count / 3; if (keyValuePair.Item2!=null) { - for (int i = 0; i < keyValuePair.Item2.Length; i++) - { + for (int i = 0; i < keyValuePair.Item2.Length; i++) // we want to add the glycan mass to the c ions and z ions that contain the glycan. + { // y ions didn't change in EThcD for O-glyco, so we just need to deal with c ions and z ions. var j = keyValuePair.Item2[i]; - while (j <= len + 1) + while (j <= len + 1) // for c ions { newFragments[j - 2] += (double)GlycanBox.GlobalOGlycans[OGlycanBoxes[keyValuePair.Item1].ModIds[i]].Mass/1E5; j++; } - j = keyValuePair.Item2[i]; - while (j >= 3) + j = keyValuePair.Item2[i]; // reset the j to the position of the glycan + while (j >= 3) // for z ions { - //y ions didn't change in EThcD for O-glyco newFragments[len * 3 - j + 2] += (double)GlycanBox.GlobalOGlycans[OGlycanBoxes[keyValuePair.Item1].ModIds[i]].Mass/1E5; j--; } @@ -369,7 +413,7 @@ public static int[] GetFragmentHash(List products, Tuple ke } - int[] fragmentHash = new int[products.Count]; + int[] fragmentHash = new int[products.Count]; // store the fragment mass in the order of c1, c2, c3, y1, y2, y3, z1, z2, z3 and with the umit of FragmentBinsPerDalton for (int i = 0; i < products.Count; i++) { fragmentHash[i] = (int)Math.Round(newFragments[i] * FragmentBinsPerDalton); @@ -377,8 +421,16 @@ public static int[] GetFragmentHash(List products, Tuple ke return fragmentHash; } - //Find FragmentHash for current box at modInd. - //y-ion didn't change for O-Glycopeptide. + + /// + /// Generate the fragment list with the specific childBox located on specific modPos. At here, the ModInd is the index for modPos. Not used in the current version. + /// + /// + /// ModPos list + /// Specific ModPos, index in ModPos + /// Whole glycanBox + /// Partial glycanBox, at here is the childBox + /// public static List GetLocalFragment(List products, int[] modPoses, int modInd, ModBox OGlycanBox, ModBox localOGlycanBox) { List newFragments = new List(); @@ -433,21 +485,38 @@ public static List GetUnlocalFragment(List products, int[] modP } - //The oxoniumIonIntensities is related with Glycan.AllOxoniumIons. - //Rules are coded in the function. - public static bool OxoniumIonsAnalysis(double[] oxoniumIonsintensities, GlycanBox glycanBox) + /// + /// Use the oxonium ions to determine the glycan type. + /// + /// From the Scan + /// The glycanBox to be tested + /// True : The Oglycan pass the filter, False : The OGl + public static bool DiagonsticFilter(double[] oxoniumIonsintensities, GlycanBox glycanBox) { + double HexNAc_diagnostic = oxoniumIonsintensities[4]; + double NeuAc_diagnostic1 = oxoniumIonsintensities[10]; + double NeuAc_diagnostic2 = oxoniumIonsintensities[12]; + double HexNAcPlusHex_diagnostic = oxoniumIonsintensities[14]; + //If a glycopeptide spectrum does not have 292.1027 or 274.0921, then remove all glycans that have sialic acids from the search. - if (oxoniumIonsintensities[10] <= 0 && oxoniumIonsintensities[12] <= 0) + if (NeuAc_diagnostic1 / HexNAc_diagnostic > 0.02 && NeuAc_diagnostic2 / HexNAc_diagnostic > 0.02) + { + if (glycanBox.Kind[2] == 0 ) + { + return false; + } + } + + if(NeuAc_diagnostic1 / HexNAc_diagnostic < 0.02 && NeuAc_diagnostic2 / HexNAc_diagnostic < 0.02) { - if (glycanBox.Kind[2] != 0 || glycanBox.Kind[3] != 0) + if (glycanBox.Kind[2] != 0) { return false; } } //If a spectrum has 366.1395, remove glycans that do not have HexNAc(1)Hex(1) or more. Here use the total glycan of glycanBox to calculate. - if (oxoniumIonsintensities[14] > 0) + else if (HexNAcPlusHex_diagnostic / HexNAc_diagnostic > 0.02) { if (glycanBox.Kind[0] < 1 && glycanBox.Kind[1] < 1) { @@ -458,6 +527,7 @@ public static bool OxoniumIonsAnalysis(double[] oxoniumIonsintensities, GlycanBo //Other rules: //A spectrum needs to have 204.0867 to be considered as a glycopeptide. //Ratio of 138.055 to 144.0655 can seperate O/N glycan. + // use some other oxonium ions to determine the glycan type. return true; } diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs index b1e8bccf4..af9ba0e58 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs @@ -15,13 +15,13 @@ namespace EngineLayer.GlycoSearch public class GlycoSearchEngine : ModernSearchEngine { public static readonly double ToleranceForMassDifferentiation = 1e-9; - private readonly int OxoniumIon204Index = 9; //Check Glycan.AllOxoniumIons - protected readonly List[] GlobalCsms; + private readonly int OxoniumIon204Index = 9; // Check Glycan.AllOxoniumIons + protected readonly List[] GlobalGsms; // Why don't we call it GlobalGsms? private GlycoSearchType GlycoSearchType; - private readonly int TopN; + private readonly int TopN; // DDA top Peak number. private readonly int _maxOGlycanNum; - private readonly bool OxoniumIonFilter; //To filt Oxonium Ion before searching a spectrum as glycopeptides. If we filter spectrum, it must contain oxonium ions such as 204 (HexNAc). + private readonly bool OxoniumIonFilter; // To filt Oxonium Ion before searching a spectrum as glycopeptides. If we filter spectrum, it must contain oxonium ions such as 204 (HexNAc). private readonly string _oglycanDatabase; private readonly string _nglycanDatabase; @@ -30,12 +30,13 @@ public class GlycoSearchEngine : ModernSearchEngine private readonly List[] SecondFragmentIndex; + // The constructor for GlycoSearchEngine, we can load the parameter for the searhcing like mode, topN, maxOGlycanNum, oxoniumIonFilter, datsbase, etc. public GlycoSearchEngine(List[] globalCsms, Ms2ScanWithSpecificMass[] listOfSortedms2Scans, List peptideIndex, List[] fragmentIndex, List[] secondFragmentIndex, int currentPartition, CommonParameters commonParameters, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, string oglycanDatabase, string nglycanDatabase, GlycoSearchType glycoSearchType, int glycoSearchTopNum, int maxOGlycanNum, bool oxoniumIonFilter, List nestedIds) : base(null, listOfSortedms2Scans, peptideIndex, fragmentIndex, currentPartition, commonParameters, fileSpecificParameters, new OpenSearchMode(), 0, nestedIds) { - this.GlobalCsms = globalCsms; + this.GlobalGsms = globalCsms; this.GlycoSearchType = glycoSearchType; this.TopN = glycoSearchTopNum; this._maxOGlycanNum = maxOGlycanNum; @@ -48,19 +49,19 @@ public GlycoSearchEngine(List[] globalCsms, Ms2ScanWithSpeci ProductSearchMode = new SinglePpmAroundZeroSearchMode(20); //For Oxonium ion only - if (glycoSearchType == GlycoSearchType.OGlycanSearch) + if (glycoSearchType == GlycoSearchType.OGlycanSearch) //if we do the O-glycan search, we need to load the O-glycan database and generate the glycoBox. { GlycanBox.GlobalOGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.OGlycanLocations.Where(p => System.IO.Path.GetFileName(p) == _oglycanDatabase).First(), true, true).ToArray(); GlycanBox.GlobalOGlycanModifications = GlycanBox.BuildGlobalOGlycanModifications(GlycanBox.GlobalOGlycans); - GlycanBox.OGlycanBoxes = GlycanBox.BuildOGlycanBoxes(_maxOGlycanNum, false).OrderBy(p => p.Mass).ToArray(); + GlycanBox.OGlycanBoxes = GlycanBox.BuildOGlycanBoxes(_maxOGlycanNum, false).OrderBy(p => p.Mass).ToArray(); //generate glycan box for O-glycan search } - else if (glycoSearchType == GlycoSearchType.NGlycanSearch) + else if (glycoSearchType == GlycoSearchType.NGlycanSearch) //because the there is only one glycan in N-glycanpeptide, so we don't need to build the n-glycanBox here. { NGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.NGlycanLocations.Where(p => System.IO.Path.GetFileName(p) == _nglycanDatabase).First(), true, false).OrderBy(p => p.Mass).ToArray(); //TO THINK: Glycan Decoy database. //DecoyGlycans = Glycan.BuildTargetDecoyGlycans(NGlycans); } - else if (glycoSearchType == GlycoSearchType.N_O_GlycanSearch) + else if (glycoSearchType == GlycoSearchType.N_O_GlycanSearch) //search both N-glycan and O-glycan is still not tested and build completely yet. { GlycanBox.GlobalOGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.OGlycanLocations.Where(p => System.IO.Path.GetFileName(p) == _oglycanDatabase).First(), true, true).ToArray(); GlycanBox.GlobalOGlycanModifications = GlycanBox.BuildGlobalOGlycanModifications(GlycanBox.GlobalOGlycans); @@ -76,6 +77,15 @@ public GlycoSearchEngine(List[] globalCsms, Ms2ScanWithSpeci private Glycan[] NGlycans { get; } //private Glycan[] DecoyGlycans { get; } + /// + /// Run the glycoSearchEngine, the main function for the glycoSearchEngine. + /// Four steps: + /// (1) run a modern search engine to get the peptide candidates. + /// (2) match the peptide candidates with the precursor mass. + /// (3) use the mass shift to generate the route for the glycan localization. + /// (4) evaluate the highest score for the glycan localization and generate the glycoSpectralMatch. + /// + /// SearchResult protected override MetaMorpheusEngineResults RunSpecific() { double progress = 0; @@ -84,14 +94,14 @@ protected override MetaMorpheusEngineResults RunSpecific() byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff; - int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile; - int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray(); + int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile; // MaxThreads = deafult is 7. + int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray(); // We can do the parallel search on different threads Parallel.ForEach(threads, (scanIndex) => { byte[] scoringTable = new byte[PeptideIndex.Count]; List idsOfPeptidesPossiblyObserved = new List(); - byte[] secondScoringTable = new byte[PeptideIndex.Count]; + byte[] secondScoringTable = new byte[PeptideIndex.Count]; // We didn't use that right now. List childIdsOfPeptidesPossiblyObserved = new List(); List idsOfPeptidesTopN = new List(); @@ -110,7 +120,7 @@ protected override MetaMorpheusEngineResults RunSpecific() var scan = ListOfSortedMs2Scans[scanIndex]; - // get fragment bins for this scan + // get fragment bins for this scan List allBinsToSearch = GetBinsToSearch(scan, FragmentIndex, CommonParameters.DissociationType); //Limit the high bound limitation, here assume it is possible to has max 3 Da shift. This allows for correcting precursor in the future. @@ -146,23 +156,23 @@ protected override MetaMorpheusEngineResults RunSpecific() // } //} - // done with indexed scoring; refine scores and create PSMs - if (idsOfPeptidesPossiblyObserved.Any()) + // filtering the peptides candidate with the cufoff and limit the topN peptides. + if (idsOfPeptidesPossiblyObserved.Any()) { scoreAtTopN = 0; peptideCount = 0; - foreach (int id in idsOfPeptidesPossiblyObserved.OrderByDescending(p => scoringTable[p])) + foreach (int id in idsOfPeptidesPossiblyObserved.OrderByDescending(p => scoringTable[p])) //from the higest score to the lowest score { - if (scoringTable[id] < (int)byteScoreCutoff) + if (scoringTable[id] < (int)byteScoreCutoff) //if the score is lower than the cutoff, we can skip this peptide. { continue; } peptideCount++; if (peptideCount == TopN) { - scoreAtTopN = scoringTable[id]; + scoreAtTopN = scoringTable[id]; //ScoreAtTopN = The score of the last peptide in the TopN list. } - if (scoringTable[id] < scoreAtTopN) + if (scoringTable[id] < scoreAtTopN) { break; } @@ -173,7 +183,7 @@ protected override MetaMorpheusEngineResults RunSpecific() if (GlycoSearchType == GlycoSearchType.OGlycanSearch) { - gsms = FindOGlycopeptideHashLocal(scan, idsOfPeptidesTopN, scanIndex, (int)byteScoreCutoff); + gsms = FindOGlycopeptideHashLocal(scan, idsOfPeptidesTopN, scanIndex, (int)byteScoreCutoff); // Use the peptide candidate and the scan to generate the gsms. } else if(GlycoSearchType == GlycoSearchType.NGlycanSearch) { @@ -191,14 +201,14 @@ protected override MetaMorpheusEngineResults RunSpecific() continue; } - if (GlobalCsms[scanIndex] == null) + if (GlobalGsms[scanIndex] == null) { - GlobalCsms[scanIndex] = new List(); + GlobalGsms[scanIndex] = new List(); //the first one finished task, create teh new gsms list. } else { - gsms.AddRange(GlobalCsms[scanIndex]); - GlobalCsms[scanIndex].Clear(); + gsms.AddRange(GlobalGsms[scanIndex]); + GlobalGsms[scanIndex].Clear(); } Add2GlobalGsms(ref gsms, scanIndex); @@ -213,11 +223,11 @@ protected override MetaMorpheusEngineResults RunSpecific() { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Performing glyco search... " + CurrentPartition + "/" + CommonParameters.TotalPartitions, NestedIds)); - } + } //percentProgress = 100, "Performing glyco search...1/1", NestedIds = 3. } }); - return new MetaMorpheusEngineResults(this); + return new MetaMorpheusEngineResults(this); //Storage the result information into the result class. } private void Add2GlobalGsms(ref List gsms, int scanIndex) @@ -229,37 +239,37 @@ private void Add2GlobalGsms(ref List gsms, int scanIndex) foreach (var gsm in gsms.Where(p => p != null).OrderByDescending(p => p.Score).ThenBy(c => c.FullSequence)) { - if (gsmsCount <= 10) + if (gsmsCount <= 10) { - gsm.ResolveAllAmbiguities(); + gsm.ResolveAllAmbiguities(); //Try to resolve any case that have the same sequence in the PSM. - if (gsmsCount == 1) + if (gsmsCount == 1) //If the gsms number is 1, we don't need to check the score and sequence. { preScore = gsm.Score; preString = gsm.FullSequence; - GlobalCsms[scanIndex].Add(gsm); + GlobalGsms[scanIndex].Add(gsm); gsmsCount++; } - else + else { - if (gsm.Score - preScore < ToleranceForMassDifferentiation && + if (gsm.Score - preScore < ToleranceForMassDifferentiation && gsm.Score - preScore > -ToleranceForMassDifferentiation) { string currentString = gsm.FullSequence; - if (preString == currentString) + if (preString == currentString) //If peptides have the same sequence and their score is almost the same { - foreach ((int, PeptideWithSetModifications Peptide) bestMatchPeptide in gsm.BestMatchingBioPolymersWithSetMods) - { - GlobalCsms[scanIndex].Last().AddProteinMatch(bestMatchPeptide, gsm.BioPolymersWithSetModsToMatchingFragments[bestMatchPeptide.Peptide]); + foreach ((int, PeptideWithSetModifications Peptide) bestMatchPeptide in gsm.BestMatchingBioPolymersWithSetMods) // We should add tje new ProteinMatch to the gsm. + { // Because the indentical sequence may from the different protein. + GlobalGsms[scanIndex].Last().AddProteinMatch(bestMatchPeptide, gsm.BioPolymersWithSetModsToMatchingFragments[bestMatchPeptide.Peptide]); } } else { preString = currentString; - GlobalCsms[scanIndex].Add(gsm); + GlobalGsms[scanIndex].Add(gsm); gsmsCount++; } } @@ -272,7 +282,7 @@ private void Add2GlobalGsms(ref List gsms, int scanIndex) } } - //For FindOGlycan + //For FindOGlycan, generate the gsms for O-glycan search private GlycoSpectralMatch CreateGsm(Ms2ScanWithSpecificMass theScan, int scanIndex, int rank, PeptideWithSetModifications peptide, Route localization, double[] oxoniumIonIntensities, List localizationGraphs) { var peptideWithMod = GlycoPeptides.OGlyGetTheoreticalPeptide(localization, peptide); @@ -348,12 +358,21 @@ private GlycoSpectralMatch CreateGsm(Ms2ScanWithSpecificMass theScan, int scanIn } else { - psmGlyco.R138vs144 = oxoniumIonIntensities[4] / oxoniumIonIntensities[5]; + psmGlyco.R138vs144 = oxoniumIonIntensities[4] / oxoniumIonIntensities[5]; // if the ratio is high, that means the glycan is more likely to be N-glycan. Oppsitely, ration is small means close to O-glycan. } return psmGlyco; } + /// + /// If the peptide mass is perfectly match with the precursor mass, we can directly generate the gsms for the peptide. Store the gsms into the possibleMatches. + /// + /// + /// + /// + /// The peptide candidate + /// + /// The space to store the gsms private void FindSingle(Ms2ScanWithSpecificMass theScan, int scanIndex, int scoreCutOff, PeptideWithSetModifications theScanBestPeptide, int ind, ref List possibleMatches) { List products = new List(); @@ -370,16 +389,30 @@ private void FindSingle(Ms2ScanWithSpecificMass theScan, int scanIndex, int scor } } + /// + /// Match the mass of the peptide candidate with the precursor mass. Try to generate the Gsms for the Scan. Gsms will be stored in the possibleMatches. + /// + /// + /// + /// + /// peptide candidate + /// + /// The precursor mass + /// + /// The space to store the gsms private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int scoreCutOff, PeptideWithSetModifications theScanBestPeptide, int ind, double possibleGlycanMassLow, double[] oxoniumIonIntensities, ref List possibleMatches) { - int iDLow = GlycoPeptides.BinarySearchGetIndex(GlycanBox.OGlycanBoxes.Select(p => p.Mass).ToArray(), possibleGlycanMassLow); + // The glycanBoxes will be filtered by the oxonium ions. If the oxonium ions don't make sense, we will remove the glycanBox. + - int[] modPos = GlycoSpectralMatch.GetPossibleModSites(theScanBestPeptide, new string[] { "S", "T" }).OrderBy(p => p).ToArray(); + int iDLow = GlycoPeptides.BinarySearchGetIndex(GlycanBox.OGlycanBoxes.Select(p => p.Mass).ToArray(), possibleGlycanMassLow); // try to find the index that closet match to the "possibleGlycanMassLow" within the glycanBox + + int[] modPos = GlycoSpectralMatch.GetPossibleModSites(theScanBestPeptide, new string[] { "S", "T" }).OrderBy(p => p).ToArray(); //list all of the possible glycoslation site/postition var localizationScan = theScan; - List products = new List(); + List products = new List(); // product list for the theoretical fragment ions - //For HCD-pd-ETD or CD-pd-EThcD type of data + //For HCD-pd-ETD or CD-pd-EThcD type of data, we generate the different rpoducts. if (theScan.ChildScans.Count > 0 && GlycoPeptides.DissociationTypeContainETD(CommonParameters.MS2ChildScanDissociationType, CommonParameters.CustomIons)) { localizationScan = theScan.ChildScans.First(); @@ -396,33 +429,33 @@ private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco //No localization can be done with MS2-HCD spectrum //TO THINK: there is a special situation. The HCD only scan from HCD-pd-EThcD data can be a glycopeptide, but there is no ETD, so there is no localization. What to do with this? bool is_HCD_only_data = !GlycoPeptides.DissociationTypeContainETD(CommonParameters.DissociationType, CommonParameters.CustomIons) && !GlycoPeptides.DissociationTypeContainETD(CommonParameters.MS2ChildScanDissociationType, CommonParameters.CustomIons); - if (is_HCD_only_data) + if (is_HCD_only_data) // In the HCD, there is no Y ion, so we don't need to consider the modification here. { theScanBestPeptide.Fragment(DissociationType.HCD, FragmentationTerminus.Both, products); } double bestLocalizedScore = 0; - List localizationGraphs = new List(); + List localizationGraphs = new List(); // if we also have ETD, then we will search the localization - while (iDLow < GlycanBox.OGlycanBoxes.Count() && (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass + GlycanBox.OGlycanBoxes[iDLow].Mass))) + while (iDLow < GlycanBox.OGlycanBoxes.Count() && (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass + GlycanBox.OGlycanBoxes[iDLow].Mass))) // verify the glycan mass is invaild (within the range and match with mass shift) { - if (OxoniumIonFilter && !GlycoPeptides.OxoniumIonsAnalysis(oxoniumIonIntensities, GlycanBox.OGlycanBoxes[iDLow])) + if (OxoniumIonFilter && !GlycoPeptides.DiagonsticFilter(oxoniumIonIntensities, GlycanBox.OGlycanBoxes[iDLow])) // if the filter is turned on, we need to check does the oxoiums make sense. { - iDLow++; + iDLow++; // if the oxonium ions don't make sense (there is no 204, or without their diagnostic ion), we can skip this glycan. continue; } - if (modPos.Length >= GlycanBox.OGlycanBoxes[iDLow].NumberOfMods) + if (modPos.Length >= GlycanBox.OGlycanBoxes[iDLow].NumberOfMods) // the glycosite number should be larger than the possible glycan number. { LocalizationGraph localizationGraph = new LocalizationGraph(modPos, GlycanBox.OGlycanBoxes[iDLow], GlycanBox.OGlycanBoxes[iDLow].ChildGlycanBoxes, iDLow); - LocalizationGraph.LocalizeOGlycan(localizationGraph, localizationScan, CommonParameters.ProductMassTolerance, products); + LocalizationGraph.LocalizeOGlycan(localizationGraph, localizationScan, CommonParameters.ProductMassTolerance, products); //create the localization graph with the glycan mass and the possible glycosite. double currentLocalizationScore = localizationGraph.TotalScore; - if (currentLocalizationScore > bestLocalizedScore) + if (currentLocalizationScore > bestLocalizedScore) //Try to find the best glycanBox with the highest score. { bestLocalizedScore = currentLocalizationScore; localizationGraphs.Clear(); - localizationGraphs.Add(localizationGraph); + localizationGraphs.Add(localizationGraph); // we only keep the best glycanBox and its localizationgraph. } else if ((is_HCD_only_data || bestLocalizedScore > 0) && (currentLocalizationScore <= bestLocalizedScore + 0.00000001 && currentLocalizationScore >= bestLocalizedScore - 0.00000001)) { @@ -436,10 +469,10 @@ private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco //In theory, the peptide_localization shouldn't be null, but it is possible that the real score is smaller than indexed score. if (localizationGraphs.Count > 0) { - var firstPath = LocalizationGraph.GetFirstPath(localizationGraphs[0].array, localizationGraphs[0].ChildModBoxes); - var localizationCandidate = LocalizationGraph.GetLocalizedPath(localizationGraphs[0], firstPath); + var firstPath = LocalizationGraph.GetFirstPath(localizationGraphs[0].array, localizationGraphs[0].ChildModBoxes); //Get the first path from the localization graph. + var localizationCandidate = LocalizationGraph.GetLocalizedPath(localizationGraphs[0], firstPath); //Get the route of the localization from the first path inforation - var psmGlyco = CreateGsm(theScan, scanIndex, ind, theScanBestPeptide, localizationCandidate, oxoniumIonIntensities, localizationGraphs); + var psmGlyco = CreateGsm(theScan, scanIndex, ind, theScanBestPeptide, localizationCandidate, oxoniumIonIntensities, localizationGraphs); //Create the glycoSpectralMatch if (psmGlyco.Score > scoreCutOff) { @@ -519,7 +552,7 @@ private void FindNGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco } } - + // Conduct the search and generate the gsms for N-glycan search private List FindNGlycopeptide(Ms2ScanWithSpecificMass theScan, List idsOfPeptidesPossiblyObserved, int scanIndex, int scoreCutOff) { List possibleMatches = new List(); @@ -566,19 +599,34 @@ private List FindNGlycopeptide(Ms2ScanWithSpecificMass theSc } return possibleMatches; } + + + // Match the mass of the peptide candiate with the precursor mass, then try to generate the gsms object as output + /// + /// This is a general function for gsm generating. It was operated after the Modern Search. + /// Two Step: + /// (1) Match the mass of the peptide candiate with the precursor mass, then decide to go to which function to generate the gsms object. + /// (2) Catch the gsms object and store it into the possibleMatches then return. + /// + /// The MS2 Scan + /// The peptide candidate from the modern Search + /// + /// + /// The Gsms collection. private List FindOGlycopeptideHashLocal(Ms2ScanWithSpecificMass theScan, List idsOfPeptidesPossiblyObserved, int scanIndex, int scoreCutOff) { List possibleMatches = new List(); + for (int ind = 0; ind < idsOfPeptidesPossiblyObserved.Count; ind++) { - var theScanBestPeptide = PeptideIndex[idsOfPeptidesPossiblyObserved[ind]]; + var theScanBestPeptide = PeptideIndex[idsOfPeptidesPossiblyObserved[ind]]; // Get the peptide from the candidate list. - if (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass)) + if (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass)) // If the peptide mass is indentical to the precursor mass (or within the tolerance), we can directly search the glycopeptide. { FindSingle(theScan, scanIndex, scoreCutOff, theScanBestPeptide, ind, ref possibleMatches); } - else if (theScan.PrecursorMass - theScanBestPeptide.MonoisotopicMass >= 100) //Filter out unknow non-glycan modifications. + else if (theScan.PrecursorMass - theScanBestPeptide.MonoisotopicMass >= 100) //If not, we need to consider the glycan mass difference. { //Filter by glycanBoxes mass difference. var possibleGlycanMassLow = PrecusorSearchMode.GetMinimumValue(theScan.PrecursorMass) - theScanBestPeptide.MonoisotopicMass; @@ -587,7 +635,7 @@ private List FindOGlycopeptideHashLocal(Ms2ScanWithSpecificM if (possibleGlycanMassHigh < GlycanBox.OGlycanBoxes.First().Mass || possibleGlycanMassLow > GlycanBox.OGlycanBoxes.Last().Mass) { - continue; + continue; // if the glycan mass difference is out of the range of the glycan box, we can skip this peptide. } //Filter by OxoniumIon diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs index acc6db3be..ec29d613c 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs @@ -47,9 +47,15 @@ public GlycoSpectralMatch(PeptideWithSetModifications theBestPeptide, int notch, public double R138vs144 { get; set; } // The intensity ratio of this 138 and 144 could be a signature for O-glycan or N-glycan. public List> LocalizedGlycan { get; set; } // All seen glycans identified. - public LocalizationLevel LocalizationLevel { get; set; } + public LocalizationLevel LocalizationLevel { get; set; } //Motif should be writen with required form + /// + /// Try to get the ModSite in the right format. + /// + /// full peptide sequence ex. "PTLFKNVSLYK" + /// modificatino AA ex. "S","T" + /// int[], the Modpositon index list ex.[9,3] public static List GetPossibleModSites(PeptideWithSetModifications peptide, string[] motifs) { List possibleModSites = new List(); @@ -58,14 +64,14 @@ public static List GetPossibleModSites(PeptideWithSetModifications peptide, foreach (var mtf in motifs) { - if (ModificationMotif.TryGetMotif(mtf, out ModificationMotif aMotif)) + if (ModificationMotif.TryGetMotif(mtf, out ModificationMotif aMotif)) //Check if the motif is valid, and creat the motif object from the string. { - Modification modWithMotif = new Modification(_target: aMotif, _locationRestriction: "Anywhere."); + Modification modWithMotif = new Modification(_target: aMotif, _locationRestriction: "Anywhere."); modifications.Add(modWithMotif); } } - foreach (var modWithMotif in modifications) + foreach (var modWithMotif in modifications) //interate through all the modifications with motif. { for (int r = 0; r < peptide.Length; r++) { @@ -113,7 +119,11 @@ public static bool MotifExist(string baseSeq, string[] motifs) return false; } - public static string GetTabSepHeaderSingle() + /// + /// Generate the peptide header, ex File name, Precursor m/z, Score… + /// + /// + public static string GetTabSepHeaderSingle() //Most complicate part in this class { var sb = new StringBuilder(); sb.Append("File Name" + '\t'); @@ -151,6 +161,10 @@ public static string GetTabSepHeaderSingle() return sb.ToString(); } + /// + /// Generate the glyco header ex Localization Score, Yion Score… + /// + /// public static string GetTabSeperatedHeaderGlyco() { var sb = new StringBuilder(); @@ -174,6 +188,10 @@ public static string GetTabSeperatedHeaderGlyco() return sb.ToString(); } + /// + /// Put the psm data into the corresponding columns. + /// + /// public string SingleToString() { var sb = new StringBuilder(); @@ -188,7 +206,7 @@ public string SingleToString() var proteinAccessionString = Accession ?? PsmTsvWriter.Resolve(BestMatchingBioPolymersWithSetMods.Select(p => p.Peptide.Parent.Accession), FullSequence).ResolvedString; sb.Append(proteinAccessionString + "\t"); sb.Append(Organism + "\t"); - sb.Append(PsmTsvWriter.Resolve(BestMatchingBioPolymersWithSetMods.Select(b => b.Peptide.Parent.FullName), FullSequence).ResolvedString + "\t"); + sb.Append(PsmTsvWriter.Resolve(BestMatchingBioPolymersWithSetMods.Select(b => b.Peptide.Parent.FullName), FullSequence).ResolvedString + "\t"); //protein name int _FirstOneBasedStartResidueInProtein = OneBasedStartResidue.HasValue ? OneBasedStartResidue.Value : BestMatchingBioPolymersWithSetMods.First().Peptide.OneBasedStartResidue; int _FirstOneBasedEndResidueInProtein = OneBasedEndResidue.HasValue ? OneBasedEndResidue.Value : BestMatchingBioPolymersWithSetMods.First().Peptide.OneBasedEndResidue; ; @@ -257,7 +275,10 @@ public string SingleToString() return sb.ToString(); } - //This should be appended to SingleToString + /// + /// Put the glycan data into the corresponding columns. + /// + /// public string GlycoToString() { var sb = new StringBuilder(); @@ -291,11 +312,11 @@ public string GlycoToString() for (int i = 0; i < glycanBox.NumberOfMods; i++) { glycans[i] = GlycanBox.GlobalOGlycans[glycanBox.ModIds[i]]; - } + } //Convert the glycanBox index into the real glycan object. ex. [H1N1, H2N2A1, H2N2A1F1] if (glycans.First().Struc != null) { - sb.Append(string.Join(",", glycans.Select(p => p.Struc.ToString()).ToArray())); + sb.Append(string.Join(",", glycans.Select(p => p.Struc.ToString()).ToArray())); //ex. (N(H)),(N(H(A))(N(H))),(N(H)(N(H(A))(F)) } sb.Append("\t"); @@ -357,38 +378,45 @@ public static Dictionary MatchedIonDataDictionary(List ; Input: List + + /// + /// Two function included: + /// (1) Analysis all pair, and evaluate any site is occured in all cases, if yes set a true on that. If not, set a false. + /// (2) Classify the localization level base on the localization. + /// + /// all case of the pair + /// level 1 to level 3 + /// A tuple, represent the pair and its confidience ex. [3,5,ture] means glycan 5 located on glycosite 3, and very confidience public static List> GetLocalizedGlycan(List OGlycanBoxLocalization, out LocalizationLevel localizationLevel) { List> localizedGlycan = new List>(); - //Dictionary: modsite-id, count - Dictionary seenModSite = new Dictionary(); + Dictionary modSiteSeenCount = new Dictionary(); // all possible glycan-sites pair, Dictionary: site-glycan pair, count - foreach (var ogl in OGlycanBoxLocalization) + foreach (var ogl in OGlycanBoxLocalization) // ogl means one case, there are three glycan located on the same peptide: (5,1,False),(9,8,Flase),(10,9,Ture) { - foreach (var og in ogl.Mods) + foreach (var og in ogl.Mods) // og means one glycan locaization, like (5,1,False) -> glycan 1 attached on postion5. { - var k = og.Item1.ToString() + "-" + og.Item2.ToString(); - if (seenModSite.ContainsKey(k)) + var k = og.Item1.ToString() + "-" + og.Item2.ToString(); // k = 5-1(glycosite-glycan) means the glycan-site pair + if (modSiteSeenCount.ContainsKey(k)) // accout the number of the same glycan-site pair { - seenModSite[k] += 1; + modSiteSeenCount[k] += 1; // this pair cpunt +1 } else { - seenModSite.Add(k, 1); + modSiteSeenCount.Add(k, 1); // If the pair is first time to seen, add it to the dictionary. } } } localizationLevel = LocalizationLevel.Level3; - if (OGlycanBoxLocalization.Count == 1) + if (OGlycanBoxLocalization.Count == 1) // we just have one situation(route), no other possibility { localizationLevel = LocalizationLevel.Level1; } else if (OGlycanBoxLocalization.Count > 1) { - if (seenModSite.Values.Where(p => p == OGlycanBoxLocalization.Count).Count() > 0) + if (modSiteSeenCount.Values.Where(p => p == OGlycanBoxLocalization.Count).Count() > 0) //If anyone of the glycan-site pair is localized in all the cases, then the localization level is 2. { localizationLevel = LocalizationLevel.Level2; } @@ -398,9 +426,9 @@ public static List> GetLocalizedGlycan(List OGlycan } } - foreach (var seenMod in seenModSite) + foreach (var seenMod in modSiteSeenCount) { - if (seenMod.Value == OGlycanBoxLocalization.Count) + if (seenMod.Value == OGlycanBoxLocalization.Count) // Try to fine the glycan-site pair that always localized in all the cases. { localizedGlycan.Add(new Tuple(int.Parse(seenMod.Key.Split('-')[0]), int.Parse(seenMod.Key.Split('-')[1]), true)); } @@ -413,6 +441,11 @@ public static List> GetLocalizedGlycan(List OGlycan return localizedGlycan; } + /// + /// convert the Route information into the string format. + /// + /// Route collection ex. [(9,4),(8,4),(7,4)...], ModBoxId = 7 + /// string {@7[8-4]}{@7[7-4]}{@7[6-4]} means three case, glycan 4 located on glycosite 6, glycan 4 located on glycosite 7, glycan 4 located on glycosite 8 public static string AllLocalizationInfo(List OGlycanBoxLocalization) { string local = ""; @@ -433,7 +466,7 @@ public static string AllLocalizationInfo(List OGlycanBoxLocalization) { var ogl = OGlycanBoxLocalization[i]; local += "{@" + ogl.ModBoxId.ToString() + "["; - var g = string.Join(",", ogl.Mods.Select(p => (p.Item1 - 1).ToString() + "-" + p.Item2.ToString())); + var g = string.Join(",", ogl.Mods.Select(p => (p.Item1 - 1).ToString() + "-" + p.Item2.ToString())); //why we have to -1 here? local += g + "]}"; i++; } @@ -446,7 +479,15 @@ public static string AllLocalizationInfo(List OGlycanBoxLocalization) return local; } - //Correct Localization Level based on site specific probability. If LocalizationLevel = 1, and there are site probability lower than 0.75, Correct the level to 1b. + /// + /// Just for the case at Level1 and Level1b. + /// + /// + /// + /// + /// + /// + /// level 1 or level 1b public static LocalizationLevel CorrectLocalizationLevel(Dictionary>> siteSpeciLocalProb, LocalizationGraph localizationGraph, Route route, List> localizedGlycan, LocalizationLevel localizationLevel) { if (siteSpeciLocalProb == null || localizationLevel!=LocalizationLevel.Level1) @@ -468,7 +509,7 @@ public static LocalizationLevel CorrectLocalizationLevel(Dictionary>> siteSpeciLocalProb, List> localizedGlycan, int? OneBasedStartResidueInProtein, ref string local, ref string local_protein) + /// + /// Output the special localization information. String store in Local_peptide and Local_protein. ex. [9,H2N2A1F1,0.589] means glycan H2N2A1F1 located on glycosite 9 with 0.589 probability. + /// + /// site : (glycan, probility)[] ex. site2 : [(glycan1, 5%), (glycan2, 5%), (glycan3, 90%)] + /// [(6,4,false),(7,4,false),(7,2,true)], glycosite,glycan,confidience respectively + /// + /// + /// + public static void LocalizedSiteSpeciLocalInfo(Dictionary>> siteSpeciLocalProb, List> localizedGlycan, int? OneBasedStartResidueInProtein, ref string local_peptide, ref string local_protein) { if (siteSpeciLocalProb == null) { return; } - foreach (var loc in localizedGlycan.Where(p => p.Item3)) + foreach (var glycositePair in localizedGlycan.Where(p => p.Item3)) // get the most confidient glycosite-glycan pair, loc is a pair of glycosite and glycan. Item 1 is glycosite, Item 2 is glycanId. { - var x = siteSpeciLocalProb[loc.Item1].Where(p => p.Item1 == loc.Item2).First().Item2; - var peptide_site = loc.Item1 - 1; - local += "[" + peptide_site + "," + GlycanBox.GlobalOGlycans[loc.Item2].Composition + "," + x.ToString("0.000") + "]"; + var site_glycanProb = siteSpeciLocalProb[glycositePair.Item1].Where(p => p.Item1 == glycositePair.Item2).First().Item2; // get the probability of the specfic glycan on the specific site. + var peptide_site = glycositePair.Item1 - 1; + local_peptide += "[" + peptide_site + "," + GlycanBox.GlobalOGlycans[glycositePair.Item2].Composition + "," + site_glycanProb.ToString("0.000") + "]"; - var protein_site = OneBasedStartResidueInProtein.HasValue ? OneBasedStartResidueInProtein.Value + loc.Item1 - 2 : -1; - local_protein += "[" + protein_site + "," + GlycanBox.GlobalOGlycans[loc.Item2].Composition + "," + x.ToString("0.000") + "]"; + var protein_site = OneBasedStartResidueInProtein.HasValue ? OneBasedStartResidueInProtein.Value + glycositePair.Item1 - 2 : -1; + local_protein += "[" + protein_site + "," + GlycanBox.GlobalOGlycans[glycositePair.Item2].Composition + "," + site_glycanProb.ToString("0.000") + "]"; } } + + /// + /// Generate the site specific localization information. + /// + /// + /// Site specific localization information. ex. {1[1,0.2][2,0.8]} means glycan 1 and 2 are located on glycosite 1 and 2 with 20% and 80% probability. public static string SiteSpeciLocalInfo(Dictionary>> siteSpeciLocalProb) { string local = ""; diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs b/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs index 3d56c5cd0..d68eeafd2 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs @@ -17,8 +17,8 @@ public class LocalizationGraph public ModBox ModBox { get; } public ModBox[] ChildModBoxes { get; set; } - public double NoLocalCost{get; set;} //Note that we have node for each glycosite, the matched ions before the first node and after the last node is scored here. - public double TotalScore { get; set; } //Total score is the score of matched ions that are used for localization. For O-glycan, it is the score of all matched c/zDot ions. + public double NoLocalCost{get; set;} // Note that we have node for each glycosite, the matched ions before the first node and after the last node is scored here. + public double TotalScore { get; set; } // Total score is the score of matched ions that are used for localization. For O-glycan, it is the score of all matched c/zDot ions. public LocalizationGraph(int[] modPos, ModBox modBox, ModBox[] childModBoxes, int id) { @@ -36,7 +36,13 @@ public LocalizationGraph(int[] modPos, ModBox modBox, ModBox[] childModBoxes, in } //The modification problem is turned into a Directed Acyclic Graph. The Graph was build with matrix, and dynamic programming is used. - //The function goes through the AdjNode[][] array from left to right, assign weight to each AdjNode, keep track of the heaviest previous AdjNode. + /// + /// The function goes through the AdjNode[][] array from left to right, assign weight to each AdjNode, keep track of the heaviest previous AdjNode. + /// + /// The space to store the data + /// The MS2 scan + /// + /// public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanWithSpecificMass theScan, Tolerance productTolerance, List products) { var boxSatisfyBox = BoxSatisfyBox(localizationGraph.ChildModBoxes); @@ -44,17 +50,17 @@ public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanW for (int i = 0; i < localizationGraph.ModPos.Length; i++) { //maxLength: the most mods we can have up to current mod pos; minlengtt: the least mods we can have up to current mod pos. - int maxLength = i + 1; - int minlength = localizationGraph.ModBox.ModIds.Length - (localizationGraph.ModPos.Length - 1 - i); - + int maxLength = i + 1; //For the first node, the maxlength is 1. Means we max have one glycan in this positioin. + int minlength = localizationGraph.ModBox.ModIds.Length - (localizationGraph.ModPos.Length - 1 - i); //In order to get min number, the min = number of glycan in the box - number of node from the last. + // Total 3 glycan in the box, end position is 7, then for position 5, the min = 3 - (7-5) = 1. for (int j = 0; j < localizationGraph.ChildModBoxes.Length; j++) { if (localizationGraph.ChildModBoxes[j].NumberOfMods <= maxLength && localizationGraph.ChildModBoxes[j].NumberOfMods >= minlength) { - AdjNode adjNode = new AdjNode(i, j, localizationGraph.ModPos[i], localizationGraph.ChildModBoxes[j]); + AdjNode adjNode = new AdjNode(i, j, localizationGraph.ModPos[i], localizationGraph.ChildModBoxes[j]); //chekc the num of glycan in this node is make sense. double cost = 0; - if (i != localizationGraph.ModPos.Length - 1) + if (i != localizationGraph.ModPos.Length - 1) // check the node is not the last one. { var fragments = GlycoPeptides.GetLocalFragment(products, localizationGraph.ModPos, i, localizationGraph.ModBox, localizationGraph.ChildModBoxes[j]); cost = CalculateCost(theScan, productTolerance, fragments); @@ -77,7 +83,7 @@ public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanW { adjNode.AllSources.Add(prej); - var tempCost = cost + localizationGraph.array[i - 1][prej].maxCost; + var tempCost = cost + localizationGraph.array[i - 1][prej].maxCost; //Try to get the max cost from previous AdjNode. if (tempCost > maxCost) { adjNode.CummulativeSources.Clear(); @@ -110,7 +116,13 @@ public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanW localizationGraph.TotalScore = localizationGraph.array[localizationGraph.ModPos.Length - 1][localizationGraph.ChildModBoxes.Length - 1].maxCost + noLocalScore; } - //Based on our implementation of Graph localization. We need to calculate cost between two nearby nodes (glycosites) + /// + /// Calculate the cost/Score of the Scan. + /// + /// + /// + /// + /// The Score public static double CalculateCost(Ms2ScanWithSpecificMass theScan, Tolerance productTolerance, List fragments) { double score = 0; @@ -128,7 +140,12 @@ public static double CalculateCost(Ms2ScanWithSpecificMass theScan, Tolerance pr return score; } - //Check if array1 contains array2 with repeats numbers. + /// + /// Check does the node1 contain everything in another node2? + /// + /// + /// + /// Ture, False private static bool TryGetLeft(int[] array1, int[] array2) { //Get compliment box @@ -148,9 +165,12 @@ private static bool TryGetLeft(int[] array1, int[] array2) return true; } - //The Directed Acyclic Graph is build from left to right. In the process, we need to know which node can linked to nodes from its left. - //Since node contains Childbox. We name this function as BoxSatisfyBox. - //The function defines how a childBox could be linked from all childBoxes. + + /// + /// Build a chart for the node connection rule. Used the chart to check if the next node could be linked to the previous node. + /// + /// + /// Chart (one column is previous, one column is current, the value is boolean) public static Dictionary BoxSatisfyBox(ModBox[] childBoxes) { Dictionary boxIdBoxes = new Dictionary(); @@ -160,7 +180,7 @@ public static Dictionary BoxSatisfyBox(ModBox[] childBoxes) for (int j = 0; j <= i; j++) { if (childBoxes[i].NumberOfMods <= childBoxes[j].NumberOfMods + 1 && (childBoxes[j].NumberOfMods ==0 || TryGetLeft(childBoxes[i].ModIds, childBoxes[j].ModIds))) - { + { //Check the next node could be the same or one more mod than the previous node. Besdies, the next node should contain all mods that the previous node has. idBoxes[j] = true; } } @@ -170,8 +190,13 @@ public static Dictionary BoxSatisfyBox(ModBox[] childBoxes) return boxIdBoxes; } - //Get all path with hightest score of Directed Acyclic Graph by recursion. - //Start from the last AdjNode[row-1 ][col-1], go back to it Sources, which contains the previous AdjNode with the highest cost. + + /// + /// Try to ll the highest score path in the graph. Start from the last AdjNode[row-1 ][col-1], go back to it Sources, which contains the previous AdjNode with the highest cost. + /// + /// + /// + /// The path (one or more) with the higgest Score public static List GetAllHighestScorePaths(AdjNode[][] array, ModBox[] boxes) { List allPaths = new List(); @@ -207,7 +232,12 @@ private static void GetAllHighestScorePathHelper(List allPaths, AdjNode[] } } - //Get one path of Directed Acyclic Graph by recursion. + /// + /// Get The toppest position path of in the localGraph by recursion Method. + /// + /// + /// + /// public static int[] GetFirstPath(AdjNode[][] array, ModBox[] boxes) { @@ -216,7 +246,7 @@ public static int[] GetFirstPath(AdjNode[][] array, ModBox[] boxes) int[] temp = new int[xlength]; - temp[xlength - 1] = ylength - 1; + temp[xlength - 1] = ylength - 1; // That is the last node in the graph, position is last one, and the childBpx is also the last one means the whole glycan. FirstPathHelper(array, xlength - 1, ylength - 1, temp); @@ -225,26 +255,29 @@ public static int[] GetFirstPath(AdjNode[][] array, ModBox[] boxes) private static void FirstPathHelper(AdjNode[][] array, int xind, int yind, int[] temp) { - if (xind == 0) + if (xind == 0) //xind = 0 means, there is just one glycosite. So the node must be the last one in the childBox = whole glycan. { - return; + return; // temp[0] = last one in the childBox = length-1. } - var pre = array[xind][yind].CummulativeSources.First(); + var pre = array[xind][yind].CummulativeSources.First(); // The first one in the CummulativeSources is the toppest previous node. xind--; yind = pre; temp[xind] = yind; FirstPathHelper(array, xind, yind, temp); } - //The original path we get is just an array of AdjNode positions. For example, path = [1, 1, 2, 2] means the best nodes are at array[0][1], array[1][1], array[2][2], array[3][2] - //This function here is to transfer the path into localized Route. Route contains each glycosite with glycanId. - //Basicly, any change from left to right of the path indicates a modification. For example, the path = [1, 1, 2, 2] which means there is a modification at ModPos[0] and ModPos[2] + /// + /// Convert the path inforation into Route object. + /// + /// + /// ex.[1,1,2,2,5] means the node in the localGraph, first node is ModBox1...last Node is modBox5 + /// Route object, present in glycosite-glycan pait format public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[] path) { Route route = new Route(); - if (path.Length == 1) + if (path.Length == 1) //If there is only one number in the path, we will assined "the first glycan in the childBox" to the glycosite. { bool onlyOneLocalized = false; if (localizationGraph.TotalScore > 0) @@ -255,7 +288,8 @@ public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[] return route; } - //Add first mod. If the childBoxes[path[0]].ModIds.Count == 0, means this is an empty childBox. + //Add first mod in the first glycosite. + //If the childBoxes[path[0]].ModIds.Count == 0, means this is an empty childBox. //Otherwise childBoxes[path[0]].ModIds.Count == 1 and childBoxes[path[0]].ModIds only contains one ModId. if (localizationGraph.ChildModBoxes[path[0]].ModIds.Count() != 0) { @@ -264,7 +298,8 @@ public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[] for (int i = 1; i < path.Length; i++) { - //If there is a change of the path, get the difference between the two Adjnodes of the array. + // If there is a change of the path, get the difference between the two Adjnodes of the array. + // If the node is the same childBox as the previous node. That means there is no modification at this glycosite. We can move on to the next glycosite. if (path[i] != path[i - 1]) { var left = GetLeft(localizationGraph.array[i][path[i]].ModBox.ModIds, localizationGraph.array[i - 1][path[i - 1]].ModBox.ModIds).First(); @@ -277,7 +312,13 @@ public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[] return route; } - //Get the difference between array 1 and array 2 with repeat numbers. + + /// + /// Get the difference in glycan between two node. + /// + /// The composition in this node. Ex. (0,0,1,2) means the cumulative glycoBox is composed of glycan0 + glycan0 + glycan 1 + glycan 2 + /// + /// The difference of the glycan composition between the two node. public static int[] GetLeft(int[] array1, int[] array2) { //Get compliment box @@ -340,13 +381,19 @@ private static void PathHelper_CalP(List allPaths, LocalizationGraph loca } //Dictionary>> is > + /// + /// Generate the localization probability chart for each glycosite. + /// + /// + /// + /// A dictionary represent the chart for glycosite Probility. Ex. key = 2 (ModPos), [(0,0.1),(1,0.3),(2,0.6)] means glycan 0 is 10 %, glycan 1 is 30%, glycan 2 is 60% public static Dictionary>> CalSiteSpecificLocalizationProbability(List routes, int[] modPos) { Dictionary>> probabilityMatrix = new Dictionary>>(); Tuple[][] matrix = new Tuple[modPos.Length][]; - for (int i = 0; i < modPos.Length; i++) + for (int i = 0; i < modPos.Length; i++) // There are all localization set in the route, we just try to sort the certain glycosite-glycan pairs into the corresponding glycosite. { matrix[i] = new Tuple[routes.Count]; for (int j = 0; j < routes.Count; j++) diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs b/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs index 7113366bb..84f3c61df 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs @@ -1,6 +1,6 @@ namespace EngineLayer { - public class ModBox + public class ModBox //The superclass of GlycanBox { //One peptide can have several modifications. The combined modifications are grouped as a modification box. Used for localization. //ModBox -- a defined combination of modifications will be considered to modify on one peptide. The box means the combined group of modification. diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs index 92f915a3b..930e33d24 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs @@ -1,17 +1,11 @@  namespace EngineLayer { - + /// + /// The structure of the glycan + /// public class Node { - public Node(char v, Node l, Node r, Node m) - { - Value = v; - LeftChild = l; - RightChild = r; - MiddleChild = m; - Level = null; - } public Node(char v) { diff --git a/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs b/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs index 137bc00fb..070bb129d 100644 --- a/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs +++ b/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs @@ -400,7 +400,7 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch, double highestMassPeptideToLookFor, List peptideIndex, MassDiffAcceptor massDiffAcceptor, double maxMassThatFragmentIonScoreIsDoubled, DissociationType dissociationType) { // get all theoretical fragments this experimental fragment could be - for (int i = 0; i < binsToSearch.Count; i++) + for (int i = 0; i < binsToSearch.Count; i++) //binsToSearch is the list of fragment in Spectra { List peptideIdsInThisBin = FragmentIndex[binsToSearch[i]]; @@ -410,11 +410,11 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch, // get index for highest mass allowed int highestPeptideMassIndex = peptideIdsInThisBin.Count - 1; - if (!Double.IsInfinity(highestMassPeptideToLookFor)) + if (!Double.IsInfinity(highestMassPeptideToLookFor)) //check if the highest mass is infinity { - highestPeptideMassIndex = BinarySearchBinForPrecursorIndex(peptideIdsInThisBin, highestMassPeptideToLookFor, peptideIndex); + highestPeptideMassIndex = BinarySearchBinForPrecursorIndex(peptideIdsInThisBin, highestMassPeptideToLookFor, peptideIndex); //get index for maximum monoisotopic allowed - for (int j = highestPeptideMassIndex; j < peptideIdsInThisBin.Count; j++) + for (int j = highestPeptideMassIndex; j < peptideIdsInThisBin.Count; j++) //find the highest peptide mass index { int nextId = peptideIdsInThisBin[j]; var nextPep = peptideIndex[nextId]; @@ -432,7 +432,7 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch, if (dissociationType == DissociationType.LowCID) { // add intensity for each peptide candidate in the scoring table up to the maximum allowed precursor mass - for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++) + for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++) { int id = peptideIdsInThisBin[j]; @@ -447,14 +447,14 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch, } } else - { - // add +1 score for each peptide candidate in the scoring table up to the maximum allowed precursor mass - for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++) + { + // account the peptide index shown in the bin + for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++) // iterate through the peptide index in the bin { int id = peptideIdsInThisBin[j]; scoringTable[id]++; - // add possible search results to the hashset of id's + // if the score of the peptide >3 (counts > 3 times), and the mass difference is accepted, add the peptide to the list of peptides possibly observed if (scoringTable[id] == byteScoreCutoff && massDiffAcceptor.Accepts(scanPrecursorMass, peptideIndex[id].MonoisotopicMass) >= 0) { idsOfPeptidesPossiblyObserved.Add(id); diff --git a/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs b/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs index 3a539c073..145d5d163 100644 --- a/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs +++ b/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs @@ -210,19 +210,31 @@ public PsmFromTsv(string line, char[] split, Dictionary parsedHeade BetaPeptideChildScanMatchedIons.Remove(Ms2ScanNumber); } - //For Glyco - GlycanMass = (parsedHeader[PsmTsvHeader_Glyco.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[PsmTsvHeader_Glyco.GlycanMass]], CultureInfo.InvariantCulture); - GlycanComposition = (parsedHeader[PsmTsvHeader_Glyco.GlycanComposition] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanComposition]]; - GlycanStructure = (parsedHeader[PsmTsvHeader_Glyco.GlycanStructure] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanStructure]]; - var localizationLevel = (parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel]]; - if (localizationLevel != null) + //For Glyco + try // Try is so that glyco and non-glyco psms can be read from the same file { - if (localizationLevel.Equals("NA")) - GlycanLocalizationLevel = null; - else - GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel); + GlycanMass = (parsedHeader[PsmTsvHeader_Glyco.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[PsmTsvHeader_Glyco.GlycanMass]], CultureInfo.InvariantCulture); + GlycanComposition = (parsedHeader[PsmTsvHeader_Glyco.GlycanComposition] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanComposition]]; + GlycanStructure = (parsedHeader[PsmTsvHeader_Glyco.GlycanStructure] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanStructure]]; + var localizationLevel = (parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel]]; + if (localizationLevel != null) + { + if (localizationLevel.Equals("NA")) + GlycanLocalizationLevel = null; + else + GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel); + } + LocalizedGlycan = (parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan]]; + + } + catch + { + GlycanMass = null; + GlycanComposition = null; + GlycanStructure = null; + GlycanLocalizationLevel = null; + LocalizedGlycan = null; } - LocalizedGlycan = (parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan]]; } /// diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs index 3d3e4ae0f..e3c7b0f6a 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs @@ -150,7 +150,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List(); //For each ms2scan, try to find the best candidate psm from the psms list. Do the localizaiton analysis. Add it into filteredAllPsms. - foreach (var gsmsPerScan in GsmPerScans.GroupBy(p => p.ScanNumber)) + foreach (var gsmsPerScan in GsmPerScans.GroupBy(p => (p.ScanNumber, p.FullFilePath))) { var glycos = RemoveSimilarSequenceDuplicates(gsmsPerScan.OrderByDescending(p=>p.Score).ToList()); diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index 6578a1790..bc00fe9f8 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -31,6 +31,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List dbFilenameList, List currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList, List allPsms, CommonParameters commonParameters, GlycoSearchParameters glycoSearchParameters, List proteinList, List variableModifications, List fixedModifications, List localizeableModificationTypes, MyTaskResults MyTaskResults) { + List proteinGroups = null; + if (!Parameters.GlycoSearchParameters.WriteDecoys) { allPsms.RemoveAll(b => b.IsDecoy); @@ -44,7 +46,7 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li //This is all psms for all files including glyco- and non-glyco psms. SingleFDRAnalysis(allPSMs, commonParameters, new List { taskId }); - List filteredGsms = allPSMs.Where(p => p.FdrInfo.QValue < 0.01).ToList(); + List filteredPsms = allPSMs.Where(p => p.FdrInfo.QValue <= 0.01).ToList(); //write individual file results if (Parameters.GlycoSearchParameters.WriteIndividualFiles) @@ -55,28 +57,30 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li Directory.CreateDirectory(individualFileResults); } - foreach (var fileSpecificGSMs in filteredGsms.GroupBy(p => p.FullFilePath)) + + + foreach (var fileSpecificPSMs in filteredPsms.GroupBy(p => p.FullFilePath)) //group by file path, and the path will be the key for the dictionary { - string individualFileFolder = Path.GetFileNameWithoutExtension(fileSpecificGSMs.Key); + string individualFileFolder = Path.GetFileNameWithoutExtension(fileSpecificPSMs.Key); //folder name. string individualFileFolderPath = Path.Combine(individualFileResults, individualFileFolder); if (!Directory.Exists(individualFileFolderPath)) { Directory.CreateDirectory(individualFileFolderPath); } - var fsgList = fileSpecificGSMs.ToList(); + var fspList = fileSpecificPSMs.ToList(); if (Parameters.GlycoSearchParameters.DoParsimony) { - GlycoProteinAnalysis(fsgList, individualFileFolderPath, individualFileFolder); + GlycoProteinAnalysis(fspList, individualFileFolderPath, individualFileFolder); //Creat the proteinGroups file } - foreach (GlycoSpectralMatch gsm in fsgList) //maybe this needs to be the filterd list??? + foreach (GlycoSpectralMatch gsm in fspList) //maybe this needs to be the filterd list??? { gsm.ResolveAllAmbiguities(); } var individualFilePsmsPath = Path.Combine(individualFileFolderPath, individualFileFolder + "_AllPSMs.psmtsv"); - WriteGlycoFile.WritePsmGlycoToTsv(fsgList, individualFilePsmsPath, false);//this is everything, glyco and non-glyco + WriteGlycoFile.WritePsmGlycoToTsv(fspList, individualFilePsmsPath, false);//this is everything, glyco and non-glyco //the individual file AllPSMs was just written. The next method writes only those PSMs that have a glyco mod - DivideGlycoPsmsIntoGroupsWriteToTsv(glycoSearchParameters.GlycoSearchType, fsgList, commonParameters, taskId, individualFileFolderPath, individualFileFolder); + DivideGlycoPsmsIntoGroupsWriteToTsv(glycoSearchParameters.GlycoSearchType, fspList, commonParameters, taskId, individualFileFolderPath, individualFileFolder); } } @@ -84,41 +88,44 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li switch (glycoSearchParameters.GlycoSearchType) { case GlycoSearchType.OGlycanSearch: - var allPsmsOgly = filteredGsms.Where(p => p.Routes != null).ToList(); - if (allPsmsOgly.Any()) + var OglyInAllPsms = filteredPsms.Where(p => p.Routes != null).ToList(); //Try to filter out the non-glyco psms + if (OglyInAllPsms.Any()) // Is there any gsms in the allPsms? { - SingleFDRAnalysis(allPsmsOgly, commonParameters, new List { taskId }); + SingleFDRAnalysis(OglyInAllPsms, commonParameters, new List { taskId }); var writtenFileOGlyco = Path.Combine(OutputFolder + "\\oglyco" + ".psmtsv"); - var ProteinLevelLocalization = GlycoProteinParsimony.ProteinLevelGlycoParsimony(allPsmsOgly.Where(p => p.Accession != null && p.OneBasedStartResidue.HasValue).ToList()); - var seen_oglyco_localization_file = Path.Combine(OutputFolder + "\\seen_oglyco_localization" + ".tsv"); + var ProteinLevelLocalization = GlycoProteinParsimony.ProteinLevelGlycoParsimony(OglyInAllPsms.Where(p => p.Accession != null && p.OneBasedStartResidue.HasValue).ToList()); + var seen_oglyco_localization_file = Path.Combine(OutputFolder + "\\seen_oglyco_localization" + ".tsv"); //generate the localization file WriteGlycoFile.WriteSeenProteinGlycoLocalization(ProteinLevelLocalization, seen_oglyco_localization_file); var protein_oglyco_localization_file = Path.Combine(OutputFolder + "\\protein_oglyco_localization" + ".tsv"); WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_oglyco_localization_file); - WriteGlycoFile.WritePsmGlycoToTsv(allPsmsOgly, writtenFileOGlyco, true); //we write this last so localization can be attempted + // Writing the oglyco results to a file and summary text + WriteGlycoFile.WritePsmGlycoToTsv(OglyInAllPsms, writtenFileOGlyco, true); //we write this last so localization can be attempted + + } break; case GlycoSearchType.NGlycanSearch: - var allPsmsNgly = filteredGsms.Where(p => p.GlycanScore > 0 && p.Routes == null).ToList(); - if (allPsmsNgly.Any()) + var NglyInAllPsms = filteredPsms.Where(p => p.GlycanScore > 0 && p.Routes == null).ToList(); + if (NglyInAllPsms.Any()) { - SingleFDRAnalysis(allPsmsNgly, commonParameters, new List { taskId }); + SingleFDRAnalysis(NglyInAllPsms, commonParameters, new List { taskId }); var writtenFileNGlyco = Path.Combine(OutputFolder + "\\nglyco" + ".psmtsv"); - var ProteinLevelLocalization = GlycoProteinParsimony.ProteinLevelGlycoParsimony(allPsmsNgly.Where(p => p.Accession != null && p.OneBasedStartResidue.HasValue).ToList()); + var ProteinLevelLocalization = GlycoProteinParsimony.ProteinLevelGlycoParsimony(NglyInAllPsms.Where(p => p.Accession != null && p.OneBasedStartResidue.HasValue).ToList()); var seen_nglyco_localization_file = Path.Combine(OutputFolder + "\\seen_nglyco_localization" + ".tsv"); WriteGlycoFile.WriteSeenProteinGlycoLocalization(ProteinLevelLocalization, seen_nglyco_localization_file); var protein_nglyco_localization_file = Path.Combine(OutputFolder + "\\protein_nglyco_localization" + ".tsv"); WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_nglyco_localization_file); - WriteGlycoFile.WritePsmGlycoToTsv(allPsmsNgly, writtenFileNGlyco, true); //we write this last so localization can be attempted + WriteGlycoFile.WritePsmGlycoToTsv(NglyInAllPsms, writtenFileNGlyco, true); //we write this last so localization can be attempted } break; case GlycoSearchType.N_O_GlycanSearch: default: - var allPsmsgly = filteredGsms.Where(p => p.GlycanScore > 0).ToList(); + var allPsmsgly = filteredPsms.Where(p => p.GlycanScore > 0).ToList(); if (allPsmsgly.Any()) { SingleFDRAnalysis(allPsmsgly, commonParameters, new List { taskId }); @@ -132,29 +139,32 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li var protein_no_glyco_localization_file = Path.Combine(OutputFolder + "\\protein_no_glyco_localization" + ".tsv"); WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_no_glyco_localization_file); WriteGlycoFile.WritePsmGlycoToTsv(allPsmsgly, writtenFileNOGlyco, true); //we write this last so localization can be attempted + } break; } if (glycoSearchParameters.DoParsimony) { - GlycoProteinAnalysis(filteredGsms, OutputFolder);//Do the whole group last so inference is done on the whole group + GlycoProteinAnalysis(filteredPsms, OutputFolder, null, MyTaskResults);//Do the whole group last so inference is done on the whole group } else { - GlycoAccessionAnalysis(filteredGsms, OutputFolder);//Do the whole group last so inference is done on the whole group + GlycoAccessionAnalysis(filteredPsms, OutputFolder);//Do the whole group last so inference is done on the whole group } QuantificationAnalysis(); WriteQuantificationResults(); var writtenFileSingle = Path.Combine(OutputFolder, "AllPSMs.psmtsv"); - WriteGlycoFile.WritePsmGlycoToTsv(filteredGsms, writtenFileSingle, true); + WriteGlycoFile.WritePsmGlycoToTsv(filteredPsms, writtenFileSingle, true); + + if (Parameters.GlycoSearchParameters.WriteSpectrumLibrary) { List spectrumLibrary = new List(); - foreach (var gsm in filteredGsms) + foreach (var gsm in filteredPsms) { spectrumLibrary.Add(new LibrarySpectrum(gsm.FullSequence, gsm.ScanPrecursorMonoisotopicPeakMz, gsm.ScanPrecursorCharge, gsm.MatchedFragmentIons,gsm.ScanRetentionTime,gsm.IsDecoy)); } @@ -162,10 +172,52 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li } FinishedWritingFile(writtenFileSingle, new List { taskId }); + + WriteSummary(filteredPsms, glycoSearchParameters, MyTaskResults); return MyTaskResults; } + /// + /// Wirte the summary of the glyco search results to the results txt file + /// + /// + /// + /// + /// + private void WriteSummary(List targetPsms, GlycoSearchParameters glycoSearchParameters, MyTaskResults MyTaskResults) + { + var gsms = targetPsms.Where(p => p.Routes != null).ToList(); + var Level1gsms = gsms.Where(p => p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1).ToList(); + MyTaskResults.AddTaskSummaryText("All target PSMs within 1% FDR: " + (targetPsms?. + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + MyTaskResults.AddTaskSummaryText("All target protein groups within 1% FDR: " + (ProteinGroups?. + Count(p => p.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + + switch (glycoSearchParameters.GlycoSearchType) + { + case GlycoSearchType.OGlycanSearch: + MyTaskResults.AddTaskSummaryText("All target O-Glyco PSMs within 1% FDR: " + (gsms?. + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + MyTaskResults.AddTaskSummaryText("All target Level 1 O-Glyco PSMs within 1% FDR: " + (Level1gsms + ?.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1) ?? 0)); + break; + case GlycoSearchType.NGlycanSearch: + MyTaskResults.AddTaskSummaryText("All target N-Glyco PSMs within 1% FDR: " + (gsms?. + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + MyTaskResults.AddTaskSummaryText("All target Level 1 N-Glyco PSMs within 1% FDR: " + (Level1gsms + ?.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1) ?? 0)); + break; + case GlycoSearchType.N_O_GlycanSearch: + MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + (gsms?. + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + (Level1gsms + ?.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1) ?? 0)); + break; + } + + } + private void DivideGlycoPsmsIntoGroupsWriteToTsv(GlycoSearchType glycoSearchType, List gsms, CommonParameters commonParameters, string taskId, string individualFileFolderPath, string individualFileFolder) @@ -173,8 +225,8 @@ private void DivideGlycoPsmsIntoGroupsWriteToTsv(GlycoSearchType glycoSearchType switch (glycoSearchType) { case GlycoSearchType.OGlycanSearch: - var allPsmsOgly = gsms.Where(p => p.Routes != null).ToList(); - if (allPsmsOgly.Any()) + var allPsmsOgly = gsms.Where(p => p.Routes != null).ToList(); + if (allPsmsOgly.Any()) //In the all gsms. is there any gsms contain localization informaiton(route) { SingleFDRAnalysis(allPsmsOgly, commonParameters, new List { taskId }); var writtenFileOGlyco = Path.Combine(individualFileFolderPath, individualFileFolder + "oglyco" + ".psmtsv"); @@ -231,7 +283,7 @@ private void SingleFDRAnalysis(List items, CommonParameters new FdrAnalysisEngine(psms, 0, commonParameters, this.FileSpecificParameters, taskIds).Run(); } - private void GlycoProteinAnalysis(List gsms, string outputFolder, string individualFileFolder = null) + private void GlycoProteinAnalysis(List gsms, string outputFolder, string individualFileFolder = null, MyTaskResults myTaskResults = null ) { // convert gsms to psms List psmsForProteinParsimony = gsms.Select(p => p as SpectralMatch).ToList(); @@ -248,7 +300,8 @@ private void GlycoProteinAnalysis(List gsms, string outputFo ProteinGroups = proteinScoringAndFdrResults.SortedAndScoredProteinGroups; Status("Done constructing protein groups!", Parameters.SearchTaskId); - WriteProteinResults(outputFolder, individualFileFolder); + WriteProteinResults(outputFolder, individualFileFolder, myTaskResults); + } private void GlycoAccessionAnalysis(List gsms, string individualFileFolderPath, string individualFileFolder = null) { @@ -285,13 +338,15 @@ private void GlycoAccessionAnalysis(List gsms, string indivi } } } - private void WriteProteinResults(string outputFolder, string individualFileFolder = null) + private void WriteProteinResults(string outputFolder, string individualFileFolder = null, MyTaskResults myTaskResults = null) { double qValueCutoff_FORDEBUGONLY = 0.01; string fileName = "AllProteinGroups.tsv"; string writtenFile = Path.Combine(outputFolder, individualFileFolder + "_"+ fileName); WriteProteinGroupsToTsv(ProteinGroups, writtenFile, new List { Parameters.SearchTaskId }, qValueCutoff_FORDEBUGONLY); + + } private void WriteProteinGroupsToTsv(List proteinGroups, string filePath, List nestedIds, double qValueCutoff) { diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/WriteGlycoFile.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/WriteGlycoFile.cs index 81be1bccb..f7208ed5b 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/WriteGlycoFile.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/WriteGlycoFile.cs @@ -71,7 +71,11 @@ public static void WriteSeenProteinGlycoLocalization(Dictionary<(string proteinA } } - //The function is to summarize localized glycosylation of each protein site. + /// + /// To summarize localized glycosylation of each protein site. The filter parameter is MinQValue <= 0.01 and IsLocalized = true. + /// + /// + /// public static void WriteProteinGlycoLocalization(Dictionary<(string proteinAccession, string proteinPosition, int glycanId), GlycoProteinParsimony> glycoProteinParsimony, string outputPath) { if (glycoProteinParsimony.Count == 0) diff --git a/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigNGlycoTest_Run.toml b/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigNGlycoTest_Run.toml new file mode 100644 index 000000000..ba75b96b2 --- /dev/null +++ b/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigNGlycoTest_Run.toml @@ -0,0 +1,65 @@ +TaskType = "GlycoSearch" + +[_glycoSearchParameters] +OGlycanDatabasefile = "OGlycan.gdb" +NGlycanDatabasefile = "NGlycan.gdb" +GlycoSearchType = "NGlycanSearch" +OxoniumIonFilt = true +DecoyType = "Reverse" +GlycoSearchTopNum = 50 +MaximumOGlycanAllowed = 4 +DoParsimony = true +NoOneHitWonders = false +ModPeptidesAreDifferent = false +WriteIndividualFiles = false +WriteDecoys = true +WriteContaminants = true + +[CommonParameters] +TaskDescriptor = "GlycoSearchTask" +MaxThreadsToUsePerFile = 7 +ListOfModsFixed = "Common Fixed\tCarbamidomethyl on C\t\tCommon Fixed\tCarbamidomethyl on U" +ListOfModsVariable = "Common Variable\tOxidation on M" +DoPrecursorDeconvolution = true +UseProvidedPrecursorInfo = true +DeconvolutionIntensityRatio = 3.0 +DeconvolutionMaxAssumedChargeState = 12 +DeconvolutionMassTolerance = "±4.0000 PPM" +TotalPartitions = 1 +ProductMassTolerance = "±20.0000 PPM" +PrecursorMassTolerance = "±10.0000 PPM" +AddCompIons = false +ScoreCutoff = 3.0 +ReportAllAmbiguity = true +NumberOfPeaksToKeepPerWindow = 1000 +MinimumAllowedIntensityRatioToBasePeak = 0.01 +NormalizePeaksAccrossAllWindows = false +TrimMs1Peaks = false +TrimMsMsPeaks = false +UseDeltaScore = false +QValueOutputFilter = 1.0 +PepQValueOutputFilter = 1.0 +CustomIons = ["c", "zDot"] +AssumeOrphanPeaksAreZ1Fragments = true +MaxHeterozygousVariants = 4 +MinVariantDepth = 1 +AddTruncations = false +DissociationType = "EThcD" +SeparationType = "HPLC" +MS2ChildScanDissociationType = "Unknown" +MS3ChildScanDissociationType = "Unknown" + +[CommonParameters.DigestionParams] +MaxMissedCleavages = 5 +InitiatorMethionineBehavior = "Variable" +MinPeptideLength = 5 +MaxPeptideLength = 60 +MaxModificationIsoforms = 1024 +MaxModsForPeptide = 2 +Protease = "StcE-trypsin" +SearchModeType = "Full" +FragmentationTerminus = "Both" +SpecificProtease = "StcE-trypsin" +GeneratehUnlabeledProteinsForSilac = true +KeepNGlycopeptide = false +KeepOGlycopeptide = false diff --git a/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigN_OGlycoTest_Run.toml b/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigN_OGlycoTest_Run.toml new file mode 100644 index 000000000..01bd6c743 --- /dev/null +++ b/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigN_OGlycoTest_Run.toml @@ -0,0 +1,65 @@ +TaskType = "GlycoSearch" + +[_glycoSearchParameters] +OGlycanDatabasefile = "OGlycan.gdb" +NGlycanDatabasefile = "NGlycan.gdb" +GlycoSearchType = "N_O_GlycanSearch" +OxoniumIonFilt = true +DecoyType = "Reverse" +GlycoSearchTopNum = 50 +MaximumOGlycanAllowed = 4 +DoParsimony = true +NoOneHitWonders = false +ModPeptidesAreDifferent = false +WriteIndividualFiles = false +WriteDecoys = true +WriteContaminants = true + +[CommonParameters] +TaskDescriptor = "GlycoSearchTask" +MaxThreadsToUsePerFile = 7 +ListOfModsFixed = "Common Fixed\tCarbamidomethyl on C\t\tCommon Fixed\tCarbamidomethyl on U" +ListOfModsVariable = "Common Variable\tOxidation on M" +DoPrecursorDeconvolution = true +UseProvidedPrecursorInfo = true +DeconvolutionIntensityRatio = 3.0 +DeconvolutionMaxAssumedChargeState = 12 +DeconvolutionMassTolerance = "±4.0000 PPM" +TotalPartitions = 1 +ProductMassTolerance = "±20.0000 PPM" +PrecursorMassTolerance = "±10.0000 PPM" +AddCompIons = false +ScoreCutoff = 3.0 +ReportAllAmbiguity = true +NumberOfPeaksToKeepPerWindow = 1000 +MinimumAllowedIntensityRatioToBasePeak = 0.01 +NormalizePeaksAccrossAllWindows = false +TrimMs1Peaks = false +TrimMsMsPeaks = false +UseDeltaScore = false +QValueOutputFilter = 1.0 +PepQValueOutputFilter = 1.0 +CustomIons = ["c", "zDot"] +AssumeOrphanPeaksAreZ1Fragments = true +MaxHeterozygousVariants = 4 +MinVariantDepth = 1 +AddTruncations = false +DissociationType = "EThcD" +SeparationType = "HPLC" +MS2ChildScanDissociationType = "Unknown" +MS3ChildScanDissociationType = "Unknown" + +[CommonParameters.DigestionParams] +MaxMissedCleavages = 5 +InitiatorMethionineBehavior = "Variable" +MinPeptideLength = 5 +MaxPeptideLength = 60 +MaxModificationIsoforms = 1024 +MaxModsForPeptide = 2 +Protease = "StcE-trypsin" +SearchModeType = "Full" +FragmentationTerminus = "Both" +SpecificProtease = "StcE-trypsin" +GeneratehUnlabeledProteinsForSilac = true +KeepNGlycopeptide = false +KeepOGlycopeptide = false diff --git a/MetaMorpheus/Test/GlycoTestData/Glyco_Isobaric_testing.toml b/MetaMorpheus/Test/GlycoTestData/Glyco_Isobaric_testing.toml new file mode 100644 index 000000000..471d55375 --- /dev/null +++ b/MetaMorpheus/Test/GlycoTestData/Glyco_Isobaric_testing.toml @@ -0,0 +1,68 @@ +TaskType = "GlycoSearch" + +[_glycoSearchParameters] +OGlycanDatabasefile = "OGlycan_withIsobaric.gdb" +NGlycanDatabasefile = "NGlycan.gdb" +GlycoSearchType = "OGlycanSearch" +OxoniumIonFilt = true +DecoyType = "Reverse" +GlycoSearchTopNum = 50 +MaximumOGlycanAllowed = 4 +DoParsimony = true +NoOneHitWonders = false +ModPeptidesAreDifferent = true +DoQuantification = false +DoMbrAnalysis = true +QuantifyPpmTol = 5.0 +Normalize = false +WriteIndividualFiles = true +WriteDecoys = true +WriteContaminants = true +WriteSpectrumLibrary = false +DisposeOfFileWhenDone = true + +[CommonParameters] +TaskDescriptor = "Kdntest2" +MaxThreadsToUsePerFile = 7 +ListOfModsFixed = "Common Fixed\tCarbamidomethyl on C" +ListOfModsVariable = "Common Variable\tOxidation on M\t\tCommon Artifact\tDeamidation on N\t\tCommon Artifact\tDeamidation on Q" +DoPrecursorDeconvolution = true +UseProvidedPrecursorInfo = true +DeconvolutionMaxAssumedChargeState = 12 +TotalPartitions = 1 +ProductMassTolerance = "±20.0000 PPM" +PrecursorMassTolerance = "±10.0000 PPM" +AddCompIons = false +QValueThreshold = 0.01 +PepQValueThreshold = 1.0 +ScoreCutoff = 3.0 +ReportAllAmbiguity = true +NumberOfPeaksToKeepPerWindow = 1000 +MinimumAllowedIntensityRatioToBasePeak = 0.01 +NormalizePeaksAccrossAllWindows = false +TrimMs1Peaks = false +TrimMsMsPeaks = false +CustomIons = [] +AssumeOrphanPeaksAreZ1Fragments = true +MaxHeterozygousVariants = 4 +MinVariantDepth = 1 +AddTruncations = false +DissociationType = "HCD" +SeparationType = "HPLC" +MS2ChildScanDissociationType = "EThcD" +MS3ChildScanDissociationType = "Unknown" + +[CommonParameters.DigestionParams] +InitiatorMethionineBehavior = "Variable" +MaxMissedCleavages = 2 +MaxModificationIsoforms = 1024 +SearchModeType = "Full" +FragmentationTerminus = "Both" +SpecificProtease = "trypsin" +GeneratehUnlabeledProteinsForSilac = true +KeepNGlycopeptide = false +KeepOGlycopeptide = false +Protease = "trypsin" +MinPeptideLength = 4 +MaxPeptideLength = 60 +MaxModsForPeptide = 4 diff --git a/MetaMorpheus/Test/MetaDraw/MetaDrawSettingsAndViewsTest.cs b/MetaMorpheus/Test/MetaDraw/MetaDrawSettingsAndViewsTest.cs index b4cdb076f..35bd816e8 100644 --- a/MetaMorpheus/Test/MetaDraw/MetaDrawSettingsAndViewsTest.cs +++ b/MetaMorpheus/Test/MetaDraw/MetaDrawSettingsAndViewsTest.cs @@ -199,21 +199,24 @@ public static void TestOldMetaDrawSettingsFileDoesNotCrash() // Load in an outdated settings file and ensure no crashes occur string metaDrawSettingsPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "MetaDraw", @"105MetaDrawSettingsSaved.xml"); MetaDrawSettingsViewModel model = new MetaDrawSettingsViewModel(false); - MetaDrawSettingsViewModel.SettingsPath = metaDrawSettingsPath; + Assert.That(!model.HasDefaultSaved); + MetaDrawSettingsViewModel.SettingsPath = metaDrawSettingsPath; Assert.That(model.HasDefaultSaved); + model.LoadSettings(); // In this case (6/27/24), the product type, beta product type, and spectrum descriptors will fail - + // As of (7/11/24), new modifications were added and now the modification type will fail when loading into MetaDrawSettings + // This is okay and working as intended. + // check that failed settings loaded to default CollectionAssert.AreEqual(defaultColorValues, MetaDrawSettings.ProductTypeToColor.Values); CollectionAssert.AreEqual(defaultBetaColorValues, MetaDrawSettings.BetaProductTypeToColor.Values); CollectionAssert.AreEqual(defaultSpectrumDescriptionValues, MetaDrawSettings.SpectrumDescription.Values); + CollectionAssert.AreEqual(defaultModificationColorValues, MetaDrawSettings.ModificationTypeToColor.Values); // check successful settings loaded correctly, in this case they were set to aqua - Assert.That(defaultModificationColorValues.First(), Is.Not.EqualTo(MetaDrawSettings.ModificationTypeToColor.First().Value)); - Assert.That(MetaDrawSettings.ModificationTypeToColor.First().Value, Is.EqualTo(OxyColors.Aqua)); Assert.That(MetaDrawSettings.CoverageTypeToColor.Values.ElementAt(0), Is.EqualTo(OxyColors.Aqua)); Assert.That(MetaDrawSettings.CoverageTypeToColor.Values.ElementAt(0), Is.Not.EqualTo(defaultCoverageColors[0])); Assert.That(MetaDrawSettings.CoverageTypeToColor.Values.ElementAt(1), Is.EqualTo(OxyColors.Aqua)); diff --git a/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs b/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs index 516b920d7..3c591066f 100644 --- a/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs +++ b/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs @@ -696,12 +696,13 @@ public static void MetaDraw_GlycoSearchTaskWithChildScansTest() string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"MetaDraw_GlycoSearchTaskTest"); string proteinDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\leukosialin.fasta"); string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\sliced_glyco_hcd_ethcd.raw"); - + // run task CommonParameters commonParameters = new CommonParameters(dissociationType: DissociationType.HCD, ms2childScanDissociationType: DissociationType.EThcD); Directory.CreateDirectory(outputFolder); var glycoSearchTask = new GlycoSearchTask() { CommonParameters = commonParameters }; + glycoSearchTask._glycoSearchParameters.OxoniumIonFilt = false; //turn off the diagnostic filter, because the case we use have 272,294 oxonium ions but assigned the N1H1 to that. glycoSearchTask.RunTask(outputFolder, new List { new DbForTask(proteinDatabase, false) }, new List { spectraFile }, ""); var psmFile = Path.Combine(outputFolder, @"oglyco.psmtsv"); diff --git a/MetaMorpheus/Test/SpectralRecoveryTest.cs b/MetaMorpheus/Test/SpectralRecoveryTest.cs index 987fb8fc5..8380a4d38 100644 --- a/MetaMorpheus/Test/SpectralRecoveryTest.cs +++ b/MetaMorpheus/Test/SpectralRecoveryTest.cs @@ -14,6 +14,8 @@ using TaskLayer.MbrAnalysis; using Omics; using UsefulProteomicsDatabases; +using Nett; +using System.DirectoryServices; namespace Test { @@ -36,10 +38,15 @@ public void SpectralRecoveryTestSetup() { string input = "MPGGGPEMDDYMETLKDEEDALWENVECNRHMLSRYINPAKLTPYLRQCKVIDEQDEDEVLNAPMLPSKINRAGRLLDILHTKGQRGYVVFLESLEFYYPELYKLVTGKEPTRRFSTIVVEEGHEGLTHFLMNEVIKLQQQMKAKDLQRCELLARLRQLEDEKKQMTLTRVELLTFQERYYKMKEERDSYNDELVKVKDDNYNLAMRYAQLSEEKNMAVMRSRDLQLEIDQLKHRLNKMEEECKLERNQSLKLKNDIENRPKKEQVLELERENEMLKTKNQELQSIIQAGKRSLPDSDKAILDILEHDRKEALEDRQELVNRIYNLQEEARQAEELRDKYLEEKEDLELKCSTLGKDCEMYKHRMNTVMLQLEEVERERDQAFHSRDEAQTQYSQCLIEKDKYRKQIRELEEKNDEMRIEMVRREACIVNLESKLRRLSKDSNNLDQSLPRNLPVTIISQDFGDASPRTNGQEADDSSTSEESPEDSKYFLPYHPPQRRMNLKGIQLQRAKSPISLKRTSDFQAKGHEEEGTDASPSSCGSLPITNSFTKMQPPRSRSSIMSITAEPPGNDSIVRRYKEDAPHRSTVEEDNDSGGFDALDLDDDSHERYSFGPSSIHSSSSSHQSEGLDAYDLEQVNLMFRKFSLERPFRPSVTSVGHVRGPGPSVQHTTLNGDSLTSQLTLLGGNARGSFVHSVKPGSLAEKAGLREGHQLLLLEGCIRGERQSVPLDTCTKEEAHWTIQRCSGPVTLHYKVNHEGYRKLVKDMEDGLITSGDSFYIRLNLNISSQLDACTMSLKCDDVVHVRDTMYQDRHEWLCARVDPFTDHDLDMGTIPSYSRAQQLLLVKLQRLMHRGSREEVDGTHHTLRALRNTLQPEEALSTSDPRVSPRLSRASFLFGQLLQFVSRSENKYKRMNSNERVRIISGSPLGSLARSSLDATKLLTEKQEELDPESELGKNLSLIPYSLVRAFYCERRRPVLFTPTVLAKTLVQRLLNSGGAMEFTICKSDIVTRDEFLRRQKTETIIYSREKNPNAFECIAPANIEAVAAKNKHCLLEAGIGCTRDLIKSNIYPIVLFIRVCEKNIKRFRKLLPRPETEEEFLRVCRLKEKELEALPCLYATVEPDMWGSVEELLRVVKDKIG"; string reversed = new string(input.Reverse().ToArray()); - + outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSpectralRecoveryOutput"); + if (Directory.Exists(outputFolder)) //automatically clean up the output folder if it exists + { + Directory.Delete(outputFolder, true); + } + Directory.CreateDirectory(outputFolder); // This block of code converts from PsmFromTsv to SpectralMatch objects - + string psmtsvPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", @"SpectralRecoveryTest\AllPSMsTesting.psmtsv"); tsvPsms = PsmTsvReader.ReadTsv(psmtsvPath, out var warnings); psms = new List(); @@ -76,7 +83,6 @@ public void SpectralRecoveryTestSetup() proteinList.Add(protein); } - Directory.CreateDirectory(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSpectralRecoveryOutput")); numSpectraPerFile = new Dictionary { { "K13_02ng_1min_frac1", new int[] { 8, 8 } }, { "K13_20ng_1min_frac1", new int[] { 8, 8 } } }; @@ -85,7 +91,7 @@ public void SpectralRecoveryTestSetup() Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", @"SpectralRecoveryTest\K13_20ng_1min_frac1.mzML") }; databaseList = new List() {new DbForTask( Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", @"SpectralRecoveryTest\HumanFastaSlice.fasta"), false) }; - outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSpectralRecoveryOutput"); + outputFolder = outputFolder; SearchTask searchTask = new SearchTask { @@ -269,6 +275,8 @@ public static void MiniClassicSearchEngineTest() } } + + [Test] public static void SpectralWriterTest() { @@ -308,7 +316,7 @@ public static void SpectralWriterTest() postSearchTask.Run(); - var path = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSpectralRecoveryOutput"); + var path = outputFolder; var list = Directory.GetFiles(path, "*.*", SearchOption.AllDirectories); string matchingvalue = list.Where(p => p.Contains("SpectralLibrary")).First().ToString(); var testLibraryWithoutDecoy = new SpectralLibrary(new List { Path.Combine(path, matchingvalue) }); @@ -370,6 +378,7 @@ public static void SpectralWriterTest() testLibraryWithoutDecoy.CloseConnections(); updatedLibraryWithoutDecoy.CloseConnections(); + } [Test] @@ -392,8 +401,7 @@ public static void SpectralRecoveryHeaderTest() [OneTimeTearDown] public static void SpectralRecoveryTeardown() { - string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSpectralRecoveryOutput"); - Directory.Delete(filePath, true); + Directory.Delete(outputFolder, true); } } } \ No newline at end of file diff --git a/MetaMorpheus/Test/Test.csproj b/MetaMorpheus/Test/Test.csproj index c50be0e8b..2725c3e1c 100644 --- a/MetaMorpheus/Test/Test.csproj +++ b/MetaMorpheus/Test/Test.csproj @@ -99,6 +99,12 @@ Always + + Always + + + Always + Always @@ -111,6 +117,9 @@ Always + + Always + PreserveNewest diff --git a/MetaMorpheus/Test/TestNGlyco.cs b/MetaMorpheus/Test/TestNGlyco.cs index 24e3c6612..9bff1f531 100644 --- a/MetaMorpheus/Test/TestNGlyco.cs +++ b/MetaMorpheus/Test/TestNGlyco.cs @@ -83,7 +83,7 @@ public static void TestNGlycoPsmsHeader() [Test] public static void GlyTest_GetKindString() { - byte[] kind = new byte[] {3, 4, 0, 0, 1, 0, 0, 0, 0, 0 }; + byte[] kind = new byte[] {3, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0 }; string kindString = Glycan.GetKindString(kind); Assert.AreEqual("H3N4F1", kindString); } @@ -291,19 +291,28 @@ public static void GlyTest_BinarySearch() [Test] public static void GlyTest_NGlycanCompositionFragments() { - var kind = GlycanDatabase.String2Kind("HexNAc(3)Hex(4)Fuc(2)NeuAc(1)"); + var testKind = GlycanDatabase.String2Kind("HexNAc(3)Hex(4)Fuc(2)NeuAc(1)Xylose(1)"); + + var ions_NotFucExtended = GlycanDatabase.NGlycanCompositionFragments(testKind); + + var ions_fucExtended = GlycanDatabase.NGlycanCompositionFragments(testKind, true); - var ions = GlycanDatabase.NGlycanCompositionFragments(kind); + Assert.That(ions_fucExtended.Count >= ions_NotFucExtended.Count); + Assert.That(ions_NotFucExtended.Count == 35); + Assert.That(ions_fucExtended.Count == 43); + + + var kind = GlycanDatabase.String2Kind("HexNAc(3)Hex(4)Fuc(2)NeuAc(1)"); Glycan glycan = Glycan.Struct2Glycan("(N(F)(N(H(H)(H(N(F)(H(A)))))))", 0); - var ionMass = ions.Select(p => p.IonMass).ToList(); + var ionMass = ions_NotFucExtended.Select(p => p.IonMass).ToList(); var glycanIonmass = glycan.Ions.Select(p => p.IonMass).ToList(); var overlap = glycanIonmass.Intersect(ionMass).Count(); - Assert.That(overlap == 13); + Assert.That(overlap == 15); } } diff --git a/MetaMorpheus/Test/TestOGlyco.cs b/MetaMorpheus/Test/TestOGlyco.cs index fdfbb6f32..7d8ded90a 100644 --- a/MetaMorpheus/Test/TestOGlyco.cs +++ b/MetaMorpheus/Test/TestOGlyco.cs @@ -20,6 +20,12 @@ using Readers; using System.Text; using Omics.Modifications; +using ThermoFisher.CommonCore.BackgroundSubtraction; +using Easy.Common.Extensions; +using iText.IO.Font.Otf; +using static Nett.TomlObjectFactory; +using Omics.SpectrumMatch; +using TopDownProteomics; namespace Test { @@ -43,6 +49,84 @@ public static void OGlycoTest_LoadGlycanBox() Assert.AreEqual(OGlycanBoxes.Count(), 454); } + [Test] + public static void OGlycanTest_GetGlycanBox_Decoy() + { + GlycanBox[] OGlycanBoxes = GlycanBox.BuildOGlycanBoxes(3).ToArray(); + Assert.That(OGlycanBoxes.All(p => p.TargetDecoy = true)); + + GlycanBox[] OGlycanBoxes_withDecoys = GlycanBox.BuildOGlycanBoxes(3, true).ToArray(); + var group_target = OGlycanBoxes_withDecoys.GroupBy(p => p.TargetDecoy == true); + var group_decoy = OGlycanBoxes_withDecoys.GroupBy(p => p.TargetDecoy == false); + Assert.That(group_target.Count() == group_decoy.Count()); + + } + + [Test] + public static void GlycoTest_WritingSummary() // In order to test writing function on different search type ex. O-Search, N-search, N-O search, make sure we have the corresponding search Rseult file. + { + string outputFolder_NSearch = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); + Directory.CreateDirectory(outputFolder_NSearch); + + var glycoSearchTask_NSearch = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSearchTaskconfigNGlycoTest_Run.toml"), MetaMorpheusTask.tomlConfig); + + DbForTask db = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P16150.fasta"), false); + string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\2019_09_16_StcEmix_35trig_EThcD25_rep1_9906.mgf"); + new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask_NSearch) }, new List { spectraFile }, new List { db }, outputFolder_NSearch).Run(); + + Directory.Delete(outputFolder_NSearch, true); + + + string outputFolder_NOSearch = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); + Directory.CreateDirectory(outputFolder_NOSearch); + + var glycoSearchTask_NOSearch = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSearchTaskconfigN_OGlycoTest_Run.toml"), MetaMorpheusTask.tomlConfig); + + string spectraFile_NOSearch = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\2019_09_16_StcEmix_35trig_EThcD25_rep1_9906.mgf"); + new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask_NOSearch) }, new List { spectraFile_NOSearch }, new List { db }, outputFolder_NOSearch).Run(); + + Directory.Delete(outputFolder_NOSearch, true); + } + + + [Test] + public static void OGlycanTest_IsobaricCase() + { + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); + Directory.CreateDirectory(outputFolder); + + var glycoSearchTask = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\Glyco_Isobaric_testing.toml"), MetaMorpheusTask.tomlConfig); + glycoSearchTask._glycoSearchParameters.DoParsimony = false; + glycoSearchTask._glycoSearchParameters.DoQuantification = true; + glycoSearchTask._glycoSearchParameters.OxoniumIonFilt = false; //turn off the diagnostic filter + + DbForTask targetDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoProteinFASTA_7proteins.fasta"), false); + DbForTask contaminDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P13987_contaminant.fasta"), true); + + string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoPepMix_snip.mzML"); + new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, new List { spectraFile }, new List { targetDbForTask, contaminDbForTask }, outputFolder).Run(); + + string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); + var glycanLevel_filterOFF = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects + .Where(p => p.Ms2ScanNumber == 161 && p.BaseSeq == "HTSVQTTSSGSGPFTDVR").ToList()[0].GlycanLocalizationLevel; + + + Assert.That(glycanLevel_filterOFF != EngineLayer.GlycoSearch.LocalizationLevel.Level1 && glycanLevel_filterOFF != EngineLayer.GlycoSearch.LocalizationLevel.Level1b); + Directory.Delete(outputFolder, true); + + glycoSearchTask._glycoSearchParameters.OxoniumIonFilt = true; + new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, new List { spectraFile }, new List { targetDbForTask, contaminDbForTask }, outputFolder).Run(); + var glycanLevel_filterON = PsmTsvReader.ReadTsv(oGlycoPath, out var error) //load the PSMs data from the "csv file" and bulid the objects + .Where(p => p.Ms2ScanNumber == 161 && p.BaseSeq == "HTSVQTTSSGSGPFTDVR").ToList()[0].GlycanLocalizationLevel; + + + Assert.That(glycanLevel_filterON == EngineLayer.GlycoSearch.LocalizationLevel.Level1); + Directory.Delete(outputFolder, true); + + } + + + [Test] public static void GlycoSpectralHeader() { @@ -92,7 +176,10 @@ public static void OGlycoTest_GetK() [Test] public static void OGlycoTest_OGlycanChildIons() { - var glycan = GlycanBox.GlobalOGlycans[5]; + // Reload the glycan database to test the child ions. + GlycanBox.GlobalOGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.OGlycanLocations.Where(p => p.Contains("OGlycan.gdb")).First(), true, true).ToArray(); + + var glycan = GlycanBox.GlobalOGlycans[5]; // we use the glycan (N(H)(N(H))) Assert.That(glycan.Ions.Count == 5); @@ -104,6 +191,19 @@ public static void OGlycoTest_OGlycanChildIons() var coreIons = GlycanDatabase.OGlycanCompositionFragments(kind); Assert.That(coreIons.Count() == 6); + + //The following code is to test the glycan with complex structure, only to pass the converage. + + var testKind = GlycanDatabase.String2Kind("HexNAc(2)Hex(4)Fuc(2)NeuAc(1)Xylose(1)"); + + var testGlycanIons = GlycanDatabase.OGlycanCompositionFragments(testKind); + + + var testKind_smallGlycan = GlycanDatabase.String2Kind("HexNAc(1)"); + + var testGlycanIons_smallGlycan = GlycanDatabase.OGlycanCompositionFragments(testKind_smallGlycan); + + } [Test] @@ -125,6 +225,21 @@ public static void OGlycoTest_StcE() Assert.That(alphaPeptide.Length == 8); Assert.That(alphaPeptide.First().BaseSequence == "MPLFKNTSV"); } + [Test] + public static void OGlycanTest_Kdn() + { + var kind = new byte[] { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2 }; + double mass = Glycan.GetMass(kind) / 1E5; + string name = Glycan.GetKindString(kind); + Assert.AreEqual(name, "H1N1K2"); + Assert.AreEqual(mass, 865.27013); + + string kdnGlycan = "HexNAc(2)Hex(2)Kdn(1)"; + string kdnGlycan2 = "N(H)H(N)K"; + var expectedKind = new byte[] { 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + Assert.AreEqual(GlycanDatabase.String2Kind(kdnGlycan), expectedKind); + Assert.AreEqual(Glycan.GetKind(kdnGlycan2), expectedKind); + } [Test] public static void GlycoTest_MotifExist() @@ -134,8 +249,8 @@ public static void GlycoTest_MotifExist() Assert.That(exist); } - [Test] - public static void OxoniumIonAnalysis() + [Test] // In this test, there are 272 and 294 oxonium ions in the scan, but the glycanBox doesn't contain these HexNAc ions. + public static void DiagonsticFilter() { Assert.That(Glycan.AllOxoniumIons[4] == 13805550); Assert.That(Glycan.AllOxoniumIons[5] == 14406607); @@ -156,8 +271,8 @@ public static void OxoniumIonAnalysis() //Get glycanBox var glycanBox = OGlycanBoxes[19]; - var satifyOxonium = GlycoPeptides.OxoniumIonsAnalysis(oxoniumIonIntensities, glycanBox); - Assert.That(satifyOxonium); + var satifyOxonium = GlycoPeptides.DiagonsticFilter(oxoniumIonIntensities, glycanBox); + Assert.That(!satifyOxonium); } @@ -192,13 +307,12 @@ public static void OGlycoTest_FragmentIons2() { //Get glycanBox var glycanBox = OGlycanBoxes[24]; - Protein protein = new Protein("TVYLGASK", ""); var peptide = protein.Digest(new DigestionParams(), new List(), new List()).First(); List modPos = new List { 2, 8 }; - var peptideWithMod = GlycoPeptides.OGlyGetTheoreticalPeptide(modPos.ToArray(), peptide, OGlycanBoxes[24]); + var peptideWithMod = GlycoPeptides.OGlyGetTheoreticalPeptide(modPos.ToArray(), peptide, glycanBox); Assert.That(peptideWithMod.FullSequence == "T[O-Glycosylation:H1N1 on X]VYLGAS[O-Glycosylation:H1N1A1 on X]K"); var fragments_etd = GlycoPeptides.OGlyGetTheoreticalFragments(DissociationType.ETD, new List(), peptide, peptideWithMod); @@ -438,7 +552,7 @@ public static void OGlycoTest_Run3() Directory.CreateDirectory(Path.Combine(Environment.CurrentDirectory, @"TESTGlycoData")); var task2 = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSearchTaskconfig_ETD_Run3.toml"), MetaMorpheusTask.tomlConfig); - task2._glycoSearchParameters.OxoniumIonFilt = true; + task2._glycoSearchParameters.OxoniumIonFilt = true; //turn on the diagnostic filter new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", task2) }, new List { spectraFile }, new List { db }, Path.Combine(Environment.CurrentDirectory, @"TESTGlycoData")).Run(); var resultsExist = File.Exists(Path.Combine(Environment.CurrentDirectory, @"TESTGlycoData\Task\oglyco.psmtsv")); Assert.That(!resultsExist); @@ -465,6 +579,7 @@ public static void OGlycoTest_Run4() Directory.Delete(outputFolder, true); } + [Test] public static void OGlycoTest_Run5() { @@ -477,11 +592,254 @@ public static void OGlycoTest_Run5() DbForTask targetDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoProteinFASTA_7proteins.fasta"), false); DbForTask contaminDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P13987_contaminant.fasta"), true); string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoPepMix_snip.mzML"); - new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, new List { spectraFile }, new List { targetDbForTask, contaminDbForTask }, outputFolder).Run(); + new EverythingRunnerEngine( + new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, + new List { spectraFile }, + new List { targetDbForTask, contaminDbForTask }, + outputFolder).Run(); + + + // TODO: Test output, make sure the values on the results.txt really reflect the number counted in the csv files + // Parse values from results.txt + string resultsTextPath = Directory.GetFiles(outputFolder, "allResults.txt", SearchOption.TopDirectoryOnly) + .FirstOrDefault(); // Try to find the file name "allResults.txt" in the output folder + if (resultsTextPath is null) + Assert.Fail("Results file not found."); + string[] allResultTxtLines = File.ReadAllLines(resultsTextPath); //read all lines from the file + Assert.That(allResultTxtLines.Length > 0); // make sure there are lines in the file + + //For PSMs + var allPsmLine = allResultTxtLines.First(p => p.Contains("target PSMs within")); + int psmCount = int.Parse(allPsmLine.Split(':').Last().Trim()); + + //For ProteinGroups + var proteinGroupLine = allResultTxtLines.First(p => p.Contains("protein groups within")); + int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); + + //For GlycoPSMs + var glycoPsmLine = allResultTxtLines.First(p => p.Contains("O-Glyco PSMs within")); + int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file + + //For Level1GlycoPSMs + var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 O-Glyco PSMs within")); + int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file + + // Parse counted number from csv files + + //For PSMs + var allPsmPath = Path.Combine(outputFolder, "Task", "AllPSMs.psmtsv"); + List onePercentPsms1 = PsmTsvReader.ReadTsv(allPsmPath, out var errors2) + .Where(p => p.QValue <= 0.01).ToList(); + Assert.That(errors2.Count == 0);// if we cannot find the file, we will get an error message + int readInPsmsCount = onePercentPsms1.Count; + + //For ProteinGroups + var allProteinGroupsPath = Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv"); + string[] proteinGroupHeaders = File.ReadAllLines(allProteinGroupsPath).First().Split("\t"); + int readInProteinCount = File.ReadAllLines(Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv")).Skip(1) + .Select(line => line.Split('\t')) + .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) && qVaule < 0.01); + + //For GlycoPSMs + string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); + List onePercentoGlycoPsms = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects + .Where(p => p.QValue <= 0.01).ToList(); // the filtering (Q<0.01) + int readInGlycoPsmCount = onePercentoGlycoPsms.Count; // the gPSMs number with Fdr<0.01 + Assert.That(errors.Count == 0);// if we cannot find the file, we will get an error message + + //For Level1GlycoPSMs + int readInLevel1GlycoPsmCount = onePercentoGlycoPsms.Count(p => p.GlycanLocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1); //the level1 gPSMs number + + //Compare the numbers + Assert.That(psmCount, Is.EqualTo(readInPsmsCount)); + Assert.That(proteinGroupCount, Is.EqualTo(readInProteinCount)); + Assert.That(glycoPsmCount, Is.EqualTo(readInGlycoPsmCount)); + Assert.That(level1Psmcount, Is.EqualTo(readInLevel1GlycoPsmCount)); + Directory.Delete(outputFolder, true); } + [Test] + public static void OGlycoTest_Run5_WriteContaminants() + { + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); + Directory.CreateDirectory(outputFolder); + + var glycoSearchTask = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSnip.toml"), MetaMorpheusTask.tomlConfig); + glycoSearchTask._glycoSearchParameters.WriteContaminants = true; // write contaminants to the output folder + + DbForTask targetDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoProteinFASTA_7proteins.fasta"), false); + DbForTask contaminDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P13987_contaminant.fasta"), true); + string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoPepMix_snip.mzML"); + new EverythingRunnerEngine( + new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, + new List { spectraFile }, + new List { targetDbForTask, contaminDbForTask }, + outputFolder).Run(); + + + // TODO: Test output, make sure the values on the results.txt really reflect the number counted in the csv files + // Parse values from results.txt + string resultsTextPath = Directory.GetFiles(outputFolder, "allResults.txt", SearchOption.TopDirectoryOnly) + .FirstOrDefault(); // Try to find the file name "allResults.txt" in the output folder + if (resultsTextPath is null) + Assert.Fail("Results file not found."); + string[] allResultTxtLines = File.ReadAllLines(resultsTextPath); //read all lines from the file + Assert.That(allResultTxtLines.Length > 0); // make sure there are lines in the file + + //For PSMs + var allPsmLine = allResultTxtLines.First(p => p.Contains("target PSMs within")); + int psmCount = int.Parse(allPsmLine.Split(':').Last().Trim()); + + //For ProteinGroups + var proteinGroupLine = allResultTxtLines.First(p => p.Contains("protein groups within")); + int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); + + //For GlycoPSMs + var glycoPsmLine = allResultTxtLines.First(p => p.Contains("O-Glyco PSMs within")); + int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file + + //For Level1GlycoPSMs + var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 O-Glyco PSMs within")); + int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file + + // Parse counted number from csv files + + //For PSMs + var allPsmPath = Path.Combine(outputFolder, "Task", "AllPSMs.psmtsv"); + List onePercentPsms1 = PsmTsvReader.ReadTsv(allPsmPath, out var errors2) + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); + Assert.That(errors2.Count == 0);// if we cannot find the file, we will get an error message + int readInPsmsCount = onePercentPsms1.Count; + + //For ProteinGroups + var allProteinGroupsPath = Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv"); + string[] proteinGroupHeaders = File.ReadAllLines(allProteinGroupsPath).First().Split("\t"); + int readInProteinCount = File.ReadAllLines(Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv")).Skip(1) + .Select(line => line.Split('\t')) + .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) + && qVaule < 0.01 && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "C" + && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "D"); + + //For GlycoPSMs + string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); + List onePercentoGlycoPsms = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); // the filtering (Q<0.01) + int readInGlycoPsmCount = onePercentoGlycoPsms.Count; // the gPSMs number with Fdr<0.01 + Assert.That(errors.Count == 0);// if we cannot find the file, we will get an error message + + //For Level1GlycoPSMs + int readInLevel1GlycoPsmCount = onePercentoGlycoPsms.Count(p => p.GlycanLocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1); //the level1 gPSMs number + + //Compare the numbers + Assert.That(psmCount, Is.EqualTo(readInPsmsCount)); + Assert.That(proteinGroupCount, Is.EqualTo(readInProteinCount)); + Assert.That(glycoPsmCount, Is.EqualTo(readInGlycoPsmCount)); + Assert.That(level1Psmcount, Is.EqualTo(readInLevel1GlycoPsmCount)); + + + Directory.Delete(outputFolder, true); + } + + [Test] + public static void OGlycoTest_Run5_WriteDecoys() // Test writing decoys, and make sure we can filter the decoys PSMs + { + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); + Directory.CreateDirectory(outputFolder); + + var glycoSearchTask = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSnip.toml"), MetaMorpheusTask.tomlConfig); + glycoSearchTask._glycoSearchParameters.WriteContaminants = true; + glycoSearchTask._glycoSearchParameters.WriteDecoys = true; + glycoSearchTask._glycoSearchParameters.DecoyType = DecoyType.Reverse; + glycoSearchTask.CommonParameters = new CommonParameters(dissociationType: DissociationType.HCD, trimMsMsPeaks: false, + precursorMassTolerance: new PpmTolerance(6), productMassTolerance: new PpmTolerance(10), qValueThreshold: 1, + pepQValueThreshold: 1, scoreCutoff: 1); + + DbForTask targetDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoProteinFASTA_7proteins.fasta"), false); + DbForTask dbContaminant = new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P02649.fasta"), true); + + List copiedSpectraFiles = new(); + string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoPepMix_snip.mzML"); // in order to get enough PSMs to test the filtering, we will copy the spectra file 19 times then get one decpys PSMs in the filterPSMs (Fdr <= 0.01, 1 decoys out of 200 target) + for(int i = 0; i < 19 ; i++) + { + var copyPath = Path.Combine(TestContext.CurrentContext.TestDirectory, $@"GlycoTestData\Copy{i}GlycoPepMix_snip.mzML"); + if (!File.Exists(copyPath)) + File.Copy(spectraFile, copyPath); + copiedSpectraFiles.Add(copyPath); + } + + new EverythingRunnerEngine( + new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, + new List { spectraFile, copiedSpectraFiles}, + new List { targetDbForTask, dbContaminant}, + outputFolder).Run(); + + + // TODO: Test output, make sure the values on the results.txt really reflect the number counted in the csv files + // Parse values from results.txt + string resultsTextPath = Directory.GetFiles(outputFolder, "allResults.txt", SearchOption.TopDirectoryOnly) + .FirstOrDefault(); // Try to find the file name "allResults.txt" in the output folder + if (resultsTextPath is null) + Assert.Fail("Results file not found."); + string[] allResultTxtLines = File.ReadAllLines(resultsTextPath); //read all lines from the file + Assert.That(allResultTxtLines.Length > 0); // make sure there are lines in the file + + //For PSMs + var allPsmLine = allResultTxtLines.First(p => p.Contains("target PSMs within")); + int psmCount = int.Parse(allPsmLine.Split(':').Last().Trim()); + + //For ProteinGroups + var proteinGroupLine = allResultTxtLines.First(p => p.Contains("protein groups within")); + int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); + + //For GlycoPSMs + var glycoPsmLine = allResultTxtLines.First(p => p.Contains("O-Glyco PSMs within")); + int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file + + //For Level1GlycoPSMs + var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 O-Glyco PSMs within")); + int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file + + // Parse counted number from csv files + + //For PSMs + var allPsmPath = Path.Combine(outputFolder, "Task", "AllPSMs.psmtsv"); + List onePercentPsms1 = PsmTsvReader.ReadTsv(allPsmPath, out var errors2) + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); + Assert.That(errors2.Count == 0);// if we cannot find the file, we will get an error message + int readInPsmsCount = onePercentPsms1.Count; + + //For ProteinGroups + var allProteinGroupsPath = Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv"); + string[] proteinGroupHeaders = File.ReadAllLines(allProteinGroupsPath).First().Split("\t"); + int readInProteinCount = File.ReadAllLines(Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv")).Skip(1) + .Select(line => line.Split('\t')) + .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) + && qVaule < 0.01 && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "C" //filter the contaminants + && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "D"); // filter the decoys + + //For GlycoPSMs + string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); + List onePercentoGlycoPsms = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); // the filtering (Q<0.01, decoy and contaminat) + int readInGlycoPsmCount = onePercentoGlycoPsms.Count; // the gPSMs number with Fdr<0.01 + Assert.That(errors.Count == 0);// if we cannot find the file, we will get an error message + + //For Level1GlycoPSMs + int readInLevel1GlycoPsmCount = onePercentoGlycoPsms.Count(p => p.GlycanLocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1); //the level1 gPSMs number + + //Compare the numbers + Assert.That(psmCount, Is.EqualTo(readInPsmsCount)); + Assert.That(proteinGroupCount, Is.EqualTo(readInProteinCount)); + Assert.That(glycoPsmCount, Is.EqualTo(readInGlycoPsmCount)); + Assert.That(level1Psmcount, Is.EqualTo(readInLevel1GlycoPsmCount)); + + + copiedSpectraFiles.ForEach(p => File.Delete(p)); + Directory.Delete(outputFolder, true); + } + [Test] public static void OGlycoTest_Run6() { @@ -1180,7 +1538,7 @@ public static void GlycoQuantWithNoExperimentalDesignFileTest() CollectionAssert.AreEquivalent(expectedIndividualFileOutput, individualOutputs); string[] allProteinGroups = File.ReadAllLines(Path.Combine(outputFolderWithTask, "AllQuantifiedProteins.tsv")); - string[] proteinGroupFields = allProteinGroups[1].Split('\t'); + string[] proteinGroupFields = allProteinGroups[2].Split('\t'); Assert.AreEqual("Q9GZM5", proteinGroupFields[0]); @@ -1264,9 +1622,11 @@ public static void TestExperimentalDesignError() Directory.Delete(outputFolder, true); } [Test] - [TestCase(false, 2, 1, 1)] - [TestCase(true, 2, 3, 1)] - [TestCase(true, 2, 3, 2)] + [TestCase(false, 2, 1, 1)] // pre output: 1 intensity column, post output: 2 intensity column + [TestCase(true, 2, 3, 1)] // pre output: 1 intensity column, post output: 6 intensity column + [TestCase(true, 2, 3, 2)] // pre output: 1 intensity column, post output: 12 intensity column + + public static void TestGlycoProteinQuantFileHeaders(bool hasDefinedExperimentalDesign, int bioreps, int fractions, int techreps) { string condition = hasDefinedExperimentalDesign ? "TestCondition" : ""; @@ -1316,8 +1676,8 @@ public static void TestGlycoProteinQuantFileHeaders(bool hasDefinedExperimentalD List splitHeader = lines[0].Split(new char[] { '\t' }).ToList(); List intensityColumnHeaders = splitHeader.Where(p => p.Contains("Intensity", StringComparison.OrdinalIgnoreCase)).ToList(); - Assert.That(intensityColumnHeaders.Count == 1); - + Assert.That(intensityColumnHeaders.Count == bioreps* fractions* techreps); // We change the search funtion allowed to get the PSMs from the duplicate file. + // Ex. we have 2 bioreps, 3 fractions, 1 techrep, then we get 6 intensity columns Directory.Delete(outputFolder, true); } [Test]