diff --git a/MetaMorpheus/EngineLayer/EngineLayer.csproj b/MetaMorpheus/EngineLayer/EngineLayer.csproj
index 5b15cc792..d59bb2d6a 100644
--- a/MetaMorpheus/EngineLayer/EngineLayer.csproj
+++ b/MetaMorpheus/EngineLayer/EngineLayer.csproj
@@ -62,6 +62,9 @@
PreserveNewest
+
+ Always
+
PreserveNewest
diff --git a/MetaMorpheus/EngineLayer/Glycan_Mods/OGlycan/OGlycan_withIsobaric.gdb b/MetaMorpheus/EngineLayer/Glycan_Mods/OGlycan/OGlycan_withIsobaric.gdb
new file mode 100644
index 000000000..427e960d1
--- /dev/null
+++ b/MetaMorpheus/EngineLayer/Glycan_Mods/OGlycan/OGlycan_withIsobaric.gdb
@@ -0,0 +1,13 @@
+(N)
+(N(H))
+(N(A))
+(N(H)(N))
+(N(H(A)))
+(N(N(K)))
+(N(H)(N(H)))
+(N(H(A))(N))
+(N(H(A))(A))
+(N(H(A))(N(H)))
+(N(H)(N(H(A))(F)))
+(N(H(A))(N(H(A))))
+(N(H(A))(N(H(A))(F)))
\ No newline at end of file
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs b/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs
index 31843be86..d7d20b5db 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs
@@ -4,6 +4,7 @@
namespace EngineLayer.GlycoSearch
{
+ //the class is for localization graph matrix. Each node in the matrix is represented by AdjNode.
public class AdjNode
{
//AdjNode -> Adjactent node is used to build graph matrix for localizaiton. Each node in graph matrix contain Sources, max cost, current cost, etc.
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs b/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs
index 0ce4f1361..21b0654e9 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs
@@ -13,14 +13,14 @@ public class GlycanIon
{
public GlycanIon(string ionStruct, int ionMass, byte[] ionKind, int lossIonMass)
{
- IonStruct = ionStruct;
+ IonStruct = ionStruct; // Always set null, deprecated.
IonMass = ionMass;
IonKind = ionKind;
- LossIonMass = lossIonMass;
+ LossIonMass = lossIonMass; // Neutral loss mass = Glycan.Mass - IonMass
}
public string IonStruct { get; set; }
public int IonMass { get; set; }
- public int LossIonMass { get; set; }//Glycan.Mass - IonMass
+ public int LossIonMass { get; set; }
public byte[] IonKind { get; set; }
}
@@ -41,13 +41,13 @@ public Glycan(byte[] kind)
Mass = GetMass(kind);
}
- public int GlyId { get; set; }
- public string Struc { get; private set; }
+ public int GlyId { get; set; } // Glycan ID, which is the index of glycan in the glycan database.
+ public string Struc { get; private set; } // Glycan structure string represented the glycan structure and linkage. Ex. (N(H(A))(N(H(A))(F)))
public int Mass { get; private set; }
- //Glycans are composed of several different types of mono saccharides. In Kind, each number correspond to one type of mono saccharide in the same order as Glycan.CharMassDic.
- public byte[] Kind { get; private set; }
- public string Composition
+
+ public byte[] Kind { get; private set; } // Glycans are composed of several types of mono suagr. In Kind, each number correspond to one type (corresponded order as Glycan.CharMassDic).
+ public string Composition // Glycan composition string. Ex. H2N2A2F1.
{
get
{
@@ -57,18 +57,18 @@ public string Composition
public List Ions { get; set; }
public bool Decoy { get; private set; }
- public HashSet DiagnosticIons
+ public HashSet DiagnosticIons // B ions (the sugar fragment dropped from the glycopeptide), used for the N-glycan. There are more ions to set...
{
get
- {
+ {
HashSet diagnosticIons = new HashSet();
- if (Kind[0] >= 1)
+ if (Kind[0] >= 1) //if we have Hexose(the number more than one), then we have the corresponding diagonsitic ions as below.
{
diagnosticIons.Add(10902895 - hydrogenAtomMonoisotopicMass);
diagnosticIons.Add(11503951 - hydrogenAtomMonoisotopicMass);
diagnosticIons.Add(16306064 - hydrogenAtomMonoisotopicMass);
}
- if (Kind[1] >= 1)
+ if (Kind[1] >= 1) // if we have HexNAc(the number more than one), then we have the corresponding diagonsitic ions as below.
{
diagnosticIons.Add(12605550 - hydrogenAtomMonoisotopicMass);
diagnosticIons.Add(13805550 - hydrogenAtomMonoisotopicMass);
@@ -77,16 +77,16 @@ public HashSet DiagnosticIons
diagnosticIons.Add(18607663 - hydrogenAtomMonoisotopicMass);
diagnosticIons.Add(20408720 - hydrogenAtomMonoisotopicMass);
}
- if (Kind[1] >= 1 && Kind[0] >= 1)
+ if (Kind[1] >= 1 && Kind[0] >= 1) // if we have HexNAc and Hexose, then we have the corresponding diagonsitic ions as below.
{
diagnosticIons.Add(36614002 - hydrogenAtomMonoisotopicMass);
}
- if (Kind[2] >= 1)
+ if (Kind[2] >= 1) //If we have NeuNAc, then we have the corresponding diagonsitic ions as below.
{
diagnosticIons.Add(27409268 - hydrogenAtomMonoisotopicMass);
diagnosticIons.Add(29210324 - hydrogenAtomMonoisotopicMass);
}
- if (Kind[3] >= 1)
+ if (Kind[3] >= 1) //If we have NeuNGc, then we have the corresponding diagonsitic ions as below.
{
diagnosticIons.Add(29008759 - hydrogenAtomMonoisotopicMass);
diagnosticIons.Add(30809816 - hydrogenAtomMonoisotopicMass);
@@ -105,7 +105,6 @@ public HashSet DiagnosticIons
//H: C6O5H10 Hexose, N: C8O5NH13 HexNAc, A: C11O8NH17 Neu5Ac, G: C11H17NO9 Neu5Gc, F: C6O4H10 Fucose,
//P: PO3H Phosphate, S: SO3H Sulfo, Y: Na Sodium, C:Acetyl for Neu5Ac
//X: C5H10O5 Xylose
- //If add more monosacchrades here, please change GetMass, GetKind, GetKindString, GlycanBox constructor, search byte[].
private readonly static Dictionary CharMassDic = new Dictionary {
{ 'H', 16205282 },
{ 'N', 20307937 },
@@ -117,9 +116,10 @@ public HashSet DiagnosticIons
{ 'Y', 2298977 },
{ 'C', 4201056 },
{ 'X', 15005282 },
+ { 'K', 25006897 },
};
- //Compitable with Byonic, for loading glycan by Kind.
+ // The corresponding index for sugar and Kind.
public readonly static Dictionary> NameCharDic = new Dictionary>
{
{"Hex", new Tuple('H', 0) },
@@ -131,16 +131,19 @@ public HashSet DiagnosticIons
{"Sulfo", new Tuple('S', 6) },
{"Na", new Tuple('Y', 7) },
{"Ac", new Tuple('C', 8) },
- {"Xylose", new Tuple('X', 9) }
+ {"Xylose", new Tuple('X', 9) },
+ {"Kdn", new Tuple('K',10)}
};
- public readonly static HashSet CommonOxoniumIons = new HashSet
+ //The same ion as we describe above in the diagnostic ions. That just for the initial filtering for glycopeptide peaks. Not used now.
+ public readonly static HashSet CommonOxoniumIons = new HashSet
{13805550, 16806607, 18607663, 20408720, 36614002 };
- public readonly static int[] AllOxoniumIons = new int[]
+ //The same ion as we describe above in the diagnostic ions. Used for building the oxoniumIntensity list.
+ public readonly static int[] AllOxoniumIons = new int[]
{10902895, 11503951, 12605550, 12703952, 13805550, 14406607, 16306064, 16806607, 18607663, 20408720, 27409268, 29008759, 29210324, 30809816, 36614002, 65723544, 67323035};
- //TrimannosylCore is only useful for N-Glyco peptides.
+ //TrimannosylCore. Only useful for N-Glyco peptides.
public readonly static Dictionary TrimannosylCores = new Dictionary
{
//Each of the mass represent as a N-Glycan core.
@@ -160,21 +163,30 @@ public HashSet DiagnosticIons
#region Glycan Structure manipulation
- //There are two ways to represent a glycan in string, one only combination, the other structure.
- //The method generate a glycan by read in a glycan structure string from database.
+ //There are two ways to represent a glycan in string
+ //Composition: HexNAc(2)Hex(5)NeuAc(1)NeuGc(1)Fuc(1)Phospho(1)Sulfo(1)Na(1)Ac(1)Xylose(1),
+ //Struct(Linkage): (N(H(A))(N(H(A))(F)))
+
+ ///
+ /// Only for Gdb. The method generate a glycan object by reading the glycan structure string from database.
+ ///
+ /// structrue string ex. (N(H(A))(N(H(A))(F)))
+ ///
+ ///
+ /// Glycan Object
public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglycan = false)
{
- Node node = Struct2Node(theGlycanStruct);
- List nodeIons = GetAllChildrenCombination(node);
- int mass = Glycan.GetMass(theGlycanStruct);
- byte[] kind = Glycan.GetKind(theGlycanStruct);
+ Node node = Struct2Node(theGlycanStruct); // String to tree structure.
+ List nodeIons = GetAllChildrenCombination(node); // Get all possible fragmentation & neutralLoss of a glycan.
+ int mass = Glycan.GetMass(theGlycanStruct); // Get glycan mass.
+ byte[] kind = Glycan.GetKind(theGlycanStruct); // Get glycan composition array, EX. [2, 5, 1, 1, 1, 1, 1, 1, 1, 1].
List glycanIons = new List();
HashSet ionMasses = new HashSet();
foreach (var aNodeIon in nodeIons)
{
- var ionMass = Glycan.GetMass(Node2Struct(aNodeIon));
- if (!ionMasses.Contains(ionMass) && ionMass != mass)
- {
+ var ionMass = Glycan.GetMass(Node2Struct(aNodeIon)); // Get the ionMass
+ if (!ionMasses.Contains(ionMass) && ionMass != mass) // Avoid duplicate ions with the same mass. Ex. N(H)N and N(N(H)) have the same ionMass.
+ { // We also avoid the ionMass equals to the glycan mass. Because we won't assume the whole glycan is a fragment ion.
ionMasses.Add(ionMass);
var ionKind = Glycan.GetKind(Node2Struct(aNodeIon));
var lossIonMass = GetIonLossMass(kind, ionKind);
@@ -184,34 +196,39 @@ public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglyca
}
if (!isOglycan)
{
- glycanIons.Add(new GlycanIon(null, 8303819, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, mass - 8303819)); //Cross-ring mass
+ glycanIons.Add(new GlycanIon(null, 8303819, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, mass - 8303819)); //Cross-ring mass
}
- glycanIons.Add(new GlycanIon(null, 0, kind, mass));
+ glycanIons.Add(new GlycanIon(null, 0, kind, mass)); //That is Y0 ion. The whole glycan dropped from the glycopeptide. Like a netural loss.
Glycan glycan = new Glycan(theGlycanStruct, mass, kind, glycanIons.OrderBy(p => p.IonMass).ToList(), false);
glycan.GlyId = id;
return glycan;
}
- //Glycan are represented in tree structures composed of Node. The function here is to transfer a string into connected Node.
+
+ ///
+ /// Convert the glycan structure string to tree format
+ ///
+ /// linkage inforamtion ex. (N(H))
+ /// glycan tree node ex. Current Nonde = Node(N, 0), left Child = Node(H, 1)
public static Node Struct2Node(string theGlycanStruct)
{
int level = 0;
- Node curr = new Node(theGlycanStruct[1], level);
- for (int i = 2; i < theGlycanStruct.Length - 1; i++)
+ Node curr = new Node(theGlycanStruct[1], level); // The first character is always '(', so the second character is the root of the tree. In this case of (N(H)), N is the root.
+ for (int i = 2; i < theGlycanStruct.Length - 1; i++) // Try to extract the following characters.
{
- if (theGlycanStruct[i] == '(')
+ if (theGlycanStruct[i] == '(') // Skip the '(' character.
{
continue;
}
- if (theGlycanStruct[i] == ')')
+ if (theGlycanStruct[i] == ')') // When we meet a ')', we need to go back to the parent node.
{
curr = curr.Father;
level--;
}
- else
+ else // While meeting a character, we need to decide where to put it in the tree. (putting priority: left -> right side -> middle)
{
- level++;
+ level++; // Move to the level.(Deeper/Child level)
if (curr.LeftChild == null)
{
curr.LeftChild = new Node(theGlycanStruct[i], level);
@@ -233,11 +250,16 @@ public static Node Struct2Node(string theGlycanStruct)
}
}
}
- return curr;
+ return curr;
+
}
- //The function is to generate all possible fragmentation/neutral loss of a glycan, which is a subset of glycan.
- //Node is tree structured glycan. subset of glycans are also represented by Node.
+
+ ///
+ /// Generate all possible fragments(subset) of a glycan. The fragments are also represented by a Node.
+ ///
+ ///
+ /// The all combination of the Glycan fragment. Presented by Node
private static List GetAllChildrenCombination(Node node)
{
List nodes = new List();
@@ -364,6 +386,7 @@ private static List GetAllChildrenCombination(Node node)
}
//Node structure to string structure.
+ // input: Node(N, 0) -> left Child = Node(H, 1), output: (N(H))
private static string Node2Struct(Node node)
{
string output = "";
@@ -374,7 +397,12 @@ private static string Node2Struct(Node node)
return output;
}
- //kind are compositions of glycan. The function here is to generate mass difference of two glycan.
+ ///
+ /// Calculate the mass difference of two glycan kind.
+ ///
+ /// Composition of the glycan
+ /// Composition of the glycanIon
+ /// Mass different between the glycan and its glycanIon
public static int GetIonLossMass(byte[] Kind, byte[] ionKind)
{
byte[] lossKind = new byte[Kind.Length];
@@ -388,8 +416,12 @@ public static int GetIonLossMass(byte[] Kind, byte[] ionKind)
#endregion
#region Transfer information
-
- private static int GetMass(string structure)
+ ///
+ /// Get glycan mass by glycan structure string
+ ///
+ /// ex.(N(H(A))(N(H(A))(F)))
+ /// The glycan Mass
+ private static int GetMass(string structure)
{
int y = CharMassDic['H'] * structure.Count(p => p == 'H') +
CharMassDic['N'] * structure.Count(p => p == 'N') +
@@ -400,12 +432,18 @@ private static int GetMass(string structure)
CharMassDic['S'] * structure.Count(p => p == 'S') +
CharMassDic['Y'] * structure.Count(p => p == 'Y') +
CharMassDic['C'] * structure.Count(p => p == 'C') +
- CharMassDic['X'] * structure.Count(p => p == 'X')
+ CharMassDic['X'] * structure.Count(p => p == 'X') +
+ CharMassDic['K'] * structure.Count(p => p == 'K')
;
return y;
}
- public static int GetMass(byte[] kind)
+ ///
+ /// Get glycan mass by glycan composition
+ ///
+ /// [2, 2, 2, 0, 1, 0, 0, 0, 0, 0]
+ /// The glycan mass
+ public static int GetMass(byte[] kind)
{
int mass = CharMassDic['H'] * kind[0] +
CharMassDic['N'] * kind[1] +
@@ -416,13 +454,20 @@ public static int GetMass(byte[] kind)
CharMassDic['S'] * kind[6] +
CharMassDic['Y'] * kind[7] +
CharMassDic['C'] * kind[8] +
- CharMassDic['X'] * kind[9]
+ CharMassDic['X'] * kind[9] +
+ CharMassDic['K'] * kind[10]
;
return mass;
}
- public static byte[] GetKind(string structure)
+
+ ///
+ /// Get glycan composition by the structure string
+ ///
+ /// structure format : (N(H(A))(N(H(A))(F)))
+ /// The kind List ex [2, 2, 2, 0, 1, 0, 0, 0, 0, 0].
+ public static byte[] GetKind(string structure)
{
var kind = new byte[]
{ Convert.ToByte(structure.Count(p => p == 'H')),
@@ -435,10 +480,17 @@ public static byte[] GetKind(string structure)
Convert.ToByte(structure.Count(p => p == 'Y')),
Convert.ToByte(structure.Count(p => p == 'C')),
Convert.ToByte(structure.Count(p => p == 'X')),
+ Convert.ToByte(structure.Count(p => p == 'K'))
};
return kind;
}
+
+ ///
+ /// Get glycan composition text from the glycan kind[].
+ ///
+ /// ex. [2, 2, 2, 0, 1, 0, 0, 0, 0, 0]
+ /// The composition text ex. H2N2A2F1
public static string GetKindString(byte[] Kind)
{
string H = Kind[0]==0 ? "" : "H" + Kind[0].ToString();
@@ -451,7 +503,8 @@ public static string GetKindString(byte[] Kind)
string Y = Kind[7] == 0 ? "" : "Y" + Kind[7].ToString();
string C = Kind[8] == 0 ? "" : "C" + Kind[8].ToString();
string X = Kind[9] == 0 ? "" : "X" + Kind[9].ToString();
- string kindString = H + N + A + G + F + P + S + Y + C + X;
+ string K = Kind[10] == 0 ? "" : "K" + Kind[10].ToString();
+ string kindString = H + N + A + G + F + P + S + Y + C + X + K;
return kindString;
}
@@ -459,6 +512,12 @@ public static string GetKindString(byte[] Kind)
//TO THINK: Is it reasonable to transfer Glycan to Modification the first time Glycan is read in? Which could save time.
//Use glycan index and modification index to reduce space.
+
+ ///
+ /// Input the N-glycan object, and transfer it to the modification object.
+ ///
+ ///
+ ///
public static Modification NGlycanToModification(Glycan glycan)
{
Dictionary> neutralLosses = new Dictionary>();
@@ -488,7 +547,12 @@ public static Modification NGlycanToModification(Glycan glycan)
return modification;
}
- public static Modification OGlycanToModification(Glycan glycan)
+ ///
+ /// Input the O-glycan object, and transfer it to the modification object.
+ ///
+ ///
+ /// The modification object
+ public static Modification OGlycanToModification(Glycan glycan) //try to transfer the glycan object to modification object.
{
//TO THINK: what the neutralLoss for O-Glyco?
Dictionary> neutralLosses = new Dictionary>();
@@ -522,9 +586,10 @@ public static Modification OGlycanToModification(Glycan glycan)
#region Combination or Permutation functions not directly related to glycan, use carefully these function don't deal duplicate elements.
+
public static IEnumerable> GetKCombs(IEnumerable list, int length) where T : IComparable
{
- if (length == 1) return list.Select(t => new T[] { t });
+ if (length == 1) return list.Select(t => new T[] { t }); // Return the list of the single element.
return GetKCombs(list, length - 1).SelectMany(t => list.Where(o => o.CompareTo(t.Last()) > 0), (t1, t2) => t1.Concat(new T[] { t2 }));
}
@@ -538,7 +603,7 @@ public static IEnumerable> GetPermutations(IEnumerable list
{
if (length == 1)
{
- return list.Select(t => new T[] { t });
+ return list.Select(t => new T[] { t });
}
return GetPermutations(list, length - 1).SelectMany(t => list.Where(o => !t.Contains(o)), (t1, t2) => t1.Concat(new T[] { t2 }));
}
@@ -553,6 +618,12 @@ public static IEnumerable> GetPermutationsWithRept(IEnumerable
#region Functions are not used now, could be useful in the future.
+ ///
+ /// Test the equality of two glycan objects. Including the glycan mass and the glycan ions should be totally indentical.
+ ///
+ ///
+ ///
+ ///
public static bool Equals(Glycan glycan1, Glycan glycan2)
{
if (glycan1.Mass == glycan2.Mass)
@@ -573,7 +644,7 @@ public static bool Equals(Glycan glycan1, Glycan glycan2)
return false;
}
- public static Glycan[] BuildTargetDecoyGlycans(IEnumerable glycans)
+ public static Glycan[] BuildTargetDecoyGlycans(IEnumerable glycans) //Build target-decoy glycans for testing.
{
List allGlycans = new List();
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs
index 9a1d0f5d2..b00776f18 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs
@@ -9,20 +9,25 @@
namespace EngineLayer
{
- //One peptide can have several o-glycans. The combined glycans are grouped as a glycan box. Used for localization.
- //GlycanBox -- A defined combination of glycans will be considered to modify on one peptide.
- //The GlycanBoxMass is the total mass of all glycans on the peptide
+
+ ///
+ /// A defined combination of glycans to modify on one peptide. Ex. if we have 3 glycans on one peptide (g1,g2,g3), the GlycanBoxMass is the sum of the three glycans.(glycanBox: [g1,g2,g3])
+ ///
public class GlycanBox:ModBox
{
- public static Glycan[] GlobalOGlycans { get; set; }
+ public static Glycan[] GlobalOGlycans { get; set; } // The glycan list in the database file
+
+ public GlycanBox[] ChildGlycanBoxes { get; set; } // all possible glycan combinations in the glycanBox
public static Modification[] GlobalOGlycanModifications { get; set; }
- public static GlycanBox[] OGlycanBoxes { get; set; }
+ public static GlycanBox[] OGlycanBoxes { get; set; } // all possible glycan boxes
+
+ public byte[] Kind { get; private set; }
//TO DO: Decoy O-glycan can be created, but the results need to be reasoned.
//public static int[] SugarShift = new int[]{ -16205282, -20307937, -29109542, -14605791, -30709033, -15005282, -36513219, -40615874, 16205282, 20307937, 29109542, 14605791, 30709033, 15005282, 36513219, 40615874 };
- private readonly static int[] SugarShift = new int[]
+ private readonly static int[] SugarShift = new int[] //still unclear about the shift...
{
7103710, 10300920, 11502690, 12904260, 14706840, 5702150, 13705890, 12809500, 11308410, 13104050,
11404290, 9705280, 12805860, 15610110, 8703200, 10104770, 9906840, 18607930, 16306330,
@@ -31,7 +36,11 @@ public class GlycanBox:ModBox
};
- //After O-glycans are read in from database, we build combinations of glycans into GlycanBox. The maxNum is maximum glycans allowed on one peptides.
+ ///
+ /// Use the glycan from database to create all possible combination glycan set into GlycanBox.
+ ///
+ /// The maxNum is maximum glycans allowed on one peptides
+ /// The glycanBox collection, glycanBox[]
public static IEnumerable BuildOGlycanBoxes(int maxNum)
{
return BuildOGlycanBoxes(maxNum, false);
@@ -51,7 +60,7 @@ public static IEnumerable BuildOGlycanBoxes(int maxNum, bool buildDec
if (buildDecoy)
{
- GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray());
+ GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray(),false); // decoy glycanBox
glycanBox_decoy.TargetDecoy = false;
glycanBox_decoy.ChildGlycanBoxes = BuildChildOGlycanBoxes(glycanBox_decoy.NumberOfMods, glycanBox_decoy.ModIds, glycanBox_decoy.TargetDecoy).ToArray();
yield return glycanBox_decoy;
@@ -60,8 +69,11 @@ public static IEnumerable BuildOGlycanBoxes(int maxNum, bool buildDec
}
}
- //After O-glycans are read in from database, we transfer the glycans into 'Modification' class type for MetaMorpheus to manipulate sequences.
- //In the future we may able to combine the two type together.
+ ///
+ /// Convert the glycan into Modification type for MetaMorpheus to manipulate sequences. In the future we may able to combine the two type together.
+ ///
+ ///
+ ///
public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGlycans)
{
Modification[] globalOGlycanModifications = new Modification[globalOGlycans.Length];
@@ -73,20 +85,26 @@ public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGly
return globalOGlycanModifications;
}
- //The function here is to build GlycanBoxes used for LocalizationGraph.
- //In LocalizationGraph matrix, for each AdjNode, it represent a ChildOGlycanBox here at certain glycosite.
+
+ ///
+ /// Generate all possible child/fragment box of the specific glycanBox. The childBoxes is uesd for LocalizationGraph.
+ ///
+ ///
+ /// The glycanBox, ex. [0,0,1] means glycan0 + glycan0 + glycan1
+ ///
+ /// The ChildBox collection, ChildBox[]
public static IEnumerable BuildChildOGlycanBoxes(int maxNum, int[] glycanIds, bool targetDecoy = true)
{
yield return new GlycanBox(new int[0], targetDecoy);
HashSet seen = new HashSet();
for (int i = 1; i <= maxNum; i++)
{
- foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i))
- {
- List ids = new List();
- foreach (var id in idCombine)
+ foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i)) //get all combinations of glycans on the peptide, ex. we have three glycosite and three glycan maybe on that (A,B,C)
+ { //the combination of glycans on the peptide can be (A),(A+B),(A+C),(B+C),(A+B+C) totally six
+ List ids = new List();
+ foreach (var id in idCombine)
{
- ids.Add(glycanIds[id]);
+ ids.Add(glycanIds[id]);
}
if (!seen.Contains(string.Join(",", ids.Select(p => p.ToString()))))
@@ -102,19 +120,24 @@ public static IEnumerable BuildChildOGlycanBoxes(int maxNum, int[] gl
}
}
- public GlycanBox(int[] ids, bool targetDecoy = true):base(ids)
+ ///
+ /// Constructor of GlycanBox.
+ ///
+ /// The glycanBox composition, each number represent one glycan index in the database
+ ///
+ public GlycanBox(int[] ids, bool Istarget = true):base(ids)
{
- byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
- foreach (var id in ModIds)
+ byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ foreach (var id in ModIds) //ModIds is the same as ids.
{
- for (int i = 0; i < kind.Length; i++)
+ for (int i = 0; i < kind.Length; i++)
{
- kind[i] += GlobalOGlycans[id].Kind[i];
+ kind[i] += GlobalOGlycans[id].Kind[i]; //kind is the sum of all glycan Kind in the Box.
}
}
Kind = kind;
- if (targetDecoy)
+ if (Istarget)
{
Mass = (double)Glycan.GetMass(Kind) / 1E5;
}
@@ -125,18 +148,13 @@ public GlycanBox(int[] ids, bool targetDecoy = true):base(ids)
Mass = (double)(Glycan.GetMass(Kind) + SugarShift[shiftInd]) / 1E5;
}
}
-
- public GlycanBox[] ChildGlycanBoxes { get; set; }
-
- public string GlycanIdString
+
+ public string GlycanIdString // the composition of glycanBox. Example: [1,2,3] means glycan1 + glycan2 + glycan3 are on the peptide.
{
get
{
return string.Join(",", ModIds.Select(p => p.ToString()));
}
}
-
- public byte[] Kind{ get; private set; }
-
}
}
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs
index ddc64d7f6..4b3148b52 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs
@@ -6,10 +6,18 @@
namespace EngineLayer
{
-
- public static class GlycanDatabase
+ // in our database, the N-glycan.gdb should be correct to the new format
+ // the class for loading glycan database then creeat the glycan object.
+ public static class GlycanDatabase
{
- //Load Glycan. Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions.
+
+ ///
+ /// Load Glycan from the database file. Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions
+ ///
+ /// Database file path
+ /// Do we need to generate the glycanIon?
+ ///
+ /// A glycan object collection
public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIons, bool IsOGlycanSearch)
{
bool isKind = true;
@@ -18,7 +26,7 @@ public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIon
while(lines.Peek() != -1)
{
string line = lines.ReadLine();
- if (!line.Contains("HexNAc"))
+ if (!line.Contains("HexNAc")) // use the first line to determine the format (kind / structure) of glycan database.
{
isKind = false;
}
@@ -28,15 +36,22 @@ public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIon
if (isKind)
{
- return LoadKindGlycan(filePath, ToGenerateIons, IsOGlycanSearch);
+ return LoadKindGlycan(filePath, ToGenerateIons, IsOGlycanSearch); // open the file of the kind format, example: HexNAc(2)Hex(5)NeuAc(1)Fuc(1)
}
else
{
- return LoadStructureGlycan(filePath, IsOGlycanSearch);
+ return LoadStructureGlycan(filePath, IsOGlycanSearch); // open the file of the structure format, example: (N(H(A))(A))
}
}
- //Load KindGlycan. Compatible with Byonic.
+
+ ///
+ /// Load composition format Glycan database, then convert to kind format followed by generating the glycan object.
+ ///
+ ///
+ ///
+ ///
+ /// The glycan collection
public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerateIons, bool IsOGlycanSearch)
{
using (StreamReader lines = new StreamReader(filePath))
@@ -46,14 +61,14 @@ public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerat
{
string line = lines.ReadLine().Split('\t').First();
- if (!(line.Contains("HexNAc") || line.Contains("Hex")))
+ if (!(line.Contains("HexNAc") || line.Contains("Hex"))) // Make sure the line is a glycan line. The line should contain HexNAc or Hex.
{
continue;
}
- var kind = String2Kind(line);
+ var kind = String2Kind(line); // Convert the database string to kind[] format (byte array).
- var glycan = new Glycan(kind);
+ var glycan = new Glycan(kind); // Use the kind[] to create a glycan object.
glycan.GlyId = id++;
if (ToGenerateIons)
{
@@ -71,9 +86,14 @@ public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerat
}
}
- public static byte[] String2Kind(string line)
+ ///
+ /// Convert the glycan string to Kind array
+ ///
+ /// ex. HexNAc(2)Hex(5)NeuAc(1)Fuc(1)
+ /// The glycan Kind List ex. [2, 5, 0, 0, 1, 0, 0, 0, 0, 1]
+ public static byte[] String2Kind(string line)
{
- byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
var x = line.Split(new char[] { '(', ')' });
int i = 0;
while (i < x.Length - 1)
@@ -85,7 +105,12 @@ public static byte[] String2Kind(string line)
return kind;
}
- //Load structured Glycan database.
+ ///
+ /// Load structured format Glycan database and generate the glycan object.
+ ///
+ ///
+ ///
+ /// The Glycan object collection
public static IEnumerable LoadStructureGlycan(string filePath, bool IsOGlycan)
{
using (StreamReader glycans = new StreamReader(filePath))
@@ -93,8 +118,8 @@ public static IEnumerable LoadStructureGlycan(string filePath, bool IsOG
int id = 1;
while (glycans.Peek() != -1)
{
- string line = glycans.ReadLine();
- yield return Glycan.Struct2Glycan(line, id++, IsOGlycan);
+ string line = glycans.ReadLine(); // Read the line from the database file. Ex. (N(H(A))(A))
+ yield return Glycan.Struct2Glycan(line, id++, IsOGlycan); // Directly convert the string to Glycan object.
}
}
}
@@ -102,28 +127,33 @@ public static IEnumerable LoadStructureGlycan(string filePath, bool IsOG
//This function build fragments based on the general core of NGlyco fragments.
//From https://github.com/mobiusklein/glycopeptidepy/structure/fragmentation_strategy/glycan.py#L408
//The fragment generation is not as good as structure based method. So it is better to use a structure based N-Glycan database.
- public static List NGlycanCompositionFragments(byte[] kind)
+ // The function is used to load the database from the different formats, but we don't use it now.
+ public static List NGlycanCompositionFragments(byte[] kind, bool isfucExtended = false)
{
int glycan_mass = Glycan.GetMass(kind);
- int core_count = 1;
+ // int core_count = 1;
int iteration_count = 0;
+ int hexnac_Core = 2;
+ int hexose_Core = 3;
bool extended = true;
- bool extended_fucosylation = false;
+ bool extended_fucosylation = isfucExtended;
int fuc_count = kind[4];
int xyl_count = kind[9];
- int hexnac_inaggregate = kind[0];
- int hexose_inaggregate = kind[1];
+ int hexnac_total = kind[1];
+ int hexose_total = kind[0];
List glycanIons = new List();
- int base_hexnac = Math.Min(hexnac_inaggregate + 1, 3);
- for (int hexnac_count = 0; hexnac_count < base_hexnac; hexnac_count++)
+ int base_hexnac = Math.Min(hexnac_total, hexnac_Core); // base_hexnac is the first priority hexnac count, they all come from the core.
+ for (int hexnac_count = 0; hexnac_count < base_hexnac + 1 ; hexnac_count++)
{
if (hexnac_count == 0)
{
- GlycanIon glycanIon = new GlycanIon(null, 8303819, new byte[] { 0, (byte)hexnac_count, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, glycan_mass - 8303819);
+ byte[] startKind = new byte[] { 0, (byte)hexnac_count, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ string glycanName = Glycan.GetKindString(startKind);
+ GlycanIon glycanIon = new GlycanIon(glycanName, 8303819, startKind, glycan_mass - 8303819);
glycanIons.Add(glycanIon);
}
else if (hexnac_count == 1)
@@ -165,7 +195,7 @@ public static List NGlycanCompositionFragments(byte[] kind)
for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++)
{
- GlycanIon add_fuc_glycanIon = ExtendGlycanIon(glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass);
+ GlycanIon add_fuc_glycanIon = ExtendGlycanIon(glycanIon, 0, 0, 1, 0, glycan_mass);
glycanIons.Add(add_fuc_glycanIon);
}
@@ -183,22 +213,25 @@ public static List NGlycanCompositionFragments(byte[] kind)
}
- int min_hexose_inaggregate = Math.Min(hexose_inaggregate + 1, 4);
- for (int hexose_count = 1; hexose_count <= min_hexose_inaggregate; hexose_count++)
+ int base_hexose = Math.Min(hexose_total, hexose_Core); // base_hexose is the first priority hexose count, they all come from the core.
+ for (int hexose_count = 1; hexose_count <= base_hexose + 1; hexose_count++)
{
GlycanIon hexose_glycanIon = GenerateGlycanIon((byte)hexose_count, (byte)hexnac_count, 0, 0, glycan_mass);
glycanIons.Add(hexose_glycanIon);
if (!extended_fucosylation)
{
- GlycanIon fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, 1, 0, glycan_mass);
- glycanIons.Add(fuc_glycanIon);
-
- if (iteration_count < xyl_count)
+ if (iteration_count < fuc_count)
{
- GlycanIon xyl_fuc_glycanIon = ExtendGlycanIon(fuc_glycanIon, 0, 0, 0, 1, glycan_mass);
- glycanIons.Add(xyl_fuc_glycanIon);
- }
+ GlycanIon fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, 1, 0, glycan_mass);
+ glycanIons.Add(fuc_glycanIon);
+
+ if (iteration_count < xyl_count)
+ {
+ GlycanIon xyl_fuc_glycanIon = ExtendGlycanIon(fuc_glycanIon, 0, 0, 0, 1, glycan_mass);
+ glycanIons.Add(xyl_fuc_glycanIon);
+ }
+ }
}
else if (fuc_count > 0)
{
@@ -207,7 +240,7 @@ public static List NGlycanCompositionFragments(byte[] kind)
for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++)
{
- GlycanIon add_fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass);
+ GlycanIon add_fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, 1, 0, glycan_mass);
glycanIons.Add(add_fuc_glycanIon);
}
@@ -224,11 +257,11 @@ public static List NGlycanCompositionFragments(byte[] kind)
glycanIons.Add(xyl_glycanIon);
}
- if (hexose_count == 3 && hexnac_count >= 2 * core_count && extended)
+ if (hexose_count == hexose_Core && hexnac_count >= hexnac_Core && extended) //After the core motif has been exhausted, speculatively add on the remaining core monosaccharides sequentially until exhausted.
{
- for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_inaggregate - hexnac_count + 1; extra_hexnac_count++)
+ for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_total - hexnac_count + 1; extra_hexnac_count++)
{
- if (extra_hexnac_count + hexnac_count > hexnac_inaggregate)
+ if (extra_hexnac_count + hexnac_count > hexnac_total) // this part is doesn't make sense, because the hexnac_count cannot be larger than total-hexnac
{
continue;
}
@@ -257,7 +290,7 @@ public static List NGlycanCompositionFragments(byte[] kind)
for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++)
{
- GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass);
+ GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, 1, 0, glycan_mass);
glycanIons.Add(add_fuc_glycanIon);
}
@@ -276,9 +309,9 @@ public static List NGlycanCompositionFragments(byte[] kind)
}
- for (int extra_hexose_count = 1; extra_hexose_count < hexose_inaggregate - hexose_count + 1; extra_hexose_count++)
+ for (int extra_hexose_count = 1; extra_hexose_count < hexose_total - hexose_Core + 1; extra_hexose_count++)
{
- if (extra_hexose_count + hexose_count > hexose_inaggregate)
+ if (extra_hexose_count + hexose_count > hexose_total) // this part is doesn't make sense, because the hexnac_count cannot be larger than total-hexnac
{
continue;
}
@@ -305,7 +338,7 @@ public static List NGlycanCompositionFragments(byte[] kind)
for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++)
{
- GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass);
+ GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, 1, 0, glycan_mass);
glycanIons.Add(add_fuc_glycanIon);
}
@@ -337,11 +370,13 @@ public static List NGlycanCompositionFragments(byte[] kind)
private static GlycanIon GenerateGlycanIon(byte hexose_count, byte hexnac_count, byte fuc_count, byte xyl_count, int glycan_mass)
{
- byte[] ionKind = new byte[] { hexose_count, hexnac_count, 0, 0, fuc_count, 0, 0, 0, 0, xyl_count };
+ byte[] ionKind = new byte[] { hexose_count, hexnac_count, 0, 0, fuc_count, 0, 0, 0, 0, xyl_count,0 };
int ionMass = Glycan.GetMass(ionKind);
- GlycanIon glycanIon = new GlycanIon(null, ionMass, ionKind, glycan_mass - ionMass);
+ String glycanName = Glycan.GetKindString(ionKind);
+
+ GlycanIon glycanIon = new GlycanIon(glycanName, ionMass, ionKind, glycan_mass - ionMass);
return glycanIon;
}
@@ -355,8 +390,9 @@ private static GlycanIon ExtendGlycanIon(GlycanIon glycanIon, byte hexose_count,
ionKind[9] += xyl_count;
int ionMass = Glycan.GetMass(ionKind);
+ string glycanName = Glycan.GetKindString(ionKind);
- GlycanIon extend_glycanIon = new GlycanIon(null, ionMass, ionKind, glycan_mass - ionMass);
+ GlycanIon extend_glycanIon = new GlycanIon(glycanName, ionMass, ionKind, glycan_mass - ionMass);
return extend_glycanIon;
}
@@ -364,6 +400,7 @@ private static GlycanIon ExtendGlycanIon(GlycanIon glycanIon, byte hexose_count,
//This function build fragments based on the general core of OGlyco fragments.
//From https://github.com/mobiusklein/glycopeptidepy/structure/fragmentation_strategy/glycan.py
//The fragment generation is not as good as structure based method. So it is better to use a structure based O-Glycan database.
+ // We don't use this function now, alternatively, we use the 'OGlycanCompositionCombinationChildIons'.
public static List OGlycanCompositionFragments(byte[] kind)
{
List glycanIons = new List();
@@ -374,12 +411,12 @@ public static List OGlycanCompositionFragments(byte[] kind)
bool extended = true;
int fuc_count = kind[4];
- int hexnac_inaggregate = kind[0];
- int hexose_inaggregate = kind[1];
+ int hexnac_total = kind[1];
+ int hexose_total = kind[0];
for (int hexnac_count = 0; hexnac_count < 3; hexnac_count++)
{
- if (hexnac_inaggregate < hexnac_count)
+ if (hexnac_total < hexnac_count)
{
continue;
}
@@ -400,7 +437,7 @@ public static List OGlycanCompositionFragments(byte[] kind)
for (int hexose_count = 0; hexose_count < 2; hexose_count++)
{
- if (hexose_inaggregate < hexose_count)
+ if (hexose_total < hexose_count)
{
continue;
}
@@ -420,9 +457,9 @@ public static List OGlycanCompositionFragments(byte[] kind)
// After the core motif has been exhausted, speculatively add on the remaining core monosaccharides sequentially until exhausted.
- if (extended && hexnac_inaggregate - hexnac_count >= 0)
+ if (extended && hexnac_total - hexnac_count >= 0)
{
- for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_inaggregate - hexnac_count + 1; extra_hexnac_count ++)
+ for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_total - hexnac_count + 1; extra_hexnac_count ++)
{
if (extra_hexnac_count > 0)
{
@@ -440,9 +477,9 @@ public static List OGlycanCompositionFragments(byte[] kind)
}
- if (hexose_inaggregate > hexose_count && hexose_count > 0)
+ if (hexose_total > hexose_count && hexose_count > 0)
{
- for (int extra_hexose_count = 0; extra_hexose_count < hexose_inaggregate - hexose_count; extra_hexose_count++)
+ for (int extra_hexose_count = 0; extra_hexose_count < hexose_total - hexose_count; extra_hexose_count++)
{
if (extra_hexose_count > 0 && extra_hexose_count + hexose_count >0)
{
@@ -473,7 +510,11 @@ public static List OGlycanCompositionFragments(byte[] kind)
return glycanIons;
}
- //The OGlycanCompositionFragments just generate some core GlycanIons. We need a combination solution.
+ ///
+ /// Generate some child ions based on the kind array. The kind array is the combination of the monosaccharides then filter by the rules.
+ ///
+ /// glycan Kind[]
+ /// The glycanIon collection
public static List OGlycanCompositionCombinationChildIons(byte[] kind)
{
List glycanIons = new List();
@@ -488,7 +529,7 @@ public static List OGlycanCompositionCombinationChildIons(byte[] kind
foreach (var k in _kinds)
{
- //Rules to build OGlycan child ions.
+ //Rules to build OGlycan child ions. Filter the kind array which doesn't meet the rules.
//At least one HexNAc
if (k[1] == 0)
{
@@ -515,15 +556,21 @@ public static List OGlycanCompositionCombinationChildIons(byte[] kind
return glycanIons.OrderBy(p=>p.IonMass).ToList();
}
- private static void _GetCombinations(byte[] kind, List _kinds, HashSet _keys)
- {
- if (kind.Sum(p=>p) == 0)
+ ///
+ /// Try to create all possible combinations from the glycan kind[]. And store the combination array in the _kinds list.
+ ///
+ /// ex. [2,2,0]
+ ///
+ ///
+ private static void _GetCombinations(byte[] kind, List _kinds, HashSet _keys)
+ {
+ if (kind.Sum(p=>p) == 0)
{
- return;
+ return; // if we don't have any monosaccharide, no need to generate the child ions.
}
else
{
- for (int i = 0; i < kind.Length; i++)
+ for (int i = 0; i < kind.Length; i++) //traverse the kind array
{
if (kind[i] >= 1)
{
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs
index b7557c83e..019c926e9 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs
@@ -10,8 +10,14 @@
namespace EngineLayer.GlycoSearch
{
- public static class GlycoPeptides
+ public static class GlycoPeptides
{
+ ///
+ /// Generate a list of isotopic intesitry of the oxonium ions
+ ///
+ /// The MS2 Scan
+ ///
+ /// int[], The intensity list
public static double[] ScanOxoniumIonFilter(Ms2ScanWithSpecificMass theScan, MassDiffAcceptor massDiffAcceptor)
{
double[] oxoniumIonsintensities = new double[Glycan.AllOxoniumIons.Length];
@@ -180,7 +186,7 @@ public static bool DissociationTypeContainETD(DissociationType dissociationType,
return true;
}
- if (dissociationType == DissociationType.Custom )
+ if (dissociationType == DissociationType.Custom ) //Use the fragment type to determine the dissociation type.
{
if (customIons.Contains(ProductType.zDot) || customIons.Contains(ProductType.c))
{
@@ -192,13 +198,22 @@ public static bool DissociationTypeContainETD(DissociationType dissociationType,
}
//TO THINK: filter reasonable fragments here. The final solution is to change mzLib.Proteomics.PeptideWithSetModifications.Fragment
+
+ ///
+ /// Get the theoretical fragments of the peptide with the glycan modification. With different dissociation type, the fragment ions are different.
+ ///
+ ///
+ ///
+ ///
+ ///
+ /// product[], Fragments list
public static List OGlyGetTheoreticalFragments(DissociationType dissociationType, List customIons, PeptideWithSetModifications peptide, PeptideWithSetModifications modPeptide)
{
List theoreticalProducts = new List();
HashSet masses = new HashSet();
List products = new List();
- if (dissociationType == DissociationType.HCD || dissociationType == DissociationType.CID)
+ if (dissociationType == DissociationType.HCD || dissociationType == DissociationType.CID)
{
List diag = new List();
modPeptide.Fragment(dissociationType, FragmentationTerminus.Both, diag);
@@ -241,7 +256,7 @@ public static List OGlyGetTheoreticalFragments(DissociationType dissoci
}
- foreach (var fragment in products)
+ foreach (var fragment in products) //this part just for the unique fragment ions. (filter the fragment with the same neturalMass)
{
if (!masses.Contains(fragment.NeutralMass))
{
@@ -253,23 +268,31 @@ public static List OGlyGetTheoreticalFragments(DissociationType dissoci
return theoreticalProducts;
}
+
+ ///
+ /// Generate the theroertical glycan modified peptide. With the glycanBox, modPos, and the peptide.
+ ///
+ ///
+ ///
+ ///
+ /// A modfiied peptide.
public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(int[] theModPositions, PeptideWithSetModifications peptide, GlycanBox glycanBox)
{
Modification[] modifications = new Modification[glycanBox.NumberOfMods];
for (int i = 0; i < glycanBox.NumberOfMods; i++)
{
- modifications[i] = GlycanBox.GlobalOGlycanModifications[glycanBox.ModIds.ElementAt(i)];
+ modifications[i] = GlycanBox.GlobalOGlycanModifications[glycanBox.ModIds.ElementAt(i)]; // transfer the glycanBox information to a new list.
}
Dictionary testMods = new Dictionary();
foreach (var mod in peptide.AllModsOneIsNterminus)
{
- testMods.Add(mod.Key, mod.Value);
+ testMods.Add(mod.Key, mod.Value); // transfer the AllMod information to a new list.
}
for (int i = 0; i < theModPositions.Count(); i++)
{
- testMods.Add(theModPositions.ElementAt(i), modifications[i]);
+ testMods.Add(theModPositions.ElementAt(i), modifications[i]); //combine the glycanBox information to the AllMod list
}
var testPeptide = new PeptideWithSetModifications(peptide.Protein, peptide.DigestionParams, peptide.OneBasedStartResidue,
@@ -278,6 +301,12 @@ public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(int[] theMod
return testPeptide;
}
+ ///
+ /// Generate the theroertical glycan modified peptide. With the route the peptide. Because the route contains the glycanBox and modPos information.
+ ///
+ ///
+ ///
+ /// A modfiied peptide
public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(Route theModPositions, PeptideWithSetModifications peptide)
{
Modification[] modifications = new Modification[theModPositions.Mods.Count];
@@ -303,16 +332,24 @@ public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(Route theMod
return testPeptide;
}
- //The function here is to calculate permutation localization which could be used to compare with Graph-Localization.
+ //Should be revised for easier understanding.
+ ///
+ /// Generate all possible glycosite for the glycan set. Supposed we will put the glycan on the glycosite in sequence.
+ ///
+ /// Ex. [3,5,2,7]
+ /// Ex. [2,2,3] means id2 + id2 + id3
+ /// A glycosite set collection. Ex. ([2,5,7],[3,5,7]...), each one list means the glcosites for glycanBox.
+ /// [2,5,7] means we will put the glycan on position 2, 5, 7.
+ ///
public static List GetPermutations(List allModPos, int[] glycanBoxId)
{
var length = glycanBoxId.Length;
- var indexes = Enumerable.Range(0, length).ToArray();
+ var indexes = Enumerable.Range(0, length).ToArray(); // just the index for the glycanBoxId to keep the order.
int[] orderGlycan = new int[length];
- List permutateModPositions = new List();
+ List permutateModPositions = new List(); //The list to store all possible permutation localization.
- var combinations = Glycan.GetKCombs(allModPos, length);
+ var combinations = Glycan.GetKCombs(allModPos, length); //Get all possible combinations of the mod sites. ex. four site[1,2,3,4], length:3 -> combination [1,2,3], [1,2,4], [1,3,4], [2,3,4]
foreach (var com in combinations)
{
@@ -332,7 +369,7 @@ public static List GetPermutations(List allModPos, int[] glycanBoxId
orderGlycan[i] = glycanBoxId[indexes[i]];
}
var key = string.Join(",", orderGlycan.Select(p => p.ToString()));
- if (!keys.Contains(key))
+ if (!keys.Contains(key)) //Remove the duplicate permutation localization.
{
keys.Add(key);
permutateModPositions.Add(per.ToArray());
@@ -343,25 +380,32 @@ public static List GetPermutations(List allModPos, int[] glycanBoxId
return permutateModPositions;
}
- //The purpose of the funtion is to generate hash fragment ions without generate the PeptideWithMod. keyValuePair key:GlycanBoxId, Value:mod sites
+
+ ///
+ /// Generate the new fragment list, we add the glycan mass to the c ions and z ions from the peptide fragment list
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
public static int[] GetFragmentHash(List products, Tuple keyValuePair, GlycanBox[] OGlycanBoxes, int FragmentBinsPerDalton)
{
- double[] newFragments = products.OrderBy(p=>p.ProductType).ThenBy(p=>p.FragmentNumber).Select(p => p.NeutralMass).ToArray();
+ double[] newFragments = products.OrderBy(p=>p.ProductType).ThenBy(p=>p.FragmentNumber).Select(p => p.NeutralMass).ToArray(); // store the fragment mass in the order of c1, c2, c3, y1, y2, y3, z1, z2, z3
var len = products.Count / 3;
if (keyValuePair.Item2!=null)
{
- for (int i = 0; i < keyValuePair.Item2.Length; i++)
- {
+ for (int i = 0; i < keyValuePair.Item2.Length; i++) // we want to add the glycan mass to the c ions and z ions that contain the glycan.
+ { // y ions didn't change in EThcD for O-glyco, so we just need to deal with c ions and z ions.
var j = keyValuePair.Item2[i];
- while (j <= len + 1)
+ while (j <= len + 1) // for c ions
{
newFragments[j - 2] += (double)GlycanBox.GlobalOGlycans[OGlycanBoxes[keyValuePair.Item1].ModIds[i]].Mass/1E5;
j++;
}
- j = keyValuePair.Item2[i];
- while (j >= 3)
+ j = keyValuePair.Item2[i]; // reset the j to the position of the glycan
+ while (j >= 3) // for z ions
{
- //y ions didn't change in EThcD for O-glyco
newFragments[len * 3 - j + 2] += (double)GlycanBox.GlobalOGlycans[OGlycanBoxes[keyValuePair.Item1].ModIds[i]].Mass/1E5;
j--;
}
@@ -369,7 +413,7 @@ public static int[] GetFragmentHash(List products, Tuple ke
}
- int[] fragmentHash = new int[products.Count];
+ int[] fragmentHash = new int[products.Count]; // store the fragment mass in the order of c1, c2, c3, y1, y2, y3, z1, z2, z3 and with the umit of FragmentBinsPerDalton
for (int i = 0; i < products.Count; i++)
{
fragmentHash[i] = (int)Math.Round(newFragments[i] * FragmentBinsPerDalton);
@@ -377,8 +421,16 @@ public static int[] GetFragmentHash(List products, Tuple ke
return fragmentHash;
}
- //Find FragmentHash for current box at modInd.
- //y-ion didn't change for O-Glycopeptide.
+
+ ///
+ /// Generate the fragment list with the specific childBox located on specific modPos. At here, the ModInd is the index for modPos. Not used in the current version.
+ ///
+ ///
+ /// ModPos list
+ /// Specific ModPos, index in ModPos
+ /// Whole glycanBox
+ /// Partial glycanBox, at here is the childBox
+ ///
public static List GetLocalFragment(List products, int[] modPoses, int modInd, ModBox OGlycanBox, ModBox localOGlycanBox)
{
List newFragments = new List();
@@ -433,21 +485,38 @@ public static List GetUnlocalFragment(List products, int[] modP
}
- //The oxoniumIonIntensities is related with Glycan.AllOxoniumIons.
- //Rules are coded in the function.
- public static bool OxoniumIonsAnalysis(double[] oxoniumIonsintensities, GlycanBox glycanBox)
+ ///
+ /// Use the oxonium ions to determine the glycan type.
+ ///
+ /// From the Scan
+ /// The glycanBox to be tested
+ /// True : The Oglycan pass the filter, False : The OGl
+ public static bool DiagonsticFilter(double[] oxoniumIonsintensities, GlycanBox glycanBox)
{
+ double HexNAc_diagnostic = oxoniumIonsintensities[4];
+ double NeuAc_diagnostic1 = oxoniumIonsintensities[10];
+ double NeuAc_diagnostic2 = oxoniumIonsintensities[12];
+ double HexNAcPlusHex_diagnostic = oxoniumIonsintensities[14];
+
//If a glycopeptide spectrum does not have 292.1027 or 274.0921, then remove all glycans that have sialic acids from the search.
- if (oxoniumIonsintensities[10] <= 0 && oxoniumIonsintensities[12] <= 0)
+ if (NeuAc_diagnostic1 / HexNAc_diagnostic > 0.02 && NeuAc_diagnostic2 / HexNAc_diagnostic > 0.02)
+ {
+ if (glycanBox.Kind[2] == 0 )
+ {
+ return false;
+ }
+ }
+
+ if(NeuAc_diagnostic1 / HexNAc_diagnostic < 0.02 && NeuAc_diagnostic2 / HexNAc_diagnostic < 0.02)
{
- if (glycanBox.Kind[2] != 0 || glycanBox.Kind[3] != 0)
+ if (glycanBox.Kind[2] != 0)
{
return false;
}
}
//If a spectrum has 366.1395, remove glycans that do not have HexNAc(1)Hex(1) or more. Here use the total glycan of glycanBox to calculate.
- if (oxoniumIonsintensities[14] > 0)
+ else if (HexNAcPlusHex_diagnostic / HexNAc_diagnostic > 0.02)
{
if (glycanBox.Kind[0] < 1 && glycanBox.Kind[1] < 1)
{
@@ -458,6 +527,7 @@ public static bool OxoniumIonsAnalysis(double[] oxoniumIonsintensities, GlycanBo
//Other rules:
//A spectrum needs to have 204.0867 to be considered as a glycopeptide.
//Ratio of 138.055 to 144.0655 can seperate O/N glycan.
+ // use some other oxonium ions to determine the glycan type.
return true;
}
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs
index b1e8bccf4..af9ba0e58 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs
@@ -15,13 +15,13 @@ namespace EngineLayer.GlycoSearch
public class GlycoSearchEngine : ModernSearchEngine
{
public static readonly double ToleranceForMassDifferentiation = 1e-9;
- private readonly int OxoniumIon204Index = 9; //Check Glycan.AllOxoniumIons
- protected readonly List[] GlobalCsms;
+ private readonly int OxoniumIon204Index = 9; // Check Glycan.AllOxoniumIons
+ protected readonly List[] GlobalGsms; // Why don't we call it GlobalGsms?
private GlycoSearchType GlycoSearchType;
- private readonly int TopN;
+ private readonly int TopN; // DDA top Peak number.
private readonly int _maxOGlycanNum;
- private readonly bool OxoniumIonFilter; //To filt Oxonium Ion before searching a spectrum as glycopeptides. If we filter spectrum, it must contain oxonium ions such as 204 (HexNAc).
+ private readonly bool OxoniumIonFilter; // To filt Oxonium Ion before searching a spectrum as glycopeptides. If we filter spectrum, it must contain oxonium ions such as 204 (HexNAc).
private readonly string _oglycanDatabase;
private readonly string _nglycanDatabase;
@@ -30,12 +30,13 @@ public class GlycoSearchEngine : ModernSearchEngine
private readonly List[] SecondFragmentIndex;
+ // The constructor for GlycoSearchEngine, we can load the parameter for the searhcing like mode, topN, maxOGlycanNum, oxoniumIonFilter, datsbase, etc.
public GlycoSearchEngine(List[] globalCsms, Ms2ScanWithSpecificMass[] listOfSortedms2Scans, List peptideIndex,
List[] fragmentIndex, List[] secondFragmentIndex, int currentPartition, CommonParameters commonParameters, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters,
string oglycanDatabase, string nglycanDatabase, GlycoSearchType glycoSearchType, int glycoSearchTopNum, int maxOGlycanNum, bool oxoniumIonFilter, List nestedIds)
: base(null, listOfSortedms2Scans, peptideIndex, fragmentIndex, currentPartition, commonParameters, fileSpecificParameters, new OpenSearchMode(), 0, nestedIds)
{
- this.GlobalCsms = globalCsms;
+ this.GlobalGsms = globalCsms;
this.GlycoSearchType = glycoSearchType;
this.TopN = glycoSearchTopNum;
this._maxOGlycanNum = maxOGlycanNum;
@@ -48,19 +49,19 @@ public GlycoSearchEngine(List[] globalCsms, Ms2ScanWithSpeci
ProductSearchMode = new SinglePpmAroundZeroSearchMode(20); //For Oxonium ion only
- if (glycoSearchType == GlycoSearchType.OGlycanSearch)
+ if (glycoSearchType == GlycoSearchType.OGlycanSearch) //if we do the O-glycan search, we need to load the O-glycan database and generate the glycoBox.
{
GlycanBox.GlobalOGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.OGlycanLocations.Where(p => System.IO.Path.GetFileName(p) == _oglycanDatabase).First(), true, true).ToArray();
GlycanBox.GlobalOGlycanModifications = GlycanBox.BuildGlobalOGlycanModifications(GlycanBox.GlobalOGlycans);
- GlycanBox.OGlycanBoxes = GlycanBox.BuildOGlycanBoxes(_maxOGlycanNum, false).OrderBy(p => p.Mass).ToArray();
+ GlycanBox.OGlycanBoxes = GlycanBox.BuildOGlycanBoxes(_maxOGlycanNum, false).OrderBy(p => p.Mass).ToArray(); //generate glycan box for O-glycan search
}
- else if (glycoSearchType == GlycoSearchType.NGlycanSearch)
+ else if (glycoSearchType == GlycoSearchType.NGlycanSearch) //because the there is only one glycan in N-glycanpeptide, so we don't need to build the n-glycanBox here.
{
NGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.NGlycanLocations.Where(p => System.IO.Path.GetFileName(p) == _nglycanDatabase).First(), true, false).OrderBy(p => p.Mass).ToArray();
//TO THINK: Glycan Decoy database.
//DecoyGlycans = Glycan.BuildTargetDecoyGlycans(NGlycans);
}
- else if (glycoSearchType == GlycoSearchType.N_O_GlycanSearch)
+ else if (glycoSearchType == GlycoSearchType.N_O_GlycanSearch) //search both N-glycan and O-glycan is still not tested and build completely yet.
{
GlycanBox.GlobalOGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.OGlycanLocations.Where(p => System.IO.Path.GetFileName(p) == _oglycanDatabase).First(), true, true).ToArray();
GlycanBox.GlobalOGlycanModifications = GlycanBox.BuildGlobalOGlycanModifications(GlycanBox.GlobalOGlycans);
@@ -76,6 +77,15 @@ public GlycoSearchEngine(List[] globalCsms, Ms2ScanWithSpeci
private Glycan[] NGlycans { get; }
//private Glycan[] DecoyGlycans { get; }
+ ///
+ /// Run the glycoSearchEngine, the main function for the glycoSearchEngine.
+ /// Four steps:
+ /// (1) run a modern search engine to get the peptide candidates.
+ /// (2) match the peptide candidates with the precursor mass.
+ /// (3) use the mass shift to generate the route for the glycan localization.
+ /// (4) evaluate the highest score for the glycan localization and generate the glycoSpectralMatch.
+ ///
+ /// SearchResult
protected override MetaMorpheusEngineResults RunSpecific()
{
double progress = 0;
@@ -84,14 +94,14 @@ protected override MetaMorpheusEngineResults RunSpecific()
byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff;
- int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile;
- int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray();
+ int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile; // MaxThreads = deafult is 7.
+ int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray(); // We can do the parallel search on different threads
Parallel.ForEach(threads, (scanIndex) =>
{
byte[] scoringTable = new byte[PeptideIndex.Count];
List idsOfPeptidesPossiblyObserved = new List();
- byte[] secondScoringTable = new byte[PeptideIndex.Count];
+ byte[] secondScoringTable = new byte[PeptideIndex.Count]; // We didn't use that right now.
List childIdsOfPeptidesPossiblyObserved = new List();
List idsOfPeptidesTopN = new List();
@@ -110,7 +120,7 @@ protected override MetaMorpheusEngineResults RunSpecific()
var scan = ListOfSortedMs2Scans[scanIndex];
- // get fragment bins for this scan
+ // get fragment bins for this scan
List allBinsToSearch = GetBinsToSearch(scan, FragmentIndex, CommonParameters.DissociationType);
//Limit the high bound limitation, here assume it is possible to has max 3 Da shift. This allows for correcting precursor in the future.
@@ -146,23 +156,23 @@ protected override MetaMorpheusEngineResults RunSpecific()
// }
//}
- // done with indexed scoring; refine scores and create PSMs
- if (idsOfPeptidesPossiblyObserved.Any())
+ // filtering the peptides candidate with the cufoff and limit the topN peptides.
+ if (idsOfPeptidesPossiblyObserved.Any())
{
scoreAtTopN = 0;
peptideCount = 0;
- foreach (int id in idsOfPeptidesPossiblyObserved.OrderByDescending(p => scoringTable[p]))
+ foreach (int id in idsOfPeptidesPossiblyObserved.OrderByDescending(p => scoringTable[p])) //from the higest score to the lowest score
{
- if (scoringTable[id] < (int)byteScoreCutoff)
+ if (scoringTable[id] < (int)byteScoreCutoff) //if the score is lower than the cutoff, we can skip this peptide.
{
continue;
}
peptideCount++;
if (peptideCount == TopN)
{
- scoreAtTopN = scoringTable[id];
+ scoreAtTopN = scoringTable[id]; //ScoreAtTopN = The score of the last peptide in the TopN list.
}
- if (scoringTable[id] < scoreAtTopN)
+ if (scoringTable[id] < scoreAtTopN)
{
break;
}
@@ -173,7 +183,7 @@ protected override MetaMorpheusEngineResults RunSpecific()
if (GlycoSearchType == GlycoSearchType.OGlycanSearch)
{
- gsms = FindOGlycopeptideHashLocal(scan, idsOfPeptidesTopN, scanIndex, (int)byteScoreCutoff);
+ gsms = FindOGlycopeptideHashLocal(scan, idsOfPeptidesTopN, scanIndex, (int)byteScoreCutoff); // Use the peptide candidate and the scan to generate the gsms.
}
else if(GlycoSearchType == GlycoSearchType.NGlycanSearch)
{
@@ -191,14 +201,14 @@ protected override MetaMorpheusEngineResults RunSpecific()
continue;
}
- if (GlobalCsms[scanIndex] == null)
+ if (GlobalGsms[scanIndex] == null)
{
- GlobalCsms[scanIndex] = new List();
+ GlobalGsms[scanIndex] = new List(); //the first one finished task, create teh new gsms list.
}
else
{
- gsms.AddRange(GlobalCsms[scanIndex]);
- GlobalCsms[scanIndex].Clear();
+ gsms.AddRange(GlobalGsms[scanIndex]);
+ GlobalGsms[scanIndex].Clear();
}
Add2GlobalGsms(ref gsms, scanIndex);
@@ -213,11 +223,11 @@ protected override MetaMorpheusEngineResults RunSpecific()
{
oldPercentProgress = percentProgress;
ReportProgress(new ProgressEventArgs(percentProgress, "Performing glyco search... " + CurrentPartition + "/" + CommonParameters.TotalPartitions, NestedIds));
- }
+ } //percentProgress = 100, "Performing glyco search...1/1", NestedIds = 3.
}
});
- return new MetaMorpheusEngineResults(this);
+ return new MetaMorpheusEngineResults(this); //Storage the result information into the result class.
}
private void Add2GlobalGsms(ref List gsms, int scanIndex)
@@ -229,37 +239,37 @@ private void Add2GlobalGsms(ref List gsms, int scanIndex)
foreach (var gsm in gsms.Where(p => p != null).OrderByDescending(p => p.Score).ThenBy(c => c.FullSequence))
{
- if (gsmsCount <= 10)
+ if (gsmsCount <= 10)
{
- gsm.ResolveAllAmbiguities();
+ gsm.ResolveAllAmbiguities(); //Try to resolve any case that have the same sequence in the PSM.
- if (gsmsCount == 1)
+ if (gsmsCount == 1) //If the gsms number is 1, we don't need to check the score and sequence.
{
preScore = gsm.Score;
preString = gsm.FullSequence;
- GlobalCsms[scanIndex].Add(gsm);
+ GlobalGsms[scanIndex].Add(gsm);
gsmsCount++;
}
- else
+ else
{
- if (gsm.Score - preScore < ToleranceForMassDifferentiation &&
+ if (gsm.Score - preScore < ToleranceForMassDifferentiation &&
gsm.Score - preScore > -ToleranceForMassDifferentiation)
{
string currentString = gsm.FullSequence;
- if (preString == currentString)
+ if (preString == currentString) //If peptides have the same sequence and their score is almost the same
{
- foreach ((int, PeptideWithSetModifications Peptide) bestMatchPeptide in gsm.BestMatchingBioPolymersWithSetMods)
- {
- GlobalCsms[scanIndex].Last().AddProteinMatch(bestMatchPeptide, gsm.BioPolymersWithSetModsToMatchingFragments[bestMatchPeptide.Peptide]);
+ foreach ((int, PeptideWithSetModifications Peptide) bestMatchPeptide in gsm.BestMatchingBioPolymersWithSetMods) // We should add tje new ProteinMatch to the gsm.
+ { // Because the indentical sequence may from the different protein.
+ GlobalGsms[scanIndex].Last().AddProteinMatch(bestMatchPeptide, gsm.BioPolymersWithSetModsToMatchingFragments[bestMatchPeptide.Peptide]);
}
}
else
{
preString = currentString;
- GlobalCsms[scanIndex].Add(gsm);
+ GlobalGsms[scanIndex].Add(gsm);
gsmsCount++;
}
}
@@ -272,7 +282,7 @@ private void Add2GlobalGsms(ref List gsms, int scanIndex)
}
}
- //For FindOGlycan
+ //For FindOGlycan, generate the gsms for O-glycan search
private GlycoSpectralMatch CreateGsm(Ms2ScanWithSpecificMass theScan, int scanIndex, int rank, PeptideWithSetModifications peptide, Route localization, double[] oxoniumIonIntensities, List localizationGraphs)
{
var peptideWithMod = GlycoPeptides.OGlyGetTheoreticalPeptide(localization, peptide);
@@ -348,12 +358,21 @@ private GlycoSpectralMatch CreateGsm(Ms2ScanWithSpecificMass theScan, int scanIn
}
else
{
- psmGlyco.R138vs144 = oxoniumIonIntensities[4] / oxoniumIonIntensities[5];
+ psmGlyco.R138vs144 = oxoniumIonIntensities[4] / oxoniumIonIntensities[5]; // if the ratio is high, that means the glycan is more likely to be N-glycan. Oppsitely, ration is small means close to O-glycan.
}
return psmGlyco;
}
+ ///
+ /// If the peptide mass is perfectly match with the precursor mass, we can directly generate the gsms for the peptide. Store the gsms into the possibleMatches.
+ ///
+ ///
+ ///
+ ///
+ /// The peptide candidate
+ ///
+ /// The space to store the gsms
private void FindSingle(Ms2ScanWithSpecificMass theScan, int scanIndex, int scoreCutOff, PeptideWithSetModifications theScanBestPeptide, int ind, ref List possibleMatches)
{
List products = new List();
@@ -370,16 +389,30 @@ private void FindSingle(Ms2ScanWithSpecificMass theScan, int scanIndex, int scor
}
}
+ ///
+ /// Match the mass of the peptide candidate with the precursor mass. Try to generate the Gsms for the Scan. Gsms will be stored in the possibleMatches.
+ ///
+ ///
+ ///
+ ///
+ /// peptide candidate
+ ///
+ /// The precursor mass
+ ///
+ /// The space to store the gsms
private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int scoreCutOff, PeptideWithSetModifications theScanBestPeptide, int ind, double possibleGlycanMassLow, double[] oxoniumIonIntensities, ref List possibleMatches)
{
- int iDLow = GlycoPeptides.BinarySearchGetIndex(GlycanBox.OGlycanBoxes.Select(p => p.Mass).ToArray(), possibleGlycanMassLow);
+ // The glycanBoxes will be filtered by the oxonium ions. If the oxonium ions don't make sense, we will remove the glycanBox.
+
- int[] modPos = GlycoSpectralMatch.GetPossibleModSites(theScanBestPeptide, new string[] { "S", "T" }).OrderBy(p => p).ToArray();
+ int iDLow = GlycoPeptides.BinarySearchGetIndex(GlycanBox.OGlycanBoxes.Select(p => p.Mass).ToArray(), possibleGlycanMassLow); // try to find the index that closet match to the "possibleGlycanMassLow" within the glycanBox
+
+ int[] modPos = GlycoSpectralMatch.GetPossibleModSites(theScanBestPeptide, new string[] { "S", "T" }).OrderBy(p => p).ToArray(); //list all of the possible glycoslation site/postition
var localizationScan = theScan;
- List products = new List();
+ List products = new List(); // product list for the theoretical fragment ions
- //For HCD-pd-ETD or CD-pd-EThcD type of data
+ //For HCD-pd-ETD or CD-pd-EThcD type of data, we generate the different rpoducts.
if (theScan.ChildScans.Count > 0 && GlycoPeptides.DissociationTypeContainETD(CommonParameters.MS2ChildScanDissociationType, CommonParameters.CustomIons))
{
localizationScan = theScan.ChildScans.First();
@@ -396,33 +429,33 @@ private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco
//No localization can be done with MS2-HCD spectrum
//TO THINK: there is a special situation. The HCD only scan from HCD-pd-EThcD data can be a glycopeptide, but there is no ETD, so there is no localization. What to do with this?
bool is_HCD_only_data = !GlycoPeptides.DissociationTypeContainETD(CommonParameters.DissociationType, CommonParameters.CustomIons) && !GlycoPeptides.DissociationTypeContainETD(CommonParameters.MS2ChildScanDissociationType, CommonParameters.CustomIons);
- if (is_HCD_only_data)
+ if (is_HCD_only_data) // In the HCD, there is no Y ion, so we don't need to consider the modification here.
{
theScanBestPeptide.Fragment(DissociationType.HCD, FragmentationTerminus.Both, products);
}
double bestLocalizedScore = 0;
- List localizationGraphs = new List();
+ List localizationGraphs = new List(); // if we also have ETD, then we will search the localization
- while (iDLow < GlycanBox.OGlycanBoxes.Count() && (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass + GlycanBox.OGlycanBoxes[iDLow].Mass)))
+ while (iDLow < GlycanBox.OGlycanBoxes.Count() && (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass + GlycanBox.OGlycanBoxes[iDLow].Mass))) // verify the glycan mass is invaild (within the range and match with mass shift)
{
- if (OxoniumIonFilter && !GlycoPeptides.OxoniumIonsAnalysis(oxoniumIonIntensities, GlycanBox.OGlycanBoxes[iDLow]))
+ if (OxoniumIonFilter && !GlycoPeptides.DiagonsticFilter(oxoniumIonIntensities, GlycanBox.OGlycanBoxes[iDLow])) // if the filter is turned on, we need to check does the oxoiums make sense.
{
- iDLow++;
+ iDLow++; // if the oxonium ions don't make sense (there is no 204, or without their diagnostic ion), we can skip this glycan.
continue;
}
- if (modPos.Length >= GlycanBox.OGlycanBoxes[iDLow].NumberOfMods)
+ if (modPos.Length >= GlycanBox.OGlycanBoxes[iDLow].NumberOfMods) // the glycosite number should be larger than the possible glycan number.
{
LocalizationGraph localizationGraph = new LocalizationGraph(modPos, GlycanBox.OGlycanBoxes[iDLow], GlycanBox.OGlycanBoxes[iDLow].ChildGlycanBoxes, iDLow);
- LocalizationGraph.LocalizeOGlycan(localizationGraph, localizationScan, CommonParameters.ProductMassTolerance, products);
+ LocalizationGraph.LocalizeOGlycan(localizationGraph, localizationScan, CommonParameters.ProductMassTolerance, products); //create the localization graph with the glycan mass and the possible glycosite.
double currentLocalizationScore = localizationGraph.TotalScore;
- if (currentLocalizationScore > bestLocalizedScore)
+ if (currentLocalizationScore > bestLocalizedScore) //Try to find the best glycanBox with the highest score.
{
bestLocalizedScore = currentLocalizationScore;
localizationGraphs.Clear();
- localizationGraphs.Add(localizationGraph);
+ localizationGraphs.Add(localizationGraph); // we only keep the best glycanBox and its localizationgraph.
}
else if ((is_HCD_only_data || bestLocalizedScore > 0) && (currentLocalizationScore <= bestLocalizedScore + 0.00000001 && currentLocalizationScore >= bestLocalizedScore - 0.00000001))
{
@@ -436,10 +469,10 @@ private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco
//In theory, the peptide_localization shouldn't be null, but it is possible that the real score is smaller than indexed score.
if (localizationGraphs.Count > 0)
{
- var firstPath = LocalizationGraph.GetFirstPath(localizationGraphs[0].array, localizationGraphs[0].ChildModBoxes);
- var localizationCandidate = LocalizationGraph.GetLocalizedPath(localizationGraphs[0], firstPath);
+ var firstPath = LocalizationGraph.GetFirstPath(localizationGraphs[0].array, localizationGraphs[0].ChildModBoxes); //Get the first path from the localization graph.
+ var localizationCandidate = LocalizationGraph.GetLocalizedPath(localizationGraphs[0], firstPath); //Get the route of the localization from the first path inforation
- var psmGlyco = CreateGsm(theScan, scanIndex, ind, theScanBestPeptide, localizationCandidate, oxoniumIonIntensities, localizationGraphs);
+ var psmGlyco = CreateGsm(theScan, scanIndex, ind, theScanBestPeptide, localizationCandidate, oxoniumIonIntensities, localizationGraphs); //Create the glycoSpectralMatch
if (psmGlyco.Score > scoreCutOff)
{
@@ -519,7 +552,7 @@ private void FindNGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco
}
}
-
+ // Conduct the search and generate the gsms for N-glycan search
private List FindNGlycopeptide(Ms2ScanWithSpecificMass theScan, List idsOfPeptidesPossiblyObserved, int scanIndex, int scoreCutOff)
{
List possibleMatches = new List();
@@ -566,19 +599,34 @@ private List FindNGlycopeptide(Ms2ScanWithSpecificMass theSc
}
return possibleMatches;
}
+
+
+ // Match the mass of the peptide candiate with the precursor mass, then try to generate the gsms object as output
+ ///
+ /// This is a general function for gsm generating. It was operated after the Modern Search.
+ /// Two Step:
+ /// (1) Match the mass of the peptide candiate with the precursor mass, then decide to go to which function to generate the gsms object.
+ /// (2) Catch the gsms object and store it into the possibleMatches then return.
+ ///
+ /// The MS2 Scan
+ /// The peptide candidate from the modern Search
+ ///
+ ///
+ /// The Gsms collection.
private List FindOGlycopeptideHashLocal(Ms2ScanWithSpecificMass theScan, List idsOfPeptidesPossiblyObserved, int scanIndex, int scoreCutOff)
{
List possibleMatches = new List();
+
for (int ind = 0; ind < idsOfPeptidesPossiblyObserved.Count; ind++)
{
- var theScanBestPeptide = PeptideIndex[idsOfPeptidesPossiblyObserved[ind]];
+ var theScanBestPeptide = PeptideIndex[idsOfPeptidesPossiblyObserved[ind]]; // Get the peptide from the candidate list.
- if (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass))
+ if (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass)) // If the peptide mass is indentical to the precursor mass (or within the tolerance), we can directly search the glycopeptide.
{
FindSingle(theScan, scanIndex, scoreCutOff, theScanBestPeptide, ind, ref possibleMatches);
}
- else if (theScan.PrecursorMass - theScanBestPeptide.MonoisotopicMass >= 100) //Filter out unknow non-glycan modifications.
+ else if (theScan.PrecursorMass - theScanBestPeptide.MonoisotopicMass >= 100) //If not, we need to consider the glycan mass difference.
{
//Filter by glycanBoxes mass difference.
var possibleGlycanMassLow = PrecusorSearchMode.GetMinimumValue(theScan.PrecursorMass) - theScanBestPeptide.MonoisotopicMass;
@@ -587,7 +635,7 @@ private List FindOGlycopeptideHashLocal(Ms2ScanWithSpecificM
if (possibleGlycanMassHigh < GlycanBox.OGlycanBoxes.First().Mass || possibleGlycanMassLow > GlycanBox.OGlycanBoxes.Last().Mass)
{
- continue;
+ continue; // if the glycan mass difference is out of the range of the glycan box, we can skip this peptide.
}
//Filter by OxoniumIon
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs
index acc6db3be..ec29d613c 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs
@@ -47,9 +47,15 @@ public GlycoSpectralMatch(PeptideWithSetModifications theBestPeptide, int notch,
public double R138vs144 { get; set; } // The intensity ratio of this 138 and 144 could be a signature for O-glycan or N-glycan.
public List> LocalizedGlycan { get; set; } // All seen glycans identified.
- public LocalizationLevel LocalizationLevel { get; set; }
+ public LocalizationLevel LocalizationLevel { get; set; }
//Motif should be writen with required form
+ ///
+ /// Try to get the ModSite in the right format.
+ ///
+ /// full peptide sequence ex. "PTLFKNVSLYK"
+ /// modificatino AA ex. "S","T"
+ /// int[], the Modpositon index list ex.[9,3]
public static List GetPossibleModSites(PeptideWithSetModifications peptide, string[] motifs)
{
List possibleModSites = new List();
@@ -58,14 +64,14 @@ public static List GetPossibleModSites(PeptideWithSetModifications peptide,
foreach (var mtf in motifs)
{
- if (ModificationMotif.TryGetMotif(mtf, out ModificationMotif aMotif))
+ if (ModificationMotif.TryGetMotif(mtf, out ModificationMotif aMotif)) //Check if the motif is valid, and creat the motif object from the string.
{
- Modification modWithMotif = new Modification(_target: aMotif, _locationRestriction: "Anywhere.");
+ Modification modWithMotif = new Modification(_target: aMotif, _locationRestriction: "Anywhere.");
modifications.Add(modWithMotif);
}
}
- foreach (var modWithMotif in modifications)
+ foreach (var modWithMotif in modifications) //interate through all the modifications with motif.
{
for (int r = 0; r < peptide.Length; r++)
{
@@ -113,7 +119,11 @@ public static bool MotifExist(string baseSeq, string[] motifs)
return false;
}
- public static string GetTabSepHeaderSingle()
+ ///
+ /// Generate the peptide header, ex File name, Precursor m/z, Score…
+ ///
+ ///
+ public static string GetTabSepHeaderSingle() //Most complicate part in this class
{
var sb = new StringBuilder();
sb.Append("File Name" + '\t');
@@ -151,6 +161,10 @@ public static string GetTabSepHeaderSingle()
return sb.ToString();
}
+ ///
+ /// Generate the glyco header ex Localization Score, Yion Score…
+ ///
+ ///
public static string GetTabSeperatedHeaderGlyco()
{
var sb = new StringBuilder();
@@ -174,6 +188,10 @@ public static string GetTabSeperatedHeaderGlyco()
return sb.ToString();
}
+ ///
+ /// Put the psm data into the corresponding columns.
+ ///
+ ///
public string SingleToString()
{
var sb = new StringBuilder();
@@ -188,7 +206,7 @@ public string SingleToString()
var proteinAccessionString = Accession ?? PsmTsvWriter.Resolve(BestMatchingBioPolymersWithSetMods.Select(p => p.Peptide.Parent.Accession), FullSequence).ResolvedString;
sb.Append(proteinAccessionString + "\t");
sb.Append(Organism + "\t");
- sb.Append(PsmTsvWriter.Resolve(BestMatchingBioPolymersWithSetMods.Select(b => b.Peptide.Parent.FullName), FullSequence).ResolvedString + "\t");
+ sb.Append(PsmTsvWriter.Resolve(BestMatchingBioPolymersWithSetMods.Select(b => b.Peptide.Parent.FullName), FullSequence).ResolvedString + "\t"); //protein name
int _FirstOneBasedStartResidueInProtein = OneBasedStartResidue.HasValue ? OneBasedStartResidue.Value : BestMatchingBioPolymersWithSetMods.First().Peptide.OneBasedStartResidue;
int _FirstOneBasedEndResidueInProtein = OneBasedEndResidue.HasValue ? OneBasedEndResidue.Value : BestMatchingBioPolymersWithSetMods.First().Peptide.OneBasedEndResidue; ;
@@ -257,7 +275,10 @@ public string SingleToString()
return sb.ToString();
}
- //This should be appended to SingleToString
+ ///
+ /// Put the glycan data into the corresponding columns.
+ ///
+ ///
public string GlycoToString()
{
var sb = new StringBuilder();
@@ -291,11 +312,11 @@ public string GlycoToString()
for (int i = 0; i < glycanBox.NumberOfMods; i++)
{
glycans[i] = GlycanBox.GlobalOGlycans[glycanBox.ModIds[i]];
- }
+ } //Convert the glycanBox index into the real glycan object. ex. [H1N1, H2N2A1, H2N2A1F1]
if (glycans.First().Struc != null)
{
- sb.Append(string.Join(",", glycans.Select(p => p.Struc.ToString()).ToArray()));
+ sb.Append(string.Join(",", glycans.Select(p => p.Struc.ToString()).ToArray())); //ex. (N(H)),(N(H(A))(N(H))),(N(H)(N(H(A))(F))
}
sb.Append("\t");
@@ -357,38 +378,45 @@ public static Dictionary MatchedIonDataDictionary(List ; Input: List
+
+ ///
+ /// Two function included:
+ /// (1) Analysis all pair, and evaluate any site is occured in all cases, if yes set a true on that. If not, set a false.
+ /// (2) Classify the localization level base on the localization.
+ ///
+ /// all case of the pair
+ /// level 1 to level 3
+ /// A tuple, represent the pair and its confidience ex. [3,5,ture] means glycan 5 located on glycosite 3, and very confidience
public static List> GetLocalizedGlycan(List OGlycanBoxLocalization, out LocalizationLevel localizationLevel)
{
List> localizedGlycan = new List>();
- //Dictionary: modsite-id, count
- Dictionary seenModSite = new Dictionary();
+ Dictionary modSiteSeenCount = new Dictionary(); // all possible glycan-sites pair, Dictionary: site-glycan pair, count
- foreach (var ogl in OGlycanBoxLocalization)
+ foreach (var ogl in OGlycanBoxLocalization) // ogl means one case, there are three glycan located on the same peptide: (5,1,False),(9,8,Flase),(10,9,Ture)
{
- foreach (var og in ogl.Mods)
+ foreach (var og in ogl.Mods) // og means one glycan locaization, like (5,1,False) -> glycan 1 attached on postion5.
{
- var k = og.Item1.ToString() + "-" + og.Item2.ToString();
- if (seenModSite.ContainsKey(k))
+ var k = og.Item1.ToString() + "-" + og.Item2.ToString(); // k = 5-1(glycosite-glycan) means the glycan-site pair
+ if (modSiteSeenCount.ContainsKey(k)) // accout the number of the same glycan-site pair
{
- seenModSite[k] += 1;
+ modSiteSeenCount[k] += 1; // this pair cpunt +1
}
else
{
- seenModSite.Add(k, 1);
+ modSiteSeenCount.Add(k, 1); // If the pair is first time to seen, add it to the dictionary.
}
}
}
localizationLevel = LocalizationLevel.Level3;
- if (OGlycanBoxLocalization.Count == 1)
+ if (OGlycanBoxLocalization.Count == 1) // we just have one situation(route), no other possibility
{
localizationLevel = LocalizationLevel.Level1;
}
else if (OGlycanBoxLocalization.Count > 1)
{
- if (seenModSite.Values.Where(p => p == OGlycanBoxLocalization.Count).Count() > 0)
+ if (modSiteSeenCount.Values.Where(p => p == OGlycanBoxLocalization.Count).Count() > 0) //If anyone of the glycan-site pair is localized in all the cases, then the localization level is 2.
{
localizationLevel = LocalizationLevel.Level2;
}
@@ -398,9 +426,9 @@ public static List> GetLocalizedGlycan(List OGlycan
}
}
- foreach (var seenMod in seenModSite)
+ foreach (var seenMod in modSiteSeenCount)
{
- if (seenMod.Value == OGlycanBoxLocalization.Count)
+ if (seenMod.Value == OGlycanBoxLocalization.Count) // Try to fine the glycan-site pair that always localized in all the cases.
{
localizedGlycan.Add(new Tuple(int.Parse(seenMod.Key.Split('-')[0]), int.Parse(seenMod.Key.Split('-')[1]), true));
}
@@ -413,6 +441,11 @@ public static List> GetLocalizedGlycan(List OGlycan
return localizedGlycan;
}
+ ///
+ /// convert the Route information into the string format.
+ ///
+ /// Route collection ex. [(9,4),(8,4),(7,4)...], ModBoxId = 7
+ /// string {@7[8-4]}{@7[7-4]}{@7[6-4]} means three case, glycan 4 located on glycosite 6, glycan 4 located on glycosite 7, glycan 4 located on glycosite 8
public static string AllLocalizationInfo(List OGlycanBoxLocalization)
{
string local = "";
@@ -433,7 +466,7 @@ public static string AllLocalizationInfo(List OGlycanBoxLocalization)
{
var ogl = OGlycanBoxLocalization[i];
local += "{@" + ogl.ModBoxId.ToString() + "[";
- var g = string.Join(",", ogl.Mods.Select(p => (p.Item1 - 1).ToString() + "-" + p.Item2.ToString()));
+ var g = string.Join(",", ogl.Mods.Select(p => (p.Item1 - 1).ToString() + "-" + p.Item2.ToString())); //why we have to -1 here?
local += g + "]}";
i++;
}
@@ -446,7 +479,15 @@ public static string AllLocalizationInfo(List OGlycanBoxLocalization)
return local;
}
- //Correct Localization Level based on site specific probability. If LocalizationLevel = 1, and there are site probability lower than 0.75, Correct the level to 1b.
+ ///
+ /// Just for the case at Level1 and Level1b.
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ /// level 1 or level 1b
public static LocalizationLevel CorrectLocalizationLevel(Dictionary>> siteSpeciLocalProb, LocalizationGraph localizationGraph, Route route, List> localizedGlycan, LocalizationLevel localizationLevel)
{
if (siteSpeciLocalProb == null || localizationLevel!=LocalizationLevel.Level1)
@@ -468,7 +509,7 @@ public static LocalizationLevel CorrectLocalizationLevel(Dictionary>> siteSpeciLocalProb, List> localizedGlycan, int? OneBasedStartResidueInProtein, ref string local, ref string local_protein)
+ ///
+ /// Output the special localization information. String store in Local_peptide and Local_protein. ex. [9,H2N2A1F1,0.589] means glycan H2N2A1F1 located on glycosite 9 with 0.589 probability.
+ ///
+ /// site : (glycan, probility)[] ex. site2 : [(glycan1, 5%), (glycan2, 5%), (glycan3, 90%)]
+ /// [(6,4,false),(7,4,false),(7,2,true)], glycosite,glycan,confidience respectively
+ ///
+ ///
+ ///
+ public static void LocalizedSiteSpeciLocalInfo(Dictionary>> siteSpeciLocalProb, List> localizedGlycan, int? OneBasedStartResidueInProtein, ref string local_peptide, ref string local_protein)
{
if (siteSpeciLocalProb == null)
{
return;
}
- foreach (var loc in localizedGlycan.Where(p => p.Item3))
+ foreach (var glycositePair in localizedGlycan.Where(p => p.Item3)) // get the most confidient glycosite-glycan pair, loc is a pair of glycosite and glycan. Item 1 is glycosite, Item 2 is glycanId.
{
- var x = siteSpeciLocalProb[loc.Item1].Where(p => p.Item1 == loc.Item2).First().Item2;
- var peptide_site = loc.Item1 - 1;
- local += "[" + peptide_site + "," + GlycanBox.GlobalOGlycans[loc.Item2].Composition + "," + x.ToString("0.000") + "]";
+ var site_glycanProb = siteSpeciLocalProb[glycositePair.Item1].Where(p => p.Item1 == glycositePair.Item2).First().Item2; // get the probability of the specfic glycan on the specific site.
+ var peptide_site = glycositePair.Item1 - 1;
+ local_peptide += "[" + peptide_site + "," + GlycanBox.GlobalOGlycans[glycositePair.Item2].Composition + "," + site_glycanProb.ToString("0.000") + "]";
- var protein_site = OneBasedStartResidueInProtein.HasValue ? OneBasedStartResidueInProtein.Value + loc.Item1 - 2 : -1;
- local_protein += "[" + protein_site + "," + GlycanBox.GlobalOGlycans[loc.Item2].Composition + "," + x.ToString("0.000") + "]";
+ var protein_site = OneBasedStartResidueInProtein.HasValue ? OneBasedStartResidueInProtein.Value + glycositePair.Item1 - 2 : -1;
+ local_protein += "[" + protein_site + "," + GlycanBox.GlobalOGlycans[glycositePair.Item2].Composition + "," + site_glycanProb.ToString("0.000") + "]";
}
}
+
+ ///
+ /// Generate the site specific localization information.
+ ///
+ ///
+ /// Site specific localization information. ex. {1[1,0.2][2,0.8]} means glycan 1 and 2 are located on glycosite 1 and 2 with 20% and 80% probability.
public static string SiteSpeciLocalInfo(Dictionary>> siteSpeciLocalProb)
{
string local = "";
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs b/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs
index 3d56c5cd0..d68eeafd2 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs
@@ -17,8 +17,8 @@ public class LocalizationGraph
public ModBox ModBox { get; }
public ModBox[] ChildModBoxes { get; set; }
- public double NoLocalCost{get; set;} //Note that we have node for each glycosite, the matched ions before the first node and after the last node is scored here.
- public double TotalScore { get; set; } //Total score is the score of matched ions that are used for localization. For O-glycan, it is the score of all matched c/zDot ions.
+ public double NoLocalCost{get; set;} // Note that we have node for each glycosite, the matched ions before the first node and after the last node is scored here.
+ public double TotalScore { get; set; } // Total score is the score of matched ions that are used for localization. For O-glycan, it is the score of all matched c/zDot ions.
public LocalizationGraph(int[] modPos, ModBox modBox, ModBox[] childModBoxes, int id)
{
@@ -36,7 +36,13 @@ public LocalizationGraph(int[] modPos, ModBox modBox, ModBox[] childModBoxes, in
}
//The modification problem is turned into a Directed Acyclic Graph. The Graph was build with matrix, and dynamic programming is used.
- //The function goes through the AdjNode[][] array from left to right, assign weight to each AdjNode, keep track of the heaviest previous AdjNode.
+ ///
+ /// The function goes through the AdjNode[][] array from left to right, assign weight to each AdjNode, keep track of the heaviest previous AdjNode.
+ ///
+ /// The space to store the data
+ /// The MS2 scan
+ ///
+ ///
public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanWithSpecificMass theScan, Tolerance productTolerance, List products)
{
var boxSatisfyBox = BoxSatisfyBox(localizationGraph.ChildModBoxes);
@@ -44,17 +50,17 @@ public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanW
for (int i = 0; i < localizationGraph.ModPos.Length; i++)
{
//maxLength: the most mods we can have up to current mod pos; minlengtt: the least mods we can have up to current mod pos.
- int maxLength = i + 1;
- int minlength = localizationGraph.ModBox.ModIds.Length - (localizationGraph.ModPos.Length - 1 - i);
-
+ int maxLength = i + 1; //For the first node, the maxlength is 1. Means we max have one glycan in this positioin.
+ int minlength = localizationGraph.ModBox.ModIds.Length - (localizationGraph.ModPos.Length - 1 - i); //In order to get min number, the min = number of glycan in the box - number of node from the last.
+ // Total 3 glycan in the box, end position is 7, then for position 5, the min = 3 - (7-5) = 1.
for (int j = 0; j < localizationGraph.ChildModBoxes.Length; j++)
{
if (localizationGraph.ChildModBoxes[j].NumberOfMods <= maxLength && localizationGraph.ChildModBoxes[j].NumberOfMods >= minlength)
{
- AdjNode adjNode = new AdjNode(i, j, localizationGraph.ModPos[i], localizationGraph.ChildModBoxes[j]);
+ AdjNode adjNode = new AdjNode(i, j, localizationGraph.ModPos[i], localizationGraph.ChildModBoxes[j]); //chekc the num of glycan in this node is make sense.
double cost = 0;
- if (i != localizationGraph.ModPos.Length - 1)
+ if (i != localizationGraph.ModPos.Length - 1) // check the node is not the last one.
{
var fragments = GlycoPeptides.GetLocalFragment(products, localizationGraph.ModPos, i, localizationGraph.ModBox, localizationGraph.ChildModBoxes[j]);
cost = CalculateCost(theScan, productTolerance, fragments);
@@ -77,7 +83,7 @@ public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanW
{
adjNode.AllSources.Add(prej);
- var tempCost = cost + localizationGraph.array[i - 1][prej].maxCost;
+ var tempCost = cost + localizationGraph.array[i - 1][prej].maxCost; //Try to get the max cost from previous AdjNode.
if (tempCost > maxCost)
{
adjNode.CummulativeSources.Clear();
@@ -110,7 +116,13 @@ public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanW
localizationGraph.TotalScore = localizationGraph.array[localizationGraph.ModPos.Length - 1][localizationGraph.ChildModBoxes.Length - 1].maxCost + noLocalScore;
}
- //Based on our implementation of Graph localization. We need to calculate cost between two nearby nodes (glycosites)
+ ///
+ /// Calculate the cost/Score of the Scan.
+ ///
+ ///
+ ///
+ ///
+ /// The Score
public static double CalculateCost(Ms2ScanWithSpecificMass theScan, Tolerance productTolerance, List fragments)
{
double score = 0;
@@ -128,7 +140,12 @@ public static double CalculateCost(Ms2ScanWithSpecificMass theScan, Tolerance pr
return score;
}
- //Check if array1 contains array2 with repeats numbers.
+ ///
+ /// Check does the node1 contain everything in another node2?
+ ///
+ ///
+ ///
+ /// Ture, False
private static bool TryGetLeft(int[] array1, int[] array2)
{
//Get compliment box
@@ -148,9 +165,12 @@ private static bool TryGetLeft(int[] array1, int[] array2)
return true;
}
- //The Directed Acyclic Graph is build from left to right. In the process, we need to know which node can linked to nodes from its left.
- //Since node contains Childbox. We name this function as BoxSatisfyBox.
- //The function defines how a childBox could be linked from all childBoxes.
+
+ ///
+ /// Build a chart for the node connection rule. Used the chart to check if the next node could be linked to the previous node.
+ ///
+ ///
+ /// Chart (one column is previous, one column is current, the value is boolean)
public static Dictionary BoxSatisfyBox(ModBox[] childBoxes)
{
Dictionary boxIdBoxes = new Dictionary();
@@ -160,7 +180,7 @@ public static Dictionary BoxSatisfyBox(ModBox[] childBoxes)
for (int j = 0; j <= i; j++)
{
if (childBoxes[i].NumberOfMods <= childBoxes[j].NumberOfMods + 1 && (childBoxes[j].NumberOfMods ==0 || TryGetLeft(childBoxes[i].ModIds, childBoxes[j].ModIds)))
- {
+ { //Check the next node could be the same or one more mod than the previous node. Besdies, the next node should contain all mods that the previous node has.
idBoxes[j] = true;
}
}
@@ -170,8 +190,13 @@ public static Dictionary BoxSatisfyBox(ModBox[] childBoxes)
return boxIdBoxes;
}
- //Get all path with hightest score of Directed Acyclic Graph by recursion.
- //Start from the last AdjNode[row-1 ][col-1], go back to it Sources, which contains the previous AdjNode with the highest cost.
+
+ ///
+ /// Try to ll the highest score path in the graph. Start from the last AdjNode[row-1 ][col-1], go back to it Sources, which contains the previous AdjNode with the highest cost.
+ ///
+ ///
+ ///
+ /// The path (one or more) with the higgest Score
public static List GetAllHighestScorePaths(AdjNode[][] array, ModBox[] boxes)
{
List allPaths = new List();
@@ -207,7 +232,12 @@ private static void GetAllHighestScorePathHelper(List allPaths, AdjNode[]
}
}
- //Get one path of Directed Acyclic Graph by recursion.
+ ///
+ /// Get The toppest position path of in the localGraph by recursion Method.
+ ///
+ ///
+ ///
+ ///
public static int[] GetFirstPath(AdjNode[][] array, ModBox[] boxes)
{
@@ -216,7 +246,7 @@ public static int[] GetFirstPath(AdjNode[][] array, ModBox[] boxes)
int[] temp = new int[xlength];
- temp[xlength - 1] = ylength - 1;
+ temp[xlength - 1] = ylength - 1; // That is the last node in the graph, position is last one, and the childBpx is also the last one means the whole glycan.
FirstPathHelper(array, xlength - 1, ylength - 1, temp);
@@ -225,26 +255,29 @@ public static int[] GetFirstPath(AdjNode[][] array, ModBox[] boxes)
private static void FirstPathHelper(AdjNode[][] array, int xind, int yind, int[] temp)
{
- if (xind == 0)
+ if (xind == 0) //xind = 0 means, there is just one glycosite. So the node must be the last one in the childBox = whole glycan.
{
- return;
+ return; // temp[0] = last one in the childBox = length-1.
}
- var pre = array[xind][yind].CummulativeSources.First();
+ var pre = array[xind][yind].CummulativeSources.First(); // The first one in the CummulativeSources is the toppest previous node.
xind--;
yind = pre;
temp[xind] = yind;
FirstPathHelper(array, xind, yind, temp);
}
- //The original path we get is just an array of AdjNode positions. For example, path = [1, 1, 2, 2] means the best nodes are at array[0][1], array[1][1], array[2][2], array[3][2]
- //This function here is to transfer the path into localized Route. Route contains each glycosite with glycanId.
- //Basicly, any change from left to right of the path indicates a modification. For example, the path = [1, 1, 2, 2] which means there is a modification at ModPos[0] and ModPos[2]
+ ///
+ /// Convert the path inforation into Route object.
+ ///
+ ///
+ /// ex.[1,1,2,2,5] means the node in the localGraph, first node is ModBox1...last Node is modBox5
+ /// Route object, present in glycosite-glycan pait format
public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[] path)
{
Route route = new Route();
- if (path.Length == 1)
+ if (path.Length == 1) //If there is only one number in the path, we will assined "the first glycan in the childBox" to the glycosite.
{
bool onlyOneLocalized = false;
if (localizationGraph.TotalScore > 0)
@@ -255,7 +288,8 @@ public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[]
return route;
}
- //Add first mod. If the childBoxes[path[0]].ModIds.Count == 0, means this is an empty childBox.
+ //Add first mod in the first glycosite.
+ //If the childBoxes[path[0]].ModIds.Count == 0, means this is an empty childBox.
//Otherwise childBoxes[path[0]].ModIds.Count == 1 and childBoxes[path[0]].ModIds only contains one ModId.
if (localizationGraph.ChildModBoxes[path[0]].ModIds.Count() != 0)
{
@@ -264,7 +298,8 @@ public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[]
for (int i = 1; i < path.Length; i++)
{
- //If there is a change of the path, get the difference between the two Adjnodes of the array.
+ // If there is a change of the path, get the difference between the two Adjnodes of the array.
+ // If the node is the same childBox as the previous node. That means there is no modification at this glycosite. We can move on to the next glycosite.
if (path[i] != path[i - 1])
{
var left = GetLeft(localizationGraph.array[i][path[i]].ModBox.ModIds, localizationGraph.array[i - 1][path[i - 1]].ModBox.ModIds).First();
@@ -277,7 +312,13 @@ public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[]
return route;
}
- //Get the difference between array 1 and array 2 with repeat numbers.
+
+ ///
+ /// Get the difference in glycan between two node.
+ ///
+ /// The composition in this node. Ex. (0,0,1,2) means the cumulative glycoBox is composed of glycan0 + glycan0 + glycan 1 + glycan 2
+ ///
+ /// The difference of the glycan composition between the two node.
public static int[] GetLeft(int[] array1, int[] array2)
{
//Get compliment box
@@ -340,13 +381,19 @@ private static void PathHelper_CalP(List allPaths, LocalizationGraph loca
}
//Dictionary>> is >
+ ///
+ /// Generate the localization probability chart for each glycosite.
+ ///
+ ///
+ ///
+ /// A dictionary represent the chart for glycosite Probility. Ex. key = 2 (ModPos), [(0,0.1),(1,0.3),(2,0.6)] means glycan 0 is 10 %, glycan 1 is 30%, glycan 2 is 60%
public static Dictionary>> CalSiteSpecificLocalizationProbability(List routes, int[] modPos)
{
Dictionary>> probabilityMatrix = new Dictionary>>();
Tuple[][] matrix = new Tuple[modPos.Length][];
- for (int i = 0; i < modPos.Length; i++)
+ for (int i = 0; i < modPos.Length; i++) // There are all localization set in the route, we just try to sort the certain glycosite-glycan pairs into the corresponding glycosite.
{
matrix[i] = new Tuple[routes.Count];
for (int j = 0; j < routes.Count; j++)
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs b/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs
index 7113366bb..84f3c61df 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs
@@ -1,6 +1,6 @@
namespace EngineLayer
{
- public class ModBox
+ public class ModBox //The superclass of GlycanBox
{
//One peptide can have several modifications. The combined modifications are grouped as a modification box. Used for localization.
//ModBox -- a defined combination of modifications will be considered to modify on one peptide. The box means the combined group of modification.
diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs
index 92f915a3b..930e33d24 100644
--- a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs
+++ b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs
@@ -1,17 +1,11 @@
namespace EngineLayer
{
-
+ ///
+ /// The structure of the glycan
+ ///
public class Node
{
- public Node(char v, Node l, Node r, Node m)
- {
- Value = v;
- LeftChild = l;
- RightChild = r;
- MiddleChild = m;
- Level = null;
- }
public Node(char v)
{
diff --git a/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs b/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs
index 137bc00fb..070bb129d 100644
--- a/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs
+++ b/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs
@@ -400,7 +400,7 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch,
double highestMassPeptideToLookFor, List peptideIndex, MassDiffAcceptor massDiffAcceptor, double maxMassThatFragmentIonScoreIsDoubled, DissociationType dissociationType)
{
// get all theoretical fragments this experimental fragment could be
- for (int i = 0; i < binsToSearch.Count; i++)
+ for (int i = 0; i < binsToSearch.Count; i++) //binsToSearch is the list of fragment in Spectra
{
List peptideIdsInThisBin = FragmentIndex[binsToSearch[i]];
@@ -410,11 +410,11 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch,
// get index for highest mass allowed
int highestPeptideMassIndex = peptideIdsInThisBin.Count - 1;
- if (!Double.IsInfinity(highestMassPeptideToLookFor))
+ if (!Double.IsInfinity(highestMassPeptideToLookFor)) //check if the highest mass is infinity
{
- highestPeptideMassIndex = BinarySearchBinForPrecursorIndex(peptideIdsInThisBin, highestMassPeptideToLookFor, peptideIndex);
+ highestPeptideMassIndex = BinarySearchBinForPrecursorIndex(peptideIdsInThisBin, highestMassPeptideToLookFor, peptideIndex); //get index for maximum monoisotopic allowed
- for (int j = highestPeptideMassIndex; j < peptideIdsInThisBin.Count; j++)
+ for (int j = highestPeptideMassIndex; j < peptideIdsInThisBin.Count; j++) //find the highest peptide mass index
{
int nextId = peptideIdsInThisBin[j];
var nextPep = peptideIndex[nextId];
@@ -432,7 +432,7 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch,
if (dissociationType == DissociationType.LowCID)
{
// add intensity for each peptide candidate in the scoring table up to the maximum allowed precursor mass
- for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++)
+ for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++)
{
int id = peptideIdsInThisBin[j];
@@ -447,14 +447,14 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch,
}
}
else
- {
- // add +1 score for each peptide candidate in the scoring table up to the maximum allowed precursor mass
- for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++)
+ {
+ // account the peptide index shown in the bin
+ for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++) // iterate through the peptide index in the bin
{
int id = peptideIdsInThisBin[j];
scoringTable[id]++;
- // add possible search results to the hashset of id's
+ // if the score of the peptide >3 (counts > 3 times), and the mass difference is accepted, add the peptide to the list of peptides possibly observed
if (scoringTable[id] == byteScoreCutoff && massDiffAcceptor.Accepts(scanPrecursorMass, peptideIndex[id].MonoisotopicMass) >= 0)
{
idsOfPeptidesPossiblyObserved.Add(id);
diff --git a/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs b/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs
index 3a539c073..145d5d163 100644
--- a/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs
+++ b/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs
@@ -210,19 +210,31 @@ public PsmFromTsv(string line, char[] split, Dictionary parsedHeade
BetaPeptideChildScanMatchedIons.Remove(Ms2ScanNumber);
}
- //For Glyco
- GlycanMass = (parsedHeader[PsmTsvHeader_Glyco.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[PsmTsvHeader_Glyco.GlycanMass]], CultureInfo.InvariantCulture);
- GlycanComposition = (parsedHeader[PsmTsvHeader_Glyco.GlycanComposition] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanComposition]];
- GlycanStructure = (parsedHeader[PsmTsvHeader_Glyco.GlycanStructure] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanStructure]];
- var localizationLevel = (parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel]];
- if (localizationLevel != null)
+ //For Glyco
+ try // Try is so that glyco and non-glyco psms can be read from the same file
{
- if (localizationLevel.Equals("NA"))
- GlycanLocalizationLevel = null;
- else
- GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel);
+ GlycanMass = (parsedHeader[PsmTsvHeader_Glyco.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[PsmTsvHeader_Glyco.GlycanMass]], CultureInfo.InvariantCulture);
+ GlycanComposition = (parsedHeader[PsmTsvHeader_Glyco.GlycanComposition] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanComposition]];
+ GlycanStructure = (parsedHeader[PsmTsvHeader_Glyco.GlycanStructure] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanStructure]];
+ var localizationLevel = (parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel]];
+ if (localizationLevel != null)
+ {
+ if (localizationLevel.Equals("NA"))
+ GlycanLocalizationLevel = null;
+ else
+ GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel);
+ }
+ LocalizedGlycan = (parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan]];
+
+ }
+ catch
+ {
+ GlycanMass = null;
+ GlycanComposition = null;
+ GlycanStructure = null;
+ GlycanLocalizationLevel = null;
+ LocalizedGlycan = null;
}
- LocalizedGlycan = (parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan]];
}
///
diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs
index 3d3e4ae0f..e3c7b0f6a 100644
--- a/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs
+++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs
@@ -150,7 +150,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List();
//For each ms2scan, try to find the best candidate psm from the psms list. Do the localizaiton analysis. Add it into filteredAllPsms.
- foreach (var gsmsPerScan in GsmPerScans.GroupBy(p => p.ScanNumber))
+ foreach (var gsmsPerScan in GsmPerScans.GroupBy(p => (p.ScanNumber, p.FullFilePath)))
{
var glycos = RemoveSimilarSequenceDuplicates(gsmsPerScan.OrderByDescending(p=>p.Score).ToList());
diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs
index 6578a1790..bc00fe9f8 100644
--- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs
+++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs
@@ -31,6 +31,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List dbFilenameList, List currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList, List