Skip to content

Commit

Permalink
Library reader change to read pdeep and neutral loss (#2056)
Browse files Browse the repository at this point in the history
* ReaderChangeToReadPdeepAndNloss

* ChangesToMakeItWorkOnPdeepAndNLoss

* DidChangesAboutPdeepReaderAndRelatedTests

* smallChanges

Co-authored-by: Yuling1996 <dai59wisc.edu>
Co-authored-by: Anthony <cesnik@wisc.edu>
  • Loading branch information
YulingDai and Anthony authored Jun 24, 2021
1 parent 5ef29e5 commit 4944681
Show file tree
Hide file tree
Showing 6 changed files with 434 additions and 3 deletions.
8 changes: 7 additions & 1 deletion EngineLayer/SpectralLibrarySearch/LibrarySpectrum.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,16 @@ public override string ToString()
{
double intensityFraction = matchedIon.Intensity / maxIntensity;

string neutralLoss = null;
if (matchedIon.NeutralTheoreticalProduct.NeutralLoss != 0)
{
neutralLoss = "-" + matchedIon.NeutralTheoreticalProduct.NeutralLoss;
}

spectrum.Append("\n"+matchedIon.Mz + "\t" + intensityFraction + "\t" + "\"" +
matchedIon.NeutralTheoreticalProduct.ProductType.ToString() +
matchedIon.NeutralTheoreticalProduct.FragmentNumber.ToString() + "^" +
matchedIon.Charge + "/" + 0 + "ppm" + "\"");
matchedIon.Charge + neutralLoss + "/" + 0 + "ppm" + "\"");
}

return spectrum.ToString();
Expand Down
147 changes: 145 additions & 2 deletions EngineLayer/SpectralLibrarySearch/SpectralLibrary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ public class SpectralLibrary
{ "Carbamidomethyl", "[Common Fixed:Carbamidomethyl on C]" }
};

private static Dictionary<string, string> pDeepToMetaMorpheusModDictionary = new Dictionary<string, string>
{
{ "Oxidation","[Common Variable:Oxidation on M]" },
{"CAM", "[Common Fixed:Carbamidomethyl on C]" }
};

public SpectralLibrary(List<string> pathsToLibraries)
{
LibraryPaths = pathsToLibraries;
Expand Down Expand Up @@ -149,7 +155,14 @@ private LibrarySpectrum ReadSpectrumFromLibraryFile(string path, long byteOffset
reader.DiscardBufferedData();

// return the library spectrum
return ReadLibrarySpectrum(reader);
if (path.Contains("pdeep"))
{
return ReadLibrarySpectrum_pDeep(reader);
}
else
{
return ReadLibrarySpectrum(reader);
}
}

private LibrarySpectrum ReadLibrarySpectrum(StreamReader reader, bool onlyReadHeader = false)
Expand All @@ -159,6 +172,7 @@ private LibrarySpectrum ReadLibrarySpectrum(StreamReader reader, bool onlyReadHe
char[] commentSplit = new char[] { ' ', ':', '=' };
char[] modSplit = new char[] { '=', '/' };
char[] fragmentSplit = new char[] { '\t', '\"', ')', '/' };
char[] neutralLossSplit = new char[] { '-' };

bool readingPeaks = false;
string sequence = null;
Expand Down Expand Up @@ -316,6 +330,13 @@ private LibrarySpectrum ReadLibrarySpectrum(StreamReader reader, bool onlyReadHe
// read fragment type, number
Match regexMatchResult = IonParserRegex.Match(split[2]);

double neutralLoss = 0;
if (split[2].Contains("-"))
{
String[] neutralLossInformation = split[2].Split(neutralLossSplit, StringSplitOptions.RemoveEmptyEntries).ToArray();
neutralLoss = double.Parse(neutralLossInformation[1]);
}

string fragmentType = regexMatchResult.Groups[1].Value;
int fragmentNumber = int.Parse(regexMatchResult.Groups[2].Value);
int fragmentCharge = 1;
Expand All @@ -330,6 +351,120 @@ private LibrarySpectrum ReadLibrarySpectrum(StreamReader reader, bool onlyReadHe
//TODO: figure out terminus
FragmentationTerminus terminus = (FragmentationTerminus)Enum.Parse(typeof(FragmentationTerminus), "None", true);

//TODO: figure out amino acid position
var product = new Product(peakProductType, terminus, experMz, fragmentNumber, 0, neutralLoss);

matchedFragmentIons.Add(new MatchedFragmentIon(ref product, experMz, experIntensity, fragmentCharge));
}
}

return new LibrarySpectrum(sequence, precursorMz, z, matchedFragmentIons, rt);
}

private LibrarySpectrum ReadLibrarySpectrum_pDeep(StreamReader reader, bool onlyReadHeader = false)
{
char[] nameSplit = new char[] { '/', '_' };
char[] mwSplit = new char[] { ':' };
char[] commentSplit = new char[] { ' ', ':', '=' };
char[] modSplit = new char[] { '/', '(', ')' };
char[] fragmentSplit = new char[] { '\t', '/' };

bool readingPeaks = false;
string sequence = null;
int z = 2;
double precursorMz = 0;
double rt = 0;
List<MatchedFragmentIon> matchedFragmentIons = new List<MatchedFragmentIon>();

while (reader.Peek() > 0)
{
string line = reader.ReadLine();
string[] split;

if (line.StartsWith("Name", StringComparison.InvariantCultureIgnoreCase))
{
if (sequence != null)
{
return new LibrarySpectrum(sequence, precursorMz, z, matchedFragmentIons, rt);
}

split = line.Split(nameSplit);

// get sequence
sequence = split[0].Replace("Name:", string.Empty).Trim();

// get charge
z = int.Parse(split[1].Trim());

string[] mods = split[2].Split(modSplit, StringSplitOptions.RemoveEmptyEntries);
for (int i = mods.Length - 1; i > 0; i--)
{
string[] modInfo = mods[i].Split(',');
int index = Convert.ToInt32(modInfo[0]);
string mod = modInfo[2];
string metaMorpheusMod = pDeepToMetaMorpheusModDictionary[mod];
//add the mod into the sequence
string leftSeq = sequence.Substring(0, index + 1);
string rightSeq = sequence.Substring(index + 1);
sequence = leftSeq + metaMorpheusMod + rightSeq;
}

}
else if (line.StartsWith("Comment", StringComparison.InvariantCultureIgnoreCase))
{
split = line.Split(commentSplit);

// get precursor m/z in comment
int indOfParent = Array.IndexOf(split, "Parent");
if (indOfParent > 0)
{
precursorMz = double.Parse(split[indOfParent + 1]);
}

// get RT
int indOfRt = Array.IndexOf(split, "RTInSeconds");
if (indOfRt > 0)
{
rt = double.Parse(split[indOfRt + 1]);
}
}
else if (line.StartsWith("Num peaks", StringComparison.InvariantCultureIgnoreCase))
{
if (onlyReadHeader)
{
return new LibrarySpectrum(sequence, precursorMz, z, matchedFragmentIons, rt);
}

// this assumes that the peaks are listed after the "Num peaks" line
readingPeaks = true;
}
else if (readingPeaks && line != "")
{
split = line.Split(fragmentSplit, StringSplitOptions.RemoveEmptyEntries);

// read fragment m/z
var experMz = double.Parse(split[0], CultureInfo.InvariantCulture);

// read fragment intensity
var experIntensity = double.Parse(split[1], CultureInfo.InvariantCulture);

// read fragment type, number

string fragmentType = split[2].ToCharArray()[0].ToString();
int fragmentNumber = int.Parse(new string(split[2].Split(new char[] { '^' })[0].Where(Char.IsDigit).ToArray()));
int fragmentCharge = 1;


if (split[2].Contains('^'))
{
fragmentCharge = int.Parse(split[2].Split('^')[1]);
}

ProductType peakProductType = (ProductType)Enum.Parse(typeof(ProductType), fragmentType, true);

//TODO: figure out terminus
FragmentationTerminus terminus = (FragmentationTerminus)Enum.Parse(typeof(FragmentationTerminus), "None", true);

//TODO: figure out amino acid position
var product = new Product(peakProductType, terminus, experMz, fragmentNumber, 0, 0);

Expand Down Expand Up @@ -360,7 +495,15 @@ private void IndexSpectralLibrary(string path)
reader.DiscardBufferedData();

// parse the header
var libraryItem = ReadLibrarySpectrum(reader, onlyReadHeader: true);
LibrarySpectrum libraryItem;
if (path.Contains("pdeep"))
{
libraryItem = ReadLibrarySpectrum_pDeep(reader, onlyReadHeader: true);
}
else
{
libraryItem = ReadLibrarySpectrum(reader, onlyReadHeader: true);
}

// add the spectrum to the index
SequenceToFileAndLocation.TryAdd(libraryItem.Name, (path, byteOffset));
Expand Down
Loading

0 comments on commit 4944681

Please sign in to comment.