From 9a1023af8c5cdf71ad7c2a48225d272f548a180c Mon Sep 17 00:00:00 2001 From: jalew188 Date: Fri, 28 Jun 2024 17:02:36 +0200 Subject: [PATCH 1/2] replace space with _ in NC-term PTMs --- README.md | 18 +- nbs_tests/model/ccs.ipynb | 122 ++++----- nbs_tests/model/featurize.ipynb | 4 +- nbs_tests/model/ms2.ipynb | 82 +++--- nbs_tests/model/rt.ipynb | 2 +- nbs_tests/protein/fasta.ipynb | 244 +++++++++--------- .../maxquant_frag_reader.ipynb | 16 +- nbs_tests/spec_lib/predict_lib.ipynb | 40 +-- nbs_tests/spec_lib/test_translate_tsv.ipynb | 2 +- nbs_trials/precursor_table.csv | 2 +- nbs_trials/speclib_from_dda.ipynb | 2 +- nbs_trials/test_argparse.ipynb | 4 +- peptdeep/constants/default_settings.yaml | 2 +- peptdeep/model/featurize.py | 2 - peptdeep/protein/fasta.py | 6 +- peptdeep/psm_frag_reader/psmlabel_reader.py | 2 +- peptdeep/settings.py | 2 +- peptdeep/webui/library_ui.py | 2 +- 18 files changed, 276 insertions(+), 278 deletions(-) diff --git a/README.md b/README.md index 09ec9372..73b30de1 100644 --- a/README.md +++ b/README.md @@ -381,12 +381,12 @@ common: user_defined_modifications: {} # For example, # user_defined_modifications: - # "Dimethyl2@Any N-term": + # "Dimethyl2@Any_N-term": # composition: "H(2)2H(2)C(2)" # modloss_composition: "H(0)" # can be without if no modloss # "Dimethyl2@K": # composition: "H(2)2H(2)C(2)" - # "Dimethyl6@Any N-term": + # "Dimethyl6@Any_N-term": # composition: "2H(4)13C(2)" # "Dimethyl6@K": # composition: "2H(4)13C(2)" @@ -496,7 +496,7 @@ library: fix_mods: - Carbamidomethyl@C var_mods: - - Acetyl@Protein N-term + - Acetyl@Protein_N-term - Oxidation@M special_mods: [] # normally for Phospho or GlyGly@K special_mods_cannot_modify_pep_n_term: False @@ -504,8 +504,8 @@ library: labeling_channels: {} # For example, # labeling_channels: - # 0: ['Dimethyl@Any N-term','Dimethyl@K'] - # 4: ['Dimethyl:2H(2)@Any N-term','Dimethyl:2H(2)@K'] + # 0: ['Dimethyl@Any_N-term','Dimethyl@K'] + # 4: ['Dimethyl:2H(2)@Any_N-term','Dimethyl:2H(2)@K'] # 8: [...] min_var_mod_num: 0 max_var_mod_num: 2 @@ -565,7 +565,7 @@ See examples: import pandas as pd df = pd.DataFrame({ 'sequence': ['ACDEFGHIK','LMNPQRSTVK','WYVSTR'], - 'mods': ['Carbamidomethyl@C','Acetyl@Protein N-term;Phospho@S',''], + 'mods': ['Carbamidomethyl@C','Acetyl@Protein_N-term;Phospho@S',''], 'mod_sites': ['2','0;7',''], 'charge': [2,3,1], }) @@ -593,7 +593,7 @@ df[['sequence','mods','mod_sites']] | | sequence | mods | mod_sites | | --- | --- | --- | --- | | 0 | ACDEFGHIK | Carbamidomethyl@C | 2 | -| 1 | LMNPQRSTVK | Acetyl@Protein N-term;Phospho@S | 0;7 | +| 1 | LMNPQRSTVK | Acetyl@Protein_N-term;Phospho@S | 0;7 | | 2 | WYVSTR | | | ##### precursor_table @@ -605,7 +605,7 @@ df | | sequence | mods | mod_sites | charge | | --- | --- | --- | --- | --- | | 0 | ACDEFGHIK | Carbamidomethyl@C | 2 | 2 | -| 1 | LMNPQRSTVK | Acetyl@Protein N-term;Phospho@S | 0;7 | 3 | +| 1 | LMNPQRSTVK | Acetyl@Protein_N-term;Phospho@S | 0;7 | 3 | | 2 | WYVSTR | | | 1 | > Columns of `proteins` and `genes` are optional for these txt/tsv/csv @@ -686,7 +686,7 @@ model_mgr: # alphabase modification to modifications of other search engines # For example, # psm_modification_mapping: - # Dimethyl@Any N-term: + # Dimethyl@Any_N-term: # - _(Dimethyl-n-0) # - _(Dimethyl) # Dimethyl:2H(2)@K: diff --git a/nbs_tests/model/ccs.ipynb b/nbs_tests/model/ccs.ipynb index a9f808fa..3a2859ab 100644 --- a/nbs_tests/model/ccs.ipynb +++ b/nbs_tests/model/ccs.ipynb @@ -201,7 +201,7 @@ " \n", " 0\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -210,7 +210,7 @@ " \n", " 1\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -219,7 +219,7 @@ " \n", " 2\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -228,7 +228,7 @@ " \n", " 3\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -237,7 +237,7 @@ " \n", " 4\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -246,7 +246,7 @@ " \n", " 5\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -255,7 +255,7 @@ " \n", " 6\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -264,7 +264,7 @@ " \n", " 7\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -273,7 +273,7 @@ " \n", " 8\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -282,7 +282,7 @@ " \n", " 9\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -294,16 +294,16 @@ ], "text/plain": [ " sequence mods mod_sites \\\n", - "0 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "1 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "2 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "3 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "4 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "5 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "6 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "7 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "8 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "9 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "0 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "1 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "2 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "3 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "4 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "5 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "6 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "7 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "8 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "9 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", "\n", " nAA charge ccs \n", "0 11 2 1 \n", @@ -327,7 +327,7 @@ "repeat = 10\n", "precursor_df = pd.DataFrame({\n", " 'sequence': ['AGHCEWQMKYR']*repeat,\n", - " 'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat,\n", + " 'mods': ['Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidation@M']*repeat,\n", " 'mod_sites': ['0;4;8']*repeat,\n", " 'nAA': [11]*repeat,\n", " 'charge': [2]*repeat,\n", @@ -384,7 +384,7 @@ " \n", " 0\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -394,7 +394,7 @@ " \n", " 1\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -404,7 +404,7 @@ " \n", " 2\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -414,7 +414,7 @@ " \n", " 3\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -424,7 +424,7 @@ " \n", " 4\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -434,7 +434,7 @@ " \n", " 5\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -444,7 +444,7 @@ " \n", " 6\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -454,7 +454,7 @@ " \n", " 7\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -464,7 +464,7 @@ " \n", " 8\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -474,7 +474,7 @@ " \n", " 9\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -487,16 +487,16 @@ ], "text/plain": [ " sequence mods mod_sites \\\n", - "0 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "1 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "2 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "3 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "4 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "5 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "6 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "7 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "8 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "9 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "0 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "1 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "2 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "3 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "4 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "5 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "6 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "7 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "8 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "9 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", "\n", " nAA charge ccs ccs_pred \n", "0 11 2 1 0.032652 \n", @@ -561,7 +561,7 @@ " \n", " 0\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -573,7 +573,7 @@ " \n", " 1\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -585,7 +585,7 @@ " \n", " 2\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -597,7 +597,7 @@ " \n", " 3\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -609,7 +609,7 @@ " \n", " 4\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -621,7 +621,7 @@ " \n", " 5\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -633,7 +633,7 @@ " \n", " 6\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -645,7 +645,7 @@ " \n", " 7\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -657,7 +657,7 @@ " \n", " 8\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -669,7 +669,7 @@ " \n", " 9\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 2\n", @@ -684,16 +684,16 @@ ], "text/plain": [ " sequence mods mod_sites \\\n", - "0 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "1 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "2 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "3 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "4 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "5 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "6 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "7 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "8 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "9 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "0 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "1 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "2 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "3 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "4 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "5 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "6 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "7 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "8 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "9 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", "\n", " nAA charge ccs ccs_pred precursor_mz mobility_pred \n", "0 11 2 1 0.032652 762.329553 0.000081 \n", diff --git a/nbs_tests/model/featurize.ipynb b/nbs_tests/model/featurize.ipynb index 7eafdd2b..8adb958b 100644 --- a/nbs_tests/model/featurize.ipynb +++ b/nbs_tests/model/featurize.ipynb @@ -66,10 +66,10 @@ "outputs": [], "source": [ "#| hide\n", - "x = parse_mod_feature(5, ['Acetyl@Protein N-term','Phospho@S','Oxidation@M'], [0,-1,1])\n", + "x = parse_mod_feature(5, ['Acetyl@Protein_N-term','Phospho@S','Oxidation@M'], [0,-1,1])\n", "assert x.shape == (7, mod_feature_size)\n", "assert np.all(x[1,:]==MOD_TO_FEATURE['Oxidation@M'])\n", - "assert np.all(x[0,:]==MOD_TO_FEATURE['Acetyl@Protein N-term'])\n", + "assert np.all(x[0,:]==MOD_TO_FEATURE['Acetyl@Protein_N-term'])\n", "assert np.all(x[-1,:]==MOD_TO_FEATURE['Phospho@S'])\n", "assert np.all(x[(2,3,4,5),:]==0)" ] diff --git a/nbs_tests/model/ms2.ipynb b/nbs_tests/model/ms2.ipynb index 9fe774e6..301c4caa 100644 --- a/nbs_tests/model/ms2.ipynb +++ b/nbs_tests/model/ms2.ipynb @@ -236,7 +236,7 @@ " \n", " 0\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -248,7 +248,7 @@ " \n", " 1\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -260,7 +260,7 @@ " \n", " 2\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -272,7 +272,7 @@ " \n", " 3\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -284,7 +284,7 @@ " \n", " 4\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -296,7 +296,7 @@ " \n", " 5\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -308,7 +308,7 @@ " \n", " 6\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -320,7 +320,7 @@ " \n", " 7\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -332,7 +332,7 @@ " \n", " 8\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -344,7 +344,7 @@ " \n", " 9\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -359,16 +359,16 @@ ], "text/plain": [ " sequence mods mod_sites \\\n", - "0 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "1 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "2 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "3 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "4 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "5 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "6 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "7 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "8 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "9 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "0 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "1 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "2 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "3 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "4 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "5 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "6 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "7 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "8 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "9 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", "\n", " nAA nce instrument charge frag_start_idx frag_stop_idx \n", "0 11 20 QE 1 0 10 \n", @@ -396,7 +396,7 @@ "repeat = 10\n", "precursor_df = pd.DataFrame({\n", " 'sequence': ['AGHCEWQMKYR']*repeat,\n", - " 'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat,\n", + " 'mods': ['Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidation@M']*repeat,\n", " 'mod_sites': ['0;4;8']*repeat,\n", " 'nAA': [11]*repeat,\n", " 'nce': [20]*repeat,\n", @@ -643,7 +643,7 @@ " \n", " 0\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -659,7 +659,7 @@ " \n", " 1\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -675,7 +675,7 @@ " \n", " 2\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -691,7 +691,7 @@ " \n", " 3\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -707,7 +707,7 @@ " \n", " 4\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -723,7 +723,7 @@ " \n", " 5\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -739,7 +739,7 @@ " \n", " 6\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -755,7 +755,7 @@ " \n", " 7\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -771,7 +771,7 @@ " \n", " 8\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -787,7 +787,7 @@ " \n", " 9\n", " AGHCEWQMKYR\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat...\n", " 0;4;8\n", " 11\n", " 20\n", @@ -806,16 +806,16 @@ ], "text/plain": [ " sequence mods mod_sites \\\n", - "0 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "1 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "2 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "3 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "4 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "5 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "6 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "7 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "8 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", - "9 AGHCEWQMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "0 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "1 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "2 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "3 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "4 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "5 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "6 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "7 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "8 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "9 AGHCEWQMKYR Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", "\n", " nAA nce instrument charge frag_start_idx frag_stop_idx PCC \\\n", "0 11 20 QE 1 0 10 0.248708 \n", diff --git a/nbs_tests/model/rt.ipynb b/nbs_tests/model/rt.ipynb index 9bf8803e..ed952b40 100644 --- a/nbs_tests/model/rt.ipynb +++ b/nbs_tests/model/rt.ipynb @@ -135,7 +135,7 @@ "def create_test_dataframe_with_identical_rows(nrows = 10):\n", " precursor_df = pd.DataFrame({\n", " 'sequence': ['AGHCEWQMKYR']*nrows,\n", - " 'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*nrows,\n", + " 'mods': ['Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidation@M']*nrows,\n", " 'mod_sites': ['0;4;8']*nrows,\n", " 'nAA': [11]*nrows,\n", " 'rt_norm': [0.6]*nrows\n", diff --git a/nbs_tests/protein/fasta.ipynb b/nbs_tests/protein/fasta.ipynb index 1256ad16..2ae2a562 100644 --- a/nbs_tests/protein/fasta.ipynb +++ b/nbs_tests/protein/fasta.ipynb @@ -584,7 +584,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;1;4\n", " 7\n", " xx\n", @@ -597,7 +597,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;4\n", " 7\n", " xx\n", @@ -675,7 +675,7 @@ " 1\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;3\n", " 12\n", " xx\n", @@ -714,7 +714,7 @@ " 1\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;1;4\n", " 13\n", " xx\n", @@ -727,7 +727,7 @@ " 1\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;4\n", " 13\n", " xx\n", @@ -766,7 +766,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term;Oxidation@M\n", + " Acetyl@Protein_N-term;Oxidation@M\n", " 0;8\n", " 13\n", " xx;yy\n", @@ -779,7 +779,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term\n", + " Acetyl@Protein_N-term\n", " 0\n", " 13\n", " xx;yy\n", @@ -844,7 +844,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;14;3\n", " 19\n", " xx\n", @@ -857,7 +857,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;3\n", " 19\n", " xx\n", @@ -922,7 +922,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;1;4\n", " 20\n", " xx\n", @@ -935,7 +935,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;15;4\n", " 20\n", " xx\n", @@ -948,7 +948,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...\n", + " Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...\n", " 0;1;15;4\n", " 20\n", " xx\n", @@ -961,7 +961,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;4\n", " 20\n", " xx\n", @@ -1009,36 +1009,36 @@ " is_prot_cterm mods \\\n", "0 False Oxidation@M;Carbamidomethyl@C \n", "1 False Carbamidomethyl@C \n", - "2 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "3 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "2 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "3 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "4 True Oxidation@M \n", "5 True \n", "6 True Oxidation@M \n", "7 True \n", "8 False Carbamidomethyl@C \n", - "9 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "9 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "10 False Oxidation@M;Carbamidomethyl@C \n", "11 False Carbamidomethyl@C \n", - "12 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "13 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "12 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "13 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "14 True Oxidation@M \n", "15 True \n", - "16 True Acetyl@Protein N-term;Oxidation@M \n", - "17 True Acetyl@Protein N-term \n", + "16 True Acetyl@Protein_N-term;Oxidation@M \n", + "17 True Acetyl@Protein_N-term \n", "18 True Oxidation@M \n", "19 True \n", "20 False Oxidation@M;Carbamidomethyl@C \n", "21 False Carbamidomethyl@C \n", - "22 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "23 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "22 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "23 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "24 False Oxidation@M;Carbamidomethyl@C \n", "25 False Oxidation@M;Carbamidomethyl@C \n", "26 False Oxidation@M;Oxidation@M;Carbamidomethyl@C \n", "27 False Carbamidomethyl@C \n", - "28 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "29 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "30 False Acetyl@Protein N-term;Oxidation@M;Oxidation@M;... \n", - "31 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "28 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "29 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "30 False Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... \n", + "31 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "\n", " mod_sites nAA proteins genes \n", "0 1;4 7 xx \n", @@ -1102,15 +1102,15 @@ " assert 'Carbamidomethyl@C' in mods\n", " else:\n", " assert 'Carbamidomethyl@C' not in mods\n", - " # test Acetyl@Protein N-term\n", - " if 'Acetyl@Protein N-term' in mods:\n", + " # test Acetyl@Protein_N-term\n", + " if 'Acetyl@Protein_N-term' in mods:\n", " assert _lib.precursor_df.is_prot_nterm[i]\n", " assert '0' in sites\n", " if '0' in mods:\n", " assert _lib.precursor_df.is_prot_nterm[i]\n", - " assert 'Acetyl@Protein N-term' in mods\n", + " assert 'Acetyl@Protein_N-term' in mods\n", " if not _lib.precursor_df.is_prot_nterm[i]:\n", - " assert 'Acetyl@Protein N-term' not in mods\n", + " assert 'Acetyl@Protein_N-term' not in mods\n", " # test Oxidation@M\n", " if 'Oxidation@M' in mods:\n", " assert 'M' in seq\n", @@ -1121,9 +1121,9 @@ " if sites == '':\n", " assert mods == ''\n", "df = _lib.precursor_df\n", - "# at least one nterm peptide does not contain Acetyl@Protein N-term\n", + "# at least one nterm peptide does not contain Acetyl@Protein_N-term\n", "assert not df[df.is_prot_nterm].mod_sites.str.contains('0').all()\n", - "# at least one nterm peptide contains Acetyl@Protein N-term\n", + "# at least one nterm peptide contains Acetyl@Protein_N-term\n", "assert df[df.is_prot_nterm].mod_sites.str.contains('0').any()\n", "# test var mod Oxidation@M\n", "assert not df[df.sequence.str.contains('M')].mods.str.contains('Oxidation@M').all()\n", @@ -1203,7 +1203,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;1;4\n", " 7\n", " xx\n", @@ -1216,7 +1216,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;4\n", " 7\n", " xx\n", @@ -1346,7 +1346,7 @@ " 1\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;3\n", " 12\n", " xx\n", @@ -1385,7 +1385,7 @@ " 1\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;1;4\n", " 13\n", " xx\n", @@ -1398,7 +1398,7 @@ " 1\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;4\n", " 13\n", " xx\n", @@ -1437,7 +1437,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term;Oxidation@M\n", + " Acetyl@Protein_N-term;Oxidation@M\n", " 0;8\n", " 13\n", " xx;yy\n", @@ -1450,7 +1450,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term\n", + " Acetyl@Protein_N-term\n", " 0\n", " 13\n", " xx;yy\n", @@ -1567,7 +1567,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;14;3\n", " 19\n", " xx\n", @@ -1580,7 +1580,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;3\n", " 19\n", " xx\n", @@ -1645,7 +1645,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;1;4\n", " 20\n", " xx\n", @@ -1658,7 +1658,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;15;4\n", " 20\n", " xx\n", @@ -1671,7 +1671,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...\n", + " Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...\n", " 0;1;15;4\n", " 20\n", " xx\n", @@ -1684,7 +1684,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;4\n", " 20\n", " xx\n", @@ -1740,8 +1740,8 @@ " is_prot_cterm mods \\\n", "0 False Oxidation@M;Carbamidomethyl@C \n", "1 False Carbamidomethyl@C \n", - "2 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "3 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "2 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "3 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "4 True Oxidation@M \n", "5 True \n", "6 True Oxidation@M;Phospho@S \n", @@ -1751,15 +1751,15 @@ "10 True Phospho@T \n", "11 True \n", "12 False Carbamidomethyl@C \n", - "13 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "13 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "14 False Oxidation@M;Carbamidomethyl@C \n", "15 False Carbamidomethyl@C \n", - "16 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "17 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "16 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "17 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "18 True Oxidation@M \n", "19 True \n", - "20 True Acetyl@Protein N-term;Oxidation@M \n", - "21 True Acetyl@Protein N-term \n", + "20 True Acetyl@Protein_N-term;Oxidation@M \n", + "21 True Acetyl@Protein_N-term \n", "22 True Oxidation@M;Phospho@S \n", "23 True Oxidation@M;Phospho@T \n", "24 True Oxidation@M \n", @@ -1768,16 +1768,16 @@ "27 True \n", "28 False Oxidation@M;Carbamidomethyl@C \n", "29 False Carbamidomethyl@C \n", - "30 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "31 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "30 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "31 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "32 False Oxidation@M;Carbamidomethyl@C \n", "33 False Oxidation@M;Carbamidomethyl@C \n", "34 False Oxidation@M;Oxidation@M;Carbamidomethyl@C \n", "35 False Carbamidomethyl@C \n", - "36 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "37 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "38 False Acetyl@Protein N-term;Oxidation@M;Oxidation@M;... \n", - "39 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "36 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "37 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "38 False Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... \n", + "39 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "\n", " mod_sites nAA proteins genes \n", "0 1;4 7 xx \n", @@ -1909,7 +1909,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;1;4\n", " 7\n", " xx\n", @@ -1923,7 +1923,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;4\n", " 7\n", " xx\n", @@ -1979,7 +1979,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;1;4;7;13\n", " 20\n", " xx\n", @@ -1993,7 +1993,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;15;4;7;13\n", " 20\n", " xx\n", @@ -2007,7 +2007,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...\n", + " Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...\n", " 0;1;15;4;7;13\n", " 20\n", " xx\n", @@ -2021,7 +2021,7 @@ " 2\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C;Dimeth...\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth...\n", " 0;4;7;13\n", " 20\n", " xx\n", @@ -2050,15 +2050,15 @@ " is_prot_cterm mods \\\n", "0 False Oxidation@M;Carbamidomethyl@C \n", "1 False Carbamidomethyl@C \n", - "2 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "3 False Acetyl@Protein N-term;Carbamidomethyl@C \n", + "2 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "3 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "4 True Oxidation@M \n", ".. ... ... \n", "115 False Carbamidomethyl@C;Dimethyl:2H(6)13C(2)@Any N-t... \n", - "116 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "117 False Acetyl@Protein N-term;Oxidation@M;Carbamidomet... \n", - "118 False Acetyl@Protein N-term;Oxidation@M;Oxidation@M;... \n", - "119 False Acetyl@Protein N-term;Carbamidomethyl@C;Dimeth... \n", + "116 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "117 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "118 False Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... \n", + "119 False Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth... \n", "\n", " mod_sites nAA proteins genes labeling_channel \n", "0 1;4 7 xx none \n", @@ -2085,8 +2085,8 @@ "#| hide\n", "_lib.add_peptide_labeling({\n", " 'none': [], # not labelled for reference\n", - " 'light': ['Dimethyl@Any N-term','Dimethyl@K'],\n", - " 'heavy': ['Dimethyl:2H(6)13C(2)@Any N-term','Dimethyl:2H(6)13C(2)@K'],\n", + " 'light': ['Dimethyl@Any_N-term','Dimethyl@K'],\n", + " 'heavy': ['Dimethyl:2H(6)13C(2)@Any_N-term','Dimethyl:2H(6)13C(2)@K'],\n", "})\n", "_lib.precursor_df" ] @@ -2229,7 +2229,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;2\n", " 8\n", " 0\n", @@ -2325,7 +2325,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;6\n", " 8\n", " 1\n", @@ -2397,7 +2397,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;1;3\n", " 9\n", " 0\n", @@ -2421,7 +2421,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;3\n", " 9\n", " 0\n", @@ -2493,7 +2493,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Oxidation@M;Carbamidomet...\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", " 0;8;6\n", " 9\n", " 1\n", @@ -2517,7 +2517,7 @@ " 0\n", " True\n", " False\n", - " Acetyl@Protein N-term;Carbamidomethyl@C\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", " 0;6\n", " 9\n", " 1\n", @@ -2637,7 +2637,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term;Oxidation@M\n", + " Acetyl@Protein_N-term;Oxidation@M\n", " 0;7\n", " 11\n", " 0\n", @@ -2661,7 +2661,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term;Oxidation@M\n", + " Acetyl@Protein_N-term;Oxidation@M\n", " 0;7\n", " 11\n", " 0\n", @@ -2685,7 +2685,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term\n", + " Acetyl@Protein_N-term\n", " 0\n", " 11\n", " 0\n", @@ -2709,7 +2709,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term\n", + " Acetyl@Protein_N-term\n", " 0\n", " 11\n", " 0\n", @@ -2829,7 +2829,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term;Oxidation@M\n", + " Acetyl@Protein_N-term;Oxidation@M\n", " 0;4\n", " 11\n", " 1\n", @@ -2853,7 +2853,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term;Oxidation@M\n", + " Acetyl@Protein_N-term;Oxidation@M\n", " 0;4\n", " 11\n", " 1\n", @@ -2877,7 +2877,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term\n", + " Acetyl@Protein_N-term\n", " 0\n", " 11\n", " 1\n", @@ -2901,7 +2901,7 @@ " 1\n", " True\n", " True\n", - " Acetyl@Protein N-term\n", + " Acetyl@Protein_N-term\n", " 0\n", " 11\n", " 1\n", @@ -3162,35 +3162,35 @@ "0 Oxidation@M 2 8 0 \n", "1 8 0 \n", "2 Carbamidomethyl@C 2 8 0 \n", - "3 Acetyl@Protein N-term;Carbamidomethyl@C 0;2 8 0 \n", + "3 Acetyl@Protein_N-term;Carbamidomethyl@C 0;2 8 0 \n", "4 Oxidation@M 6 8 1 \n", "5 8 1 \n", "6 Carbamidomethyl@C 6 8 1 \n", - "7 Acetyl@Protein N-term;Carbamidomethyl@C 0;6 8 1 \n", + "7 Acetyl@Protein_N-term;Carbamidomethyl@C 0;6 8 1 \n", "8 Oxidation@M;Carbamidomethyl@C 1;3 9 0 \n", "9 Carbamidomethyl@C 3 9 0 \n", - "10 Acetyl@Protein N-term;Oxidation@M;Carbamidomet... 0;1;3 9 0 \n", - "11 Acetyl@Protein N-term;Carbamidomethyl@C 0;3 9 0 \n", + "10 Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... 0;1;3 9 0 \n", + "11 Acetyl@Protein_N-term;Carbamidomethyl@C 0;3 9 0 \n", "12 Oxidation@M;Carbamidomethyl@C 8;6 9 1 \n", "13 Carbamidomethyl@C 6 9 1 \n", - "14 Acetyl@Protein N-term;Oxidation@M;Carbamidomet... 0;8;6 9 1 \n", - "15 Acetyl@Protein N-term;Carbamidomethyl@C 0;6 9 1 \n", + "14 Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... 0;8;6 9 1 \n", + "15 Acetyl@Protein_N-term;Carbamidomethyl@C 0;6 9 1 \n", "16 Oxidation@M 7 11 0 \n", "17 Oxidation@M 7 11 0 \n", "18 11 0 \n", "19 11 0 \n", - "20 Acetyl@Protein N-term;Oxidation@M 0;7 11 0 \n", - "21 Acetyl@Protein N-term;Oxidation@M 0;7 11 0 \n", - "22 Acetyl@Protein N-term 0 11 0 \n", - "23 Acetyl@Protein N-term 0 11 0 \n", + "20 Acetyl@Protein_N-term;Oxidation@M 0;7 11 0 \n", + "21 Acetyl@Protein_N-term;Oxidation@M 0;7 11 0 \n", + "22 Acetyl@Protein_N-term 0 11 0 \n", + "23 Acetyl@Protein_N-term 0 11 0 \n", "24 Oxidation@M 4 11 1 \n", "25 Oxidation@M 4 11 1 \n", "26 11 1 \n", "27 11 1 \n", - "28 Acetyl@Protein N-term;Oxidation@M 0;4 11 1 \n", - "29 Acetyl@Protein N-term;Oxidation@M 0;4 11 1 \n", - "30 Acetyl@Protein N-term 0 11 1 \n", - "31 Acetyl@Protein N-term 0 11 1 \n", + "28 Acetyl@Protein_N-term;Oxidation@M 0;4 11 1 \n", + "29 Acetyl@Protein_N-term;Oxidation@M 0;4 11 1 \n", + "30 Acetyl@Protein_N-term 0 11 1 \n", + "31 Acetyl@Protein_N-term 0 11 1 \n", "32 Oxidation@M 6 13 1 \n", "33 Oxidation@M 6 13 1 \n", "34 13 1 \n", @@ -3381,7 +3381,7 @@ " 1\n", " False\n", " True\n", - " Oxidation@M;Dimethyl@Any N-term\n", + " Oxidation@M;Dimethyl@Any_N-term\n", " 2;0\n", " 8\n", " 0\n", @@ -3405,7 +3405,7 @@ " 1\n", " False\n", " True\n", - " Dimethyl:2H(6)13C(2)@Any N-term\n", + " Dimethyl:2H(6)13C(2)@Any_N-term\n", " 0\n", " 8\n", " 0\n", @@ -3429,7 +3429,7 @@ " 1\n", " False\n", " True\n", - " Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term\n", + " Oxidation@M;Dimethyl:2H(6)13C(2)@Any_N-term\n", " 2;0\n", " 8\n", " 0\n", @@ -3453,7 +3453,7 @@ " 1\n", " False\n", " True\n", - " Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term\n", + " Oxidation@M;Dimethyl:2H(6)13C(2)@Any_N-term\n", " 6;0\n", " 8\n", " 1\n", @@ -3477,7 +3477,7 @@ " 1\n", " False\n", " True\n", - " Dimethyl:2H(6)13C(2)@Any N-term\n", + " Dimethyl:2H(6)13C(2)@Any_N-term\n", " 0\n", " 8\n", " 1\n", @@ -3525,7 +3525,7 @@ " 2\n", " False\n", " True\n", - " Dimethyl@Any N-term;Dimethyl@K\n", + " Dimethyl@Any_N-term;Dimethyl@K\n", " 0;8\n", " 13\n", " 1\n", @@ -3549,7 +3549,7 @@ " 2\n", " False\n", " True\n", - " Oxidation@M;Dimethyl@Any N-term;Dimethyl@K\n", + " Oxidation@M;Dimethyl@Any_N-term;Dimethyl@K\n", " 6;0;8\n", " 13\n", " 1\n", @@ -3573,7 +3573,7 @@ " 2\n", " False\n", " True\n", - " Oxidation@M;Dimethyl@Any N-term;Dimethyl@K\n", + " Oxidation@M;Dimethyl@Any_N-term;Dimethyl@K\n", " 6;0;8\n", " 13\n", " 1\n", @@ -3597,7 +3597,7 @@ " 2\n", " False\n", " True\n", - " Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...\n", + " Dimethyl:2H(6)13C(2)@Any_N-term;Dimethyl:2H(6)...\n", " 0;5\n", " 13\n", " 0\n", @@ -3621,7 +3621,7 @@ " 2\n", " False\n", " True\n", - " Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...\n", + " Dimethyl:2H(6)13C(2)@Any_N-term;Dimethyl:2H(6)...\n", " 0;5\n", " 13\n", " 0\n", @@ -3658,17 +3658,17 @@ "79 FGHIKLMNPQRST 0 2 False True \n", "\n", " mods mod_sites nAA decoy \\\n", - "0 Oxidation@M;Dimethyl@Any N-term 2;0 8 0 \n", - "1 Dimethyl:2H(6)13C(2)@Any N-term 0 8 0 \n", - "2 Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term 2;0 8 0 \n", - "3 Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term 6;0 8 1 \n", - "4 Dimethyl:2H(6)13C(2)@Any N-term 0 8 1 \n", + "0 Oxidation@M;Dimethyl@Any_N-term 2;0 8 0 \n", + "1 Dimethyl:2H(6)13C(2)@Any_N-term 0 8 0 \n", + "2 Oxidation@M;Dimethyl:2H(6)13C(2)@Any_N-term 2;0 8 0 \n", + "3 Oxidation@M;Dimethyl:2H(6)13C(2)@Any_N-term 6;0 8 1 \n", + "4 Dimethyl:2H(6)13C(2)@Any_N-term 0 8 1 \n", ".. ... ... ... ... \n", - "75 Dimethyl@Any N-term;Dimethyl@K 0;8 13 1 \n", - "76 Oxidation@M;Dimethyl@Any N-term;Dimethyl@K 6;0;8 13 1 \n", - "77 Oxidation@M;Dimethyl@Any N-term;Dimethyl@K 6;0;8 13 1 \n", - "78 Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)... 0;5 13 0 \n", - "79 Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)... 0;5 13 0 \n", + "75 Dimethyl@Any_N-term;Dimethyl@K 0;8 13 1 \n", + "76 Oxidation@M;Dimethyl@Any_N-term;Dimethyl@K 6;0;8 13 1 \n", + "77 Oxidation@M;Dimethyl@Any_N-term;Dimethyl@K 6;0;8 13 1 \n", + "78 Dimethyl:2H(6)13C(2)@Any_N-term;Dimethyl:2H(6)... 0;5 13 0 \n", + "79 Dimethyl:2H(6)13C(2)@Any_N-term;Dimethyl:2H(6)... 0;5 13 0 \n", "\n", " charge ... i_5 mono_isotope_idx rt_pred rt_norm_pred \\\n", "0 2 ... 0.001352 0 0.242660 0.242660 \n", @@ -3707,8 +3707,8 @@ "source": [ "_lib.import_and_process_protein_dict(protein_dict)\n", "_lib.add_peptide_labeling({\n", - " 'light': ['Dimethyl@Any N-term','Dimethyl@K'],\n", - " 'heavy': ['Dimethyl:2H(6)13C(2)@Any N-term','Dimethyl:2H(6)13C(2)@K'],\n", + " 'light': ['Dimethyl@Any_N-term','Dimethyl@K'],\n", + " 'heavy': ['Dimethyl:2H(6)13C(2)@Any_N-term','Dimethyl:2H(6)13C(2)@K'],\n", "})\n", "_lib.predict_all()\n", "assert (_lib.precursor_df.decoy==1).any()\n", diff --git a/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb b/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb index 80dda52d..7fe53447 100644 --- a/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb +++ b/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb @@ -53,11 +53,11 @@ "outputs": [], "source": [ "#| hide\n", - "mq_str = '''Raw file\tScan number\tScan index\tSequence\tLength\tMissed cleavages\tModifications\tModified sequence\tPhospho (STY) Probabilities\tPhospho (STY) Score Diffs\tAcetyl (Protein N-term)\tPhospho (STY)\tProteins\tGene Names\tProtein Names\tCharge\tFragmentation\tMass analyzer\tType\tScan event number\tIsotope index\tm/z\tMass\tMass Error [ppm]\tSimple Mass Error [ppm]\tRetention time\tPEP\tScore\tDelta score\tScore diff\tLocalization prob\tCombinatorics\tPIF\tFraction of total spectrum\tBase peak fraction\tPrecursor Full ScanNumber\tPrecursor Intensity\tPrecursor Apex Fraction\tPrecursor Apex Offset\tPrecursor Apex Offset Time\tDiagnostic peak Phospho (STY) Y\tMatches\tIntensities\tMass Deviations [Da]\tMass Deviations [ppm]\tMasses\tNumber of Matches\tIntensity coverage\tPeak coverage\tNeutral loss level\tETD identification type\tReverse\tAll scores\tAll sequences\tAll modified sequences\tid\tProtein group IDs\tPeptide ID\tMod. peptide ID\tEvidence ID\tPhospho (STY) site IDs\n", + "mq_str = '''Raw file\tScan number\tScan index\tSequence\tLength\tMissed cleavages\tModifications\tModified sequence\tPhospho (STY) Probabilities\tPhospho (STY) Score Diffs\tAcetyl (Protein_N-term)\tPhospho (STY)\tProteins\tGene Names\tProtein Names\tCharge\tFragmentation\tMass analyzer\tType\tScan event number\tIsotope index\tm/z\tMass\tMass Error [ppm]\tSimple Mass Error [ppm]\tRetention time\tPEP\tScore\tDelta score\tScore diff\tLocalization prob\tCombinatorics\tPIF\tFraction of total spectrum\tBase peak fraction\tPrecursor Full ScanNumber\tPrecursor Intensity\tPrecursor Apex Fraction\tPrecursor Apex Offset\tPrecursor Apex Offset Time\tDiagnostic peak Phospho (STY) Y\tMatches\tIntensities\tMass Deviations [Da]\tMass Deviations [ppm]\tMasses\tNumber of Matches\tIntensity coverage\tPeak coverage\tNeutral loss level\tETD identification type\tReverse\tAll scores\tAll sequences\tAll modified sequences\tid\tProtein group IDs\tPeptide ID\tMod. peptide ID\tEvidence ID\tPhospho (STY) site IDs\n", "200123_SAX_SPAC3_1\t17556\t14082\tAAAADILPVLLK\t12\t0\tUnmodified\t_AAAADILPVLLK_\t\t\t0\t0\tO13864\tkap95\tImportin subunit beta-1\t2\tHCD\tFTMS\tMULTI-MSMS\t7\t0\t597.87646\t1193.7384\t0.17929\t0.9109951\t69.506\t0.00016307\t99.539\t92.608\tNaN\tNaN\t1\t0\t0\t0\t-1\t0\t0\t0\t0\t\ty1;y2;y3;y5;y6;y7;y8;y9;y10;y11;y7(2+);y8(2+);y9(2+);y10(2+);b2;b3;b4;b5;b5-H2O;b6;b6-H2O;b7;b10\t1691;1737.9;2746.5;24815.1;26944.9;16823.3;45272.1;30384.9;18895.4;1119.1;604.5;957.2;966.6;2035.4;15718.7;24674.9;6395.5;7923;1537.9;4618.8;887.6;2099.3;860.1\t-0.0001719036;0.0005485709;-0.0004704565;-5.074493E-05;-3.217469E-05;-0.0006849912;-0.0001701818;-0.0001047339;0.0009983118;0.004176553;0.0008543315;0.001996746;0.001480154;-0.0003792124;5.262618E-05;8.755656E-05;0.0001835221;0.0004847084;0.002249124;0.000259138;-0.001699591;0.001376341;1.408576E-05\t-1.168514;2.108296;-1.260327;-0.08911967;-0.04714336;-0.8610058;-0.1868903;-0.1066933;0.9483612;3.716772;2.14501;4.380748;3.01261;-0.719787;0.3678058;0.4089163;0.6435859;1.211219;5.885142;0.5048801;-3.43173;2.197401;0.01505603\t147.112976074219;260.196319580078;373.281402587891;569.402160644531;682.486206054688;795.570922851563;910.597351074219;981.634399414063;1052.67041015625;1123.70434570313;398.287902832031;455.800231933594;491.319305419922;526.839721679688;143.081451416016;214.118530273438;285.155548095703;400.182189941406;382.169860839844;513.266479492188;495.257873535156;626.349426269531;935.556030273438\t23\t0.7687168\t0.3026316\tNone\tUnknown\t\t99.53857;6.931045;5.725131\tAAAADILPVLLK;TLWHRLKLK;HIRTLSARIK\t_AAAADILPVLLK_;_TLWHRLKLK_;_HIRTLSARIK_\t0\t204\t0\t0\t0\t\n", - "200123_SAX_SPAC3_1\t10089\t7082\tAAARPTVSIYNK\t12\t1\tAcetyl (Protein N-term)\t_(ac)AAARPTVSIYNK_\t\t\t1\t0\tQ9P784\trpl4b\t60S ribosomal protein L4-B\t2\tHCD\tFTMS\tMULTI-MSMS\t1\t0\t666.86715\t1331.7197\t-0.51651\t2.4762469\t48.013\t0.00068485\t73.082\t56.42\tNaN\tNaN\t1\t0\t0\t0\t-1\t0\t0\t0\t0\t\ty1;y2;y3;y4;y5;y8;y1-NH3;y2-NH3;y3-NH3;b2;b4;b7;b8;b8-H2O;b9;b9-H2O;b10;b11\t164590.5;494033.8;411033.4;49476.1;87580.2;144666.2;23841.3;130096.5;83541.1;32851.9;97649.7;149771.2;286369.7;168714.4;1080879;100917.2;927992.6;748799.6\t0.0001790485;0.0002110847;-0.0005136858;-0.001807504;-0.006095012;0.0005297848;-0.0001859709;0.0003373015;0.0005249706;0.0005159942;-0.001426426;-0.001423944;0.003513171;0.0002991892;-0.0009556375;-0.007416335;0.001034187;0.00577125\t1.217085;0.8082718;-1.210896;-3.364019;-9.762309;0.5749135;-1.429595;1.381653;1.289246;2.787778;-3.460254;-2.007249;4.411161;0.3843544;-1.05071;-8.318828;0.9642072;4.863622\t147.11262512207;261.155520533145;424.219573841893;537.304931640625;624.341247558594;921.503479003906;130.086441040039;244.128845214844;407.191986083984;185.091552734375;412.231719970703;709.400573730469;796.42766502565;778.420314321062;909.51619781447;891.512093825909;1072.57753652828;1186.61572691238\t18\t0.1805881\t0.2168675\tNone\tUnknown\t\t73.082;16.6623;7.865682\tAAARPTVSIYNK;QGLLGTPERYAK;NRSLFTLQPEK\t_(ac)AAARPTVSIYNK_;_QGLLGTPERYAK_;_NRSLFTLQPEK_\t107\t2604\t15\t16\t131\t\n", + "200123_SAX_SPAC3_1\t10089\t7082\tAAARPTVSIYNK\t12\t1\tAcetyl (Protein_N-term)\t_(ac)AAARPTVSIYNK_\t\t\t1\t0\tQ9P784\trpl4b\t60S ribosomal protein L4-B\t2\tHCD\tFTMS\tMULTI-MSMS\t1\t0\t666.86715\t1331.7197\t-0.51651\t2.4762469\t48.013\t0.00068485\t73.082\t56.42\tNaN\tNaN\t1\t0\t0\t0\t-1\t0\t0\t0\t0\t\ty1;y2;y3;y4;y5;y8;y1-NH3;y2-NH3;y3-NH3;b2;b4;b7;b8;b8-H2O;b9;b9-H2O;b10;b11\t164590.5;494033.8;411033.4;49476.1;87580.2;144666.2;23841.3;130096.5;83541.1;32851.9;97649.7;149771.2;286369.7;168714.4;1080879;100917.2;927992.6;748799.6\t0.0001790485;0.0002110847;-0.0005136858;-0.001807504;-0.006095012;0.0005297848;-0.0001859709;0.0003373015;0.0005249706;0.0005159942;-0.001426426;-0.001423944;0.003513171;0.0002991892;-0.0009556375;-0.007416335;0.001034187;0.00577125\t1.217085;0.8082718;-1.210896;-3.364019;-9.762309;0.5749135;-1.429595;1.381653;1.289246;2.787778;-3.460254;-2.007249;4.411161;0.3843544;-1.05071;-8.318828;0.9642072;4.863622\t147.11262512207;261.155520533145;424.219573841893;537.304931640625;624.341247558594;921.503479003906;130.086441040039;244.128845214844;407.191986083984;185.091552734375;412.231719970703;709.400573730469;796.42766502565;778.420314321062;909.51619781447;891.512093825909;1072.57753652828;1186.61572691238\t18\t0.1805881\t0.2168675\tNone\tUnknown\t\t73.082;16.6623;7.865682\tAAARPTVSIYNK;QGLLGTPERYAK;NRSLFTLQPEK\t_(ac)AAARPTVSIYNK_;_QGLLGTPERYAK_;_NRSLFTLQPEK_\t107\t2604\t15\t16\t131\t\n", "200115_SPAC1_3\t3236\t1204\tAAAGPSNSSSGTSTPR\t16\t0\tPhospho (STY)\t_AAAGPSNSSSGTST(ph)PR_\tAAAGPSNS(0.006)S(0.018)S(0.012)GT(0.037)S(0.166)T(0.761)PR\tAAAGPS(-50.91)NS(-21.04)S(-16.23)S(-17.92)GT(-13.13)S(-6.61)T(6.61)PR\t0\t1\tO74883\trpc37\tDNA-directed RNA polymerase III subunit rpc5\t2\tHCD\tFTMS\tMULTI-MSMS\t5\t0\t764.32539\t1526.6362\t0.040372\t2.5009131\t27.058\t3.09E-07\t85.563\t85.563\t6.6132\t0.7607\t7\t0\t0\t0\t-1\t0\t0\t0\t0\t\ty1;y2;y3;y4;y5;y6;y7;y8;y9;y10;y11;y12;y13;y14;y15(2+);b2;b3;b4;b5;b6;b6-H2O;b7;b7-H2O;b7-NH3;b8\t311.7;19064.6;1579.2;2432.6;1493.8;3766.8;4935.3;8933.7;10552.5;5357.6;2967.5;36079.4;27868.7;24033;2864.2;2566.3;2115;4843.2;1061.5;1544.9;775.2;1551.7;378.6;1400;2564\t-0.001180265;-0.0001376866;-0.001495543;-0.000752534;-0.0009866576;3.028873E-05;-0.001571673;-0.0003049813;-0.0001979581;-0.0006054719;-0.003855382;-0.001239357;-0.001152213;-0.002655332;-0.003468621;-0.0002525496;-6.503133E-05;9.742274E-05;-0.004145561;-0.003275599;-0.004807082;-0.002462409;-0.0008810994;0.0026047;-0.002904702\t-6.739747;-0.5058812;-3.300056;-1.393018;-1.538608;0.04337576;-2.001313;-0.3496084;-0.206339;-0.5640553;-3.322283;-0.9855627;-0.8765184;-1.916415;-4.759291;-1.765072;-0.3037163;0.3593079;-11.25908;-7.19551;-10.99467;-4.325554;-1.598343;4.71662;-4.425858\t175.120132446289;272.171853719647;453.187220458853;540.218505859375;641.266418457031;698.286865234375;785.320495605469;872.351257324219;959.383178710938;1073.42651367188;1160.46179199219;1257.51193981881;1314.5333163981;1385.57193330488;728.810302734375;143.081756591797;214.118682861328;271.139984130859;368.196990966797;455.228149414063;437.219116210938;569.270263671875;551.258117675781;552.238647460938;656.302734375\t25\t0.5122163\t0.2066116\tNone\tUnknown\t\t85.56325\tAAAGPSNSSSGTSTPR\t_AAAGPSNSSSGTST(ph)PR_\t48\t1130\t7\t7\t56\t3562;3563;3564;9844;9845\n", - "200116_SPAC2_4\t12669\t10068\tMDSVSNVSVNEQGK\t14\t0\tAcetyl (Protein N-term),2 Phospho (STY)\t_(ac)MDS(ph)VSNVS(ph)VNEQGK_\tMDS(0.995)VS(0.005)NVS(1)VNEQGK\tMDS(23.35)VS(-23.35)NVS(33.01)VNEQGK\t1\t2\tO60113\tSPBC15C4.04c\tUncharacterized amino-acid permease C15C4.04c\t2\tHCD\tFTMS\tMULTI-MSMS\t2\t0\t856.31773\t1710.6209\t0.080964\t3.4697316\t59.682\t0.00014247\t62.739\t61.368\t33.011\t0.9995\t3\t0\t0\t0\t-1\t0\t0\t0\t0\t\ty2;y3;y4;y5;y6;y7;y9;y10;y5-H2O;y7*;y7-NH3;y9*;y10*;y10-NH3;b2;b5;b7;b7-H2O;b3*;b4*;b5*;b5-H2O;b6*;b7*\t2599.4;1474.7;1583.7;3379;965;7366.5;2061.5;5733.5;401.4;2281.2;787.3;1643.1;4011;491.2;1048;500.2;5472.9;641.4;3808.4;6472.9;1034.5;904.9;775.4;1457.6\t9.736198E-05;0.0001574173;-0.003392065;0.001205305;-0.005576092;0.001632824;0.0003032891;-0.002946621;-0.006063483;-0.0008970362;-0.01182114;0.004487296;-0.001570232;-0.009076364;0.001234439;-0.01120099;-0.001971442;-0.01070507;-0.0004951972;-0.001295148;-0.004636611;0.0008509484;-0.008095883;-0.01268001\t0.4769509;0.4738735;-7.354248;2.095172;-8.26881;1.940734;0.2876261;-2.581378;-10.88062;-1.206717;-16.27463;4.691493;-1.504755;-8.8421;4.046292;-17.01786;-2.262665;-12.54564;-1.323695;-2.737165;-8.276608;1.569463;-12.00719;-16.39672\t204.134170532227;332.192687988281;461.238830566406;575.277160644531;674.352355957031;841.343505859375;1054.45617675781;1141.49145507813;557.273864746094;743.369140625;726.353515625;956.47509765625;1043.51318359375;1026.494140625;305.078948974609;658.190185546875;871.292297363281;853.290466308594;374.102142333984;473.171356201172;560.206726074219;542.190673828125;674.253112792969;773.326110839844\t24\t0.482179\t0.3076923\tOnce\tUnknown\t\t62.73941;1.371648;0.8473398\tMDSVSNVSVNEQGK;KTNRYYNDELR;DSQECILTETEAR\t_(ac)MDS(ph)VSNVS(ph)VNEQGK_;_KT(ph)NRY(ph)Y(ph)NDELR_;_DS(ph)QECILT(ph)ETEAR_\t114258\t812\t12946\t14424\t109552\t2514;2515;2516\n", + "200116_SPAC2_4\t12669\t10068\tMDSVSNVSVNEQGK\t14\t0\tAcetyl (Protein_N-term),2 Phospho (STY)\t_(ac)MDS(ph)VSNVS(ph)VNEQGK_\tMDS(0.995)VS(0.005)NVS(1)VNEQGK\tMDS(23.35)VS(-23.35)NVS(33.01)VNEQGK\t1\t2\tO60113\tSPBC15C4.04c\tUncharacterized amino-acid permease C15C4.04c\t2\tHCD\tFTMS\tMULTI-MSMS\t2\t0\t856.31773\t1710.6209\t0.080964\t3.4697316\t59.682\t0.00014247\t62.739\t61.368\t33.011\t0.9995\t3\t0\t0\t0\t-1\t0\t0\t0\t0\t\ty2;y3;y4;y5;y6;y7;y9;y10;y5-H2O;y7*;y7-NH3;y9*;y10*;y10-NH3;b2;b5;b7;b7-H2O;b3*;b4*;b5*;b5-H2O;b6*;b7*\t2599.4;1474.7;1583.7;3379;965;7366.5;2061.5;5733.5;401.4;2281.2;787.3;1643.1;4011;491.2;1048;500.2;5472.9;641.4;3808.4;6472.9;1034.5;904.9;775.4;1457.6\t9.736198E-05;0.0001574173;-0.003392065;0.001205305;-0.005576092;0.001632824;0.0003032891;-0.002946621;-0.006063483;-0.0008970362;-0.01182114;0.004487296;-0.001570232;-0.009076364;0.001234439;-0.01120099;-0.001971442;-0.01070507;-0.0004951972;-0.001295148;-0.004636611;0.0008509484;-0.008095883;-0.01268001\t0.4769509;0.4738735;-7.354248;2.095172;-8.26881;1.940734;0.2876261;-2.581378;-10.88062;-1.206717;-16.27463;4.691493;-1.504755;-8.8421;4.046292;-17.01786;-2.262665;-12.54564;-1.323695;-2.737165;-8.276608;1.569463;-12.00719;-16.39672\t204.134170532227;332.192687988281;461.238830566406;575.277160644531;674.352355957031;841.343505859375;1054.45617675781;1141.49145507813;557.273864746094;743.369140625;726.353515625;956.47509765625;1043.51318359375;1026.494140625;305.078948974609;658.190185546875;871.292297363281;853.290466308594;374.102142333984;473.171356201172;560.206726074219;542.190673828125;674.253112792969;773.326110839844\t24\t0.482179\t0.3076923\tOnce\tUnknown\t\t62.73941;1.371648;0.8473398\tMDSVSNVSVNEQGK;KTNRYYNDELR;DSQECILTETEAR\t_(ac)MDS(ph)VSNVS(ph)VNEQGK_;_KT(ph)NRY(ph)Y(ph)NDELR_;_DS(ph)QECILT(ph)ETEAR_\t114258\t812\t12946\t14424\t109552\t2514;2515;2516\n", "200116_SPAC2_4\t14769\t12037\tELQTSPIVSPTTSPK\t15\t0\t3 Phospho (STY)\t_ELQTS(ph)PIVS(ph)PTTS(ph)PK_\tELQT(0.111)S(0.889)PIVS(0.916)PT(0.103)T(0.093)S(0.888)PK\tELQT(-9.09)S(9.09)PIVS(13.02)PT(-11.33)T(-12.18)S(11.33)PK\t0\t3\tQ9UUJ6\tned1\tNuclear elongation and deformation protein 1\t2\tHCD\tFTMS\tMULTI-MSMS\t8\t1\t912.8771\t1823.7396\t0.069888\t3.8401561\t67.673\t8.03E-07\t97.271\t96.717\t13.022\t0.91604\t20\t0\t0\t0\t-1\t0\t0\t0\t0\t\ty2;y3;y5;y6;y7;y8;y10;y3*;y7*;y10*;y12*;y12-H2O;y10(2+);a2;b2;b2-H2O;b3;b3-H2O;b4;b4-H2O;b5*;b5-H2O\t1051;1613.3;816.6;2865.2;5194.7;2482.5;6341.7;412.7;1335.7;1653.1;1492.5;1954.2;435.7;845.1;596.3;367.1;1818.9;1685.7;953.5;841.8;1028.8;1682.9\t0.0002695607;0.0005662452;-0.006646387;0.0007683442;-0.001605259;0.00540729;-0.001560841;8.106261E-05;0.009048474;0.003963074;-0.02187429;-0.02075101;0.001452572;0.001079468;-0.0005421648;0.00135958;0.0003723827;-0.0005792665;-0.003462815;-0.002735998;-0.00305622;-0.003916317\t1.104009;1.377178;-10.83769;1.081701;-1.829747;5.538137;-1.315482;0.2588314;11.61066;3.640742;-16.12427;-15.50208;2.446395;5.017562;-2.229897;6.0393;1.003207;-1.640133;-7.332687;-6.023344;-5.646443;-7.484528\t244.165298461914;411.163360595703;613.265930175781;710.311279296875;877.31201171875;976.373413085938;1186.51720904948;313.186950683594;779.324462890625;1088.53479003906;1356.60666469299;1338.59497672386;593.760009765625;215.137939453125;243.134475708008;225.122009277344;371.192138671875;353.182525634766;472.24365234375;454.232360839844;541.264709472656;523.255004882813\t22\t0.3637594\t0.247191\tOnce\tUnknown\t\t97.27094;0.5537065\tELQTSPIVSPTTSPK;EERVENDWFETYK\t_ELQTS(ph)PIVS(ph)PTTS(ph)PK_;_EERVENDWFET(ph)YK_\t41087\t3091\t4421\t4973\t38407\t8662;8663;8664;11068;11069;11070\n", "'''\n", "raw_df = pd.read_table(io.StringIO(mq_str))\n", @@ -69,7 +69,7 @@ "assert 'frag_stop_idx' in mq_reader.psm_df.columns\n", "assert mq_reader.psm_df.mods.values[0] == ''\n", "assert mq_reader.psm_df.mod_sites.values[0] == ''\n", - "assert mq_reader.psm_df.mods.values[1] == 'Acetyl@Protein N-term'\n", + "assert mq_reader.psm_df.mods.values[1] == 'Acetyl@Protein_N-term'\n", "assert mq_reader.psm_df.mod_sites.values[1] == '0'\n", "seq = 'AAAGPSNSSSGTSTPR'\n", "frag_types = raw_df[raw_df['Sequence']==seq]['Matches'].values[0].split(';')\n", @@ -188,7 +188,7 @@ " \n", " \n", " 10088\n", - " Acetyl@Protein N-term\n", + " Acetyl@Protein_N-term\n", " 0\n", " 12\n", " 0.690775\n", @@ -232,7 +232,7 @@ " 0.995;1\n", " 3;8\n", " 12668\n", - " Acetyl@Protein N-term;Phospho@S;Phospho@S\n", + " Acetyl@Protein_N-term;Phospho@S;Phospho@S\n", " 0;3;8\n", " 14\n", " 0.881917\n", @@ -282,9 +282,9 @@ "\n", " phos_sites spec_idx mods mod_sites \\\n", "0 17555 \n", - "1 10088 Acetyl@Protein N-term 0 \n", + "1 10088 Acetyl@Protein_N-term 0 \n", "2 14 3235 Phospho@T 14 \n", - "3 3;8 12668 Acetyl@Protein N-term;Phospho@S;Phospho@S 0;3;8 \n", + "3 3;8 12668 Acetyl@Protein_N-term;Phospho@S;Phospho@S 0;3;8 \n", "4 5;9;13 14768 Phospho@S;Phospho@S;Phospho@S 5;9;13 \n", "\n", " nAA rt_norm frag_start_idx frag_stop_idx \n", diff --git a/nbs_tests/spec_lib/predict_lib.ipynb b/nbs_tests/spec_lib/predict_lib.ipynb index 7fa38264..940feac8 100644 --- a/nbs_tests/spec_lib/predict_lib.ipynb +++ b/nbs_tests/spec_lib/predict_lib.ipynb @@ -176,7 +176,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", " 13;0;15\n", " 16\n", " 1\n", @@ -189,7 +189,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", " 13;0;15\n", " 16\n", " 1\n", @@ -202,7 +202,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term\n", " 13;0\n", " 16\n", " 1\n", @@ -215,7 +215,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term\n", " 13;0\n", " 16\n", " 1\n", @@ -228,7 +228,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term\n", " 13;0\n", " 16\n", " 1\n", @@ -260,11 +260,11 @@ "3 False \n", "4 False \n", ".. ... ... \n", - "169 False Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat... \n", - "170 False Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat... \n", - "171 False Carbamidomethyl@C;Acetyl@Protein N-term \n", - "172 False Carbamidomethyl@C;Acetyl@Protein N-term \n", - "173 False Carbamidomethyl@C;Acetyl@Protein N-term \n", + "169 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", + "170 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", + "171 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", + "172 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", + "173 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", "\n", " mod_sites nAA decoy charge \n", "0 7 0 2 \n", @@ -500,7 +500,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", " 13;0;15\n", " 16\n", " 1\n", @@ -522,7 +522,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", " 13;0;15\n", " 16\n", " 1\n", @@ -544,7 +544,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term\n", " 13;0\n", " 16\n", " 1\n", @@ -566,7 +566,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term\n", " 13;0\n", " 16\n", " 1\n", @@ -588,7 +588,7 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein N-term\n", + " Carbamidomethyl@C;Acetyl@Protein_N-term\n", " 13;0\n", " 16\n", " 1\n", @@ -629,11 +629,11 @@ "3 False \n", "4 False \n", ".. ... ... \n", - "169 False Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat... \n", - "170 False Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat... \n", - "171 False Carbamidomethyl@C;Acetyl@Protein N-term \n", - "172 False Carbamidomethyl@C;Acetyl@Protein N-term \n", - "173 False Carbamidomethyl@C;Acetyl@Protein N-term \n", + "169 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", + "170 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", + "171 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", + "172 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", + "173 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", "\n", " mod_sites nAA decoy charge precursor_mz rt_pred rt_norm_pred \\\n", "0 7 0 2 400.742505 0.029719 0.029719 \n", diff --git a/nbs_tests/spec_lib/test_translate_tsv.ipynb b/nbs_tests/spec_lib/test_translate_tsv.ipynb index 514e2c9b..b9658a39 100644 --- a/nbs_tests/spec_lib/test_translate_tsv.ipynb +++ b/nbs_tests/spec_lib/test_translate_tsv.ipynb @@ -138,7 +138,7 @@ "charged_frag_types = ['b_z1','y_z1','y_modloss_z1']\n", "precursor_df = pd.DataFrame({\n", " 'sequence': ['ASGHCEWMKYR']*repeat+['ASGHCEWMAAR'],\n", - " 'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat+[''],\n", + " 'mods': ['Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidation@M']*repeat+[''],\n", " 'mod_sites': ['0;4;8']*repeat+[''],\n", " 'nAA': 11,\n", " 'NCE': 20,\n", diff --git a/nbs_trials/precursor_table.csv b/nbs_trials/precursor_table.csv index efab1151..6110d3d8 100644 --- a/nbs_trials/precursor_table.csv +++ b/nbs_trials/precursor_table.csv @@ -1,4 +1,4 @@ sequence,mods,mod_sites,charge ACDEFGHIK,Carbamidomethyl@C,2,2 -LMNPQRSTVK,Acetyl@Protein N-term;Phospho@S,0;7,3 +LMNPQRSTVK,Acetyl@Protein_N-term;Phospho@S,0;7,3 WYVSTR,,,1 diff --git a/nbs_trials/speclib_from_dda.ipynb b/nbs_trials/speclib_from_dda.ipynb index 6555e5ae..306b3099 100644 --- a/nbs_trials/speclib_from_dda.ipynb +++ b/nbs_trials/speclib_from_dda.ipynb @@ -352,7 +352,7 @@ " 'Phospho@S': 'pS',\n", " 'Phospho@T': 'pT',\n", " 'Phospho@Y': 'pY',\n", - " 'Acetyl@Protein N-term': 'a',\n", + " 'Acetyl@Protein_N-term': 'a',\n", "}\n", "\n", "reader = psm_reader_provider.get_reader('alphapept', modification_mapping=modification_mapping)\n", diff --git a/nbs_trials/test_argparse.ipynb b/nbs_trials/test_argparse.ipynb index 3a703b14..36639f2a 100644 --- a/nbs_trials/test_argparse.ipynb +++ b/nbs_trials/test_argparse.ipynb @@ -112,7 +112,7 @@ " 'psm_num_per_mod_to_train_rt_ccs': 50,\n", " 'psm_num_to_test_rt_ccs': 0,\n", " 'top_n_mods_to_train': 10,\n", - " 'other_modification_mapping': {'Dimethyl@Any N-term': ['_(Dimethyl-n-0)',\n", + " 'other_modification_mapping': {'Dimethyl@Any_N-term': ['_(Dimethyl-n-0)',\n", " '_(Dimethyl)']}}},\n", " 'library': {'infile_type': 'fasta',\n", " 'infile_type_choices': ['fasta',\n", @@ -180,7 +180,7 @@ " \"--model--frag_types\", \"b\", \"y\", \n", " \"--library--labeling_channels\", \"0:Ox@M;a@B\", \"4:1@C;2@D\",\n", " \"--common--user_defined_modifications\", \"Dimethyl2@Any_N-term:H(2)2H(2)C(2);H(0)\",\n", - " \"--model_mgr--transfer--psm_modification_mapping\", \"Dimethyl@Any N-term:_(Dimethyl-n-0);_(Dimethyl)\"\n", + " \"--model_mgr--transfer--psm_modification_mapping\", \"Dimethyl@Any_N-term:_(Dimethyl-n-0);_(Dimethyl)\"\n", " ]\n", ")\n", "global_settings" diff --git a/peptdeep/constants/default_settings.yaml b/peptdeep/constants/default_settings.yaml index 29ddb833..934e3962 100644 --- a/peptdeep/constants/default_settings.yaml +++ b/peptdeep/constants/default_settings.yaml @@ -151,7 +151,7 @@ model_mgr: psm_modification_mapping: {} # alphabase modifications to modifications of other engine PSMs # Example (note that `X(UniMod:id)` format can directly be recognized by alphabase), # psm_modification_mapping: - # Dimethyl@Any N-term: + # Dimethyl@Any_N-term: # - _(Dimethyl-n-0) # - _(Dimethyl) # Dimethyl:2H(2)@K: diff --git a/peptdeep/model/featurize.py b/peptdeep/model/featurize.py index a8b7f1ce..fc729cad 100644 --- a/peptdeep/model/featurize.py +++ b/peptdeep/model/featurize.py @@ -4,8 +4,6 @@ from peptdeep.settings import ( model_const, mod_feature_size, MOD_TO_FEATURE, - mod_elements, mod_elem_to_idx, - _parse_mod_formula, update_all_mod_features, ) def parse_mod_feature( diff --git a/peptdeep/protein/fasta.py b/peptdeep/protein/fasta.py index dcbfe898..f55c885c 100644 --- a/peptdeep/protein/fasta.py +++ b/peptdeep/protein/fasta.py @@ -19,7 +19,7 @@ def __init__(self, precursor_charge_max:int = 4, precursor_mz_min:float = 400.0, precursor_mz_max:float = 1800.0, - var_mods:list = ['Acetyl@Protein N-term','Oxidation@M'], + var_mods:list = ['Acetyl@Protein_N-term','Oxidation@M'], min_var_mod_num:int = 0, max_var_mod_num:int = 2, fix_mods:list = ['Carbamidomethyl@C'], @@ -73,7 +73,7 @@ def __init__(self, var_mods : list, optional list of variable modifications, - by default ['Acetyl@Protein N-term','Oxidation@M'] + by default ['Acetyl@Protein_N-term','Oxidation@M'] max_var_mod_num : int, optional Minimal number of variable modifications on a peptide sequence, @@ -112,7 +112,7 @@ def __init__(self, Defaults to False. special_mods_cannot_modify_pep_n_term : bool, optional - Similar to `special_mods_cannot_modify_pep_c_term`, but at N-term. + Similar to `special_mods_cannot_modify_pep_c_term`, but at_N-term. Defaults to False. decoy : str, optional diff --git a/peptdeep/psm_frag_reader/psmlabel_reader.py b/peptdeep/psm_frag_reader/psmlabel_reader.py index 60a9b58d..738fc8bd 100644 --- a/peptdeep/psm_frag_reader/psmlabel_reader.py +++ b/peptdeep/psm_frag_reader/psmlabel_reader.py @@ -157,7 +157,7 @@ def load_psmlabel_list( "Phospho@S", "Phospho@T", "Phospho@Y", - "Acetyl@Protein N-term", + "Acetyl@Protein_N-term", ], ): psm_df_list = [] diff --git a/peptdeep/settings.py b/peptdeep/settings.py index 2d089310..ef89a190 100644 --- a/peptdeep/settings.py +++ b/peptdeep/settings.py @@ -63,7 +63,7 @@ def add_user_defined_modifications( Example: ``` { - "Dimethyl2@Any N-term": { + "Dimethyl2@Any_N-term": { "composition": "H(2)2H(2)C(2)", "modloss_composition": "" }, ... diff --git a/peptdeep/webui/library_ui.py b/peptdeep/webui/library_ui.py index a427c1ff..5416870a 100644 --- a/peptdeep/webui/library_ui.py +++ b/peptdeep/webui/library_ui.py @@ -284,7 +284,7 @@ def show(): if infile_type != 'fasta': df = pd.DataFrame({ 'sequence': ['ACDEFGHIK','LMNPQRSTVK','WYVSTR'], - 'mods': ['Carbamidomethyl@C','Acetyl@Protein N-term;Phospho@S',''], + 'mods': ['Carbamidomethyl@C','Acetyl@Protein_N-term;Phospho@S',''], 'mod_sites': ['2','0;7',''], 'charge': [2,3,1], }) From cf4a3af81a28eae2d6a6e977f3e5afb6bf5405ff Mon Sep 17 00:00:00 2001 From: jalew188 Date: Fri, 28 Jun 2024 17:22:20 +0200 Subject: [PATCH 2/2] patch fix maxquant reader --- .../maxquant_frag_reader.ipynb | 35 ++++++++++++------- .../psm_frag_reader/maxquant_frag_reader.py | 4 +-- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb b/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb index 7fe53447..7abbd1e4 100644 --- a/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb +++ b/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb @@ -29,7 +29,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" + ] + } + ], "source": [ "from peptdeep.psm_frag_reader.maxquant_frag_reader import *" ] @@ -69,7 +77,10 @@ "assert 'frag_stop_idx' in mq_reader.psm_df.columns\n", "assert mq_reader.psm_df.mods.values[0] == ''\n", "assert mq_reader.psm_df.mod_sites.values[0] == ''\n", - "assert mq_reader.psm_df.mods.values[1] == 'Acetyl@Protein_N-term'\n", + "assert (\n", + " mq_reader.psm_df.mods.values[1] == 'Acetyl@Protein_N-term' or\n", + " mq_reader.psm_df.mods.values[1] == 'Acetyl@Protein N-term'\n", + ")\n", "assert mq_reader.psm_df.mod_sites.values[1] == '0'\n", "seq = 'AAAGPSNSSSGTSTPR'\n", "frag_types = raw_df[raw_df['Sequence']==seq]['Matches'].values[0].split(';')\n", @@ -188,7 +199,7 @@ " \n", " \n", " 10088\n", - " Acetyl@Protein_N-term\n", + " Acetyl@Protein N-term\n", " 0\n", " 12\n", " 0.690775\n", @@ -232,7 +243,7 @@ " 0.995;1\n", " 3;8\n", " 12668\n", - " Acetyl@Protein_N-term;Phospho@S;Phospho@S\n", + " Acetyl@Protein N-term;Phospho@S;Phospho@S\n", " 0;3;8\n", " 14\n", " 0.881917\n", @@ -282,17 +293,17 @@ "\n", " phos_sites spec_idx mods mod_sites \\\n", "0 17555 \n", - "1 10088 Acetyl@Protein_N-term 0 \n", + "1 10088 Acetyl@Protein N-term 0 \n", "2 14 3235 Phospho@T 14 \n", - "3 3;8 12668 Acetyl@Protein_N-term;Phospho@S;Phospho@S 0;3;8 \n", + "3 3;8 12668 Acetyl@Protein N-term;Phospho@S;Phospho@S 0;3;8 \n", "4 5;9;13 14768 Phospho@S;Phospho@S;Phospho@S 5;9;13 \n", "\n", " nAA rt_norm frag_start_idx frag_stop_idx \n", - "0 12 1.000000 0 11 \n", - "1 12 0.690775 11 22 \n", - "2 16 1.000000 22 37 \n", - "3 14 0.881917 37 50 \n", - "4 15 1.000000 50 64 " + "0 12 1.000000 0 11 \n", + "1 12 0.690775 11 22 \n", + "2 16 1.000000 22 37 \n", + "3 14 0.881917 37 50 \n", + "4 15 1.000000 50 64 " ] }, "execution_count": null, @@ -493,7 +504,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.3 ('base')", + "display_name": "python3", "language": "python", "name": "python3" } diff --git a/peptdeep/psm_frag_reader/maxquant_frag_reader.py b/peptdeep/psm_frag_reader/maxquant_frag_reader.py index b3f8d943..12c45cca 100644 --- a/peptdeep/psm_frag_reader/maxquant_frag_reader.py +++ b/peptdeep/psm_frag_reader/maxquant_frag_reader.py @@ -47,7 +47,7 @@ def filter_phos(mq_df, prob): (mq_df["PhosProbs"], mq_df["PhosSites"]) = zip( *mq_df[["Modifications", "Phospho (STY) Probabilities"]].apply( - lambda x: parse_phos_probs(x[0], x[1], prob), axis=1 + lambda x: parse_phos_probs(x.iloc[0], x.iloc[1], prob), axis=1 ) ) return mq_df[mq_df["PhosProbs"] != "x"] @@ -138,7 +138,7 @@ def _post_process(self, mq_df): if np.any(intens > 0): intens /= np.max(intens) - self._fragment_intensity_df.iloc[start:end, :] = intens + self._fragment_intensity_df.iloc[start:end, :] = np.float32(intens) self._psm_df[["frag_start_idx", "frag_stop_idx"]] = mq_df[ ["frag_start_idx", "frag_stop_idx"]