@@ -1631,15 +1631,15 @@ def get_sequence_for_fasta(template, MHC_class, chain):
1631
1631
1632
1632
1633
1633
def generate_mhcseq_database (data_dir = PANDORA .PANDORA_data + '/mhcseqs' ,
1634
- HLA_out = 'Human_MHC_data .fasta' ,
1635
- nonHLA_out = 'NonHuman_MHC_data .fasta' ):
1636
- """generate_mhcseq_database(data_dir=PANDORA.PANDORA_data, HLA_out='Human_MHC_data .fasta', nonHLA_out='NonHuman_MHC_data .fasta')
1634
+ HLA_out = 'HLA_cleaned .fasta' ,
1635
+ nonHLA_out = 'MHC_cleaned .fasta' ):
1636
+ """generate_mhcseq_database(data_dir=PANDORA.PANDORA_data, HLA_out='HLA_cleaned .fasta', nonHLA_out='MHC_cleaned .fasta')
1637
1637
Downloads and parse HLA and other MHC sequences to compile reference fastas
1638
1638
1639
1639
Args:
1640
1640
data_dir (str, optional): Data directory. Defaults to PANDORA.PANDORA_data.
1641
- HLA_out (str, optional): Output file for HLA sequences. Defaults to 'Human_MHC_data .fasta'.
1642
- nonHLA_out (str, optional): Output file for non human MHCs. Defaults to 'NonHuman_MHC_data .fasta'.
1641
+ HLA_out (str, optional): Output file for HLA sequences. Defaults to 'HLA_cleaned .fasta'.
1642
+ nonHLA_out (str, optional): Output file for non human MHCs. Defaults to 'MHC_cleaned .fasta'.
1643
1643
1644
1644
Returns:
1645
1645
None.
@@ -1665,12 +1665,12 @@ def generate_mhcseq_database(data_dir = PANDORA.PANDORA_data + '/mhcseqs',
1665
1665
return ref_MHCI_sequences
1666
1666
1667
1667
1668
- def generate_hla_database (data_dir , HLA_out = 'Human_MHC_data .fasta' ):
1668
+ def generate_hla_database (data_dir , HLA_out = 'HLA_cleaned .fasta' ):
1669
1669
"""
1670
1670
Downloads and parse HLA sequences
1671
1671
1672
1672
Args:
1673
- HLA_out (str, optional): Output file for HLA sequences. Defaults to 'Human_MHC_data .fasta'.
1673
+ HLA_out (str, optional): Output file for HLA sequences. Defaults to 'HLA_cleaned .fasta'.
1674
1674
1675
1675
Returns:
1676
1676
None.
@@ -1681,21 +1681,21 @@ def generate_hla_database(data_dir, HLA_out = 'Human_MHC_data.fasta'):
1681
1681
###
1682
1682
# Rename pre-existing raw file
1683
1683
try :
1684
- os .system ('mv %s/hla_prot .fasta %s/OLD_hla_prot .fasta' % (data_dir , data_dir ))
1684
+ os .system ('mv %s/HLA_raw .fasta %s/OLD_HLA_raw .fasta' % (data_dir , data_dir ))
1685
1685
except :
1686
1686
pass
1687
1687
1688
1688
# Download Human data
1689
1689
url = 'https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/hla_prot.fasta'
1690
- command = (' ' ).join (['wget' , url , '-P ' , data_dir ])
1690
+ command = (' ' ).join (['wget' , url , '-O ' , f' { data_dir } /HLA_raw.fasta' ])
1691
1691
proc = subprocess .Popen (command , executable = '/bin/bash' ,
1692
1692
shell = True , stdout = subprocess .PIPE )
1693
1693
print (proc .stdout .read ())
1694
1694
1695
1695
HLAs = {}
1696
1696
to_write = {}
1697
1697
#Parse the fasta files
1698
- for seq_record in SeqIO .parse (data_dir + '/hla_prot .fasta' , "fasta" ):
1698
+ for seq_record in SeqIO .parse (f' { data_dir } /HLA_raw .fasta' , "fasta" ):
1699
1699
allele_fullname = seq_record .description .split (' ' )[1 ]
1700
1700
#allele_significant = allele_fullname[:8]
1701
1701
#Take only up to the allele identifyer, ignore the silent mutations
@@ -1743,18 +1743,18 @@ def generate_hla_database(data_dir, HLA_out = 'Human_MHC_data.fasta'):
1743
1743
1744
1744
# Remove pre-existing raw file
1745
1745
try :
1746
- os .system ('rm %s/OLD_hla_prot .fasta' % data_dir )
1746
+ os .system (f 'rm { data_dir } /OLD_HLA_raw .fasta' )
1747
1747
except :
1748
1748
pass
1749
1749
1750
1750
return to_write
1751
1751
1752
- def generate_nonhla_database (data_dir , nonHLA_out = 'NonHuman_MHC_data .fasta' ):
1752
+ def generate_nonhla_database (data_dir , nonHLA_out = 'MHC_cleaned .fasta' ):
1753
1753
"""
1754
1754
Downloads and parse non human MHC sequences
1755
1755
1756
1756
Args:
1757
- nonHLA_out (str, optional): Output file for non human MHCs. Defaults to 'NonHuman_MHC_data .fasta'.
1757
+ nonHLA_out (str, optional): Output file for non human MHCs. Defaults to 'MHC_cleaned .fasta'.
1758
1758
1759
1759
Returns:
1760
1760
None.
@@ -1765,22 +1765,22 @@ def generate_nonhla_database(data_dir, nonHLA_out = 'NonHuman_MHC_data.fasta'):
1765
1765
###
1766
1766
# Rename pre-existing raw file
1767
1767
try :
1768
- os .system ('mv %s/MHC_prot .fasta %s/OLD_MHC_prot .fasta' % ( data_dir , data_dir ) )
1768
+ os .system (f 'mv { data_dir } /MHC_raw .fasta { data_dir } /OLD_MHC_raw .fasta' )
1769
1769
except :
1770
1770
pass
1771
1771
1772
1772
# Download other animlas data
1773
1773
#os.system('wget https://raw.githubusercontent.com/ANHIG/IPDMHC/Latest/MHC_prot.fasta')
1774
1774
url = 'https://raw.githubusercontent.com/ANHIG/IPDMHC/Latest/MHC_prot.fasta'
1775
- command = (' ' ).join (['wget' , url , '-P ' , data_dir ])
1775
+ command = (' ' ).join (['wget' , url , '-O ' , f' { data_dir } /MHC_raw.fasta' ])
1776
1776
proc = subprocess .Popen (command , executable = '/bin/bash' ,
1777
1777
shell = True , stdout = subprocess .PIPE )
1778
1778
print (proc .stdout .read ())
1779
1779
1780
1780
MHCs = {}
1781
1781
to_write = {}
1782
1782
#Parse the fasta file
1783
- fasta = f'{ data_dir } /MHC_prot .fasta'
1783
+ fasta = f'{ data_dir } /MHC_raw .fasta'
1784
1784
for seq_record in SeqIO .parse (fasta , "fasta" ):
1785
1785
allele_fullname = seq_record .description .split (' ' )[1 ]
1786
1786
#allele_significant = allele_fullname[:8]
@@ -1827,7 +1827,7 @@ def generate_nonhla_database(data_dir, nonHLA_out = 'NonHuman_MHC_data.fasta'):
1827
1827
1828
1828
# Remove pre-existing raw file
1829
1829
try :
1830
- os .system ('rm %s/OLD_MHC_prot .fasta' % data_dir )
1830
+ os .system (f 'rm { data_dir } /OLD_MHC_raw .fasta' )
1831
1831
except :
1832
1832
pass
1833
1833
0 commit comments