Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

T007/22: Switch to fingerpring generator #348

Merged
merged 13 commits into from
May 16, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ In this talktorial, we get familiar with different approaches to encode (descrip

### References

* Review on "Molecular similarity in medicinal chemistry" ([<i>J. Med. Chem.</i> (2014), <b>57</b>, 3186-3204](https://pubmed.ncbi.nlm.nih.gov/24151987/))
* Review on "Molecular similarity in medicinal chemistry" ([<i>J. Med. Chem.</i> (2014), <b>57</b>, 3186-3204](https://pubmed.ncbi.nlm.nih.gov/24151987))
* [Morgan fingerprints](http://www.rdkit.org/docs/GettingStartedInPython.html#morgan-fingerprints-circular-fingerprints) with `rdkit`
* Description of the extended-connectivity fingerprint ECFP ([<i>J. Chem. Inf. Model.</i> (2010), <b>50</b>,742-754](https://pubs.acs.org/doi/abs/10.1021/ci100050t))
* What is the chemical space?
Expand Down
5,625 changes: 2,827 additions & 2,798 deletions teachopencadd/talktorials/T004_compound_similarity/talktorial.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -228,8 +228,7 @@
"from sklearn.metrics import roc_curve, roc_auc_score\n",
"import matplotlib.pyplot as plt\n",
"from rdkit import Chem\n",
"from rdkit.Chem import MACCSkeys\n",
"from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect\n",
"from rdkit.Chem import MACCSkeys, rdFingerprintGenerator\n",
"\n",
"from teachopencadd.utils import seed_everything\n",
"\n",
Expand All @@ -242,7 +241,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -266,7 +265,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 34,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -380,8 +379,8 @@
"</div>"
],
"text/plain": [
" molecule_chembl_id IC50 units smiles \n",
"0 CHEMBL63786 0.003 nM Brc1cccc(Nc2ncnc3cc4ccccc4cc23)c1 \\\n",
" molecule_chembl_id IC50 units smiles \\\n",
"0 CHEMBL63786 0.003 nM Brc1cccc(Nc2ncnc3cc4ccccc4cc23)c1 \n",
"1 CHEMBL35820 0.006 nM CCOc1cc2ncnc(Nc3cccc(Br)c3)c2cc1OCC \n",
"2 CHEMBL53711 0.006 nM CN(C)c1cc2c(Nc3cccc(Br)c3)ncnc2cn1 \n",
"3 CHEMBL66031 0.008 nM Brc1cccc(Nc2ncnc3cc4[nH]cnc4cc23)c1 \n",
Expand All @@ -395,7 +394,7 @@
"4 11.096910 329.027607 5 2 3.5726 True "
]
},
"execution_count": 3,
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -415,7 +414,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 35,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -488,7 +487,7 @@
"4 CHEMBL53753 CNc1cc2c(Nc3cccc(Br)c3)ncnc2cn1 11.096910"
]
},
"execution_count": 4,
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -523,7 +522,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 36,
"metadata": {},
"outputs": [
{
Expand All @@ -549,7 +548,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 37,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -628,7 +627,7 @@
"4 CHEMBL53753 CNc1cc2c(Nc3cccc(Br)c3)ncnc2cn1 11.096910 1.0"
]
},
"execution_count": 6,
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -653,7 +652,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -685,9 +684,11 @@
" if method == \"maccs\":\n",
" return np.array(MACCSkeys.GenMACCSKeys(mol))\n",
" if method == \"morgan2\":\n",
" return np.array(GetMorganFingerprintAsBitVect(mol, 2, nBits=n_bits))\n",
" fpg = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=n_bits)\n",
" return np.array(fpg.GetCountFingerprint(mol))\n",
" if method == \"morgan3\":\n",
" return np.array(GetMorganFingerprintAsBitVect(mol, 3, nBits=n_bits))\n",
" fpg = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=n_bits)\n",
" return np.array(fpg.GetCountFingerprint(mol))\n",
" else:\n",
" # NBVAL_CHECK_OUTPUT\n",
" print(f\"Warning: Wrong method specified: {method}. Default will be used instead.\")\n",
Expand All @@ -696,7 +697,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -705,7 +706,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 40,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -766,8 +767,8 @@
"</div>"
],
"text/plain": [
" molecule_chembl_id smiles pIC50 active \n",
"0 CHEMBL63786 Brc1cccc(Nc2ncnc3cc4ccccc4cc23)c1 11.522879 1.0 \\\n",
" molecule_chembl_id smiles pIC50 active \\\n",
"0 CHEMBL63786 Brc1cccc(Nc2ncnc3cc4ccccc4cc23)c1 11.522879 1.0 \n",
"1 CHEMBL35820 CCOc1cc2ncnc(Nc3cccc(Br)c3)c2cc1OCC 11.221849 1.0 \n",
"2 CHEMBL53711 CN(C)c1cc2c(Nc3cccc(Br)c3)ncnc2cn1 11.221849 1.0 \n",
"\n",
Expand All @@ -777,7 +778,7 @@
"2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... "
]
},
"execution_count": 9,
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1418,7 +1418,7 @@
}
],
"source": [
"client.Information.get_kinase_names?"
"?client.Information.get_kinase_names"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1476,7 +1476,7 @@
}
],
"source": [
"view.shape.add_sphere?"
"?view.shape.add_sphere"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,10 @@
"source": [
"try:\n",
" import google.colab\n",
"\n",
" !pip install condacolab\n",
" import condacolab\n",
"\n",
" condacolab.install()\n",
"except ModuleNotFoundError:\n",
" pass"
Expand All @@ -315,14 +317,16 @@
" import condacolab\n",
" from google.colab import files\n",
" from IPython.display import clear_output\n",
"\n",
" condacolab.check()\n",
" !conda install -q -y -c conda-forge mdtraj openmm openmmforcefields openff-toolkit pdbfixer pypdb rdkit\n",
"except ModuleNotFoundError:\n",
" on_colab = False\n",
"else:\n",
" #check if installation was succesful\n",
" # check if installation was succesful\n",
" try:\n",
" import rdkit\n",
"\n",
" on_colab = True\n",
" clear_output() # clear the excessive installation outputs\n",
" print(\"Dependencies successfully installed!\")\n",
Expand Down Expand Up @@ -352,6 +356,7 @@
],
"source": [
"import sys\n",
"\n",
"if not on_colab and sys.platform.startswith((\"linux\", \"darwin\")):\n",
" !mamba install -q -y -c conda-forge openmmforcefields\n",
" # Notes:\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -375,8 +375,7 @@
"import pandas as pd\n",
"import numpy as np\n",
"from rdkit import Chem\n",
"from rdkit.Chem import MACCSkeys, Draw\n",
"from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect\n",
"from rdkit.Chem import MACCSkeys, Draw, rdFingerprintGenerator\n",
"from sklearn.model_selection import train_test_split\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import metrics\n",
Expand Down Expand Up @@ -690,9 +689,11 @@
" if method == \"maccs\":\n",
" return np.array(MACCSkeys.GenMACCSKeys(mol))\n",
" if method == \"morgan2\":\n",
" return np.array(GetMorganFingerprintAsBitVect(mol, 2, nBits=n_bits))\n",
" fpg = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=n_bits)\n",
" return np.array(fpg.GetCountFingerprint(mol))\n",
" if method == \"morgan3\":\n",
" return np.array(GetMorganFingerprintAsBitVect(mol, 3, nBits=n_bits))\n",
" fpg = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=n_bits)\n",
" return np.array(fpg.GetCountFingerprint(mol))\n",
" else:\n",
" print(f\"Warning: Wrong method specified: {method}.\" \" Default will be used instead.\")\n",
" return np.array(MACCSkeys.GenMACCSKeys(mol))"
Expand Down Expand Up @@ -1921,7 +1922,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.7.8"
},
"toc-autonumbering": true,
"widgets": {
Expand Down