Skip to content

Commit 375216c

Browse files
committed
Merge pull request #19 from bioinf-jku/dev
Fix numerical issues and efficiency issues
2 parents b4bcc22 + 2c45937 commit 375216c

File tree

10 files changed

+428
-105
lines changed

10 files changed

+428
-105
lines changed

.github/workflows/test_dev.yml

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3+
4+
name: Tests (dev)
5+
6+
on:
7+
push:
8+
branches: [ "dev" ]
9+
pull_request:
10+
branches: [ "dev" ]
11+
12+
jobs:
13+
build:
14+
15+
runs-on: ubuntu-latest
16+
strategy:
17+
fail-fast: false
18+
matrix:
19+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
20+
21+
steps:
22+
- uses: actions/checkout@v4
23+
- name: Set up Python ${{ matrix.python-version }}
24+
uses: actions/setup-python@v5
25+
with:
26+
python-version: ${{ matrix.python-version }}
27+
- name: Install dependencies
28+
run: |
29+
python -m pip install --upgrade pip
30+
python -m pip install flake8 pytest
31+
python -m pip install -e .
32+
- name: Lint with flake8
33+
run: |
34+
# stop the build if there are Python syntax errors or undefined names
35+
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36+
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37+
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38+
- name: Test with pytest
39+
run: |
40+
pytest

.github/workflows/test_master.yml

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3+
4+
name: Tests (master)
5+
6+
on:
7+
push:
8+
branches: [ "master"]
9+
pull_request:
10+
branches: [ "master"]
11+
12+
jobs:
13+
build:
14+
15+
runs-on: ubuntu-latest
16+
strategy:
17+
fail-fast: false
18+
matrix:
19+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
20+
21+
steps:
22+
- uses: actions/checkout@v4
23+
- name: Set up Python ${{ matrix.python-version }}
24+
uses: actions/setup-python@v5
25+
with:
26+
python-version: ${{ matrix.python-version }}
27+
- name: Install dependencies
28+
run: |
29+
python -m pip install --upgrade pip
30+
python -m pip install flake8 pytest
31+
python -m pip install -e .
32+
- name: Lint with flake8
33+
run: |
34+
# stop the build if there are Python syntax errors or undefined names
35+
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36+
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37+
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38+
- name: Test with pytest
39+
run: |
40+
pytest

README.md

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,27 @@
11
# Fréchet ChemNet Distance
2+
![PyPI](https://img.shields.io/pypi/v/fcd)
3+
![Tests (master)](https://github.com/bioinf-jku/fcd/actions/workflows/test_master.yml/badge.svg?branch=dev)
4+
![Tests (dev)](https://github.com/bioinf-jku/fcd/actions/workflows/test_dev.yml/badge.svg?branch=dev)
5+
![PyPI - Downloads](https://img.shields.io/pypi/dm/fcd)
6+
![GitHub release (latest by date)](https://img.shields.io/github/v/release/bioinf-jku/fcd)
7+
![GitHub release date](https://img.shields.io/github/release-date/bioinf-jku/fcd)
8+
![GitHub](https://img.shields.io/github/license/bioinf-jku/fcd)
9+
210

311
Code for the paper "Fréchet ChemNet Distance: A Metric for Generative Models for Molecules in Drug Discovery"
412
[JCIM](https://pubs.acs.org/doi/10.1021/acs.jcim.8b00234) /
513
[ArXiv](https://arxiv.org/abs/1803.09518)
614

715

816
## Installation
9-
You can install the FCD using
17+
You can install FCD using
1018
```
1119
pip install fcd
1220
```
21+
or run the example notebook on Google Colab <a href="https://colab.research.google.com/github/bioinf-jku/FCD/blob/master/example.ipynb">
22+
<img src="https://colab.research.google.com/assets/colab-badge.svg">
23+
</a>.
24+
1325

1426
# Requirements
1527
```

example.ipynb

+61-24
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,58 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 22,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"%%capture\n",
10+
"!pip install fcd"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 23,
16+
"metadata": {},
17+
"outputs": [
18+
{
19+
"name": "stdout",
20+
"output_type": "stream",
21+
"text": [
22+
"--2024-04-01 18:53:15-- https://raw.githubusercontent.com/bioinf-jku/FCD/master/generated_smiles/LSTM_Segler.smi\n",
23+
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...\n",
24+
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
25+
"HTTP request sent, awaiting response... 200 OK\n",
26+
"Length: 22730454 (22M) [text/plain]\n",
27+
"Saving to: ‘generated_smiles/LSTM_Segler.smi’\n",
28+
"\n",
29+
"generated_smiles/LS 100%[===================>] 21.68M 3.07MB/s in 7.3s \n",
30+
"\n",
31+
"2024-04-01 18:53:22 (2.98 MB/s) - ‘generated_smiles/LSTM_Segler.smi’ saved [22730454/22730454]\n",
32+
"\n"
33+
]
34+
}
35+
],
36+
"source": [
37+
"!mkdir generated_smiles -p\n",
38+
"!wget https://raw.githubusercontent.com/bioinf-jku/FCD/master/generated_smiles/LSTM_Segler.smi -O generated_smiles/LSTM_Segler.smi"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": 24,
644
"metadata": {},
745
"outputs": [],
846
"source": [
947
"import os\n",
10-
"from rdkit import RDLogger \n",
48+
"from rdkit import RDLogger\n",
1149
"import numpy as np\n",
12-
"import pandas as pd\n",
13-
"from fcd import get_fcd, load_ref_model,canonical_smiles, get_predictions, calculate_frechet_distance\n",
1450
"\n",
15-
"RDLogger.DisableLog('rdApp.*')\n",
51+
"from fcd import get_fcd, load_ref_model, canonical_smiles, get_predictions, calculate_frechet_distance\n",
52+
"\n",
53+
"RDLogger.DisableLog(\"rdApp.*\")\n",
1654
"\n",
1755
"np.random.seed(0)\n",
18-
"os.environ[\"CUDA_VISIBLE_DEVICES\"]= '0' #set gpu"
56+
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" # set gpu"
1957
]
2058
},
2159
{
@@ -31,7 +69,7 @@
3169
},
3270
{
3371
"cell_type": "code",
34-
"execution_count": 2,
72+
"execution_count": 25,
3573
"metadata": {
3674
"ExecuteTime": {
3775
"end_time": "2020-04-23T09:13:50.403933Z",
@@ -44,8 +82,11 @@
4482
"model = load_ref_model()\n",
4583
"\n",
4684
"# Load generated molecules\n",
47-
"gen_mol_file = \"generated_smiles/LSTM_Segler.smi\" #input file which contains one generated SMILES per line\n",
48-
"gen_mol = pd.read_csv(gen_mol_file,header=None)[0] #IMPORTANT: take at least 10000 molecules as FCD can vary with sample size \n",
85+
"gen_mol_file = \"generated_smiles/LSTM_Segler.smi\" # input file which contains one generated SMILES per line\n",
86+
"with open(gen_mol_file) as f:\n",
87+
" gen_mol = f.read().split(\"\\n\")\n",
88+
"\n",
89+
"# IMPORTANT: take at least 10000 molecules as FCD can vary with sample size\n",
4990
"sample1 = np.random.choice(gen_mol, 10000, replace=False)\n",
5091
"sample2 = np.random.choice(gen_mol, 10000, replace=False)\n",
5192
"\n",
@@ -65,7 +106,7 @@
65106
},
66107
{
67108
"cell_type": "code",
68-
"execution_count": 3,
109+
"execution_count": 26,
69110
"metadata": {
70111
"ExecuteTime": {
71112
"end_time": "2020-04-23T09:11:27.207953Z",
@@ -77,12 +118,12 @@
77118
"name": "stdout",
78119
"output_type": "stream",
79120
"text": [
80-
"FCD: 0.333862289051325\n"
121+
"FCD: 0.3298386855756661\n"
81122
]
82123
}
83124
],
84125
"source": [
85-
"#get CHEBMLNET activations of generated molecules \n",
126+
"# get CHEBMLNET activations of generated molecules\n",
86127
"act1 = get_predictions(model, can_sample1)\n",
87128
"act2 = get_predictions(model, can_sample2)\n",
88129
"\n",
@@ -92,18 +133,14 @@
92133
"mu2 = np.mean(act2, axis=0)\n",
93134
"sigma2 = np.cov(act2.T)\n",
94135
"\n",
95-
"fcd_score = calculate_frechet_distance(\n",
96-
" mu1=mu1,\n",
97-
" mu2=mu2, \n",
98-
" sigma1=sigma1,\n",
99-
" sigma2=sigma2)\n",
136+
"fcd_score = calculate_frechet_distance(mu1=mu1, mu2=mu2, sigma1=sigma1, sigma2=sigma2)\n",
100137
"\n",
101-
"print('FCD: ',fcd_score)"
138+
"print(\"FCD: \", fcd_score)"
102139
]
103140
},
104141
{
105142
"cell_type": "code",
106-
"execution_count": 4,
143+
"execution_count": 27,
107144
"metadata": {
108145
"ExecuteTime": {
109146
"end_time": "2020-04-23T09:11:38.873496Z",
@@ -115,20 +152,20 @@
115152
"name": "stdout",
116153
"output_type": "stream",
117154
"text": [
118-
"FCD: 0.333862289051325\n"
155+
"FCD: 0.3298386855756661\n"
119156
]
120157
}
121158
],
122159
"source": [
123160
"\"\"\"if you don't need to store the activations you can also take a shortcut.\"\"\"\n",
124161
"fcd_score = get_fcd(can_sample1, can_sample2, model)\n",
125162
"\n",
126-
"print('FCD: ',fcd_score)"
163+
"print(\"FCD: \", fcd_score)"
127164
]
128165
},
129166
{
130167
"cell_type": "code",
131-
"execution_count": 5,
168+
"execution_count": 28,
132169
"metadata": {
133170
"ExecuteTime": {
134171
"end_time": "2020-04-23T09:11:49.760022Z",
@@ -140,14 +177,14 @@
140177
"name": "stdout",
141178
"output_type": "stream",
142179
"text": [
143-
"FCD: 25.635578193222216\n"
180+
"FCD: 25.552174526889033\n"
144181
]
145182
}
146183
],
147184
"source": [
148185
"\"\"\"This is what happens if you do not canonicalize the smiles\"\"\"\n",
149186
"fcd_score = get_fcd(can_sample1, sample2, model)\n",
150-
"print('FCD: ',fcd_score)"
187+
"print(\"FCD: \", fcd_score)"
151188
]
152189
}
153190
],

fcd/__init__.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
1-
from .fcd import get_fcd, get_predictions, load_ref_model
2-
from .utils import calculate_frechet_distance, canonical_smiles
1+
# ruff: noqa: F401
32

4-
__version__ = "1.2"
3+
from fcd.fcd import get_fcd, get_predictions, load_ref_model
4+
from fcd.utils import calculate_frechet_distance, canonical_smiles
5+
6+
__all__ = [
7+
"get_fcd",
8+
"get_predictions",
9+
"load_ref_model",
10+
"calculate_frechet_distance",
11+
"canonical_smiles",
12+
]
13+
14+
__version__ = "1.2.2"

0 commit comments

Comments
 (0)