@@ -34,12 +34,8 @@ library(reticulate)
34
34
#reticulate::py_discover_config(required_module = "keras")
35
35
36
36
## For setup on a renku environment:
37
- # reticulate::use_virtualenv("/opt/conda")
37
+ reticulate::use_virtualenv("/opt/conda")
38
38
39
- ## Only for xenon6 setup:
40
- #reticulate::use_virtualenv("/tungstenfs/groups/gbioinfo/sharedSoft/virtualenvs/r-reticulate-keras-2.3.0-tensorflow-2.0.0-gpu/") # Use TF2
41
- #reticulate::use_virtualenv("/tungstenfs/groups/gbioinfo/sharedSoft/virtualenvs/r-reticulate-keras-2.2.5-tensorflow-1.14.0-gpu") # Use TF1
42
- #Sys.setenv("CUDA_VISIBLE_DEVICES" = "3" )
43
39
44
40
reticulate::py_config()
45
41
library(keras)
@@ -73,12 +69,13 @@ composition of the dataset:
73
69
74
70
``` {r Download}
75
71
## Download the data and set row names to gene symbols whenever possible
76
- sce <- readRDS(gzcon(url("https://github.com/fmicompbio/adv_scrnaseq_2020/blob/master/DGNs/data/SCE_MammaryGland.rds?raw=true")))
72
+ #sce <- readRDS(gzcon(url("https://github.com/fmicompbio/adv_scrnaseq_2020/blob/master/DGNs/data/SCE_MammaryGland.rds?raw=true")))
73
+ sce <- readRDS("/work/adv_scrnaseq_2020/data/dgn/SCE_MammaryGland.rds")
77
74
#Subsample cells to speed up processing and, most importantly, later-on model training:
78
- set.seed(42)
79
- n=4000
75
+ # set.seed(42)
76
+ # n=4000
80
77
#sce <- sce[, sample(1:ncol(sce), n ) ]
81
- ## Dataset compostion per cell type and study:
78
+ ## Dataset composition per cell type and study:
82
79
table(colData(sce)$study , colData(sce)$cell.class)
83
80
```
84
81
@@ -143,7 +140,7 @@ rm(M)
143
140
144
141
# Define the variational autoencoder model
145
142
146
- ![ ] ( figures/VAE_schematic.png ) { width=40% }
143
+ ![ ] ( /work/adv_scrnaseq_2020/adv_scrnaseq_2020/DGNs/ figures/VAE_schematic.png) { width=40% }
147
144
148
145
``` {r}
149
146
##### Sparse variational autoencoder model
@@ -329,7 +326,6 @@ early_stopping <- callback_early_stopping(monitor = "val_loss", min_delta = 0,
329
326
330
327
``` {r eval=FALSE}
331
328
##### Tensorboard callback (do not execute):
332
- #indir <- "/tungstenfs/groups/gbioinfo/papapana/DEEP_LEARNING/Autoencoders/BatchCor_paper/" # When running from xenon
333
329
indir <- "/Users/papapana/Desktop/XENON/papapana/DEEP_LEARNING/Autoencoders/BatchCor_paper" # When running locally
334
330
log_dir <- paste0(indir,"/logs/run_exercise_DGNs/")
335
331
system(paste0 ("rm -rf ", log_dir, "/*") )
@@ -345,7 +341,7 @@ tnsrb <- callback_tensorboard( log_dir )
345
341
346
342
``` {r eval=FALSE}
347
343
nepochs=1000 #
348
- ###### Fit the model using also our specified callbacks for scheduling and ealry stopping:
344
+ ###### Fit the model using also our specified callbacks for scheduling and early stopping:
349
345
history <- vae %>% fit(
350
346
x=sc_train_x,
351
347
y=sc_train_x,
@@ -362,7 +358,7 @@ history <- vae %>% fit(
362
358
```
363
359
364
360
``` {r}
365
- history <- readRDS("data/MG_complete_VAE_history.rds")
361
+ history <- readRDS("/work/adv_scrnaseq_2020/ data/dgn /MG_complete_VAE_history.rds")
366
362
plot(history)
367
363
368
364
```
@@ -389,11 +385,9 @@ We now have in our hands a trained VAE for our dataset. What is it good for?
389
385
390
386
``` {r fig.width=7, fig.height=7}
391
387
########################################################################################################
392
- vae %>% load_model_weights_hdf5("trained_models/MG_complete_VAE_weights.hdf5")
393
- #vae %>% load_model_weights_hdf5("data/dgn/MG_complete_VAE_weights.hdf5")
388
+ vae %>% load_model_weights_hdf5("/work/adv_scrnaseq_2020/data/dgn/MG_complete_VAE_weights.hdf5")
394
389
395
- palettes <- readRDS("data/distinct_palettes.rds")
396
- #palettes <- readRDS("data/dgn/distinct_palettes.rds")
390
+ palettes <- readRDS("/work/adv_scrnaseq_2020/data/dgn/distinct_palettes.rds")
397
391
398
392
##### Run on the combined select dataset (train + validation):
399
393
study_annot <- sce$study
@@ -498,7 +492,7 @@ legend("topright",legend = c("poisson","loess.fit"),lty=2,lwd=2,bty="n",col=c("d
498
492
Here we will show a first example of using our learned latent space for inference. Specifically we will correct for batch-specific
499
493
effects by decomposing the variance in the latent space:
500
494
501
- ![ ] ( figures/LatentArithm_BC.png ) { width=50% }
495
+ ![ ] ( /work/adv_scrnaseq_2020/adv_scrnaseq_2020/DGNs/ figures/LatentArithm_BC.png) { width=50% }
502
496
503
497
``` {r fig.width=10, fig.height=7}
504
498
########## Latent arithmetic operations to correct for batch :
@@ -573,12 +567,11 @@ Now we implement a second example of latent arithmetic-based inference. In this
573
567
in one of the three studies (wal). Following the same rational of decomposing variance sources in latent space we will then predict the profile of the missing
574
568
cell type in the target study:
575
569
576
- ![ ] ( figures/LatentArithm_OoS.png ) { width=50% }
570
+ ![ ] ( /work/adv_scrnaseq_2020/adv_scrnaseq_2020/DGNs/ figures/LatentArithm_OoS.png) { width=50% }
577
571
578
572
``` {r fig.width=9, fig.height=3}
579
573
#We will use a model that was trained on data that never saw wal luminal progenitor cells:
580
- #vae %>% load_model_weights_hdf5("data/dgn/MG_leavout_wallp_VAE_weights.hdf5")
581
- vae %>% load_model_weights_hdf5("trained_models/MG_leavout_wallp_VAE_weights.hdf5")
574
+ vae %>% load_model_weights_hdf5("/work/adv_scrnaseq_2020/data/dgn/MG_leavout_wallp_VAE_weights.hdf5")
582
575
latent_output <- predict(encoder, list(gene_input=sc_x))
583
576
584
577
0 commit comments