|
| 1 | +library(tidyverse) |
| 2 | +library(gganimate) |
| 3 | +library(ggthemes) |
| 4 | + |
| 5 | +#Probably try a few times until the raw correlation looks nice and low |
| 6 | +df <- data.frame(X = rnorm(200)+1,Y=rnorm(200)+1,time="1") %>% |
| 7 | + mutate(C = as.integer(X+Y+rnorm(200)/2>2)) %>% |
| 8 | + group_by(C) %>% |
| 9 | + mutate(mean_X=mean(X),mean_Y=mean(Y)) %>% |
| 10 | + ungroup() |
| 11 | + |
| 12 | +cor(df$X,df$Y) |
| 13 | + |
| 14 | +#Calculate correlations |
| 15 | +before_cor <- paste("1. Start with raw data, ignoring C. Correlation between X and Y: ",round(cor(df$X,df$Y),3),sep='') |
| 16 | +after_cor <- paste("7. Analyze what's left! Correlation between X and Y controlling for C: ",round(cor(df$X-df$mean_X,df$Y-df$mean_Y),3),sep='') |
| 17 | + |
| 18 | + |
| 19 | + |
| 20 | + |
| 21 | +#Add step 2 in which X is demeaned, and 3 in which both X and Y are, and 4 which just changes label |
| 22 | +dffull <- rbind( |
| 23 | + #Step 1: Raw data only |
| 24 | + df %>% mutate(mean_X=NA,mean_Y=NA,C=0,time=before_cor), |
| 25 | + #Step 2: Raw data only |
| 26 | + df %>% mutate(mean_X=NA,mean_Y=NA,time='2. Separate data by the values of C.'), |
| 27 | + #Step 3: Add x-lines |
| 28 | + df %>% mutate(mean_Y=NA,time='3. Figure out what differences in X are explained by C'), |
| 29 | + #Step 4: X de-meaned |
| 30 | + df %>% mutate(X = X - mean_X,mean_X=0,mean_Y=NA,time="4. Remove differences in X explained by C"), |
| 31 | + #Step 5: Remove X lines, add Y |
| 32 | + df %>% mutate(X = X - mean_X,mean_X=NA,time="5. Figure out what differences in Y are explained by C"), |
| 33 | + #Step 6: Y de-meaned |
| 34 | + df %>% mutate(X = X - mean_X,Y = Y - mean_Y,mean_X=NA,mean_Y=0,time="6. Remove differences in Y explained by C"), |
| 35 | + #Step 7: Raw demeaned data only |
| 36 | + df %>% mutate(X = X - mean_X,Y = Y - mean_Y,mean_X=NA,mean_Y=NA,time=after_cor)) |
| 37 | + |
| 38 | +p <- ggplot(dffull,aes(y=Y,x=X,color=as.factor(C)))+geom_point()+ |
| 39 | + geom_vline(aes(xintercept=mean_X,color=as.factor(C)))+ |
| 40 | + geom_hline(aes(yintercept=mean_Y,color=as.factor(C)))+ |
| 41 | + guides(color=guide_legend(title="C"))+ |
| 42 | + scale_color_colorblind()+ |
| 43 | + labs(title = 'Inventing a Correlation Between X and Y by Controlling for Collider C \n{next_state}')+ |
| 44 | + transition_states(time,transition_length=c(1,12,32,12,32,12,12),state_length=c(160,125,100,75,100,75,160),wrap=FALSE)+ |
| 45 | + ease_aes('sine-in-out')+ |
| 46 | + exit_fade()+enter_fade() |
| 47 | + |
| 48 | +animate(p,nframes=200) |
0 commit comments