Simulation study of a gene-environment interaction effect. The main effect delta
is set to have 95% power. The interaction effect theta
is set using phi
which is relative to the main effect ranging from {0..4}x
as described in Brookes et al (2004).
library("varGWASR")
library("broom")
library("dplyr")
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library("ggplot2")
set.seed(1234)
<- 1000
n_obs <- 1000
n_sim <- 0.4
maf
# main effect size of x on y detectable ~ 95% power
<- 0.17
delta
# simulation
<- data.frame()
results for (phi in seq(0, 4, .5)){
# size of interaction relative to main effect (Brookes S, et al 2004)
<- delta * phi
theta
for (i in 1:n_sim) {
# simulate GxE interaction effect
<- rbinom(n_obs, 2, maf)
x <- rnorm(n_obs)
u <- x * delta + x*u * theta + rnorm(n_obs)
y
# test/estimate variance effect
<- varGWASR::model(data.frame(x, y), "x", "y")
fit
# add true variance difference
$v1 <- var(y[x==1]) - var(y[x==0])
fit$v2 <- var(y[x==2]) - var(y[x==0])
fit
# store result
$phi <- phi
fit<- rbind(results, fit)
results
}
}
Power/T1E of the Brown-Forsythe test using LAD regression
<- results %>%
tbl ::group_by(phi) %>%
dplyr::summarize(tidy(binom.test(sum(phi_p < 0.05), n_sim))) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::rename(power="estimate", ci_95l="conf.low", ci_95h="conf.high")
dplyr::kable(tbl, digits=3) knitr
phi | power | ci_95l | ci_95h |
---|---|---|---|
0.0 | 0.054 | 0.041 | 0.070 |
0.5 | 0.043 | 0.031 | 0.057 |
1.0 | 0.104 | 0.086 | 0.125 |
1.5 | 0.295 | 0.267 | 0.324 |
2.0 | 0.671 | 0.641 | 0.700 |
2.5 | 0.928 | 0.910 | 0.943 |
3.0 | 0.996 | 0.990 | 0.999 |
3.5 | 1.000 | 0.996 | 1.000 |
4.0 | 1.000 | 0.996 | 1.000 |
Effect size of the Brown-Forsythe test using LAD regression
# estimate mean and 95% CI
<- results %>%
v1 ::group_by(phi) %>%
dplyr::summarize(t.test(phi_x1) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=1", method="LAD-BF")
dplyr<- results %>%
v2 ::group_by(phi) %>%
dplyr::summarize(t.test(phi_x2) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=2", method="LAD-BF")
dplyr<- results %>%
v1_mean ::group_by(phi) %>%
dplyr::summarize(x=mean(v1))
dplyr<- results %>%
v2_mean ::group_by(phi) %>%
dplyr::summarize(x=mean(v2))
dplyr<- merge(v1, v1_mean, "phi")
v1 $wd <- 0.01
v1<- merge(v2, v2_mean, "phi")
v2 $wd <- 0.05
v2<- rbind(v1, v2)
ci $phi <- as.factor(ci$phi)
ci$genotype <- as.factor(ci$genotype) ci
ggplot(data=ci, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, linetype="dashed", color="grey") +
geom_errorbar(aes(width=wd)) +
theme_classic() +
xlab("Difference in variance compared with SNP=0") +
ylab("Estimated difference in variance compared with SNP=0 (95% CI)") +
facet_wrap(method ~ genotype, scales="free") +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
theme(
strip.background = element_blank(),
legend.title.align=0.5
)
Confidence interval coverage of the Brown-Forsythe test using LAD regression
<- merge(results, v1_mean, "phi")
results <- merge(results, v2_mean, "phi")
results names(results)[10] <- "v1_mean"
names(results)[11] <- "v2_mean"
$phi_x1_lci <- results$phi_x1 - (1.96 * results$se_x1)
results$phi_x1_uci <- results$phi_x1 + (1.96 * results$se_x1)
results$phi_x2_lci <- results$phi_x2 - (1.96 * results$se_x2)
results$phi_x2_uci <- results$phi_x2 + (1.96 * results$se_x2)
results
<- results %>%
coverage1 ::group_by(phi) %>%
dplyr::summarize(binom.test(sum(phi_x1_lci <= v1_mean & phi_x1_uci >= v1_mean), n()) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=1", method="LAD-BF")
dplyr<- results %>%
coverage2 ::group_by(phi) %>%
dplyr::summarize(binom.test(sum(phi_x2_lci <= v2_mean & phi_x2_uci >= v2_mean), n()) %>% tidy) %>%
dplyr::select(phi, estimate, conf.low, conf.high) %>%
dplyr::mutate(genotype="SNP=2", method="LAD-BF")
dplyr
<- merge(coverage1, v1_mean, "phi")
coverage1 $wd <- 0.01
coverage1<- merge(coverage2, v2_mean, "phi")
coverage2 $wd <- 0.05
coverage2<- rbind(coverage1, coverage2)
coverage $phi <- as.factor(coverage$phi)
coverage$genotype <- as.factor(coverage$genotype) coverage
ggplot(data=coverage, aes(x=x, y=estimate, ymin=conf.low, ymax=conf.high)) +
geom_point() +
geom_errorbar(aes(width=wd)) +
theme_classic() +
xlab("Difference in variance compared with SNP=0") +
ylab("Coverage of 95% CI (95% CI)") +
geom_hline(yintercept = 0.95, linetype = "dashed", color = "grey") +
facet_grid(method ~ genotype, scales="free_x") +
labs(shape="Genotype") +
scale_y_continuous(limits = c(0, 1), breaks = scales::pretty_breaks(n = 5)) +
theme(
strip.background = element_blank(),
legend.title.align=0.5
)