Import to CAMERA from local data
import-local.Rmd
library(CAMeRa)
You can supply to CAMERA from text files or from OpenGWAS (currently not a mixture of both, though you can download the studies in OpenGWAS to be used as raw files).
Generating the data manually
You need to supply the following data:
-
instrument_raw
= a data frame of pooled instruments across all ancestries, that has been extracted from each ancestry for the exposure traits. Optionally also provide the same for the outcome traits. -
instrument_outcome
= instruments ininstrument_raw
extracted from the outcome datasets -
instrument_regions
= named list of data frames of length number of unique instruments ininstrument_raw
. Names of each item are the instruments. Each item is a list of regional extracts around the instrument from each population exposure study. -
instrument_outcome_regions
= as above but for the outcome datasets.
Examples of these datasets can be seen in the following file:
load(system.file(package="CAMeRa", "extdata/example-local.rdata"))
instrument_raw
#> # A tibble: 45 × 14
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 122285218 0.224 0.0204 0.00583 4.69e- 4 C CT 3:122… LDL AFR
#> 2 3 122285218 0.438 0.0198 0.00542 2.52e- 4 C CT 3:122… LDL EAS
#> 3 3 122285218 0.302 0.0190 0.00166 2.66e-30 C CT 3:122… LDL EUR
#> 4 3 122285218 0.237 0.0139 0.00808 8.45e- 2 C CT 3:122… LDL AMR
#> 5 3 122285218 0.296 0.0154 0.00778 4.75e- 2 C CT 3:122… LDL SAS
#> 6 5 139567696 0.244 -0.0127 0.0172 4.62e- 1 G T 5:139… LDL AFR
#> 7 5 139567696 0.213 -0.0130 0.00192 1.10e-11 G T 5:139… LDL EUR
#> 8 6 27067657 0.989 0.0172 0.0222 4.37e- 1 A T 6:270… LDL AFR
#> 9 6 27067657 0.924 0.0325 0.00268 7.70e-34 A T 6:270… LDL EUR
#> 10 6 27067657 0.978 0.0558 0.0242 2.12e- 2 A T 6:270… LDL AMR
#> # ℹ 35 more rows
#> # ℹ 3 more variables: id <chr>, nstudies <int>, target_trait <chr>
instrument_outcome
#> # A tibble: 15 × 14
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 5 139567696 0.230 0.0069 0.008 0.387 G T 5:1395… Stro… EUR
#> 2 5 139567696 0.247 -0.0483 0.0385 0.209 G T 5:1395… Stro… AFR
#> 3 5 139567696 0.262 0.0183 0.0831 0.826 G T 5:1395… Stro… AMR
#> 4 6 27067657 0.928 -0.0011 0.0132 0.934 A T 6:2706… Stro… EUR
#> 5 6 27067657 0.975 0.212 0.219 0.333 A T 6:2706… Stro… AMR
#> 6 6 161010118 0.936 -0.0427 0.0136 0.00177 A G 6:1610… Stro… EUR
#> 7 6 161010118 0.963 -0.0934 0.171 0.584 A G 6:1610… Stro… AMR
#> 8 7 137562744 0.0201 0.0259 0.0602 0.668 C G 7:1375… Stro… EAS
#> 9 7 137562744 0.132 0.0945 0.0417 0.0234 C G 7:1375… Stro… AFR
#> 10 7 143092269 0.0635 0.0158 0.014 0.257 A G 7:1430… Stro… EUR
#> 11 7 143092269 0.0781 0.025 0.0765 0.743 A G 7:1430… Stro… SAS
#> 12 16 72912880 0.957 0.0117 0.0194 0.547 A G 16:729… Stro… EUR
#> 13 16 72912880 0.982 -0.225 0.183 0.219 A G 16:729… Stro… SAS
#> 14 19 33864260 0.0307 0.0046 0.0192 0.813 A G 19:338… Stro… EUR
#> 15 19 33864260 0.0521 -0.0669 0.0924 0.469 A G 19:338… Stro… SAS
#> # ℹ 3 more variables: id <chr>, nstudies <int>, target_trait <chr>
instrument_regions[[1]]
#> $`LDL AFR`
#> # A tibble: 2,827 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 121981372 0.984 0.0232 0.0204 0.256 C T 3:1219… LDL AFR
#> 2 3 121981609 0.0346 0.0189 0.0134 0.158 A G 3:1219… LDL AFR
#> 3 3 121981619 0.146 0.00208 0.00690 0.763 G T 3:1219… LDL AFR
#> 4 3 121981629 0.978 -0.00525 0.0167 0.754 A G 3:1219… LDL AFR
#> 5 3 121981835 0.823 -0.00165 0.00633 0.794 C G 3:1219… LDL AFR
#> 6 3 121981836 0.172 0.00212 0.00645 0.742 A G 3:1219… LDL AFR
#> 7 3 121982002 0.987 -0.00833 0.0215 0.699 C T 3:1219… LDL AFR
#> 8 3 121982338 0.178 0.00148 0.00632 0.814 G T 3:1219… LDL AFR
#> 9 3 121983322 0.818 -0.00143 0.00626 0.819 C T 3:1219… LDL AFR
#> 10 3 121983551 0.941 -0.00517 0.0108 0.631 C T 3:1219… LDL AFR
#> # ℹ 2,817 more rows
#> # ℹ 1 more variable: id <chr>
#>
#> $`LDL AMR`
#> # A tibble: 2,084 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 121981215 0.855 0.0303 0.0108 0.00521 A G 3:12… LDL AMR
#> 2 3 121981609 0.106 0.0160 0.0111 0.15 A G 3:12… LDL AMR
#> 3 3 121981619 0.2 0.0148 0.00988 0.135 G T 3:12… LDL AMR
#> 4 3 121981835 0.969 -0.0206 0.0235 0.381 C G 3:12… LDL AMR
#> 5 3 121981836 0.203 0.0119 0.00983 0.227 A G 3:12… LDL AMR
#> 6 3 121982338 0.0316 0.0184 0.0234 0.432 G T 3:12… LDL AMR
#> 7 3 121983322 0.968 -0.0192 0.0232 0.408 C T 3:12… LDL AMR
#> 8 3 121983607 0.76 0.0192 0.00919 0.0371 C T 3:12… LDL AMR
#> 9 3 121983805 0.795 -0.0150 0.00850 0.0775 C T 3:12… LDL AMR
#> 10 3 121984020 0.977 0.00998 0.0233 0.669 C T 3:12… LDL AMR
#> # ℹ 2,074 more rows
#> # ℹ 1 more variable: id <chr>
#>
#> $`LDL EAS`
#> # A tibble: 1,646 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 121981609 0.0257 -0.0237 0.0160 0.14 A G 3:121… LDL EAS
#> 2 3 121981835 0.983 -0.0426 0.0195 0.0294 C G 3:121… LDL EAS
#> 3 3 121982338 0.017 0.0426 0.0195 0.0289 G T 3:121… LDL EAS
#> 4 3 121983322 0.983 -0.0425 0.0195 0.0288 C T 3:121… LDL EAS
#> 5 3 121983607 0.975 0.0293 0.0163 0.0727 C T 3:121… LDL EAS
#> 6 3 121984021 0.417 0.0154 0.00512 0.00261 A G 3:121… LDL EAS
#> 7 3 121984792 0.0253 -0.0281 0.0158 0.0758 A G 3:121… LDL EAS
#> 8 3 121985083 0.0253 -0.0282 0.0158 0.0748 A AG 3:121… LDL EAS
#> 9 3 121985131 0.984 -0.0435 0.0200 0.0296 A G 3:121… LDL EAS
#> 10 3 121985345 0.539 -0.0150 0.00503 0.0029 C T 3:121… LDL EAS
#> # ℹ 1,636 more rows
#> # ℹ 1 more variable: id <chr>
#>
#> $`LDL EUR`
#> # A tibble: 2,233 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 121981609 0.129 6.03e-3 0.00208 3.71e-3 A G 3:12… LDL EUR
#> 2 3 121981619 0.356 6.02e-4 0.00146 6.81e-1 G T 3:12… LDL EUR
#> 3 3 121981835 0.955 6.87e-3 0.00342 4.46e-2 C G 3:12… LDL EUR
#> 4 3 121981836 0.356 6.09e-4 0.00146 6.78e-1 A G 3:12… LDL EUR
#> 5 3 121982338 0.0448 -6.78e-3 0.00342 4.75e-2 G T 3:12… LDL EUR
#> 6 3 121983322 0.955 6.80e-3 0.00342 4.65e-2 C T 3:12… LDL EUR
#> 7 3 121983607 0.86 7.87e-3 0.00202 1 e-4 C T 3:12… LDL EUR
#> 8 3 121983674 0.972 -1.62e-2 0.00440 2.38e-4 A G 3:12… LDL EUR
#> 9 3 121983805 0.644 -6.39e-4 0.00146 6.62e-1 C T 3:12… LDL EUR
#> 10 3 121984020 0.976 -1.46e-3 0.00464 7.53e-1 C T 3:12… LDL EUR
#> # ℹ 2,223 more rows
#> # ℹ 1 more variable: id <chr>
#>
#> $`LDL SAS`
#> # A tibble: 1,875 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 121981609 0.193 -0.000939 0.00906 0.917 A G 3:12… LDL SAS
#> 2 3 121981619 0.234 0.00438 0.00837 0.601 G T 3:12… LDL SAS
#> 3 3 121981835 0.952 0.0186 0.0166 0.26 C G 3:12… LDL SAS
#> 4 3 121981836 0.234 0.00416 0.00836 0.619 A G 3:12… LDL SAS
#> 5 3 121982338 0.0479 -0.0177 0.0164 0.28 G T 3:12… LDL SAS
#> 6 3 121983322 0.952 0.0177 0.0164 0.281 C T 3:12… LDL SAS
#> 7 3 121983607 0.85 0.00333 0.00985 0.735 C T 3:12… LDL SAS
#> 8 3 121983805 0.766 -0.00409 0.00835 0.624 C T 3:12… LDL SAS
#> 9 3 121984020 0.985 0.0239 0.0293 0.415 C T 3:12… LDL SAS
#> 10 3 121984021 0.0591 0.0321 0.0150 0.0316 A G 3:12… LDL SAS
#> # ℹ 1,865 more rows
#> # ℹ 1 more variable: id <chr>
instrument_outcome_regions[[1]]
#> $`Stroke African American or Afro-Caribbean`
#> # A tibble: 1,935 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 122289921 0.387 -0.0073 0.0302 0.808 C T 3:12228… Stro… AFR
#> 2 3 122108718 0.345 -0.0224 0.0297 0.451 G T 3:12210… Stro… AFR
#> 3 3 122356077 0.153 0.0322 0.0422 0.445 G T 3:12235… Stro… AFR
#> 4 3 122125052 0.309 0.0319 0.0306 0.297 A G 3:12212… Stro… AFR
#> 5 3 122467637 0.0267 0.176 0.103 0.0864 C T 3:12246… Stro… AFR
#> 6 3 122297742 0.323 -0.0286 0.0303 0.344 A G 3:12229… Stro… AFR
#> 7 3 122365670 0.885 -0.0087 0.0479 0.855 C G 3:12236… Stro… AFR
#> 8 3 122214612 0.0976 -0.0716 0.0484 0.139 C T 3:12221… Stro… AFR
#> 9 3 122201610 0.0948 -0.0072 0.0483 0.882 C G 3:12220… Stro… AFR
#> 10 3 122376850 0.691 0.0424 0.0305 0.165 A G 3:12237… Stro… AFR
#> # ℹ 1,925 more rows
#> # ℹ 1 more variable: id <chr>
#>
#> $`Stroke Hispanic or Latin American`
#> # A tibble: 1,566 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 122289921 0.382 -0.101 0.0794 0.204 C T 3:12228… Stro… AMR
#> 2 3 122108718 0.174 0.0473 0.0737 0.521 G T 3:12210… Stro… AMR
#> 3 3 122125052 0.295 -0.0998 0.0586 0.0887 A G 3:12212… Stro… AMR
#> 4 3 122297742 0.316 0.125 0.0642 0.0522 A G 3:12229… Stro… AMR
#> 5 3 122365670 0.835 0.0308 0.0833 0.711 C G 3:12236… Stro… AMR
#> 6 3 122214612 0.0209 0.111 0.256 0.664 C T 3:12221… Stro… AMR
#> 7 3 122201610 0.132 -0.0104 0.0806 0.897 C G 3:12220… Stro… AMR
#> 8 3 122034854 0.426 -0.0195 0.0563 0.729 A G 3:12203… Stro… AMR
#> 9 3 122268506 0.118 -0.0509 0.0873 0.560 C T 3:12226… Stro… AMR
#> 10 3 122273816 0.516 -0.0404 0.0756 0.593 A G 3:12227… Stro… AMR
#> # ℹ 1,556 more rows
#> # ℹ 1 more variable: id <chr>
#>
#> $`Stroke East Asian`
#> # A tibble: 1,351 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 122289921 0.601 -0.0027 0.011 0.807 C T 3:1222… Stro… EAS
#> 2 3 122024691 0.0338 -0.0287 0.03 0.338 A T 3:1220… Stro… EAS
#> 3 3 122108718 0.0281 -0.0972 0.0327 0.00299 G T 3:1221… Stro… EAS
#> 4 3 122125052 0.588 0.0146 0.0096 0.130 A G 3:1221… Stro… EAS
#> 5 3 122297742 0.250 -0.0051 0.011 0.644 A G 3:1222… Stro… EAS
#> 6 3 122201610 0.0788 -0.0304 0.0205 0.138 C G 3:1222… Stro… EAS
#> 7 3 122376850 0.346 0.0151 0.0101 0.135 A G 3:1223… Stro… EAS
#> 8 3 122034854 0.099 0.0019 0.016 0.905 A G 3:1220… Stro… EAS
#> 9 3 122268506 0.439 0.0193 0.0097 0.0479 C T 3:1222… Stro… EAS
#> 10 3 122273816 0.353 -0.0072 0.0101 0.472 A G 3:1222… Stro… EAS
#> # ℹ 1,341 more rows
#> # ℹ 1 more variable: id <chr>
#>
#> $`Stroke European`
#> # A tibble: 1,489 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 122289921 0.286 0.0039 0.0081 0.629 C T 3:122289… Stro… EUR
#> 2 3 122108718 0.143 -0.0035 0.0094 0.709 G T 3:122108… Stro… EUR
#> 3 3 122125052 0.437 -0.0122 0.0065 0.059 A G 3:122125… Stro… EUR
#> 4 3 122297742 0.24 -0.0084 0.0079 0.286 A G 3:122297… Stro… EUR
#> 5 3 122365670 0.757 -0.0168 0.0075 0.0264 C G 3:122365… Stro… EUR
#> 6 3 122201610 0.138 -0.0005 0.0096 0.960 C G 3:122201… Stro… EUR
#> 7 3 122376850 0.748 0.0103 0.0076 0.175 A G 3:122376… Stro… EUR
#> 8 3 122034854 0.281 0.0026 0.0077 0.739 A G 3:122034… Stro… EUR
#> 9 3 122268506 0.165 -0.0018 0.0086 0.837 C T 3:122268… Stro… EUR
#> 10 3 122273816 0.430 0.0097 0.0066 0.145 A G 3:122273… Stro… EUR
#> # ℹ 1,479 more rows
#> # ℹ 1 more variable: id <chr>
#>
#> $`Stroke South Asian`
#> # A tibble: 561 × 12
#> chr position eaf beta se p ea nea rsid trait pop
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 3 122289921 0.305 -0.066 0.0504 0.190 C T 3:12228… Stro… SAS
#> 2 3 122356077 0.0455 0.0271 0.109 0.804 G T 3:12235… Stro… SAS
#> 3 3 122297445 0.802 -0.0272 0.0512 0.595 A T 3:12229… Stro… SAS
#> 4 3 122292451 0.0652 0.135 0.102 0.185 A G 3:12229… Stro… SAS
#> 5 3 122321980 0.0313 0.256 0.120 0.0339 A G 3:12232… Stro… SAS
#> 6 3 122177260 0.953 0.0188 0.103 0.855 C T 3:12217… Stro… SAS
#> 7 3 122147542 0.0795 0.0231 0.0749 0.757 A G 3:12214… Stro… SAS
#> 8 3 122201061 0.0566 -0.0094 0.0958 0.922 A C 3:12220… Stro… SAS
#> 9 3 122089025 0.927 -0.019 0.0774 0.807 A G 3:12208… Stro… SAS
#> 10 3 122408098 0.0196 0.0389 0.147 0.791 C T 3:12240… Stro… SAS
#> # ℹ 551 more rows
#> # ℹ 1 more variable: id <chr>
For example scripts on how these data were generated see https://github.com/yoonsucho/CAMERA_analysis/tree/main/scripts/ldl_stroke_analysis
Generating the data using CAMERA_local
We have developed a separate set of functions to organise data from text files to generate the data above.
metadata <- readRDS(system.file(package="CAMeRa", "extdata/example-metadata.rds"))
metadata
#> # A tibble: 10 × 16
#> what pop trait id n rsid_col chr_col pos_col eaf_col beta_col
#> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 exposure AFR LDL LDL A… 9.11e4 1 2 3 8 9
#> 2 exposure EAS LDL LDL E… 7.98e4 1 2 3 8 9
#> 3 exposure EUR LDL LDL E… 9.00e5 1 2 3 8 9
#> 4 exposure AMR LDL LDL A… 4.48e4 1 2 3 8 9
#> 5 exposure SAS LDL LDL S… 3.04e4 1 2 3 8 9
#> 6 outcome EUR Stroke Strok… 1.31e6 NA 1 2 3 4
#> 7 outcome EAS Stroke Strok… 2.65e5 NA 1 2 3 4
#> 8 outcome AFR Stroke Strok… 2.40e4 NA 1 2 3 4
#> 9 outcome AMR Stroke Strok… 5.66e3 NA 1 2 3 4
#> 10 outcome SAS Stroke Strok… 1.13e4 NA 1 2 3 4
#> # ℹ 6 more variables: se_col <dbl>, pval_col <dbl>, ea_col <dbl>, oa_col <dbl>,
#> # fn <chr>, units <chr>
ld_ref <- dplyr::tibble(
pop = unique(metadata$pop),
bfile = file.path("path/to/plink_files/", pop)
)
ld_ref
#> # A tibble: 5 × 2
#> pop bfile
#> <chr> <chr>
#> 1 AFR path/to/plink_files//AFR
#> 2 EAS path/to/plink_files//EAS
#> 3 EUR path/to/plink_files//EUR
#> 4 AMR path/to/plink_files//AMR
#> 5 SAS path/to/plink_files//SAS
localdata <- CAMERA_local$new(metadata = metadata, ld_ref = ld_ref, plink_bin = "path/to/plink")
localdata$organise()
This will read the files specified in the metadata and attempt to
arrange the data as described above, generating
localdata$instrument_raw
,
localdata$instrument_outcome
etc.
You can then generate the CAMERA
object e.g.
l <- CAMERA$new()
l$import_from_local(
instrument_raw=instrument_raw,
instrument_outcome=instrument_outcome,
instrument_regions=instrument_regions,
instrument_outcome_regions=instrument_outcome_regions,
exposure_ids=unique(instrument_raw$id),
outcome_ids=unique(names(instrument_outcome_regions[[1]])),
pops=c("AFR", "EAS", "EUR", "AMR", "SAS")
)
#> list()
l$instrument_heterogeneity()
#> # A tibble: 14 × 9
#> Reference Replication nsnp agreement se pval Q Q_pval
#> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 LDL AFR LDL EAS 3 1.06 0.371 4.27e- 3 14.5 6.99e- 4
#> 2 LDL AFR LDL EUR 3 0.942 0.0207 0 0.168 9.19e- 1
#> 3 LDL AFR LDL AMR 4 0.901 0.138 7.04e-11 1.77 6.22e- 1
#> 4 LDL AFR LDL SAS 2 1.06 0.261 4.93e- 5 1.10 2.94e- 1
#> 5 LDL EAS LDL AFR 3 0.656 0.125 1.41e- 7 2.11 3.49e- 1
#> 6 LDL EAS LDL EUR 3 0.692 0.151 4.65e- 6 41.3 1.10e- 9
#> 7 LDL EAS LDL AMR 3 0.793 0.175 6.20e- 6 0.399 8.19e- 1
#> 8 LDL EAS LDL SAS 3 0.726 0.156 3.47e- 6 0.216 8.98e- 1
#> 9 LDL EUR LDL AFR 8 0.990 0.117 2.62e-17 2.27 9.43e- 1
#> 10 LDL EUR LDL EAS 5 1.07 0.235 5.06e- 6 14.3 6.36e- 3
#> 11 LDL EUR LDL AMR 10 0.883 0.146 1.63e- 9 11.6 2.40e- 1
#> 12 LDL EUR LDL SAS 8 0.835 0.189 1.02e- 5 9.85 1.97e- 1
#> 13 LDL AMR LDL AFR 2 0.931 0.198 2.68e- 6 2.18 1.39e- 1
#> 14 LDL AMR LDL EUR 2 1.02 0.148 5.49e-12 40.1 2.47e-10
#> # ℹ 1 more variable: I2 <dbl>