
Import Nightingale Metabolomic Data
nightingale.Rmd
Nightingale data is…
Import Nightingale data
library(metaboprep)
# example file
filepath <- system.file("extdata", "nightingale_v1_example.xlsx", package = "metaboprep")
# import
dat <- read_nightingale(filepath)
# view structure
str(dat)
#> List of 3
#> $ data : num [1:50, 1:12] 3.85 NA 5.2 3.01 2.68 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#> .. ..$ : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#> $ samples :'data.frame': 50 obs. of 25 variables:
#> ..$ sample_id : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#> ..$ informed_sample_type : chr [1:50] "Serum" "Serum" "Serum" "Serum" ...
#> ..$ sample_excluded : chr [1:50] NA NA NA NA ...
#> ..$ sample_notes : chr [1:50] NA NA NA NA ...
#> ..$ edta_plasma : chr [1:50] "0" "1" "0" "0" ...
#> ..$ citrate_plasma : chr [1:50] "0" "0" "0" "0" ...
#> ..$ low_ethanol : chr [1:50] "0" "0" "0" "0" ...
#> ..$ medium_ethanol : chr [1:50] "0" "1" "0" "0" ...
#> ..$ high_ethanol : chr [1:50] "0" "0" "0" "0" ...
#> ..$ isopropyl_alcohol : chr [1:50] "0" "0" "0" "0" ...
#> ..$ 1methyl2pyrrolidone : chr [1:50] "0" "0" "0" "0" ...
#> ..$ polysaccharides : chr [1:50] "0" "0" "0" "0" ...
#> ..$ aminocaproic_acid : chr [1:50] "0" "0" "0" "0" ...
#> ..$ low_glucose : chr [1:50] "0" "0" "0" "0" ...
#> ..$ high_lactate : chr [1:50] "0" "0" "0" "0" ...
#> ..$ high_pyruvate : chr [1:50] "0" "0" "0" "0" ...
#> ..$ low_glutamine__high_glutamate : chr [1:50] "0" "0" "0" "0" ...
#> ..$ gluconolactone : chr [1:50] "0" "0" "0" "0" ...
#> ..$ low_protein : chr [1:50] "1" "1" "0" "0" ...
#> ..$ unexpected_amino_acid_signals : chr [1:50] "0" "0" "0" "0" ...
#> ..$ unidentified_macromolecules : chr [1:50] "0" "0" "0" "0" ...
#> ..$ unidentified_small_molecule (a): chr [1:50] "0" "0" "0" "0" ...
#> ..$ unidentified_small_molecule (b): chr [1:50] "0" "0" "0" "0" ...
#> ..$ unidentified_small_molecule (c): chr [1:50] "0" "0" "0" "0" ...
#> ..$ below_limit_of_quantification : chr [1:50] "1" "1" "1" "1" ...
#> $ features:'data.frame': 12 obs. of 6 variables:
#> ..$ feature_id : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#> ..$ csv_column_name: chr [1:12] "Total_C" "non_HDL_C" "Remnant_C" "VLDL_C" ...
#> ..$ biomarker_name : chr [1:12] "Total cholesterol" "Total cholesterol minus HDL-C" "Remnant cholesterol (non-HDL, non-LDL -cholesterol)" "VLDL cholesterol" ...
#> ..$ unit : chr [1:12] "mmol/l" "mmol/l" "mmol/l" "mmol/l" ...
#> ..$ group : chr [1:12] "Cholesterol" "Cholesterol" "Cholesterol" "Cholesterol" ...
#> ..$ subgroup : chr [1:12] NA NA NA NA ...
Create Metaboprep object
Once imported, we pass the data to the Metaboprep
class
object.
m <- Metaboprep(data = dat$data,
features = dat$features,
samples = dat$samples)
# view
m
#> <metaboprep::Metaboprep>
#> @ data : num [1:50, 1:12, 1] 3.85 NA 5.2 3.01 2.68 ...
#> .. - attr(*, "dimnames")=List of 3
#> .. ..$ : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#> .. ..$ : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#> .. ..$ : chr "input"
#> @ samples :'data.frame': 50 obs. of 25 variables:
#> .. $ sample_id : chr "ind1" "ind2" "ind3" "ind4" ...
#> .. $ informed_sample_type : chr "Serum" "Serum" "Serum" "Serum" ...
#> .. $ sample_excluded : chr NA NA NA NA ...
#> .. $ sample_notes : chr NA NA NA NA ...
#> .. $ edta_plasma : chr "0" "1" "0" "0" ...
#> .. $ citrate_plasma : chr "0" "0" "0" "0" ...
#> .. $ low_ethanol : chr "0" "0" "0" "0" ...
#> .. $ medium_ethanol : chr "0" "1" "0" "0" ...
#> .. $ high_ethanol : chr "0" "0" "0" "0" ...
#> .. $ isopropyl_alcohol : chr "0" "0" "0" "0" ...
#> .. $ 1methyl2pyrrolidone : chr "0" "0" "0" "0" ...
#> .. $ polysaccharides : chr "0" "0" "0" "0" ...
#> .. $ aminocaproic_acid : chr "0" "0" "0" "0" ...
#> .. $ low_glucose : chr "0" "0" "0" "0" ...
#> .. $ high_lactate : chr "0" "0" "0" "0" ...
#> .. $ high_pyruvate : chr "0" "0" "0" "0" ...
#> .. $ low_glutamine__high_glutamate : chr "0" "0" "0" "0" ...
#> .. $ gluconolactone : chr "0" "0" "0" "0" ...
#> .. $ low_protein : chr "1" "1" "0" "0" ...
#> .. $ unexpected_amino_acid_signals : chr "0" "0" "0" "0" ...
#> .. $ unidentified_macromolecules : chr "0" "0" "0" "0" ...
#> .. $ unidentified_small_molecule (a): chr "0" "0" "0" "0" ...
#> .. $ unidentified_small_molecule (b): chr "0" "0" "0" "0" ...
#> .. $ unidentified_small_molecule (c): chr "0" "0" "0" "0" ...
#> .. $ below_limit_of_quantification : chr "1" "1" "1" "1" ...
#> @ features :'data.frame': 12 obs. of 6 variables:
#> .. $ feature_id : chr "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#> .. $ csv_column_name: chr "Total_C" "non_HDL_C" "Remnant_C" "VLDL_C" ...
#> .. $ biomarker_name : chr "Total cholesterol" "Total cholesterol minus HDL-C" "Remnant cholesterol (non-HDL, non-LDL -cholesterol)" "VLDL cholesterol" ...
#> .. $ unit : chr "mmol/l" "mmol/l" "mmol/l" "mmol/l" ...
#> .. $ group : chr "Cholesterol" "Cholesterol" "Cholesterol" "Cholesterol" ...
#> .. $ subgroup : chr NA NA NA NA ...
#> @ exclusions :List of 2
#> .. $ samples :List of 5
#> .. ..$ user_excluded : chr(0)
#> .. ..$ extreme_sample_missingness : chr(0)
#> .. ..$ user_defined_sample_missingness : chr(0)
#> .. ..$ user_defined_sample_totalpeakarea: chr(0)
#> .. ..$ user_defined_sample_pca_outlier : chr(0)
#> .. $ features:List of 3
#> .. ..$ user_excluded : chr(0)
#> .. ..$ extreme_feature_missingness : chr(0)
#> .. ..$ user_defined_feature_missingness: chr(0)
#> @ feature_summary: num[0 , 0 , 0 ]
#> @ sample_summary : num[0 , 0 , 0 ]
QC Nightingale
m <- m |>
quality_control(source_layer = "input",
sample_missingness = 0.5,
feature_missingness = 0.3,
total_peak_area_sd = 5,
outlier_udist = 5,
outlier_treatment = "leave_be",
winsorize_quantile = 1.0,
tree_cut_height = 0.5,
pc_outlier_sd = 5)
#>
#> ── Starting Metabolite QC Process ──────────────────────────────────────────────
#> ℹ Validating input parameters
#> ✔ Validating input parameters [7ms]
#>
#> ℹ Sample & Feature Summary Statistics for raw data
#> ✔ Sample & Feature Summary Statistics for raw data [138ms]
#>
#> ℹ Copying input data to new 'qc' data layer
#> ✔ Copying input data to new 'qc' data layer [22ms]
#>
#> ℹ Assessing for extreme sample missingness >=80% - excluding 0 sample(s)
#> ✔ Assessing for extreme sample missingness >=80% - excluding 2 sample(s) [16ms]
#>
#> ℹ Assessing for extreme feature missingness >=80% - excluding 0 feature(s)
#> ✔ Assessing for extreme feature missingness >=80% - excluding 0 feature(s) [16m…
#>
#> ℹ Assessing for sample missingness at specified level of >=50% - excluding 0 sa…
#> ✔ Assessing for sample missingness at specified level of >=50% - excluding 0 sa…
#>
#> ℹ Assessing for feature missingness at specified level of >=30% - excluding 0 f…
#> ✔ Assessing for feature missingness at specified level of >=30% - excluding 0 f…
#>
#> ℹ Calculating total peak abundance outliers at +/- 5 Sdev - excluding 0 sample(…
#> ✔ Calculating total peak abundance outliers at +/- 5 Sdev - excluding 0 sample(…
#>
#> ℹ Running sample data PCA outlier analysis at +/- 5 Sdev
#> ✔ Running sample data PCA outlier analysis at +/- 5 Sdev [16ms]
#>
#> ℹ Sample PCA outlier analysis - re-identify feature independence and PC outlier…
#> [1] 2
#> [1] 48 10
#> ✔ Sample PCA outlier analysis - re-identify feature independence and PC outlier…
#>
#> ℹ Creating final QC dataset...
#> ✔ Creating final QC dataset... [101ms]
#>
#> ℹ Metabolite QC Process Completed
#> ✔ Metabolite QC Process Completed [13ms]
#>
# view
m
#> <metaboprep::Metaboprep>
#> @ data : num [1:50, 1:12, 1:2] 3.85 NA 5.2 3.01 2.68 ...
#> .. - attr(*, "dimnames")=List of 3
#> .. ..$ : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#> .. ..$ : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#> .. ..$ : chr [1:2] "input" "qc"
#> .. - attr(*, "qc_sample_missingness")= num 0.5
#> .. - attr(*, "qc_feature_missingness")= num 0.3
#> .. - attr(*, "qc_total_peak_area_sd")= num 5
#> .. - attr(*, "qc_outlier_udist")= num 5
#> .. - attr(*, "qc_outlier_treatment")= chr "leave_be"
#> .. - attr(*, "qc_winsorize_quantile")= num 1
#> .. - attr(*, "qc_tree_cut_height")= num 0.5
#> .. - attr(*, "qc_pc_outlier_sd")= num 5
#> .. - attr(*, "qc_features_exclude_but_keep")= chr(0)
#> @ samples :'data.frame': 50 obs. of 27 variables:
#> .. $ sample_id : chr "ind1" "ind2" "ind3" "ind4" ...
#> .. $ informed_sample_type : chr "Serum" "Serum" "Serum" "Serum" ...
#> .. $ sample_excluded : chr NA NA NA NA ...
#> .. $ sample_notes : chr NA NA NA NA ...
#> .. $ edta_plasma : chr "0" "1" "0" "0" ...
#> .. $ citrate_plasma : chr "0" "0" "0" "0" ...
#> .. $ low_ethanol : chr "0" "0" "0" "0" ...
#> .. $ medium_ethanol : chr "0" "1" "0" "0" ...
#> .. $ high_ethanol : chr "0" "0" "0" "0" ...
#> .. $ isopropyl_alcohol : chr "0" "0" "0" "0" ...
#> .. $ 1methyl2pyrrolidone : chr "0" "0" "0" "0" ...
#> .. $ polysaccharides : chr "0" "0" "0" "0" ...
#> .. $ aminocaproic_acid : chr "0" "0" "0" "0" ...
#> .. $ low_glucose : chr "0" "0" "0" "0" ...
#> .. $ high_lactate : chr "0" "0" "0" "0" ...
#> .. $ high_pyruvate : chr "0" "0" "0" "0" ...
#> .. $ low_glutamine__high_glutamate : chr "0" "0" "0" "0" ...
#> .. $ gluconolactone : chr "0" "0" "0" "0" ...
#> .. $ low_protein : chr "1" "1" "0" "0" ...
#> .. $ unexpected_amino_acid_signals : chr "0" "0" "0" "0" ...
#> .. $ unidentified_macromolecules : chr "0" "0" "0" "0" ...
#> .. $ unidentified_small_molecule (a): chr "0" "0" "0" "0" ...
#> .. $ unidentified_small_molecule (b): chr "0" "0" "0" "0" ...
#> .. $ unidentified_small_molecule (c): chr "0" "0" "0" "0" ...
#> .. $ below_limit_of_quantification : chr "1" "1" "1" "1" ...
#> .. $ reason_excluded : chr NA NA NA NA ...
#> .. $ excluded : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
#> @ features :'data.frame': 12 obs. of 8 variables:
#> .. $ feature_id : chr "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#> .. $ csv_column_name: chr "Total_C" "non_HDL_C" "Remnant_C" "VLDL_C" ...
#> .. $ biomarker_name : chr "Total cholesterol" "Total cholesterol minus HDL-C" "Remnant cholesterol (non-HDL, non-LDL -cholesterol)" "VLDL cholesterol" ...
#> .. $ unit : chr "mmol/l" "mmol/l" "mmol/l" "mmol/l" ...
#> .. $ group : chr "Cholesterol" "Cholesterol" "Cholesterol" "Cholesterol" ...
#> .. $ subgroup : chr NA NA NA NA ...
#> .. $ reason_excluded: chr NA NA NA NA ...
#> .. $ excluded : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
#> @ exclusions :List of 2
#> .. $ samples :List of 5
#> .. ..$ user_excluded : chr(0)
#> .. ..$ extreme_sample_missingness : chr [1:2] "ind15" "ind35"
#> .. ..$ user_defined_sample_missingness : chr(0)
#> .. ..$ user_defined_sample_totalpeakarea: chr(0)
#> .. ..$ user_defined_sample_pca_outlier : chr(0)
#> .. $ features:List of 3
#> .. ..$ user_excluded : chr(0)
#> .. ..$ extreme_feature_missingness : chr(0)
#> .. ..$ user_defined_feature_missingness: chr(0)
#> @ feature_summary: num [1:20, 1:12, 1:2] 0.12 0 44 3.8 1.06 ...
#> .. - attr(*, "dimnames")=List of 3
#> .. ..$ : chr [1:20] "missingness" "outlier_count" "n" "mean" ...
#> .. ..$ : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#> .. ..$ : chr [1:2] "input" "qc"
#> .. - attr(*, "qc_tree")=List of 7
#> .. ..$ merge : int [1:11, 1:2] -10 -7 -1 -2 -4 -6 3 -5 6 2 ...
#> .. ..$ height : num [1:11] 0.602 0.624 0.634 0.69 0.714 ...
#> .. ..$ order : int [1:12] 5 4 10 12 7 8 6 11 1 3 ...
#> .. ..$ labels : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#> .. ..$ method : chr "complete"
#> .. ..$ call : language stats::hclust(d = dist_matrix, method = "complete")
#> .. ..$ dist.method: NULL
#> .. ..- attr(*, "class")= chr "hclust"
#> .. - attr(*, "qc_outlier_udist")= num 5
#> .. - attr(*, "qc_tree_cut_height")= num 0.5
#> @ sample_summary : num [1:50, 1:20, 1:2] 0.0833 0.25 0.1667 0 0.0833 ...
#> .. - attr(*, "dimnames")=List of 3
#> .. ..$ : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#> .. ..$ : chr [1:20] "missingness" "tpa_total" "tpa_complete_features" "outlier_count" ...
#> .. ..$ : chr [1:2] "input" "qc"
#> .. - attr(*, "qc_varexp")= Named num [1:12] 0.1904 0.1516 0.1185 0.1105 0.0973 ...
#> .. ..- attr(*, "names")= chr [1:12] "PC1" "PC2" "PC3" "PC4" ...
#> .. - attr(*, "qc_num_pcs_scree")= num 2
#> .. - attr(*, "qc_num_pcs_parallel")= int 6
#> .. - attr(*, "qc_outlier_udist")= num 5