Skip to contents

Nightingale data is…

Import Nightingale data

library(metaboprep)

# example file
filepath <- system.file("extdata", "nightingale_v1_example.xlsx", package = "metaboprep")

# import
dat <- read_nightingale(filepath)

# view structure
str(dat)
#> List of 3
#>  $ data    : num [1:50, 1:12] 3.85 NA 5.2 3.01 2.68 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#>   .. ..$ : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>  $ samples :'data.frame':    50 obs. of  25 variables:
#>   ..$ sample_id                      : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#>   ..$ informed_sample_type           : chr [1:50] "Serum" "Serum" "Serum" "Serum" ...
#>   ..$ sample_excluded                : chr [1:50] NA NA NA NA ...
#>   ..$ sample_notes                   : chr [1:50] NA NA NA NA ...
#>   ..$ edta_plasma                    : chr [1:50] "0" "1" "0" "0" ...
#>   ..$ citrate_plasma                 : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ low_ethanol                    : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ medium_ethanol                 : chr [1:50] "0" "1" "0" "0" ...
#>   ..$ high_ethanol                   : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ isopropyl_alcohol              : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ 1methyl2pyrrolidone            : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ polysaccharides                : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ aminocaproic_acid              : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ low_glucose                    : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ high_lactate                   : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ high_pyruvate                  : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ low_glutamine__high_glutamate  : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ gluconolactone                 : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ low_protein                    : chr [1:50] "1" "1" "0" "0" ...
#>   ..$ unexpected_amino_acid_signals  : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ unidentified_macromolecules    : chr [1:50] "0" "0" "0" "0" ...
#>   ..$ unidentified_small_molecule (a): chr [1:50] "0" "0" "0" "0" ...
#>   ..$ unidentified_small_molecule (b): chr [1:50] "0" "0" "0" "0" ...
#>   ..$ unidentified_small_molecule (c): chr [1:50] "0" "0" "0" "0" ...
#>   ..$ below_limit_of_quantification  : chr [1:50] "1" "1" "1" "1" ...
#>  $ features:'data.frame':    12 obs. of  6 variables:
#>   ..$ feature_id     : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>   ..$ csv_column_name: chr [1:12] "Total_C" "non_HDL_C" "Remnant_C" "VLDL_C" ...
#>   ..$ biomarker_name : chr [1:12] "Total cholesterol" "Total cholesterol minus HDL-C" "Remnant cholesterol (non-HDL, non-LDL -cholesterol)" "VLDL cholesterol" ...
#>   ..$ unit           : chr [1:12] "mmol/l" "mmol/l" "mmol/l" "mmol/l" ...
#>   ..$ group          : chr [1:12] "Cholesterol" "Cholesterol" "Cholesterol" "Cholesterol" ...
#>   ..$ subgroup       : chr [1:12] NA NA NA NA ...

Create Metaboprep object

Once imported, we pass the data to the Metaboprep class object.

m <- Metaboprep(data     = dat$data, 
                features = dat$features, 
                samples  = dat$samples)

# view 
m
#> <metaboprep::Metaboprep>
#>  @ data           : num [1:50, 1:12, 1] 3.85 NA 5.2 3.01 2.68 ...
#>  .. - attr(*, "dimnames")=List of 3
#>  ..  ..$ : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#>  ..  ..$ : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>  ..  ..$ : chr "input"
#>  @ samples        :'data.frame': 50 obs. of  25 variables:
#>  .. $ sample_id                      : chr  "ind1" "ind2" "ind3" "ind4" ...
#>  .. $ informed_sample_type           : chr  "Serum" "Serum" "Serum" "Serum" ...
#>  .. $ sample_excluded                : chr  NA NA NA NA ...
#>  .. $ sample_notes                   : chr  NA NA NA NA ...
#>  .. $ edta_plasma                    : chr  "0" "1" "0" "0" ...
#>  .. $ citrate_plasma                 : chr  "0" "0" "0" "0" ...
#>  .. $ low_ethanol                    : chr  "0" "0" "0" "0" ...
#>  .. $ medium_ethanol                 : chr  "0" "1" "0" "0" ...
#>  .. $ high_ethanol                   : chr  "0" "0" "0" "0" ...
#>  .. $ isopropyl_alcohol              : chr  "0" "0" "0" "0" ...
#>  .. $ 1methyl2pyrrolidone            : chr  "0" "0" "0" "0" ...
#>  .. $ polysaccharides                : chr  "0" "0" "0" "0" ...
#>  .. $ aminocaproic_acid              : chr  "0" "0" "0" "0" ...
#>  .. $ low_glucose                    : chr  "0" "0" "0" "0" ...
#>  .. $ high_lactate                   : chr  "0" "0" "0" "0" ...
#>  .. $ high_pyruvate                  : chr  "0" "0" "0" "0" ...
#>  .. $ low_glutamine__high_glutamate  : chr  "0" "0" "0" "0" ...
#>  .. $ gluconolactone                 : chr  "0" "0" "0" "0" ...
#>  .. $ low_protein                    : chr  "1" "1" "0" "0" ...
#>  .. $ unexpected_amino_acid_signals  : chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_macromolecules    : chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_small_molecule (a): chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_small_molecule (b): chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_small_molecule (c): chr  "0" "0" "0" "0" ...
#>  .. $ below_limit_of_quantification  : chr  "1" "1" "1" "1" ...
#>  @ features       :'data.frame': 12 obs. of  6 variables:
#>  .. $ feature_id     : chr  "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>  .. $ csv_column_name: chr  "Total_C" "non_HDL_C" "Remnant_C" "VLDL_C" ...
#>  .. $ biomarker_name : chr  "Total cholesterol" "Total cholesterol minus HDL-C" "Remnant cholesterol (non-HDL, non-LDL -cholesterol)" "VLDL cholesterol" ...
#>  .. $ unit           : chr  "mmol/l" "mmol/l" "mmol/l" "mmol/l" ...
#>  .. $ group          : chr  "Cholesterol" "Cholesterol" "Cholesterol" "Cholesterol" ...
#>  .. $ subgroup       : chr  NA NA NA NA ...
#>  @ exclusions     :List of 2
#>  .. $ samples :List of 5
#>  ..  ..$ user_excluded                    : chr(0) 
#>  ..  ..$ extreme_sample_missingness       : chr(0) 
#>  ..  ..$ user_defined_sample_missingness  : chr(0) 
#>  ..  ..$ user_defined_sample_totalpeakarea: chr(0) 
#>  ..  ..$ user_defined_sample_pca_outlier  : chr(0) 
#>  .. $ features:List of 3
#>  ..  ..$ user_excluded                   : chr(0) 
#>  ..  ..$ extreme_feature_missingness     : chr(0) 
#>  ..  ..$ user_defined_feature_missingness: chr(0) 
#>  @ feature_summary: num[0 , 0 , 0 ] 
#>  @ sample_summary : num[0 , 0 , 0 ]

QC Nightingale

m <- m |>
  quality_control(source_layer        = "input", 
                  sample_missingness  = 0.5, 
                  feature_missingness = 0.3, 
                  total_peak_area_sd  = 5, 
                  outlier_udist       = 5, 
                  outlier_treatment   = "leave_be", 
                  winsorize_quantile  = 1.0, 
                  tree_cut_height     = 0.5, 
                  pc_outlier_sd       = 5)
#> 
#> ── Starting Metabolite QC Process ──────────────────────────────────────────────
#>  Validating input parameters
#>  Validating input parameters [7ms]
#> 
#>  Sample & Feature Summary Statistics for raw data
#>  Sample & Feature Summary Statistics for raw data [138ms]
#> 
#>  Copying input data to new 'qc' data layer
#>  Copying input data to new 'qc' data layer [22ms]
#> 
#>  Assessing for extreme sample missingness >=80% - excluding 0 sample(s)
#>  Assessing for extreme sample missingness >=80% - excluding 2 sample(s) [16ms]
#> 
#>  Assessing for extreme feature missingness >=80% - excluding 0 feature(s)
#>  Assessing for extreme feature missingness >=80% - excluding 0 feature(s) [16m
#> 
#>  Assessing for sample missingness at specified level of >=50% - excluding 0 sa…
#>  Assessing for sample missingness at specified level of >=50% - excluding 0 sa…
#> 
#>  Assessing for feature missingness at specified level of >=30% - excluding 0 f…
#>  Assessing for feature missingness at specified level of >=30% - excluding 0 f…
#> 
#>  Calculating total peak abundance outliers at +/- 5 Sdev - excluding 0 sample(…
#>  Calculating total peak abundance outliers at +/- 5 Sdev - excluding 0 sample(…
#> 
#>  Running sample data PCA outlier analysis at +/- 5 Sdev
#>  Running sample data PCA outlier analysis at +/- 5 Sdev [16ms]
#> 
#>  Sample PCA outlier analysis - re-identify feature independence and PC outlier…
#> [1] 2
#> [1] 48 10
#>  Sample PCA outlier analysis - re-identify feature independence and PC outlier…
#> 
#>  Creating final QC dataset...
#>  Creating final QC dataset... [101ms]
#> 
#>  Metabolite QC Process Completed
#>  Metabolite QC Process Completed [13ms]
#> 

# view 
m
#> <metaboprep::Metaboprep>
#>  @ data           : num [1:50, 1:12, 1:2] 3.85 NA 5.2 3.01 2.68 ...
#>  .. - attr(*, "dimnames")=List of 3
#>  ..  ..$ : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#>  ..  ..$ : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>  ..  ..$ : chr [1:2] "input" "qc"
#>  .. - attr(*, "qc_sample_missingness")= num 0.5
#>  .. - attr(*, "qc_feature_missingness")= num 0.3
#>  .. - attr(*, "qc_total_peak_area_sd")= num 5
#>  .. - attr(*, "qc_outlier_udist")= num 5
#>  .. - attr(*, "qc_outlier_treatment")= chr "leave_be"
#>  .. - attr(*, "qc_winsorize_quantile")= num 1
#>  .. - attr(*, "qc_tree_cut_height")= num 0.5
#>  .. - attr(*, "qc_pc_outlier_sd")= num 5
#>  .. - attr(*, "qc_features_exclude_but_keep")= chr(0) 
#>  @ samples        :'data.frame': 50 obs. of  27 variables:
#>  .. $ sample_id                      : chr  "ind1" "ind2" "ind3" "ind4" ...
#>  .. $ informed_sample_type           : chr  "Serum" "Serum" "Serum" "Serum" ...
#>  .. $ sample_excluded                : chr  NA NA NA NA ...
#>  .. $ sample_notes                   : chr  NA NA NA NA ...
#>  .. $ edta_plasma                    : chr  "0" "1" "0" "0" ...
#>  .. $ citrate_plasma                 : chr  "0" "0" "0" "0" ...
#>  .. $ low_ethanol                    : chr  "0" "0" "0" "0" ...
#>  .. $ medium_ethanol                 : chr  "0" "1" "0" "0" ...
#>  .. $ high_ethanol                   : chr  "0" "0" "0" "0" ...
#>  .. $ isopropyl_alcohol              : chr  "0" "0" "0" "0" ...
#>  .. $ 1methyl2pyrrolidone            : chr  "0" "0" "0" "0" ...
#>  .. $ polysaccharides                : chr  "0" "0" "0" "0" ...
#>  .. $ aminocaproic_acid              : chr  "0" "0" "0" "0" ...
#>  .. $ low_glucose                    : chr  "0" "0" "0" "0" ...
#>  .. $ high_lactate                   : chr  "0" "0" "0" "0" ...
#>  .. $ high_pyruvate                  : chr  "0" "0" "0" "0" ...
#>  .. $ low_glutamine__high_glutamate  : chr  "0" "0" "0" "0" ...
#>  .. $ gluconolactone                 : chr  "0" "0" "0" "0" ...
#>  .. $ low_protein                    : chr  "1" "1" "0" "0" ...
#>  .. $ unexpected_amino_acid_signals  : chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_macromolecules    : chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_small_molecule (a): chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_small_molecule (b): chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_small_molecule (c): chr  "0" "0" "0" "0" ...
#>  .. $ below_limit_of_quantification  : chr  "1" "1" "1" "1" ...
#>  .. $ reason_excluded                : chr  NA NA NA NA ...
#>  .. $ excluded                       : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
#>  @ features       :'data.frame': 12 obs. of  8 variables:
#>  .. $ feature_id     : chr  "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>  .. $ csv_column_name: chr  "Total_C" "non_HDL_C" "Remnant_C" "VLDL_C" ...
#>  .. $ biomarker_name : chr  "Total cholesterol" "Total cholesterol minus HDL-C" "Remnant cholesterol (non-HDL, non-LDL -cholesterol)" "VLDL cholesterol" ...
#>  .. $ unit           : chr  "mmol/l" "mmol/l" "mmol/l" "mmol/l" ...
#>  .. $ group          : chr  "Cholesterol" "Cholesterol" "Cholesterol" "Cholesterol" ...
#>  .. $ subgroup       : chr  NA NA NA NA ...
#>  .. $ reason_excluded: chr  NA NA NA NA ...
#>  .. $ excluded       : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
#>  @ exclusions     :List of 2
#>  .. $ samples :List of 5
#>  ..  ..$ user_excluded                    : chr(0) 
#>  ..  ..$ extreme_sample_missingness       : chr [1:2] "ind15" "ind35"
#>  ..  ..$ user_defined_sample_missingness  : chr(0) 
#>  ..  ..$ user_defined_sample_totalpeakarea: chr(0) 
#>  ..  ..$ user_defined_sample_pca_outlier  : chr(0) 
#>  .. $ features:List of 3
#>  ..  ..$ user_excluded                   : chr(0) 
#>  ..  ..$ extreme_feature_missingness     : chr(0) 
#>  ..  ..$ user_defined_feature_missingness: chr(0) 
#>  @ feature_summary: num [1:20, 1:12, 1:2] 0.12 0 44 3.8 1.06 ...
#>  .. - attr(*, "dimnames")=List of 3
#>  ..  ..$ : chr [1:20] "missingness" "outlier_count" "n" "mean" ...
#>  ..  ..$ : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>  ..  ..$ : chr [1:2] "input" "qc"
#>  .. - attr(*, "qc_tree")=List of 7
#>  ..  ..$ merge      : int [1:11, 1:2] -10 -7 -1 -2 -4 -6 3 -5 6 2 ...
#>  ..  ..$ height     : num [1:11] 0.602 0.624 0.634 0.69 0.714 ...
#>  ..  ..$ order      : int [1:12] 5 4 10 12 7 8 6 11 1 3 ...
#>  ..  ..$ labels     : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>  ..  ..$ method     : chr "complete"
#>  ..  ..$ call       : language stats::hclust(d = dist_matrix, method = "complete")
#>  ..  ..$ dist.method: NULL
#>  ..  ..- attr(*, "class")= chr "hclust"
#>  .. - attr(*, "qc_outlier_udist")= num 5
#>  .. - attr(*, "qc_tree_cut_height")= num 0.5
#>  @ sample_summary : num [1:50, 1:20, 1:2] 0.0833 0.25 0.1667 0 0.0833 ...
#>  .. - attr(*, "dimnames")=List of 3
#>  ..  ..$ : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#>  ..  ..$ : chr [1:20] "missingness" "tpa_total" "tpa_complete_features" "outlier_count" ...
#>  ..  ..$ : chr [1:2] "input" "qc"
#>  .. - attr(*, "qc_varexp")= Named num [1:12] 0.1904 0.1516 0.1185 0.1105 0.0973 ...
#>  ..  ..- attr(*, "names")= chr [1:12] "PC1" "PC2" "PC3" "PC4" ...
#>  .. - attr(*, "qc_num_pcs_scree")= num 2
#>  .. - attr(*, "qc_num_pcs_parallel")= int 6
#>  .. - attr(*, "qc_outlier_udist")= num 5