Skip to contents

Import Nightingale data directly into a Metaboprep object

Read in the Nightingale Health data using the read_nightingale function. Here we will read in the example data provided with the package, and convert it directly into a Metaboprep S7 object.

library(metaboprep)

# example file
filepath <- system.file("extdata", "nightingale_v1_example.xlsx", package = "metaboprep")

# import
mydata <- read_nightingale(filepath, return_Metaboprep = TRUE)

Quick look at data structure of the imported data

str(mydata)
#> <metaboprep::Metaboprep>
#>  @ data           : num [1:50, 1:12, 1] 3.85 NA 5.2 3.01 2.68 ...
#>  .. - attr(*, "dimnames")=List of 3
#>  ..  ..$ : chr [1:50] "ind1" "ind2" "ind3" "ind4" ...
#>  ..  ..$ : chr [1:12] "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>  ..  ..$ : chr "input"
#>  @ samples        :'data.frame': 50 obs. of  37 variables:
#>  .. $ sample_id                      : chr  "ind1" "ind2" "ind3" "ind4" ...
#>  .. $ informed_sample_type           : chr  "Serum" "Serum" "Serum" "Serum" ...
#>  .. $ sample_excluded                : chr  NA NA NA NA ...
#>  .. $ sample_notes                   : chr  NA NA NA NA ...
#>  .. $ edta_plasma                    : chr  "0" "1" "0" "0" ...
#>  .. $ citrate_plasma                 : chr  "0" "0" "0" "0" ...
#>  .. $ low_ethanol                    : chr  "0" "0" "0" "0" ...
#>  .. $ medium_ethanol                 : chr  "0" "1" "0" "0" ...
#>  .. $ high_ethanol                   : chr  "0" "0" "0" "0" ...
#>  .. $ isopropyl_alcohol              : chr  "0" "0" "0" "0" ...
#>  .. $ 1methyl2pyrrolidone            : chr  "0" "0" "0" "0" ...
#>  .. $ polysaccharides                : chr  "0" "0" "0" "0" ...
#>  .. $ aminocaproic_acid              : chr  "0" "0" "0" "0" ...
#>  .. $ low_glucose                    : chr  "0" "0" "0" "0" ...
#>  .. $ high_lactate                   : chr  "0" "0" "0" "0" ...
#>  .. $ high_pyruvate                  : chr  "0" "0" "0" "0" ...
#>  .. $ low_glutamine__high_glutamate  : chr  "0" "0" "0" "0" ...
#>  .. $ gluconolactone                 : chr  "0" "0" "0" "0" ...
#>  .. $ low_protein                    : chr  "1" "1" "0" "0" ...
#>  .. $ unexpected_amino_acid_signals  : chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_macromolecules    : chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_small_molecule (a): chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_small_molecule (b): chr  "0" "0" "0" "0" ...
#>  .. $ unidentified_small_molecule (c): chr  "0" "0" "0" "0" ...
#>  .. $ below_limit_of_quantification  : chr  "1" "1" "1" "1" ...
#>  .. $ totalc                         : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA 1 NA NA NA NA NA 1 NA NA ...
#>  .. $ nonhdlc                        : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA 1 NA NA 1 NA NA NA 1 NA ...
#>  .. $ remnantc                       : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA NA NA NA NA NA NA NA NA NA ...
#>  .. $ vldlc                          : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA NA 1 NA NA NA NA NA NA NA ...
#>  .. $ clinical_ldlc                  : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA NA NA NA NA NA NA NA NA NA ...
#>  .. $ ldlc                           : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA NA NA NA NA 1 NA NA NA NA ...
#>  .. $ hdlc                           : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA NA NA NA NA NA NA NA NA NA ...
#>  .. $ totaltg                        : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA NA NA NA NA NA NA NA NA NA ...
#>  .. $ vldltg                         : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA 1 NA NA NA NA NA NA NA NA ...
#>  .. $ ldltg                          : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA NA 1 NA NA NA NA NA NA NA ...
#>  .. $ hdltg                          : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": 1 NA NA NA NA NA NA NA NA NA ...
#>  .. $ omega3_pct                     : Factor w/ 1 level "Value was rejected by automatic sample and measurement quality control": NA NA NA NA NA NA NA NA NA NA ...
#>  @ features       :'data.frame': 12 obs. of  6 variables:
#>  .. $ feature_id     : chr  "Total-C" "non-HDL-C" "Remnant-C" "VLDL-C" ...
#>  .. $ csv_column_name: chr  "Total_C" "non_HDL_C" "Remnant_C" "VLDL_C" ...
#>  .. $ biomarker_name : chr  "Total cholesterol" "Total cholesterol minus HDL-C" "Remnant cholesterol (non-HDL, non-LDL -cholesterol)" "VLDL cholesterol" ...
#>  .. $ unit           : chr  "mmol/l" "mmol/l" "mmol/l" "mmol/l" ...
#>  .. $ group          : chr  "Cholesterol" "Cholesterol" "Cholesterol" "Cholesterol" ...
#>  .. $ subgroup       : chr  NA NA NA NA ...
#>  @ exclusions     :List of 2
#>  .. $ samples :List of 5
#>  ..  ..$ user_excluded                    : chr(0) 
#>  ..  ..$ extreme_sample_missingness       : chr(0) 
#>  ..  ..$ user_defined_sample_missingness  : chr(0) 
#>  ..  ..$ user_defined_sample_totalpeakarea: chr(0) 
#>  ..  ..$ user_defined_sample_pca_outlier  : chr(0) 
#>  .. $ features:List of 3
#>  ..  ..$ user_excluded                   : chr(0) 
#>  ..  ..$ extreme_feature_missingness     : chr(0) 
#>  ..  ..$ user_defined_feature_missingness: chr(0) 
#>  @ feature_summary: num[0 , 0 , 0 ] 
#>  @ sample_summary : num[0 , 0 , 0 ]

QC Nightingale

Perform the QC steps using the quality_control function.

mydata <- mydata |>
  quality_control(source_layer        = "input", 
                  sample_missingness  = 0.2, 
                  feature_missingness = 0.2, 
                  total_peak_area_sd  = 5, 
                  outlier_udist       = 5, 
                  outlier_treatment   = "leave_be", 
                  winsorize_quantile  = 1.0, 
                  tree_cut_height     = 0.5, 
                  pc_outlier_sd       = 5, 
                  feature_selection   = "max_var_exp",
                  features_exclude_but_keep = NULL
                  )
#> 
#> ── Starting Metabolite QC Process ──────────────────────────────────────────────
#>  Validating input parameters
#>  Validating input parameters [7ms]
#> 
#>  Sample & Feature Summary Statistics for raw data
#>  Sample & Feature Summary Statistics for raw data [137ms]
#> 
#>  Copying input data to new 'qc' data layer
#>  Copying input data to new 'qc' data layer [22ms]
#> 
#>  Assessing for extreme sample missingness >=80% - excluding 0 sample(s)
#>  Assessing for extreme sample missingness >=80% - excluding 2 sample(s) [16ms]
#> 
#>  Assessing for extreme feature missingness >=80% - excluding 0 feature(s)
#>  Assessing for extreme feature missingness >=80% - excluding 0 feature(s) [20m
#> 
#>  Assessing for sample missingness at specified level of >=20% - excluding 0 sa…
#>  Assessing for sample missingness at specified level of >=20% - excluding 1 sa…
#> 
#>  Assessing for feature missingness at specified level of >=20% - excluding 0 f…
#>  Assessing for feature missingness at specified level of >=20% - excluding 0 f…
#> 
#>  Calculating total peak abundance outliers at +/- 5 Sdev - excluding 0 sample(…
#>  Calculating total peak abundance outliers at +/- 5 Sdev - excluding 0 sample(…
#> 
#>  Running sample data PCA outlier analysis at +/- 5 Sdev
#>  Running sample data PCA outlier analysis at +/- 5 Sdev [20ms]
#> 
#>  Sample PCA outlier analysis - re-identify feature independence and PC outlier…
#> ! The stated max PCs [max_num_pcs=10] to use in PCA outlier assessment is greater than the number of available informative PCs [2]
#>  Sample PCA outlier analysis - re-identify feature independence and PC outlier… Sample PCA outlier analysis - re-identify feature independence and PC outlier…
#> 
#>  Creating final QC dataset...
#>  Creating final QC dataset... [104ms]
#> 
#>  Metabolite QC Process Completed
#>  Metabolite QC Process Completed [13ms]

Quick summary of the metaboprep object following QC

summary(mydata)
#> Metaboprep Object Summary
#> --------------------------
#> Samples      : 50
#> Features     : 12
#> Data Layers  : 2
#> Layer Names  : input, qc
#> 
#> Sample Summary Layers : input, qc
#> Feature Summary Layers: input, qc
#> 
#> Sample Annotation (metadata):
#>   Columns: 39
#>   Names  : sample_id, informed_sample_type, sample_excluded, sample_notes, edta_plasma, citrate_plasma, low_ethanol, medium_ethanol, high_ethanol, isopropyl_alcohol, 1methyl2pyrrolidone, polysaccharides, aminocaproic_acid, low_glucose, high_lactate, high_pyruvate, low_glutamine__high_glutamate, gluconolactone, low_protein, unexpected_amino_acid_signals, unidentified_macromolecules, unidentified_small_molecule (a), unidentified_small_molecule (b), unidentified_small_molecule (c), below_limit_of_quantification, totalc, nonhdlc, remnantc, vldlc, clinical_ldlc, ldlc, hdlc, totaltg, vldltg, ldltg, hdltg, omega3_pct, reason_excluded, excluded
#> 
#> Feature Annotation (metadata):
#>   Columns: 8
#>   Names  : feature_id, csv_column_name, biomarker_name, unit, group, subgroup, reason_excluded, excluded
#> 
#> Exclusion Codes Summary:
#> 
#>   Sample Exclusions:
#> Exclusion | Count
#> -----------------
#> user_excluded                     | 0
#> extreme_sample_missingness        | 2
#> user_defined_sample_missingness   | 1
#> user_defined_sample_totalpeakarea | 0
#> user_defined_sample_pca_outlier   | 0
#> 
#>   Feature Exclusions:
#> Exclusion | Count
#> -----------------
#> user_excluded                    | 0
#> extreme_feature_missingness      | 0
#> user_defined_feature_missingness | 0