Module pygwasvcf.variant_record_gwas_funs
Class to provide helper functions on the pysam.VariantRecord object
Expand source code
"""
Class to provide helper functions on the pysam.VariantRecord object
"""
# subclassing pysam.VariantRecord does not seem to work (I think this must be done in Cython)
class VariantRecordGwasFuns:
"""
Transforms -log10 P back to P value
:param p: -log10 P value
:return P value in the range 0,1
"""
@staticmethod
def transform_logpval(p):
return 10 ** -p
"""
Getter for the variant-trait association P value
:param variant_record: pysam.VariantRecord object for the VCF row
:param trait: Name of the trait
:return P value in the range 0,1
"""
@staticmethod
def get_pval(variant_record, trait):
p = variant_record.samples[trait]['LP'][0]
if p == 0:
return 1
elif p == 999:
return 0
else:
return VariantRecordGwasFuns.transform_logpval(p)
"""
Getter for the variant-trait association standard error value
:param variant_record: pysam.VariantRecord object for the VCF row
:param trait: Name of the trait
:return Association standard error
"""
@staticmethod
def get_se(variant_record, trait):
return variant_record.samples[trait]['SE'][0]
"""
Getter for the variant-trait beta value
:param variant_record: pysam.VariantRecord object for the VCF row
:param trait: Name of the trait
:return effect size coefficient
"""
@staticmethod
def get_beta(variant_record, trait):
return variant_record.samples[trait]['ES'][0]
"""
Getter for the variant-trait allele-frequency in the study
:param variant_record: pysam.VariantRecord object for the VCF row
:param trait: Name of the trait
:return alternative (effect) allele frequency
"""
@staticmethod
def get_af(variant_record, trait):
return variant_record.samples[trait]['AF'][0]
"""
Getter for the variant-trait variant ID
:param variant_record: pysam.VariantRecord object for the VCF row
:param trait: Name of the trait
:param create_if_missing: Create ID in the format chrom-pos-ref-alt if no ID is available
:return Variant/marker identifier
"""
@staticmethod
def get_id(variant_record, trait, create_if_missing=False):
if "ID" in variant_record.samples[trait] and variant_record.samples[trait]['ID'] is not None:
return variant_record.samples[trait]['ID']
elif create_if_missing:
return variant_record.chrom + "-" + str(variant_record.pos) + "-" + variant_record.ref + "-" + \
variant_record.alts[0]
else:
raise KeyError("No ID available for this record")
"""
Getter for the variant-trait sample size used to estimate the effect
:param variant_record: pysam.VariantRecord object for the VCF row
:param trait: Name of the trait
:param metadata: If the per-variant sample size if missing then taken from global metadata (optional)
:return Total sample size used to estimate the association effect size
"""
@staticmethod
def get_ss(variant_record, trait, metadata=None):
if 'SS' in variant_record.samples[trait]:
return variant_record.samples[trait]['SS'][0]
elif metadata is not None and 'TotalControls' in metadata[trait]:
if 'TotalCases' in metadata[trait]:
return int(metadata[trait]['TotalControls']) + int(metadata[trait]['TotalCases'])
else:
return int(metadata[trait]['TotalControls'])
else:
raise KeyError("No sample size available")
"""
Getter for the variant-trait number of cases size used to estimate the effect
:param variant_record: pysam.VariantRecord object for the VCF row
:param trait: Name of the trait
:param metadata: If the per-variant sample size if missing then taken from global metadata (optional)
:return Number of cases used to estimate the association effect size
"""
@staticmethod
def get_nc(variant_record, trait, metadata=None):
if 'NC' in variant_record.samples[trait]:
return variant_record.samples[trait]['NC'][0]
elif metadata is not None and 'TotalCases' in metadata[trait]:
return int(metadata[trait]['TotalCases'])
else:
raise KeyError("No sample size available")
"""
Check the VCF record only contains a bi-allelic change; multi-allelic changes should be on separate rows
:param variant_record: pysam.VariantRecord object for the VCF row
"""
@staticmethod
def check_biallelic(variant_record):
assert len(variant_record.alts) == 1
Classes
class VariantRecordGwasFuns
-
Transforms -log10 P back to P value :param p: -log10 P value :return P value in the range 0,1
Expand source code
class VariantRecordGwasFuns: """ Transforms -log10 P back to P value :param p: -log10 P value :return P value in the range 0,1 """ @staticmethod def transform_logpval(p): return 10 ** -p """ Getter for the variant-trait association P value :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :return P value in the range 0,1 """ @staticmethod def get_pval(variant_record, trait): p = variant_record.samples[trait]['LP'][0] if p == 0: return 1 elif p == 999: return 0 else: return VariantRecordGwasFuns.transform_logpval(p) """ Getter for the variant-trait association standard error value :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :return Association standard error """ @staticmethod def get_se(variant_record, trait): return variant_record.samples[trait]['SE'][0] """ Getter for the variant-trait beta value :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :return effect size coefficient """ @staticmethod def get_beta(variant_record, trait): return variant_record.samples[trait]['ES'][0] """ Getter for the variant-trait allele-frequency in the study :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :return alternative (effect) allele frequency """ @staticmethod def get_af(variant_record, trait): return variant_record.samples[trait]['AF'][0] """ Getter for the variant-trait variant ID :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :param create_if_missing: Create ID in the format chrom-pos-ref-alt if no ID is available :return Variant/marker identifier """ @staticmethod def get_id(variant_record, trait, create_if_missing=False): if "ID" in variant_record.samples[trait] and variant_record.samples[trait]['ID'] is not None: return variant_record.samples[trait]['ID'] elif create_if_missing: return variant_record.chrom + "-" + str(variant_record.pos) + "-" + variant_record.ref + "-" + \ variant_record.alts[0] else: raise KeyError("No ID available for this record") """ Getter for the variant-trait sample size used to estimate the effect :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :param metadata: If the per-variant sample size if missing then taken from global metadata (optional) :return Total sample size used to estimate the association effect size """ @staticmethod def get_ss(variant_record, trait, metadata=None): if 'SS' in variant_record.samples[trait]: return variant_record.samples[trait]['SS'][0] elif metadata is not None and 'TotalControls' in metadata[trait]: if 'TotalCases' in metadata[trait]: return int(metadata[trait]['TotalControls']) + int(metadata[trait]['TotalCases']) else: return int(metadata[trait]['TotalControls']) else: raise KeyError("No sample size available") """ Getter for the variant-trait number of cases size used to estimate the effect :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :param metadata: If the per-variant sample size if missing then taken from global metadata (optional) :return Number of cases used to estimate the association effect size """ @staticmethod def get_nc(variant_record, trait, metadata=None): if 'NC' in variant_record.samples[trait]: return variant_record.samples[trait]['NC'][0] elif metadata is not None and 'TotalCases' in metadata[trait]: return int(metadata[trait]['TotalCases']) else: raise KeyError("No sample size available") """ Check the VCF record only contains a bi-allelic change; multi-allelic changes should be on separate rows :param variant_record: pysam.VariantRecord object for the VCF row """ @staticmethod def check_biallelic(variant_record): assert len(variant_record.alts) == 1
Static methods
def check_biallelic(variant_record)
-
Expand source code
@staticmethod def check_biallelic(variant_record): assert len(variant_record.alts) == 1
def get_af(variant_record, trait)
-
Expand source code
@staticmethod def get_af(variant_record, trait): return variant_record.samples[trait]['AF'][0]
def get_beta(variant_record, trait)
-
Expand source code
@staticmethod def get_beta(variant_record, trait): return variant_record.samples[trait]['ES'][0]
def get_id(variant_record, trait, create_if_missing=False)
-
Expand source code
@staticmethod def get_id(variant_record, trait, create_if_missing=False): if "ID" in variant_record.samples[trait] and variant_record.samples[trait]['ID'] is not None: return variant_record.samples[trait]['ID'] elif create_if_missing: return variant_record.chrom + "-" + str(variant_record.pos) + "-" + variant_record.ref + "-" + \ variant_record.alts[0] else: raise KeyError("No ID available for this record")
def get_nc(variant_record, trait, metadata=None)
-
Expand source code
@staticmethod def get_nc(variant_record, trait, metadata=None): if 'NC' in variant_record.samples[trait]: return variant_record.samples[trait]['NC'][0] elif metadata is not None and 'TotalCases' in metadata[trait]: return int(metadata[trait]['TotalCases']) else: raise KeyError("No sample size available")
def get_pval(variant_record, trait)
-
Expand source code
@staticmethod def get_pval(variant_record, trait): p = variant_record.samples[trait]['LP'][0] if p == 0: return 1 elif p == 999: return 0 else: return VariantRecordGwasFuns.transform_logpval(p)
def get_se(variant_record, trait)
-
Expand source code
@staticmethod def get_se(variant_record, trait): return variant_record.samples[trait]['SE'][0]
def get_ss(variant_record, trait, metadata=None)
-
Expand source code
@staticmethod def get_ss(variant_record, trait, metadata=None): if 'SS' in variant_record.samples[trait]: return variant_record.samples[trait]['SS'][0] elif metadata is not None and 'TotalControls' in metadata[trait]: if 'TotalCases' in metadata[trait]: return int(metadata[trait]['TotalControls']) + int(metadata[trait]['TotalCases']) else: return int(metadata[trait]['TotalControls']) else: raise KeyError("No sample size available")
def transform_logpval(p)
-
Expand source code
@staticmethod def transform_logpval(p): return 10 ** -p