Module pygwasvcf.variant_record_gwas_funs
Class to provide helper functions on the pysam.VariantRecord object
Expand source code
"""
Class to provide helper functions on the pysam.VariantRecord object
"""
# subclassing pysam.VariantRecord does not seem to work (I think this must be done in Cython)
class VariantRecordGwasFuns:
    """
    Transforms -log10 P back to P value
    :param p: -log10 P value
    :return P value in the range 0,1
    """
    @staticmethod
    def transform_logpval(p):
        return 10 ** -p
    """
    Getter for the variant-trait association P value
    :param variant_record: pysam.VariantRecord object for the VCF row
    :param trait: Name of the trait
    :return P value in the range 0,1
    """
    @staticmethod
    def get_pval(variant_record, trait):
        p = variant_record.samples[trait]['LP'][0]
        if p == 0:
            return 1
        elif p == 999:
            return 0
        else:
            return VariantRecordGwasFuns.transform_logpval(p)
    """
    Getter for the variant-trait association standard error value
    :param variant_record: pysam.VariantRecord object for the VCF row
    :param trait: Name of the trait
    :return Association standard error
    """
    @staticmethod
    def get_se(variant_record, trait):
        return variant_record.samples[trait]['SE'][0]
    """
    Getter for the variant-trait beta value
    :param variant_record: pysam.VariantRecord object for the VCF row
    :param trait: Name of the trait
    :return effect size coefficient
    """
    @staticmethod
    def get_beta(variant_record, trait):
        return variant_record.samples[trait]['ES'][0]
    """
    Getter for the variant-trait allele-frequency in the study
    :param variant_record: pysam.VariantRecord object for the VCF row
    :param trait: Name of the trait
    :return alternative (effect) allele frequency
    """
    @staticmethod
    def get_af(variant_record, trait):
        return variant_record.samples[trait]['AF'][0]
    """
    Getter for the variant-trait variant ID
    :param variant_record: pysam.VariantRecord object for the VCF row
    :param trait: Name of the trait
    :param create_if_missing: Create ID in the format chrom-pos-ref-alt if no ID is available
    :return Variant/marker identifier
    """
    @staticmethod
    def get_id(variant_record, trait, create_if_missing=False):
        if "ID" in variant_record.samples[trait] and variant_record.samples[trait]['ID'] is not None:
            return variant_record.samples[trait]['ID']
        elif create_if_missing:
            return variant_record.chrom + "-" + str(variant_record.pos) + "-" + variant_record.ref + "-" + \
                   variant_record.alts[0]
        else:
            raise KeyError("No ID available for this record")
    """
    Getter for the variant-trait sample size used to estimate the effect
    :param variant_record: pysam.VariantRecord object for the VCF row
    :param trait: Name of the trait
    :param metadata: If the per-variant sample size if missing then taken from global metadata (optional)
    :return Total sample size used to estimate the association effect size
    """
    @staticmethod
    def get_ss(variant_record, trait, metadata=None):
        if 'SS' in variant_record.samples[trait]:
            return variant_record.samples[trait]['SS'][0]
        elif metadata is not None and 'TotalControls' in metadata[trait]:
            if 'TotalCases' in metadata[trait]:
                return int(metadata[trait]['TotalControls']) + int(metadata[trait]['TotalCases'])
            else:
                return int(metadata[trait]['TotalControls'])
        else:
            raise KeyError("No sample size available")
    """
    Getter for the variant-trait number of cases size used to estimate the effect
    :param variant_record: pysam.VariantRecord object for the VCF row
    :param trait: Name of the trait
    :param metadata: If the per-variant sample size if missing then taken from global metadata (optional)
    :return Number of cases used to estimate the association effect size
    """
    @staticmethod
    def get_nc(variant_record, trait, metadata=None):
        if 'NC' in variant_record.samples[trait]:
            return variant_record.samples[trait]['NC'][0]
        elif metadata is not None and 'TotalCases' in metadata[trait]:
            return int(metadata[trait]['TotalCases'])
        else:
            raise KeyError("No sample size available")
    """
    Check the VCF record only contains a bi-allelic change; multi-allelic changes should be on separate rows 
    :param variant_record: pysam.VariantRecord object for the VCF row
    """
    @staticmethod
    def check_biallelic(variant_record):
        assert len(variant_record.alts) == 1
Classes
class VariantRecordGwasFuns- 
Transforms -log10 P back to P value :param p: -log10 P value :return P value in the range 0,1
Expand source code
class VariantRecordGwasFuns: """ Transforms -log10 P back to P value :param p: -log10 P value :return P value in the range 0,1 """ @staticmethod def transform_logpval(p): return 10 ** -p """ Getter for the variant-trait association P value :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :return P value in the range 0,1 """ @staticmethod def get_pval(variant_record, trait): p = variant_record.samples[trait]['LP'][0] if p == 0: return 1 elif p == 999: return 0 else: return VariantRecordGwasFuns.transform_logpval(p) """ Getter for the variant-trait association standard error value :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :return Association standard error """ @staticmethod def get_se(variant_record, trait): return variant_record.samples[trait]['SE'][0] """ Getter for the variant-trait beta value :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :return effect size coefficient """ @staticmethod def get_beta(variant_record, trait): return variant_record.samples[trait]['ES'][0] """ Getter for the variant-trait allele-frequency in the study :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :return alternative (effect) allele frequency """ @staticmethod def get_af(variant_record, trait): return variant_record.samples[trait]['AF'][0] """ Getter for the variant-trait variant ID :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :param create_if_missing: Create ID in the format chrom-pos-ref-alt if no ID is available :return Variant/marker identifier """ @staticmethod def get_id(variant_record, trait, create_if_missing=False): if "ID" in variant_record.samples[trait] and variant_record.samples[trait]['ID'] is not None: return variant_record.samples[trait]['ID'] elif create_if_missing: return variant_record.chrom + "-" + str(variant_record.pos) + "-" + variant_record.ref + "-" + \ variant_record.alts[0] else: raise KeyError("No ID available for this record") """ Getter for the variant-trait sample size used to estimate the effect :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :param metadata: If the per-variant sample size if missing then taken from global metadata (optional) :return Total sample size used to estimate the association effect size """ @staticmethod def get_ss(variant_record, trait, metadata=None): if 'SS' in variant_record.samples[trait]: return variant_record.samples[trait]['SS'][0] elif metadata is not None and 'TotalControls' in metadata[trait]: if 'TotalCases' in metadata[trait]: return int(metadata[trait]['TotalControls']) + int(metadata[trait]['TotalCases']) else: return int(metadata[trait]['TotalControls']) else: raise KeyError("No sample size available") """ Getter for the variant-trait number of cases size used to estimate the effect :param variant_record: pysam.VariantRecord object for the VCF row :param trait: Name of the trait :param metadata: If the per-variant sample size if missing then taken from global metadata (optional) :return Number of cases used to estimate the association effect size """ @staticmethod def get_nc(variant_record, trait, metadata=None): if 'NC' in variant_record.samples[trait]: return variant_record.samples[trait]['NC'][0] elif metadata is not None and 'TotalCases' in metadata[trait]: return int(metadata[trait]['TotalCases']) else: raise KeyError("No sample size available") """ Check the VCF record only contains a bi-allelic change; multi-allelic changes should be on separate rows :param variant_record: pysam.VariantRecord object for the VCF row """ @staticmethod def check_biallelic(variant_record): assert len(variant_record.alts) == 1Static methods
def check_biallelic(variant_record)- 
Expand source code
@staticmethod def check_biallelic(variant_record): assert len(variant_record.alts) == 1 def get_af(variant_record, trait)- 
Expand source code
@staticmethod def get_af(variant_record, trait): return variant_record.samples[trait]['AF'][0] def get_beta(variant_record, trait)- 
Expand source code
@staticmethod def get_beta(variant_record, trait): return variant_record.samples[trait]['ES'][0] def get_id(variant_record, trait, create_if_missing=False)- 
Expand source code
@staticmethod def get_id(variant_record, trait, create_if_missing=False): if "ID" in variant_record.samples[trait] and variant_record.samples[trait]['ID'] is not None: return variant_record.samples[trait]['ID'] elif create_if_missing: return variant_record.chrom + "-" + str(variant_record.pos) + "-" + variant_record.ref + "-" + \ variant_record.alts[0] else: raise KeyError("No ID available for this record") def get_nc(variant_record, trait, metadata=None)- 
Expand source code
@staticmethod def get_nc(variant_record, trait, metadata=None): if 'NC' in variant_record.samples[trait]: return variant_record.samples[trait]['NC'][0] elif metadata is not None and 'TotalCases' in metadata[trait]: return int(metadata[trait]['TotalCases']) else: raise KeyError("No sample size available") def get_pval(variant_record, trait)- 
Expand source code
@staticmethod def get_pval(variant_record, trait): p = variant_record.samples[trait]['LP'][0] if p == 0: return 1 elif p == 999: return 0 else: return VariantRecordGwasFuns.transform_logpval(p) def get_se(variant_record, trait)- 
Expand source code
@staticmethod def get_se(variant_record, trait): return variant_record.samples[trait]['SE'][0] def get_ss(variant_record, trait, metadata=None)- 
Expand source code
@staticmethod def get_ss(variant_record, trait, metadata=None): if 'SS' in variant_record.samples[trait]: return variant_record.samples[trait]['SS'][0] elif metadata is not None and 'TotalControls' in metadata[trait]: if 'TotalCases' in metadata[trait]: return int(metadata[trait]['TotalControls']) + int(metadata[trait]['TotalCases']) else: return int(metadata[trait]['TotalControls']) else: raise KeyError("No sample size available") def transform_logpval(p)- 
Expand source code
@staticmethod def transform_logpval(p): return 10 ** -p