Module ieugwaspy.query
API query functions
Expand source code
"""API query functions
"""
import datetime
import time
import requests
from retry import retry
from urllib.parse import urljoin
import ieugwaspy.config as config
import ieugwaspy.variants as variants
import ieugwaspy.backwards as backwards
def _check_allowance_reset_timestamp():
"""Check existence of config.allowance_reset_timestamp, reset it when the time has passed or if not, halt the request
"""
if config.allowance_reset_timestamp == 0:
return
if int(time.time()) > config.allowance_reset_timestamp:
config.allowance_reset_timestamp = 0
else:
raise Exception("You have run out of allowance. Please retry after {}".format(datetime.datetime.strftime(datetime.datetime.fromtimestamp(config.allowance_reset_timestamp).astimezone(), '%Y-%m-%d %H:%M %Z')))
def _save_allowance_reset_timestamp(headers):
"""Calculate and save the retry timestamp when there is no remaining allowance
Args:
headers: response headers
"""
if headers.get('X-Allowance-Remaining', None) == "0":
config.allowance_reset_timestamp = int(time.time()) + int(headers.get('Retry-After', 0))
@retry(tries=5, delay=2, backoff=2)
def _api_query(path, method="GET", data=[]):
"""This is a general-purpose function called by other functions to query the API and return the resulting data in JSON format.
Args:
path: the path component of the URL
method: the HTTP method
data: the form data
Returns:
data: JSON object as returned by API
"""
url = urljoin(config.env["base_url"], path)
headers = {}
if config.env["jwt"]:
headers["Authorization"] = "Bearer " + config.env["jwt"]
if config.env["test_mode_key"]:
headers["X-TEST-MODE-KEY"] = config.env["test_mode_key"]
if method == "GET":
response = requests.get(url, headers=headers)
elif method == "POST":
response = requests.post(url, headers=headers, data=data)
else:
raise Exception("Invalid API method")
try:
_check_allowance_reset_timestamp()
except Exception as e:
return str(e)
try:
_save_allowance_reset_timestamp(response.headers)
return response.json()
except Exception as e:
raise Exception("Unable to parse the response. " + str(e))
def status():
"""Check API status
Returns:
result: JSON object as returned by API
"""
return _api_query("status")
def user():
"""Get user information
Returns:
result: JSON object as returned by API
"""
return _api_query("user")
def gwasinfo(id=[]):
"""This function will return GWAS study meta-data describing the specific studies selected by id
Args:
id (list): OpenGWAS IDs of specific studies, e.g. ["ieu-a-1239", "ieu-a-2"]
Returns:
result: JSON object as returned by API
"""
if id != []:
return _api_query("gwasinfo/{}".format(",".join(id)))
else:
return _api_query("gwasinfo")
def associations(
variant,
id,
proxies=1,
r2=0.8,
align_alleles=1,
palindromes=1,
maf_threshold=0.3
):
"""Retrieve associations for specific SNP/study combinations
Args:
variant (list): list of variants, e.g. ["rs1205", "7:105561135", "7:105561135-105563135"]
id (list): list of OpenGWAS IDs, e.g. ["ieu-a-1239", "ieu-a-2"]
proxies (int): `1` or `0`, whether to look for proxies (1, default) or not (0)
r2 (float): minimum proxy LD rsq value. Default = 0.8
align_alleles (int): `1` or `0`, whether to try to align tag alleles to target alleles (1, default) or not (0). Only active when proxies = 1
palindromes (int): `1` or `0`, whether to allow palindromic SNPs (1, default) or not (0). Only active when proxies = 1
maf_threshold (float): MAF threshold to try to infer palindromic SNPs. Default = 0.3
Returns:
result: JSON object as returned by API
"""
id = backwards.legacy_ids(id)
data = {
"variant": variant,
"id": id,
"proxies": proxies,
"r2": r2,
"align_alleles": align_alleles,
"palindromes": palindromes,
"maf_threshold": maf_threshold
}
return _api_query("associations", method="POST", data=data)
def phewas(variant, pval=1e-3, batch=[]):
"""Perform phenome-wide association analysis (PheWAS) of variant(s) across all traits in the IEU GWAS database
Args:
variant (list): list of variants, e.g. ["rs1205", "7:105561135", "7:105561135-105563135"]
pval (float): p-value threshold
batch (list): OpenGWAS data batches, e.g. ["ieu-a"]. Leave empty to search across all batches
Returns:
result: JSON object as returned by API
"""
rsid = ",".join(variants.variants_to_rsid(variant))
data = {
"variant": rsid,
"pval": pval,
"index_list": batch
}
return _api_query("phewas", method="POST", data=data)
def tophits(
id,
pval=5e-8,
preclumped=1,
clump=1,
bychr=0,
r2=0.001,
kb=10000,
pop="EUR",
force_server=False
):
"""Retrieve association data for specific SNP/study combinations
Args:
id (list): list of OpenGWAS IDs, e.g. ["ieu-a-1239", "ieu-a-2"]
pval (float): use this p-value threshold. Default = 5e-8
preclumped (int): `1` or `0`, whether to use pre-clumped hits (1, default) or not (0)
clump (int): `1` or `0`, whether to clump (1, default) or not (0)
bychr (int): `0` or `1`, whether to extract all at once (0, default) or by chromosome (1). There is a limit on query results so bychr might be required for some well-powered datasets
r2 (float): use this clumping r2 threshold. Default = 0.001 (very strict)
kb (int): use this clumping kb window. Default = 10000 (very strict)
pop (str): `EUR`, `SAS`, `EAS`, `AFR`, `AMR` or `legacy`, indicating the population. Default = EUR
force_server (bool): `True` or `False`. By False (default) it will return preclumped hits with pval = 5e-8, r2 = 0.001 and kb = 10000 and using only SNPs with MAF > 0.01 in the European (EUR) samples in 1000 genomes. If TRUE then it will recompute using server side LD reference panel
Returns:
result: JSON object as returned by API
"""
id = backwards.legacy_ids(id)
if clump == 1 and pval == 5e-8 and r2 == 0.001 and kb == 10000:
preclumped = 0 if force_server else 1
else:
preclumped = 0
data = {
"id": id,
"pval": pval,
"preclumped": preclumped,
"clump": clump,
"bychr": bychr,
"r2": r2,
"kb": kb,
"pop": pop
}
return _api_query("tophits", method="POST", data=data)
Functions
def associations(variant, id, proxies=1, r2=0.8, align_alleles=1, palindromes=1, maf_threshold=0.3)
-
Retrieve associations for specific SNP/study combinations
Args
variant
:list
- list of variants, e.g. ["rs1205", "7:105561135", "7:105561135-105563135"]
id
:list
- list of OpenGWAS IDs, e.g. ["ieu-a-1239", "ieu-a-2"]
proxies
:int
1
or0
, whether to look for proxies (1, default) or not (0)r2
:float
- minimum proxy LD rsq value. Default = 0.8
align_alleles
:int
1
or0
, whether to try to align tag alleles to target alleles (1, default) or not (0). Only active when proxies = 1palindromes
:int
1
or0
, whether to allow palindromic SNPs (1, default) or not (0). Only active when proxies = 1maf_threshold
:float
- MAF threshold to try to infer palindromic SNPs. Default = 0.3
Returns
result
- JSON object as returned by API
Expand source code
def associations( variant, id, proxies=1, r2=0.8, align_alleles=1, palindromes=1, maf_threshold=0.3 ): """Retrieve associations for specific SNP/study combinations Args: variant (list): list of variants, e.g. ["rs1205", "7:105561135", "7:105561135-105563135"] id (list): list of OpenGWAS IDs, e.g. ["ieu-a-1239", "ieu-a-2"] proxies (int): `1` or `0`, whether to look for proxies (1, default) or not (0) r2 (float): minimum proxy LD rsq value. Default = 0.8 align_alleles (int): `1` or `0`, whether to try to align tag alleles to target alleles (1, default) or not (0). Only active when proxies = 1 palindromes (int): `1` or `0`, whether to allow palindromic SNPs (1, default) or not (0). Only active when proxies = 1 maf_threshold (float): MAF threshold to try to infer palindromic SNPs. Default = 0.3 Returns: result: JSON object as returned by API """ id = backwards.legacy_ids(id) data = { "variant": variant, "id": id, "proxies": proxies, "r2": r2, "align_alleles": align_alleles, "palindromes": palindromes, "maf_threshold": maf_threshold } return _api_query("associations", method="POST", data=data)
def gwasinfo(id=[])
-
This function will return GWAS study meta-data describing the specific studies selected by id
Args
id
:list
- OpenGWAS IDs of specific studies, e.g. ["ieu-a-1239", "ieu-a-2"]
Returns
result
- JSON object as returned by API
Expand source code
def gwasinfo(id=[]): """This function will return GWAS study meta-data describing the specific studies selected by id Args: id (list): OpenGWAS IDs of specific studies, e.g. ["ieu-a-1239", "ieu-a-2"] Returns: result: JSON object as returned by API """ if id != []: return _api_query("gwasinfo/{}".format(",".join(id))) else: return _api_query("gwasinfo")
def phewas(variant, pval=0.001, batch=[])
-
Perform phenome-wide association analysis (PheWAS) of variant(s) across all traits in the IEU GWAS database
Args
variant
:list
- list of variants, e.g. ["rs1205", "7:105561135", "7:105561135-105563135"]
pval
:float
- p-value threshold
batch
:list
- OpenGWAS data batches, e.g. ["ieu-a"]. Leave empty to search across all batches
Returns
result
- JSON object as returned by API
Expand source code
def phewas(variant, pval=1e-3, batch=[]): """Perform phenome-wide association analysis (PheWAS) of variant(s) across all traits in the IEU GWAS database Args: variant (list): list of variants, e.g. ["rs1205", "7:105561135", "7:105561135-105563135"] pval (float): p-value threshold batch (list): OpenGWAS data batches, e.g. ["ieu-a"]. Leave empty to search across all batches Returns: result: JSON object as returned by API """ rsid = ",".join(variants.variants_to_rsid(variant)) data = { "variant": rsid, "pval": pval, "index_list": batch } return _api_query("phewas", method="POST", data=data)
def status()
-
Check API status
Returns
result
- JSON object as returned by API
Expand source code
def status(): """Check API status Returns: result: JSON object as returned by API """ return _api_query("status")
def tophits(id, pval=5e-08, preclumped=1, clump=1, bychr=0, r2=0.001, kb=10000, pop='EUR', force_server=False)
-
Retrieve association data for specific SNP/study combinations
Args
id
:list
- list of OpenGWAS IDs, e.g. ["ieu-a-1239", "ieu-a-2"]
pval
:float
- use this p-value threshold. Default = 5e-8
preclumped
:int
1
or0
, whether to use pre-clumped hits (1, default) or not (0)clump
:int
1
or0
, whether to clump (1, default) or not (0)bychr
:int
0
or1
, whether to extract all at once (0, default) or by chromosome (1). There is a limit on query results so bychr might be required for some well-powered datasetsr2
:float
- use this clumping r2 threshold. Default = 0.001 (very strict)
kb
:int
- use this clumping kb window. Default = 10000 (very strict)
pop
:str
EUR
,SAS
,EAS
,AFR
,AMR
orlegacy
, indicating the population. Default = EURforce_server
:bool
True
orFalse
. By False (default) it will return preclumped hits with pval = 5e-8, r2 = 0.001 and kb = 10000 and using only SNPs with MAF > 0.01 in the European (EUR) samples in 1000 genomes. If TRUE then it will recompute using server side LD reference panel
Returns
result
- JSON object as returned by API
Expand source code
def tophits( id, pval=5e-8, preclumped=1, clump=1, bychr=0, r2=0.001, kb=10000, pop="EUR", force_server=False ): """Retrieve association data for specific SNP/study combinations Args: id (list): list of OpenGWAS IDs, e.g. ["ieu-a-1239", "ieu-a-2"] pval (float): use this p-value threshold. Default = 5e-8 preclumped (int): `1` or `0`, whether to use pre-clumped hits (1, default) or not (0) clump (int): `1` or `0`, whether to clump (1, default) or not (0) bychr (int): `0` or `1`, whether to extract all at once (0, default) or by chromosome (1). There is a limit on query results so bychr might be required for some well-powered datasets r2 (float): use this clumping r2 threshold. Default = 0.001 (very strict) kb (int): use this clumping kb window. Default = 10000 (very strict) pop (str): `EUR`, `SAS`, `EAS`, `AFR`, `AMR` or `legacy`, indicating the population. Default = EUR force_server (bool): `True` or `False`. By False (default) it will return preclumped hits with pval = 5e-8, r2 = 0.001 and kb = 10000 and using only SNPs with MAF > 0.01 in the European (EUR) samples in 1000 genomes. If TRUE then it will recompute using server side LD reference panel Returns: result: JSON object as returned by API """ id = backwards.legacy_ids(id) if clump == 1 and pval == 5e-8 and r2 == 0.001 and kb == 10000: preclumped = 0 if force_server else 1 else: preclumped = 0 data = { "id": id, "pval": pval, "preclumped": preclumped, "clump": clump, "bychr": bychr, "r2": r2, "kb": kb, "pop": pop } return _api_query("tophits", method="POST", data=data)
def user()
-
Get user information
Returns
result
- JSON object as returned by API
Expand source code
def user(): """Get user information Returns: result: JSON object as returned by API """ return _api_query("user")