-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from tansey-lab/jw_pip_package
Initial commits to add CLI interface to query HGNC or UniProt for gene names
- Loading branch information
Showing
7 changed files
with
366 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import ast | ||
import logging | ||
from abc import ABC, abstractmethod | ||
|
||
import requests | ||
|
||
from nf_rnaseq import requests_wrapper | ||
|
||
|
||
class APIClient(ABC): | ||
"""Abstract class for API clients.""" | ||
|
||
def query_api(self): | ||
"""Get response from API which tries to save as json in instance; otherwise saves as text.""" | ||
session = requests_wrapper.get_cached_session() | ||
if self.headers is None: | ||
response = session.get(self.url_query) | ||
else: | ||
response = session.get(self.url_query, ast.literal_eval(self.header)) | ||
|
||
try: | ||
response.raise_for_status() | ||
except requests.exceptions.HTTPError as e: | ||
logging.error("Error at %s", "division", exc_info=e) | ||
|
||
try: | ||
self.json = response.json() | ||
except requests.exceptions.JSONDecodeError as e: | ||
logging.error("Error at %s", "division", exc_info=e) | ||
self.text = response.text | ||
|
||
@abstractmethod | ||
def create_query_url(self): | ||
"""Create the URL to query the API (e.g., add search term or ID).""" | ||
... | ||
|
||
@abstractmethod | ||
def maybe_set_attr_from_json(self): | ||
"""Set attributes in the object from the json response.""" | ||
... | ||
|
||
@abstractmethod | ||
def maybe_get_hgnc_gene_name(self): | ||
"""Get the HGNC gene name from the json response.""" | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
#!/bin/usr/env python | ||
|
||
import argparse | ||
|
||
from nf_rnaseq import config, hgnc, uniprot | ||
|
||
|
||
def parsearg_utils(): | ||
""" | ||
Argparser to get HGNC gene name from string input. | ||
Returns | ||
------- | ||
args: argparse.Namespace | ||
Namespace object containing featureCounts files | ||
""" | ||
parser = argparse.ArgumentParser(description="Parser for get_hgnc_gene_name.py.") | ||
|
||
parser.add_argument( | ||
"-c", | ||
"--cachePath", | ||
help="Path to requests cache (type: str, default: '')", | ||
type=str, | ||
default="", | ||
) | ||
|
||
parser.add_argument( | ||
"-i", | ||
"--input", | ||
help="Input string (type: str)", | ||
type="str", | ||
) | ||
|
||
parser.add_argument( | ||
"-s", | ||
"--searchTerm", | ||
help="Search term for HGNC Fetch; if UniProt, not in use (type: str)", | ||
type="str", | ||
default="mane_select", | ||
) | ||
|
||
parser.add_argument( | ||
"-t", | ||
"--tsv", | ||
help="If flag included tsv format out otherwise csv", | ||
action="store_true", | ||
) | ||
|
||
parser.add_argument( | ||
"-u", | ||
"--uniProt", | ||
help="If flag included UniProt should be queried otherwise HGNC database used", | ||
action="store_true", | ||
) | ||
|
||
args = parser.parse_args() | ||
|
||
return args | ||
|
||
|
||
def main(): | ||
"""Get HGNC gene name from string input.""" | ||
args = parsearg_utils() | ||
|
||
if args.cachePath != "": | ||
config.set_request_cache(args.cachePath) | ||
|
||
if args.uniProt: | ||
source = "UniProt" | ||
uniprot_obj = uniprot.UniProt(uniprot_id=args.input) | ||
uniprot_obj.query_api() | ||
uniprot_obj.maybe_set_attr_from_json() | ||
# id_out = uniprot_obj.hgnc_gene_name | ||
else: | ||
source = "HGNC" | ||
hgnc_obj = hgnc.HGNC(search_id=args.input, search_term=args.searchTerm) | ||
# hgnc_obj.query_api() | ||
# hgnc_obj.maybe_set_attr_from_json() | ||
id_out = hgnc_obj.hgnc_gene_name | ||
|
||
str1 = f"{args.input.ljust(20)}" | ||
str2 = f"{str(id_out).ljust(20)}" | ||
str3 = f"{source}" | ||
|
||
if args.tsv: | ||
print(f"{str1}\t{str2}\t{str3}") | ||
else: | ||
print(f"{str1},{str2},{str3}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import os | ||
|
||
REQUESTS_CACHE_VAR = "REQUESTS_CACHE" | ||
"""str: Environment variable for request cache file prefix.""" | ||
|
||
|
||
def set_request_cache(val: str) -> None: | ||
"""Set the request cache path in environment variables. | ||
Parameters | ||
---------- | ||
val : str | ||
Request cache path | ||
Returns | ||
------- | ||
None | ||
""" | ||
os.environ[REQUESTS_CACHE_VAR] = val | ||
|
||
|
||
def maybe_get_request_cache() -> str | None: | ||
"""Get the request cache path from the environment. | ||
Returns | ||
------- | ||
str | None | ||
Request cache path as string if exists, otherwise None | ||
""" | ||
try: | ||
return os.environ[REQUESTS_CACHE_VAR] | ||
except KeyError: | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import logging | ||
import os | ||
from dataclasses import dataclass | ||
|
||
from nf_rnaseq.api_schema import APIClient | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@dataclass | ||
class HGNC(APIClient): | ||
"""Class to interact with HGNC API.""" | ||
|
||
search_id: str | ||
"""str: ID on which to search.""" | ||
search_term: str | ||
"""str: Term from, https://www.genenames.org/help/rest/ on which to search.""" | ||
url_base: str = "https://rest.genenames.org/fetch" | ||
"""str: URL base for HGNC API.""" | ||
header: str = "{'Accept': 'application/json'}" | ||
"""str: Header for HGNC API (use ast.as_literal for dict).""" | ||
# url_query: str = None | ||
# """str: URL query for HGNC API.""" | ||
# json: dict = None | ||
# """dict: JSON response from UniProt API.""" | ||
# text: str = None | ||
# """str: Text response from UniProt API (if no json).""" | ||
# hgnc_gene_name: list[str] = None | ||
# """str: HGNC gene name.""" | ||
|
||
def __post_init__(self): | ||
self.create_query_url() | ||
self.query_api() | ||
self.maybe_set_json_properties() | ||
self.maybe_get_hgnc_gene_name() | ||
|
||
def create_query_url(self): | ||
"""Create URL for HGNC API query.""" | ||
self.url_query = os.path.join(self.url_base, self.search_term, self.search_id) | ||
|
||
def maybe_set_json_properties(self): | ||
"""If self.json is not None, set properties of UniProt object using self.json.""" | ||
if self.json is not None: | ||
HGNC(**self.json) | ||
|
||
def maybe_get_hgnc_gene_name(self, str_symbol: str = "symbol") -> list[str]: | ||
"""Get list of gene names from UniProt ID and add as hgnc_gene_name attr.""" | ||
try: | ||
list_genes = self.maybe_extract_list_from_hgnc_response_docs(str_symbol) | ||
self.hgnc_gene_name = list_genes | ||
except (KeyError, AttributeError) as e: | ||
logging.error("Error at %s", "division", exc_info=e) | ||
|
||
def maybe_extract_list_from_hgnc_response_docs( | ||
self, | ||
str_to_extract: str, | ||
) -> list[str] | None: | ||
"""Extract a list of values from the response documents of an HGNC REST API request. | ||
Parameters | ||
---------- | ||
str_to_extract : str | ||
Key to extract from the response documents | ||
Returns | ||
------- | ||
list[str] | ||
List of values extracted from the response documents | ||
""" | ||
try: | ||
if self.response["numFound"] >= 1: | ||
list_output = [doc[str_to_extract] for doc in self.response["docs"]] | ||
else: | ||
list_output = [] | ||
return list_output | ||
except (KeyError, AttributeError) as e: | ||
logging.error("Error at %s", "division", exc_info=e) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import os | ||
from functools import cache | ||
|
||
from requests.adapters import HTTPAdapter, Retry | ||
from requests_cache import CachedSession | ||
|
||
REQUEST_CACHE_VAR = "REQUEST_CACHE" | ||
|
||
|
||
def add_retry_to_session( | ||
session, | ||
retries=5, | ||
backoff_factor=0.3, | ||
status_forcelist=(429, 500, 501, 502, 503, 504), | ||
): | ||
"""Add retry logic to a session. | ||
Parameters | ||
---------- | ||
session: requests.Session | ||
Session object | ||
retries: int | ||
Number of retries | ||
backoff_factor: float | ||
Backoff factor | ||
status_forcelist: tuple[int] | ||
Tuple of status codes to force a retry | ||
Returns | ||
------- | ||
requests.Session | ||
Session object with retry logic | ||
""" | ||
retry = Retry( | ||
total=retries, | ||
backoff_factor=backoff_factor, | ||
status_forcelist=status_forcelist, | ||
allowed_methods=False, | ||
) | ||
adapter = HTTPAdapter(max_retries=retry) | ||
session.mount("http://", adapter) | ||
session.mount("https://", adapter) | ||
return session | ||
|
||
|
||
@cache | ||
def get_cached_session(): | ||
"""Get a cached session. | ||
Returns | ||
------- | ||
requests.Session | ||
Cached session object | ||
""" | ||
if REQUEST_CACHE_VAR in os.environ: | ||
cache_location = os.environ[REQUEST_CACHE_VAR] | ||
|
||
session = CachedSession(cache_location, allowable_codes=(200, 404, 400), backend="sqlite") | ||
else: | ||
session = CachedSession(backend="memory") | ||
|
||
return add_retry_to_session(session) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import logging | ||
import os | ||
from dataclasses import dataclass | ||
|
||
from nf_rnaseq import APIClient | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@dataclass | ||
class UniProt(APIClient): | ||
"""Class to interact with UniProt API.""" | ||
|
||
uniprot_id: str | ||
"""str: UniProt ID.""" | ||
url_base: str = "https://rest.uniprot.org/uniprotkb" | ||
"""str: URL base for UniProtKB API.""" | ||
url_query: str = None | ||
"""str: URL query for UniProt API.""" | ||
json: dict = None | ||
"""dict: JSON response from UniProt API.""" | ||
text: str = None | ||
"""str: Text response from UniProt API (if no json).""" | ||
hgnc_gene_name: str = None | ||
"""str: HGNC gene name.""" | ||
|
||
def __post_init__(self): | ||
self.create_query_url() | ||
self.query_api() | ||
self.maybe_set_json_properties() | ||
self.maybe_get_hgnc_gene_name() | ||
|
||
def create_query_url(self): | ||
"""Create URL for UniProt API query.""" | ||
self.url_query = os.path.join(self.url_base, self.uniprot_id, ".json") | ||
|
||
def maybe_set_json_properties(self): | ||
"""If self.json is not None, set properties of UniProt object using self.json.""" | ||
if self.json is not None: | ||
UniProt(**self.json) | ||
|
||
def maybe_get_hgnc_gene_name(self): | ||
"""Get list of gene names from UniProt ID and add as hgnc_gene_name attr.""" | ||
try: | ||
list_genes = [str(gene["geneName"]["value"]) for gene in self.genes] | ||
self.hgnc_gene_name = list_genes | ||
except (KeyError, AttributeError) as e: | ||
logging.error("Error at %s", "division", exc_info=e) |