Skip to content

Commit

Permalink
Merge tag 'itrb-deployment-20241017' into production
Browse files Browse the repository at this point in the history
  • Loading branch information
edeutsch committed Nov 8, 2024
2 parents 1c75259 + 2de2fe4 commit 439104d
Show file tree
Hide file tree
Showing 80 changed files with 11,055 additions and 10,083 deletions.
29 changes: 14 additions & 15 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,24 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v2


- name: Set up config files
run: |
scp [email protected]:config_secrets.json /mnt/config/config_secrets.json
echo "http://localhost:5008/api/rtxkg2/v1.4" > /mnt/config/kg2_url_override.txt
# - name: Append code dir to GITHUB_PATH # Causing RTXConfiguration import error
# run: echo $GITHUB_PATH
# - name: Download databases as needed
# run: python3 $HOME/RTX/code/ARAX/ARAXQuery/ARAX_database_manager.py --mnt --skip-if-exists --remove_unused
- name: Download databases into CICD instance
continue-on-error: true
run: |
cd /home/ubuntu/RTX
git checkout ${GITHUB_REF#refs/heads/}
git pull
/home/ubuntu/venv3.9/bin/pip3 install -r requirements.txt
/home/ubuntu/venv3.9/bin/python3 code/ARAX/ARAXQuery/ARAX_database_manager.py --mnt --skip-if-exists --remove_unused
git checkout master
- name: Stop any running containers
continue-on-error: true
Expand All @@ -63,25 +70,17 @@ jobs:
run: sudo docker build --no-cache=true --rm -t rtx:test DockerBuild/ -f DockerBuild/Merged-Dockerfile

- name: Run docker container
run: sudo docker run -d -it --name rtx_test -v /mnt/data/orangeboard/databases:/mnt/data/orangeboard/databases -v /mnt/config/kg2_url_override.txt:/mnt/data/orangeboard/production/RTX/code/kg2_url_override.txt -v /mnt/config/config_secrets.json:/mnt/data/orangeboard/production/RTX/code/config_secrets.json -v /mnt/config/config_secrets.json:/mnt/data/orangeboard/kg2/RTX/code/config_secrets.json rtx:test

# - name: Add config.domain
# run: |
# sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/production/RTX/code && echo \'RTXTeam/RTX Github actions test suite\' > config.domain'"
# sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/kg2/RTX/code && echo \'RTXTeam/RTX Github actions test suite\' > config.domain'"
run: sudo docker run -d -it --name rtx_test -v /mnt/data/orangeboard/databases:/mnt/data/orangeboard/databases -v /mnt/config/config_secrets.json:/mnt/data/orangeboard/production/RTX/code/config_secrets.json rtx:test

- name: Create symlinks
run: |
sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/production/RTX && python3 code/ARAX/ARAXQuery/ARAX_database_manager.py'"
sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/kg2/RTX && python3 code/ARAX/ARAXQuery/ARAX_database_manager.py'"
run: sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/production/RTX && python3 code/ARAX/ARAXQuery/ARAX_database_manager.py'"

- name: Build KP info cache
run: sudo docker exec rtx_test bash -c "cd /mnt/data/orangeboard/production/RTX/code/ARAX/ARAXQuery/Expand && python3 kp_info_cacher.py"

- name: Start services
run: |
sudo docker exec rtx_test service apache2 start
sudo docker exec rtx_test service RTX_OpenAPI_kg2 start
sudo docker exec rtx_test service RTX_OpenAPI_production start
sudo docker exec rtx_test service RTX_Complete start
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ code/ARAX/ARAXQuery/Infer/data/xCRG_data/*.pt
code/ARAX/ARAXQuery/Infer/data/xCRG_data/*.npz

code/UI/OpenAPI/python-flask-server/openapi_server/openapi/openapi.json
code/UI/OpenAPI/specifications/export/KG2/*/openapi.json
111 changes: 45 additions & 66 deletions ISSUE_TEMPLATES/kg2rollout.md

Large diffs are not rendered by default.

100 changes: 74 additions & 26 deletions code/ARAX/ARAXQuery/ARAX_connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
import copy

sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from Path_Finder.converter.paths_to_response_converter_factory import paths_to_response_converter_factory
from Path_Finder.converter.EdgeExtractorFromPloverDB import EdgeExtractorFromPloverDB
from Path_Finder.converter.SuperNodeConverter import SuperNodeConverter
from Path_Finder.converter.Names import Names
from Path_Finder.BidirectionalPathFinder import BidirectionalPathFinder
from Path_Finder.repo.NGDSortedNeighborsRepo import NGDSortedNeighborsRepo
from Path_Finder.repo.PloverDBRepo import PloverDBRepo

sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../UI/OpenAPI/python-flask-server/")
from openapi_server.models.q_edge import QEdge
from openapi_server.models.q_node import QNode
from openapi_server.models.knowledge_graph import KnowledgeGraph

sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../NodeSynonymizer/")
from node_synonymizer import NodeSynonymizer

Expand Down Expand Up @@ -51,12 +52,11 @@ def __init__(self):
"type": "list",
"description": "List with just two qnode keys to connect. example: [n1, n2]"
}
self.result_as_info = {
self.node_category_constraint_info = {
"is_required": False,
"examples": ['betweenness_centrality', 'all_in_one', 'one_by_one'],
"examples": ['biolink:Disease', 'biolink:Gene', 'biolink:ChemicalEntity'],
"type": "string",
"description": "It determines how to receive the results. For instance, one_by_one means that it will "
"return each path in one subgraph. The default value is betweenness_centrality"
"description": "This constraint will display paths that only pass through the user-specified category."
}

# command descriptions
Expand All @@ -78,7 +78,7 @@ def __init__(self):
"parameters": {
"max_path_length": self.max_path_length_info,
"qnode_keys": self.qnode_keys_info,
"result_as": self.result_as_info
"node_category_constraint": self.node_category_constraint_info
}
}
}
Expand Down Expand Up @@ -148,6 +148,8 @@ def check_params(self, allowable_parameters):
continue
elif any([type(x) == int for x in allowable_parameters[key]]):
continue
elif any([type(x) == str for x in allowable_parameters[key]]):
continue
else: # otherwise, it's really not an allowable parameter
self.response.warning(
f"Supplied value {item} is not permitted. In action {allowable_parameters['action']}, allowable values to {key} are: {list(allowable_parameters[key])}")
Expand Down Expand Up @@ -217,15 +219,15 @@ def __connect_nodes(self, describe=False):
if message and parameters and hasattr(message, 'query_graph') and hasattr(message.query_graph, 'nodes'):
allowable_parameters = {'action': {'connect_nodes'},
'max_path_length': {int()},
'result_as': {'betweenness_centrality', 'all_in_one', 'one_by_one'},
'node_category_constraint': {str()},
'qnode_keys': set(self.message.query_graph.nodes.keys())
}
else:
allowable_parameters = {'action': {'connect_nodes'},
'max_path_length': {
'a maximum path length to use to connect qnodes. Defaults to 2.'},
'result_as': {
'How to show results?'},
'A maximum path length to use to connect qnodes. Defaults to 2.'},
'node_category_constraint': {
'All paths must include at least one node from this category constraint.'},
'qnode_keys': {'A list with just two query keys to connect'}
}

Expand Down Expand Up @@ -253,8 +255,8 @@ def __connect_nodes(self, describe=False):

if 'max_path_length' not in self.parameters:
self.parameters['max_path_length'] = 2
if 'result_as' not in self.parameters:
self.parameters['result_as'] = 'betweenness_centrality'
if 'node_category_constraint' not in self.parameters:
self.parameters['node_category_constraint'] = ''
# convert path length to int if it isn't already
if type(self.parameters['max_path_length']) != int:
self.parameters['max_path_length'] = int(self.parameters['max_path_length'])
Expand All @@ -281,43 +283,89 @@ def __connect_nodes(self, describe=False):
self.response.error(f"Need to have two nodes to find paths between them. Number of nodes: {len(nodes)}")

path_finder = BidirectionalPathFinder(
NGDSortedNeighborsRepo(
PloverDBRepo(plover_url=RTXConfiguration().plover_url)
)
"NGDSortedNeighborsRepo",
self.response
)
qnode_1_id = self.parameters['qnode_keys'][0]
qnode_2_id = self.parameters['qnode_keys'][1]
synonymizer = NodeSynonymizer()
node_1_id = synonymizer.get_canonical_curies(curies=nodes[qnode_1_id].ids[0])[nodes[qnode_1_id].ids[0]]['preferred_curie']
node_2_id = synonymizer.get_canonical_curies(curies=nodes[qnode_2_id].ids[0])[nodes[qnode_2_id].ids[0]]['preferred_curie']
try:
node_1_id = synonymizer.get_canonical_curies(curies=nodes[qnode_1_id].ids[0])[nodes[qnode_1_id].ids[0]][
'preferred_curie']
except Exception as e:
self.response.error(f"PathFinder could not get canonical CURIE for the node: {qnode_1_id}."
f" You need to provide id (CURIE) or name for this node"
f" Error message is: {e}")
return self.response

try:
node_2_id = synonymizer.get_canonical_curies(curies=nodes[qnode_2_id].ids[0])[nodes[qnode_2_id].ids[0]][
'preferred_curie']
except Exception as e:
self.response.error(f"PathFinder could not get canonical CURIE for the node: {qnode_2_id}"
f" You need to provide id (CURIE) or name for this node."
f" Error message is: {e}")
return self.response

paths = path_finder.find_all_paths(node_1_id, node_2_id, hops_numbers=self.parameters['max_path_length'])

self.response.debug(f"PathFinder found {len(paths)} paths")

if len(paths) == 0:
self.response.warning(f"Could not connect the nodes {qnode_1_id} and {qnode_2_id} "
f"with a max path length of {self.parameters['max_path_length']}.")
return self.response

q_edge_name = 'q_edge_path_finder'
self.response.envelope.message.query_graph.edges[q_edge_name] = QEdge(
qnode_mid_id = "qnode_mid_id"
self.response.envelope.message.query_graph.nodes[qnode_mid_id] = QNode(
ids=[],
categories=None,
is_set=False,
set_interpretation='BATCH',
set_id=None,
constraints=[],
option_group_id=None
)

q_edge_src_dest = 'q_edge_src_dest'
self.response.envelope.message.query_graph.edges[q_edge_src_dest] = QEdge(
object=qnode_1_id,
subject=qnode_2_id
)
q_edge_src_mid = 'q_edge_src_mid'
self.response.envelope.message.query_graph.edges[q_edge_src_mid] = QEdge(
object=qnode_1_id,
subject=qnode_mid_id
)
q_edge_mid_dest = 'q_edge_mid_dest'
self.response.envelope.message.query_graph.edges[q_edge_mid_dest] = QEdge(
object=qnode_mid_id,
subject=qnode_2_id
)

names = Names(
q_edge_name=q_edge_name,
q_src_dest_edge_name=q_edge_src_dest,
q_src_mid_edge_name=q_edge_src_mid,
q_mid_dest_edge_name=q_edge_mid_dest,
result_name="result",
auxiliary_graph_name="aux",
kg_edge_name="kg_edge"
kg_src_dest_edge_name="kg_src_dest_edge",
kg_src_mid_edge_name="kg_src_mid_edge",
kg_mid_dest_edge_name="kg_mid_dest_edge",
)
edge_extractor = EdgeExtractorFromPloverDB(
RTXConfiguration().plover_url
)
paths_to_response_converter_factory(
self.parameters['result_as'],
SuperNodeConverter(
paths,
node_1_id,
node_2_id,
qnode_1_id,
qnode_2_id,
names
qnode_mid_id,
names,
edge_extractor,
self.parameters['node_category_constraint']
).convert(self.response)

if mode != "RTXKG2" and not hasattr(self.response, "original_query_graph"):
Expand Down
8 changes: 8 additions & 0 deletions code/ARAX/ARAXQuery/ARAX_database_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def __init__(self, allow_downloads=False):
self.local_paths = {
'cohd_database': f"{cohd_filepath}{os.path.sep}{self.RTXConfig.cohd_database_path.split('/')[-1]}",
'curie_to_pmids': f"{ngd_filepath}{os.path.sep}{self.RTXConfig.curie_to_pmids_path.split('/')[-1]}",
'curie_ngd': f"{ngd_filepath}{os.path.sep}{self.RTXConfig.curie_ngd_path.split('/')[-1]}",
'node_synonymizer': f"{synonymizer_filepath}{os.path.sep}{self.RTXConfig.node_synonymizer_path.split('/')[-1]}",
'kg2c_sqlite': f"{kg2c_filepath}{os.path.sep}{self.RTXConfig.kg2c_sqlite_path.split('/')[-1]}",
'kg2c_meta_kg': f"{kg2c_meta_kg_filepath}{os.path.sep}{self.RTXConfig.kg2c_meta_kg_path.split('/')[-1]}",
Expand All @@ -95,6 +96,7 @@ def __init__(self, allow_downloads=False):
self.database_subpaths = {
'cohd_database': self.get_database_subpath(self.RTXConfig.cohd_database_path),
'curie_to_pmids': self.get_database_subpath(self.RTXConfig.curie_to_pmids_path),
'curie_ngd': self.get_database_subpath(self.RTXConfig.curie_ngd_path),
'node_synonymizer': self.get_database_subpath(self.RTXConfig.node_synonymizer_path),
'kg2c_sqlite': self.get_database_subpath(self.RTXConfig.kg2c_sqlite_path),
'kg2c_meta_kg': self.get_database_subpath(self.RTXConfig.kg2c_meta_kg_path),
Expand All @@ -110,6 +112,7 @@ def __init__(self, allow_downloads=False):
self.remote_locations = {
'cohd_database': self.get_remote_location('cohd_database'),
'curie_to_pmids': self.get_remote_location('curie_to_pmids'),
'curie_ngd': self.get_remote_location('curie_ngd'),
'node_synonymizer': self.get_remote_location('node_synonymizer'),
'kg2c_sqlite': self.get_remote_location('kg2c_sqlite'),
'kg2c_meta_kg': self.get_remote_location('kg2c_meta_kg'),
Expand All @@ -125,6 +128,7 @@ def __init__(self, allow_downloads=False):
self.docker_central_paths = {
'cohd_database': self.get_docker_path('cohd_database'),
'curie_to_pmids': self.get_docker_path('curie_to_pmids'),
'curie_ngd': self.get_docker_path('curie_ngd'),
'node_synonymizer': self.get_docker_path('node_synonymizer'),
'kg2c_sqlite': self.get_docker_path('kg2c_sqlite'),
'kg2c_meta_kg': self.get_docker_path('kg2c_meta_kg'),
Expand All @@ -146,6 +150,10 @@ def __init__(self, allow_downloads=False):
'path': self.local_paths['curie_to_pmids'],
'version': self.RTXConfig.curie_to_pmids_version
},
'curie_ngd': {
'path': self.local_paths['curie_ngd'],
'version': self.RTXConfig.curie_ngd_version
},
'node_synonymizer': {
'path': self.local_paths['node_synonymizer'],
'version': self.RTXConfig.node_synonymizer_version
Expand Down
Loading

0 comments on commit 439104d

Please sign in to comment.