Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing new OpenAI functions as a potential SPIRES augmentation. #135

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions src/ontogpt/engines/spires2_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""
Rewrite of SPIRES to use OpenAI function feature.

See https://github.com/monarch-initiative/ontogpt/issues/132
"""
import json
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

import openai
from linkml_runtime.linkml_model import ClassDefinition

from ontogpt.engines.knowledge_engine import (
ANNOTATION_KEY_PROMPT,
ANNOTATION_KEY_PROMPT_SKIP,
EXAMPLE,
FIELD,
OBJECT,
KnowledgeEngine,
chunk_text,
)
from ontogpt.templates.core import ExtractionResult

this_path = Path(__file__).parent


logger = logging.getLogger(__name__)


@dataclass
class SPIRES2Engine(KnowledgeEngine):
"""Knowledge extractor."""

model: str = "gpt-3.5-turbo-0613"

def extract_from_text(
self, text: str, cls: ClassDefinition = None, object: OBJECT = None
) -> ExtractionResult:
"""
Extract annotations from the given text.

:param text:
:param cls:
:param object: optional stub object
:return:
"""
if cls is None:
[cls] = [c for c in self.schemaview.all_classes().values() if c.tree_root]
py_cls = self.template_module.__dict__[cls.name]
schema = py_cls.schema()
functions = [
{
"name": "extract_data",
#"description": cls.description,
"description": "paper",
"parameters": schema,
},
]
# TODO: introspect schema to customize system content
messages = [
{"role": "system",
"content": "You are a helpful assistant that extracts summaries from text as JSON for a database."},
{"role": "user",
"content": 'Extract a summary from the following text: ' + text},
]
logger.info(json.dumps(functions, indent=2))
# TODO: abstract this so as not hardcoded
response = openai.ChatCompletion.create(
model=self.model, functions=functions, messages=messages)
logger.info(f"Response: {response}")
r = response.choices[0]['message']['function_call']['arguments']
extracted_object = py_cls(**json.loads(r))
return ExtractionResult(
input_text=text,
extracted_object=extracted_object,
named_entities=self.named_entities,
)
16 changes: 16 additions & 0 deletions src/ontogpt/models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@ models:
- OpenAI
is_default: true

- name: MODEL_GPT_3_5_TURBO
alternative_names:
- "gpt-3.5-turbo-0613"
- "openai-gpt-3.5-turbo-0613"
provider: OpenAI
creators:
- OpenAI

- name: MODEL_GPT_3_5_TURBO_16K
alternative_names:
- "gpt-3.5-turbo-16k"
- "openai-gpt-3.5-turbo-16k"
provider: OpenAI
creators:
- OpenAI

- name: MODEL_TEXT_DAVINCI_003
alternative_names:
- "text-davinci-003"
Expand Down
151 changes: 151 additions & 0 deletions tests/integration/test_knowledge_engines/test_spires2_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""Core tests."""
import unittest

import yaml
from linkml_runtime.linkml_model import ClassDefinitionName
from oaklib import get_implementation_from_shorthand

from ontogpt.clients.pubmed_client import PubmedClient
from ontogpt.engines import create_engine
from ontogpt.engines.knowledge_engine import chunk_text
from ontogpt.engines.spires2_engine import SPIRES2Engine
from ontogpt.engines.spires_engine import SPIRESEngine
from ontogpt.io.yaml_wrapper import dump_minimal_yaml
from ontogpt.templates.biological_process import BiologicalProcess
from ontogpt.templates.gocam import (
ExtractionResult,
Gene,
GeneLocation,
GeneOrganismRelationship,
GoCamAnnotations,
)

TEMPLATE = "gocam.GoCamAnnotations"

PAPER = """
Title: β-Catenin Is Required for the cGAS/STING Signaling Pathway but
Antagonized by the Herpes Simplex Virus 1 US3 Protein.
Text:
The cGAS/STING-mediated DNA-sensing signaling pathway is crucial
for interferon (IFN) production and host antiviral
responses. Herpes simplex virus I (HSV-1) is a DNA virus that has
evolved multiple strategies to evade host immune responses. Here,
we demonstrate that the highly conserved β-catenin protein in the
Wnt signaling pathway is an important factor to enhance the
transcription of type I interferon (IFN-I) in the cGAS/STING
signaling pathway, and the production of IFN-I mediated by
β-catenin was antagonized by HSV-1 US3 protein via its kinase
activity. Infection by US3-deficienct HSV-1 and its kinase-dead
variants failed to downregulate IFN-I and IFN-stimulated
gene (ISG) production induced by β-catenin. Consistent with this,
absence of β-catenin enhanced the replication of US3-deficienct
HSV-1, but not wild-type HSV-1. The underlying mechanism was the
interaction of US3 with β-catenin and its hyperphosphorylation of
β-catenin at Thr556 to block its nuclear translocation. For the
first time, HSV-1 US3 has been shown to inhibit IFN-I production
through hyperphosphorylation of β-catenin and to subvert host
antiviral innate immunity.IMPORTANCE Although increasing evidence
has demonstrated that HSV-1 subverts host immune responses and
establishes lifelong latent infection, the molecular mechanisms
by which HSV-1 interrupts antiviral innate immunity, especially
the cGAS/STING-mediated cellular DNA-sensing signaling pathway,
have not been fully explored. Here, we show that β-catenin
promotes cGAS/STING-mediated activation of the IFN pathway, which
is important for cellular innate immune responses and intrinsic
resistance to DNA virus infection. The protein kinase US3
antagonizes the production of IFN by targeting β-catenin via its
kinase activity. The findings in this study reveal a novel
mechanism for HSV-1 to evade host antiviral immunity and add new
knowledge to help in understanding the interaction between the
host and HSV-1 infection.

Keywords: HSV-1; US3; type I IFN; β-catenin.
"""

EXAMPLE_RESULTS = """
genes: β-Catenin; cGAS; STING; US3; IFN; ISG
organisms: Herpes Simplex Virus I (HSV-1);
gene_organisms: β-Catenin:host; cGAS:host; STING:host; US3:HSV-1; IFN:host; ISG:host
activities: production of type I IFN; transcription of type I IFN; replication of HSV-1;
nuclear translocation of β-catenin.
gene_functions: β-catenin:enhance the transcription of type I IFN; US3:antagonize
the production of IFN; β-catenin:block nuclear translocation.
cellular_processes: cGAS/STING-mediated DNA-sensing signaling; activation of IFN pathway
pathways: IFN pathway; Wnt signalling pathway
gene_gene_interactions: US3:β-catenin
gene_localizations: US3:host; β-catenin:host
"""

EXAMPLE_RESULTS_ALT = """
genes: β-Catenin; cGAS; STING; US3; IFN; ISG
organisms: Herpes Simplex Virus I (HSV-1);
gene_organisms: β-Catenin - Human; cGAS - Human; STING - Human;
US3 - Human; IFN - Human; ISG - Human.
activities: Transcription; Production; Downregulation; Replication; Nuclear Translocation
gene_functions: β-Catenin - Enhances Transcription; US3 - Antagonizes Production;
US3 - Downregulates IFN-I; US3 - Blocks Nuclear Translocation; β-Catenin - Enhances Production
cellular_processes: DNA-sensing; Interferon Production; Antiviral Innate Immunity;
Host Innate Immune Responses; Interaction with Host; Evade Host Antiviral Immunity
pathways: cGAS/STING-mediated DNA-sensing; Wnt Signaling; IFN pathway
gene_gene_interactions: US3 - β-Catenin; β-Catenin - US3
gene_localizations: β-Catenin - Nuclear; US3 - Hyperphosphorylation
"""

TEST_PROCESS = BiologicalProcess(
label="autophagosome assembly",
description="The formation of a double membrane-bounded structure, the autophagosome,\
that occurs when a specialized membrane sac, called the isolation membrane,\
starts to enclose a portion of the cytoplasm",
subclass_of="GO:0022607",
outputs=["GO:0005776"],
)

DIRECT_PARSE = {
"genes": ["β-Catenin", "cGAS", "STING", "US3", "IFN", "ISG"],
"gene_organisms": [
("β-Catenin", "host"),
("cGAS", "host"),
("STING", "host"),
("US3", "HSV-1"),
("IFN", "host"),
("ISG", "host"),
],
}


class TestCore(unittest.TestCase):
"""Test annotation."""

def setUp(self) -> None:
"""Set up."""
#self.ke = create_engine(TEMPLATE, SPIRESEngine)
self.ke = SPIRES2Engine(template=TEMPLATE)

def test_setup(self):
"""Tests template and module is loaded."""
ke = self.ke
pyc = ke.template_pyclass
print(pyc)
obj = pyc(genes=["a"], gene_organisms=[{"gene": "a", "organism": "b"}])
print(yaml.dump(obj.dict()))
self.assertEqual(obj.genes, ["a"])
self.assertEqual(obj.gene_organisms[0].gene, "a")
self.assertEqual(obj.gene_organisms[0].organism, "b")
slot = ke.schemaview.induced_slot("gene", "GeneOrganismRelationship")
self.assertEqual(slot.name, "gene")
self.assertIsNone(slot.multivalued)
self.assertEqual(slot.range, "Gene")


def test_extract(self):
"""Tests end to end knowledge extraction."""
ke = self.ke
ann = ke.extract_from_text(PAPER)
print(f"RESULTS={ann}")
print(yaml.dump(ann.dict()))
results = ann.extracted_object
print(results)
if not isinstance(results, GoCamAnnotations):
raise ValueError(f"Expected GoCamAnnotations, got {type(results)}")
self.assertIn("HGNC:2514", results.genes)

Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ def test_setup(self):
self.assertEqual(obj.genes, ["a"])
self.assertEqual(obj.gene_organisms[0].gene, "a")
self.assertEqual(obj.gene_organisms[0].organism, "b")
slot = ke.schemaview.induced_slot("genes", "GeneOrganismRelationship")
self.assertEqual(slot.name, "genes")
self.assertEqual(slot.multivalued, True)
slot = ke.schemaview.induced_slot("gene", "GeneOrganismRelationship")
self.assertEqual(slot.name, "gene")
self.assertIsNone(slot.multivalued)
self.assertEqual(slot.range, "Gene")

def test_chunk_text(self):
Expand Down