Skip to content

Commit

Permalink
Renaming and reorganizing lib files into packages with descriptive names
Browse files Browse the repository at this point in the history
  • Loading branch information
KasperFyhn committed Oct 27, 2023
1 parent e97cc33 commit 7f60079
Show file tree
Hide file tree
Showing 43 changed files with 55 additions and 57 deletions.
7 changes: 2 additions & 5 deletions paper/extract_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@

import random
import re
from typing import List, Tuple, Dict
from typing import List, Tuple
from spacy.tokens import Doc
from conspiracies.prompt_relation_extraction.data_classes import (
SpanTriplet,
DocTriplets,
)
from conspiracies.relationextraction.gptprompting.data_classes import DocTriplets


def has_multiple_triplets(spacy_triplets: DocTriplets):
Expand Down
8 changes: 2 additions & 6 deletions paper/extract_triplets_newspapers.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
from spacy.tokens import Span
import time
import os
import ndjson
from pathlib import Path
from typing import List, Union, Generator, Tuple
from typing import List, Generator
import spacy
from transformers import AutoTokenizer
import argparse

# Conspiracies
from conspiracies.HeadWordExtractionComponent import contains_ents
from conspiracies.relationextraction import SpacyRelationExtractor
from conspiracies import wordpiece_length_normalization
from conspiracies.coref import CoreferenceComponent
from conspiracies.preproc import wordpiece_length_normalization
from extract_utils import load_ndjson, write_txt


Expand Down
12 changes: 6 additions & 6 deletions paper/extract_triplets_tweets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,23 @@
import os
import random
from pathlib import Path
from typing import List, Optional, Generator, Union, Dict
import spacy
from typing import List, Optional, Generator, Union

import ndjson
import torch
from spacy.tokens import Doc, Span
import argparse
import sys

from data import load_gold_triplets
import spacy
from extract_examples import extract_examples
from conspiracies.prompt_relation_extraction import (
from conspiracies.relationextraction.gptprompting import (
MarkdownPromptTemplate2,
PromptTemplate,
)
import openai

# Conspiracies
from conspiracies.coref import CoreferenceComponent
from conspiracies.relationextraction import SpacyRelationExtractor

from extract_utils import write_txt, ndjson_gen
from src.concat_split_contexts import (
Expand Down Expand Up @@ -324,6 +323,7 @@ def multi2oie_extraction(
continue
except StopIteration:
print("Stopping iteration because of StopIteration exception")
# TODO: the last iteration happens twice with this logic
run = False
subjects, predicates, objects, triplets = [], [], [], []
for triplet in doc._.relation_triplets:
Expand Down
2 changes: 1 addition & 1 deletion paper/src/ents_heads_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import spacy

from relationextraction import SpacyRelationExtractor # noqa
from conspiracies.HeadWordExtractionComponent import contains_ents
from conspiracies.headwordextraction.headwordextraction_comp import contains_ents


def main():
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ dependencies = [
"numpy>=1.19.5,<1.24.0",
"pandas>=1.1.5,<1.5.0",
"jsonlines>=3.1.0,<3.2.0",
"openai",
"ndjson"
]

[project.license]
Expand Down Expand Up @@ -80,7 +82,7 @@ content-type = "text/markdown"


[project.entry-points.spacy_factories]
"conspiracies/prompt_relation_extraction" = "conspiracies.prompt_relation_extraction.prompt_relation_component:create_prompt_relation_extraction_component"
"conspiracies/relationextraction/gptprompting" = "conspiracies.relationextraction.gptprompting:create_prompt_relation_extraction_component"


[build-system]
Expand Down
16 changes: 1 addition & 15 deletions src/conspiracies/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,2 @@
from .HeadWordExtractionComponent import HeadwordsExtractionComponent # noqa F401
from .HeadWordExtractionComponent import create_headwords_component # noqa F401
from .registry import registry # noqa F401
from .prompt_relation_extraction import ( # noqa F401
PromptTemplate1,
PromptTemplate2,
MarkdownPromptTemplate1,
MarkdownPromptTemplate2,
XMLStylePromptTemplate,
chatGPTPromptTemplate,
SpanTriplet,
StringTriplet,
DocTriplets,
)
from .utils import docs_from_jsonl, docs_to_jsonl # noqa F401
from .wordpiece_length_normalization import wordpiece_length_normalization # noqa F401
from .doc_utils import docs_from_jsonl, docs_to_jsonl # noqa F401
4 changes: 2 additions & 2 deletions src/conspiracies/coref/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .CoreferenceModel import CoreferenceModel # noqa
from .CoreferenceComponent import CoreferenceComponent, create_coref_component # noqa
from .coref_model import CoreferenceModel # noqa
from .coref_comp import CoreferenceComponent, create_coref_component # noqa
File renamed without changes.
File renamed without changes.
5 changes: 4 additions & 1 deletion src/conspiracies/utils.py → src/conspiracies/doc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from spacy.language import Language
from spacy.tokens import Doc

from .prompt_relation_extraction import DocTriplets, SpanTriplet
from conspiracies.relationextraction.gptprompting import (
DocTriplets,
SpanTriplet,
)


def _doc_to_json(doc: Doc):
Expand Down
Empty file.
Empty file.
4 changes: 2 additions & 2 deletions src/conspiracies/relationextraction/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .knowledge_triplets import KnowledgeTriplets # noqa F401
from .wrap_model_spacy import SpacyRelationExtractor # noqa F401
from .multioie2.knowledge_triplets import KnowledgeTriplets # noqa F401
from .multioie2.multi2oie_comp import SpacyRelationExtractor # noqa F401
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
chatGPTPromptTemplate,
)

from .prompt_relation_component import ( # noqa F401
from .prompt_relation_comp import ( # noqa F401
create_prompt_relation_extraction_component,
score_open_relations,
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import time
from typing import Any, Dict, List

from ..registry import registry
from conspiracies.registry import registry
from spacy.tokens import Doc

from conspiracies.prompt_relation_extraction import PromptTemplate
from conspiracies.relationextraction.gptprompting.prompt_templates import PromptTemplate


@registry.prompt_apis.register("conspiracies/openai_gpt3_api")
Expand Down Expand Up @@ -77,8 +77,9 @@ def openai_prompt(targets: List[str]) -> List[str]:

openai.api_key = api_key
message_example = prompt_template.create_prompt("test")
assert (
type(message_example) == list and type(message_example[0]) == dict
assert isinstance(message_example, list) and isinstance(
message_example[0],
dict,
), "ChatGPT requires a list of message dicts. Consider using chatGPTPromptTemplate as template." # noqa: E501

responses: List[str] = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from spacy.tokens import Doc
from spacy.training.example import Example

from ..registry import registry
from conspiracies.registry import registry
from .data_classes import DocTriplets, SpanTriplet
from .prompt_apis import create_openai_chatgpt_prompt_api # noqa: F401

Expand Down
2 changes: 2 additions & 0 deletions src/conspiracies/relationextraction/multioie2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .knowledge_triplets import KnowledgeTriplets # noqa F401
from .multi2oie_comp import SpacyRelationExtractor # noqa F401
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from transformers import AutoTokenizer

from .knowledge_triplets import KnowledgeTriplets
from .util import (
from .multioie2_utils import (
install_extension,
match_extraction_spans_to_wp,
wp2tokid,
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .utils import nlp_da # noqa

from conspiracies.coref.CoreferenceModel import CoreferenceModel
from conspiracies.coref.coref_model import CoreferenceModel


def test_CoreferenceModel(nlp_da): # noqa
Expand Down
6 changes: 5 additions & 1 deletion tests/test_data/prompt_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from typing import List

import spacy
from conspiracies import DocTriplets, SpanTriplet, StringTriplet
from conspiracies.relationextraction.gptprompting import (
DocTriplets,
SpanTriplet,
StringTriplet,
)
from spacy.tokens import Doc

test_thread = """@user2: I was hurt. END
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ents_filter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import spacy
from spacy.tokens import Doc, Span

from conspiracies.HeadWordExtractionComponent import contains_ents
from conspiracies.headwordextraction.headwordextraction_comp import contains_ents


def test_ents_filter():
Expand Down
2 changes: 1 addition & 1 deletion tests/test_prompt_data_classes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
import spacy
from conspiracies.prompt_relation_extraction import (
from conspiracies.relationextraction.gptprompting import (
DocTriplets,
SpanTriplet,
StringTriplet,
Expand Down
7 changes: 6 additions & 1 deletion tests/test_prompt_relation_evaluate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import numpy as np
from conspiracies.prompt_relation_extraction import DocTriplets, score_open_relations
from conspiracies.relationextraction.gptprompting import (
DocTriplets,
)
from spacy.training import Example

from conspiracies.relationextraction.gptprompting.prompt_relation_comp import (
score_open_relations,
)
from .utils import docs_with_triplets # noqa F401


Expand Down
2 changes: 1 addition & 1 deletion tests/test_prompt_relationextraction_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest
from confection import registry
from conspiracies import SpanTriplet, StringTriplet
from conspiracies.relationextraction.gptprompting import SpanTriplet, StringTriplet
from spacy.language import Language

from .test_prompt_template_parse_prompt import (
Expand Down
2 changes: 1 addition & 1 deletion tests/test_prompt_template_create_prompt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from conspiracies import (
from conspiracies.relationextraction.gptprompting.prompt_templates import (
MarkdownPromptTemplate1,
MarkdownPromptTemplate2,
PromptTemplate1,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_prompt_template_parse_prompt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from conspiracies import (
from conspiracies.relationextraction.gptprompting.prompt_templates import (
MarkdownPromptTemplate1,
MarkdownPromptTemplate2,
PromptTemplate1,
Expand Down
1 change: 0 additions & 1 deletion tests/test_relationextraction_component.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pytest

from conspiracies.relationextraction import SpacyRelationExtractor # noqa F401

from .utils import nlp_da # noqa F401

Expand Down
3 changes: 2 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import pytest
import spacy
from conspiracies import DocTriplets, SpanTriplet, docs_from_jsonl, docs_to_jsonl
from conspiracies import docs_from_jsonl, docs_to_jsonl
from conspiracies.relationextraction.gptprompting import DocTriplets, SpanTriplet
from spacy.tokens import Doc

from .utils import docs_with_triplets # noqa: F401
Expand Down
4 changes: 3 additions & 1 deletion tests/test_wp_length_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

from .utils import nlp_da # noqa

from conspiracies import wordpiece_length_normalization
from conspiracies.preproc.wordpiece_length_normalization import (
wordpiece_length_normalization,
)
from transformers import AutoTokenizer


Expand Down

0 comments on commit 7f60079

Please sign in to comment.