Skip to content

Commit

Permalink
Merge branch 'prompt_relation_component' of https://github.com/centre…
Browse files Browse the repository at this point in the history
…-for-humanities-computing/conspiracies into prompt_relation_component
  • Loading branch information
KennethEnevoldsen committed Oct 10, 2023
2 parents acf6efd + c7eba71 commit 432de14
Show file tree
Hide file tree
Showing 13 changed files with 1,135 additions and 1,036 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,7 @@ dmypy.json
# data
data/gpt_predictions_compare.json
data/prompt_outputs_compare_templates.md
paper/extracted_triplets_papers/*
paper/extracted_triplets_tweets/*
paper/*ndjson
paper/fig/*
50 changes: 1 addition & 49 deletions paper/extract_triplets_newspapers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,7 @@
from conspiracies.relationextraction import SpacyRelationExtractor
from conspiracies import wordpiece_length_normalization
from conspiracies.coref import CoreferenceComponent


def load_ndjson(path: str) -> List[dict]:
"""Loads ndjson file
Args:
path (str): path to ndjson file
Returns:
data (dict): data from ndjson file
"""
with open(path, "r") as f:
data = ndjson.load(f)
return data


def write_txt(
path: str,
data: Union[List[str], List[Tuple[str, str, str]]],
method="a",
):
"""Writes data to txt file. Can take either a list of strings or a list of tuples.
List of strings is e.g. list of subjects, predicates, or objects.
List of tuples is e.g. list of triplets.
Args:
path (str): path to txt file
data (Union[List[str], List[tuple]]): data to write to file
method (str, optional): method to use when writing data. Defaults to "a" (i.e., append).
Returns:
None
"""
with open(path, method) as f:
if isinstance(data[0], tuple):
f.write("\n".join([", ".join(triplet) for triplet in data]))
elif isinstance(data[0], str):
f.write("\n".join(data)) # type: ignore
f.write("\n")
from extract_utils import load_ndjson, write_txt


def build_coref_pipeline():
Expand All @@ -59,19 +24,6 @@ def build_coref_pipeline():
return nlp_coref


def build_relation_extraction_pipeline():
nlp = spacy.load("da_core_news_sm")
nlp.add_pipe("sentencizer")
nlp.add_pipe(
"heads_extraction",
config={"normalize_to_entity": True, "normalize_to_noun_chunk": True},
)
config = {"confidence_threshold": 2.7, "model_args": {"batch_size": 10}}
nlp.add_pipe("relation_extractor", config=config)

return nlp


def yield_one_article(articles: List[dict]) -> Generator:
for article in articles:
yield article["text"]
Expand Down
Loading

0 comments on commit 432de14

Please sign in to comment.