Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reorganization of code #44

Merged
merged 9 commits into from
Oct 30, 2023
12 changes: 8 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ venv.bak/
.spyderproject
.spyproject

# PyCharm
.idea

# Rope project settings
.ropeproject

Expand All @@ -128,15 +131,16 @@ dmypy.json
# Pyre type checker
.pyre/

# ruff
.ruff_cache

# .vscode
.vscode/*
# MacOS
.DS_Store

# data
data/gpt_predictions_compare.json
data/prompt_outputs_compare_templates.md
paper/extracted_triplets_papers/*
paper/extracted_triplets_tweets/*
data/
*extracted_triplets*
paper/*ndjson
paper/fig/*
38 changes: 21 additions & 17 deletions docs/tutorials/overview.ipynb

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions paper/extract_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@

import random
import re
from typing import List, Tuple, Dict
from typing import List, Tuple
from spacy.tokens import Doc
from conspiracies.prompt_relation_extraction.data_classes import (
SpanTriplet,
from conspiracies.docprocessing.relationextraction.gptprompting.data_classes import (
DocTriplets,
)

Expand Down
8 changes: 2 additions & 6 deletions paper/extract_triplets_newspapers.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
from spacy.tokens import Span
import time
import os
import ndjson
from pathlib import Path
from typing import List, Union, Generator, Tuple
from typing import List, Generator
import spacy
from transformers import AutoTokenizer
import argparse

# Conspiracies
from conspiracies.HeadWordExtractionComponent import contains_ents
from conspiracies.relationextraction import SpacyRelationExtractor
from conspiracies import wordpiece_length_normalization
from conspiracies.coref import CoreferenceComponent
from conspiracies.preproc import wordpiece_length_normalization
KasperFyhn marked this conversation as resolved.
Show resolved Hide resolved
from extract_utils import load_ndjson, write_txt


Expand Down
10 changes: 4 additions & 6 deletions paper/extract_triplets_tweets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,21 @@
import os
import random
from pathlib import Path
from typing import List, Optional, Generator, Union, Dict
import spacy
from typing import List, Optional, Generator, Union

from spacy.tokens import Doc, Span
import argparse
import sys

from data import load_gold_triplets
import spacy
from extract_examples import extract_examples
from conspiracies.prompt_relation_extraction import (
from conspiracies.docprocessing.relationextraction import (
MarkdownPromptTemplate2,
PromptTemplate,
)
import openai

# Conspiracies
from conspiracies.coref import CoreferenceComponent
from conspiracies.relationextraction import SpacyRelationExtractor

from extract_utils import write_txt, ndjson_gen
from src.concat_split_contexts import (
Expand Down Expand Up @@ -324,6 +321,7 @@ def multi2oie_extraction(
continue
except StopIteration:
print("Stopping iteration because of StopIteration exception")
# TODO: the last iteration happens twice with this logic
run = False
subjects, predicates, objects, triplets = [], [], [], []
for triplet in doc._.relation_triplets:
Expand Down
Loading
Loading