Skip to content

Commit

Permalink
added constants file
Browse files Browse the repository at this point in the history
  • Loading branch information
menamerai committed Feb 21, 2024
1 parent 15646dd commit df48f25
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 8 deletions.
11 changes: 5 additions & 6 deletions scripts/label_all_tokens.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import argparse
import pickle
from importlib.resources import files

from tqdm.auto import tqdm
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast

from delphi.constants import STATIC_ASSETS_DIR
from delphi.eval import token_labelling


Expand Down Expand Up @@ -35,14 +35,13 @@ def main():
args = parser.parse_args()

# Access command-line arguments
# Directory to save the results
SAVE_DIR = files("delphi").joinpath("static")

model_name = args.model_name

print("\n", " LABEL ALL TOKENS ".center(50, "="), "\n")
print(f"You chose the model: {model_name}\n")
print(
f"The language model will be loaded from Huggingface and its tokenizer used to do two things:\n\t1) Create a list of all tokens in the tokenizer's vocabulary.\n\t2) Label each token with its part of speech, dependency, and named entity recognition tags.\nThe respective results will be saved to files located at: '{SAVE_DIR}'\n"
f"The language model will be loaded from Huggingface and its tokenizer used to do two things:\n\t1) Create a list of all tokens in the tokenizer's vocabulary.\n\t2) Label each token with its part of speech, dependency, and named entity recognition tags.\nThe respective results will be saved to files located at: '{STATIC_ASSETS_DIR}'\n"
)

# ================ (1) =================
Expand All @@ -60,7 +59,7 @@ def main():

# Save the list of all tokens to a file
filename = "all_tokens_list.txt"
filepath = SAVE_DIR.joinpath(filename)
filepath = STATIC_ASSETS_DIR.joinpath(filename)
with open(f"{filepath}", "w", encoding="utf-8") as f:
f.write(tokens_str)

Expand Down Expand Up @@ -88,7 +87,7 @@ def main():

# Save the labelled tokens to a file
filename = "labelled_token_ids_dict.pkl"
filepath = SAVE_DIR / filename
filepath = STATIC_ASSETS_DIR.joinpath(filename)
with open(f"{filepath}", "wb") as f:
pickle.dump(labelled_token_ids_dict, f)

Expand Down
4 changes: 2 additions & 2 deletions scripts/map_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import argparse
import pickle
from importlib.resources import files

from delphi.constants import STATIC_ASSETS_DIR
from delphi.eval.token_map import token_map
from delphi.eval.utils import load_validation_dataset

Expand All @@ -19,5 +19,5 @@

mapping = token_map(dataset)

with open(f"{files('delphi').joinpath('static')}/{args.output}", "wb") as f:
with open(f"{STATIC_ASSETS_DIR}/{args.output}", "wb") as f:
pickle.dump(mapping, file=f)
3 changes: 3 additions & 0 deletions src/delphi/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from importlib.resources import files

STATIC_ASSETS_DIR = files("delphi.static")
Empty file added src/delphi/static/__init__.py
Empty file.

0 comments on commit df48f25

Please sign in to comment.