Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added static file folder #41

Merged
merged 2 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions scripts/label_all_tokens.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import argparse
import pickle
from pathlib import Path

from tqdm.auto import tqdm
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast

from delphi.constants import STATIC_ASSETS_DIR
from delphi.eval import token_labelling


Expand Down Expand Up @@ -35,14 +35,13 @@ def main():
args = parser.parse_args()

# Access command-line arguments
# Directory to save the results
SAVE_DIR = Path("src/delphi/eval/")

model_name = args.model_name

print("\n", " LABEL ALL TOKENS ".center(50, "="), "\n")
print(f"You chose the model: {model_name}\n")
print(
f"The language model will be loaded from Huggingface and its tokenizer used to do two things:\n\t1) Create a list of all tokens in the tokenizer's vocabulary.\n\t2) Label each token with its part of speech, dependency, and named entity recognition tags.\nThe respective results will be saved to files located at: '{SAVE_DIR}'\n"
f"The language model will be loaded from Huggingface and its tokenizer used to do two things:\n\t1) Create a list of all tokens in the tokenizer's vocabulary.\n\t2) Label each token with its part of speech, dependency, and named entity recognition tags.\nThe respective results will be saved to files located at: '{STATIC_ASSETS_DIR}'\n"
)

# ================ (1) =================
Expand All @@ -60,8 +59,8 @@ def main():

# Save the list of all tokens to a file
filename = "all_tokens_list.txt"
filepath = SAVE_DIR / filename
with open(filepath, "w", encoding="utf-8") as f:
filepath = STATIC_ASSETS_DIR.joinpath(filename)
with open(f"{filepath}", "w", encoding="utf-8") as f:
f.write(tokens_str)

print(f"Saved the list of all tokens to:\n\t{filepath}\n")
Expand All @@ -88,16 +87,16 @@ def main():

# Save the labelled tokens to a file
filename = "labelled_token_ids_dict.pkl"
filepath = SAVE_DIR / filename
with open(filepath, "wb") as f:
filepath = STATIC_ASSETS_DIR.joinpath(filename)
with open(f"{filepath}", "wb") as f:
pickle.dump(labelled_token_ids_dict, f)

print(f"Saved the labelled tokens to:\n\t{filepath}\n")

# sanity check that The pickled and the original dict are the same
print("Sanity check ...", end="")
# load pickle
with open(filepath, "rb") as f:
with open(f"{filepath}", "rb") as f:
pickled = pickle.load(f)
# compare
assert labelled_token_ids_dict == pickled
Expand Down
3 changes: 2 additions & 1 deletion scripts/map_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import pickle

from delphi.constants import STATIC_ASSETS_DIR
from delphi.eval.token_map import token_map
from delphi.eval.utils import load_validation_dataset

Expand All @@ -18,5 +19,5 @@

mapping = token_map(dataset)

with open(f"data/{args.output}", "wb") as f:
with open(f"{STATIC_ASSETS_DIR}/{args.output}", "wb") as f:
pickle.dump(mapping, file=f)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
version="0.1",
packages=find_packages(where="src"),
package_dir={"": "src"},
package_data={"delphi.static": ["*"]},
jaidhyani marked this conversation as resolved.
Show resolved Hide resolved
)
3 changes: 3 additions & 0 deletions src/delphi/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from importlib.resources import files

STATIC_ASSETS_DIR = files("delphi.static")
Binary file removed src/delphi/eval/labelled_token_ids_dict.pkl
Binary file not shown.
Empty file added src/delphi/static/__init__.py
Empty file.
Binary file added src/delphi/static/labelled_token_ids_dict.pkl
Binary file not shown.
Binary file added src/delphi/static/token_map.pkl
Binary file not shown.
Loading