Skip to content

Commit

Permalink
Added static file folder (#41)
Browse files Browse the repository at this point in the history
* added static file folder

* added constants file
  • Loading branch information
menamerai authored Feb 23, 2024
1 parent 75e68aa commit 4409a9e
Show file tree
Hide file tree
Showing 9 changed files with 14 additions and 10 deletions.
17 changes: 8 additions & 9 deletions scripts/label_all_tokens.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import argparse
import pickle
from pathlib import Path

from tqdm.auto import tqdm
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast

from delphi.constants import STATIC_ASSETS_DIR
from delphi.eval import token_labelling


Expand Down Expand Up @@ -35,14 +35,13 @@ def main():
args = parser.parse_args()

# Access command-line arguments
# Directory to save the results
SAVE_DIR = Path("src/delphi/eval/")

model_name = args.model_name

print("\n", " LABEL ALL TOKENS ".center(50, "="), "\n")
print(f"You chose the model: {model_name}\n")
print(
f"The language model will be loaded from Huggingface and its tokenizer used to do two things:\n\t1) Create a list of all tokens in the tokenizer's vocabulary.\n\t2) Label each token with its part of speech, dependency, and named entity recognition tags.\nThe respective results will be saved to files located at: '{SAVE_DIR}'\n"
f"The language model will be loaded from Huggingface and its tokenizer used to do two things:\n\t1) Create a list of all tokens in the tokenizer's vocabulary.\n\t2) Label each token with its part of speech, dependency, and named entity recognition tags.\nThe respective results will be saved to files located at: '{STATIC_ASSETS_DIR}'\n"
)

# ================ (1) =================
Expand All @@ -60,8 +59,8 @@ def main():

# Save the list of all tokens to a file
filename = "all_tokens_list.txt"
filepath = SAVE_DIR / filename
with open(filepath, "w", encoding="utf-8") as f:
filepath = STATIC_ASSETS_DIR.joinpath(filename)
with open(f"{filepath}", "w", encoding="utf-8") as f:
f.write(tokens_str)

print(f"Saved the list of all tokens to:\n\t{filepath}\n")
Expand All @@ -88,16 +87,16 @@ def main():

# Save the labelled tokens to a file
filename = "labelled_token_ids_dict.pkl"
filepath = SAVE_DIR / filename
with open(filepath, "wb") as f:
filepath = STATIC_ASSETS_DIR.joinpath(filename)
with open(f"{filepath}", "wb") as f:
pickle.dump(labelled_token_ids_dict, f)

print(f"Saved the labelled tokens to:\n\t{filepath}\n")

# sanity check that The pickled and the original dict are the same
print("Sanity check ...", end="")
# load pickle
with open(filepath, "rb") as f:
with open(f"{filepath}", "rb") as f:
pickled = pickle.load(f)
# compare
assert labelled_token_ids_dict == pickled
Expand Down
3 changes: 2 additions & 1 deletion scripts/map_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import pickle

from delphi.constants import STATIC_ASSETS_DIR
from delphi.eval.token_map import token_map
from delphi.eval.utils import load_validation_dataset

Expand All @@ -18,5 +19,5 @@

mapping = token_map(dataset)

with open(f"data/{args.output}", "wb") as f:
with open(f"{STATIC_ASSETS_DIR}/{args.output}", "wb") as f:
pickle.dump(mapping, file=f)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
version="0.1",
packages=find_packages(where="src"),
package_dir={"": "src"},
package_data={"delphi.static": ["*"]},
)
3 changes: 3 additions & 0 deletions src/delphi/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from importlib.resources import files

STATIC_ASSETS_DIR = files("delphi.static")
Binary file removed src/delphi/eval/labelled_token_ids_dict.pkl
Binary file not shown.
Empty file added src/delphi/static/__init__.py
Empty file.
File renamed without changes.
Binary file added src/delphi/static/labelled_token_ids_dict.pkl
Binary file not shown.
Binary file added src/delphi/static/token_map.pkl
Binary file not shown.

0 comments on commit 4409a9e

Please sign in to comment.