Skip to content

Commit

Permalink
logging
Browse files Browse the repository at this point in the history
  • Loading branch information
jettjaniak committed May 15, 2024
1 parent 411756a commit a135a4f
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 3 deletions.
3 changes: 1 addition & 2 deletions scripts/tokenize_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@
print(f"Tokenizing split='{args.split}'...")
split_name = args.split.split("[")[0]
for chunk_idx, ds_chunk in enumerate(ds_chunks_it):
print(f"Processing chunk {chunk_idx}...")
chunk_name = f"{split_name}-{chunk_idx:05}.parquet"
if args.out_dir:
ds_parquet_chunk = Path(args.out_dir) / chunk_name
Expand All @@ -125,4 +124,4 @@
repo_id=args.out_repo_id,
repo_type="dataset",
)
print("Done.")
print(f"Done saving/uploading '{chunk_name}'")
1 change: 0 additions & 1 deletion src/delphi/dataset/tokenization.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import io
import itertools
from collections import deque
from collections.abc import Iterator
Expand Down

0 comments on commit a135a4f

Please sign in to comment.