Skip to content

Commit

Permalink
fix language embed
Browse files Browse the repository at this point in the history
  • Loading branch information
kpertsch committed Aug 27, 2023
1 parent cd8d634 commit d38f6d3
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions example_dataset/example_dataset_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
from example_dataset.conversion_utils import MultiThreadedDatasetBuilder


_embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")


def _generate_examples(paths) -> Iterator[Tuple[str, Any]]:
"""Yields episodes for list of data paths."""
# the line below needs to be *inside* generate_examples so that each worker creates it's own model
# creating one shared model outside this function would cause a deadlock
_embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")

def _parse_example(episode_path):
# load raw data --> this should change for your dataset
Expand Down Expand Up @@ -141,6 +141,7 @@ def _info(self) -> tfds.core.DatasetInfo:

def _split_paths(self):
"""Define filepaths for data splits."""
print(self.info)
return {
'train': glob.glob('data/train/episode_*.npy'),
'val': glob.glob('data/val/episode_*.npy')
Expand Down

0 comments on commit d38f6d3

Please sign in to comment.