Skip to content

Commit

Permalink
feat: Make it possible to instantiate MuminDataset without a bearer t…
Browse files Browse the repository at this point in the history
…oken
  • Loading branch information
saattrupdan committed Apr 14, 2022
1 parent 06d3b78 commit 304b6b8
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 5 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).


## [Unreleased]
### Changed
- Now allows instantiation of `MuminDataset` without having any Twitter bearer
token, neither as an explicit argument nor as an environment variable, which
is useful for pre-compiled datasets. If the dataset needs to be compiled then
a `RuntimeError` will be raised when calling the `compile` method.


## [v1.7.0] - 2022-03-24
### Added
- Now allows setting `twitter_bearer_token=None` in the constructor of
Expand Down
40 changes: 35 additions & 5 deletions mumin/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tqdm.auto import tqdm
from shutil import rmtree
from dotenv import load_dotenv
import warnings

from .twitter import Twitter
from .dgl import build_dgl_dataset
Expand Down Expand Up @@ -167,10 +168,22 @@ def __init__(self,
# Load the bearer token if it is not provided
if twitter_bearer_token is None:
twitter_bearer_token = os.environ.get('TWITTER_API_KEY')
if twitter_bearer_token is None:
raise ValueError('Twitter bearer token not provided.')

self._twitter = Twitter(twitter_bearer_token=twitter_bearer_token)
# If no bearer token is available, raise a warning and set the
# `_twitter` attribute to None. Otherwise, set the `_twitter` attribute
# to a `Twitter` instance.
if twitter_bearer_token is None:
warnings.warn('Twitter bearer token not provided, so '
'rehydration can not be performed. This is fine '
'if you are using a pre-compiled MuMiN, but if '
'this is not the case then you will need to '
'either specify the `twitter_bearer_token` '
'argument or set the environment variable '
'`TWITTER_API_KEY`.')
self._twitter = None
else:
self._twitter = Twitter(twitter_bearer_token=twitter_bearer_token)

self._extractor = DataExtractor(
include_replies=include_replies,
include_articles=include_articles,
Expand Down Expand Up @@ -208,6 +221,11 @@ def compile(self, overwrite: bool = False):
overwrite (bool, optional):
Whether the dataset directory should be overwritten, in case it
already exists. Defaults to False.
Raises:
RuntimeError:
If the dataset needs to be compiled and a Twitter bearer token
has not been provided.
'''
self._download(overwrite=overwrite)
self._load_dataset()
Expand All @@ -218,6 +236,14 @@ def compile(self, overwrite: bool = False):
# Only compile the dataset if it has not already been compiled
if not compiled:

# If the bearer token is not available then raise an error
if self._twitter is None:
raise RuntimeError('Twitter bearer token not provided. You '
'need to either specify the '
'`twitter_bearer_token` argument in the '
'`MuminDataset` constructor or set the '
'environment variable `TWITTER_API_KEY`.')

# Shrink dataset to the correct size
self._shrink_dataset()

Expand Down Expand Up @@ -967,12 +993,16 @@ def __repr__(self) -> str:
Returns:
str: The representation of the dataset.
'''
bearer_token_available = (self._twitter is not None)
if len(self.nodes) == 0 or len(self.rels) == 0:
return f'MuminDataset(size={self.size}, compiled={self.compiled})'
return (f'MuminDataset(size={self.size}, '
f'compiled={self.compiled}, '
f'bearer_token_available={bearer_token_available})')
else:
num_nodes = sum([len(df) for df in self.nodes.values()])
num_rels = sum([len(df) for df in self.rels.values()])
return (f'MuminDataset(num_nodes={num_nodes:,}, '
f'num_relations={num_rels:,}, '
f'size=\'{self.size}\', '
f'compiled={self.compiled})')
f'compiled={self.compiled}, '
f'bearer_token_available={bearer_token_available})')

0 comments on commit 304b6b8

Please sign in to comment.