Skip to content

Commit

Permalink
fix: typo
Browse files Browse the repository at this point in the history
  • Loading branch information
Frankstein73 committed Jul 22, 2024
1 parent 469c061 commit 7f9e69e
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 6 deletions.
6 changes: 3 additions & 3 deletions src/lm_saes/activation/activation_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, act_source: ActivationSource, cfg: ActivationStoreConfig):
self.act_source = act_source
self.buffer_size = cfg.n_tokens_in_buffer
self.device = cfg.device
self.ddp_size = cfg.ddp_size # 1 8
self.ddp_size = cfg.ddp_size
self.tp_size = cfg.tp_size
self._store: Dict[str, torch.Tensor] = {}
self._all_gather_buffer: Dict[str, torch.Tensor] = {}
Expand Down Expand Up @@ -111,9 +111,9 @@ def next(self, batch_size) -> Dict[str, torch.Tensor] | None:

def next_tokens(self, batch_size: int) -> torch.Tensor | None:
if self.tp_size > 1:
# TODO
# TODO: only get next token from the root process
next_tokens = self.act_source.next_tokens(batch_size)
# funcol.broadcast(next_tokens, src=0, group=self.device_mesh["tp"])
# funcol.broadcast does not work and we dont know why
dist.broadcast(next_tokens, src=0)
return next_tokens
else:
Expand Down
5 changes: 2 additions & 3 deletions src/lm_saes/sae_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,9 @@ def train_sae(
sae.parallelize_plan = plan

elif cfg.sae.ddp_size > 1:
# parallelize_module does not work with DDP
_ = DDP(sae, device_mesh=sae.device_mesh["ddp"])
# sae = parallelize_module(
# sae, device_mesh=sae.device_mesh["ddp"], parallelize_plan={}
# )


optimizer = Adam(sae.parameters(), lr=cfg.lr, betas=cfg.betas)

Expand Down

0 comments on commit 7f9e69e

Please sign in to comment.