Skip to content

Commit

Permalink
Tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
pierre.delaunay committed Aug 12, 2024
1 parent aed0290 commit a4edcfd
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 15 deletions.
7 changes: 0 additions & 7 deletions benchmarks/flops/activator

This file was deleted.

1 change: 1 addition & 0 deletions config/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ dinov2-giant-gpus:
train.num_workers=10: true

dinov2-giant-nodes:
enabled: false
tags:
- multinode
max_duration: 3600
Expand Down
2 changes: 1 addition & 1 deletion milabench/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ def __init__(self, pack: pack.BasePackage, **kwargs):
super().__init__(pack, **kwargs)

def _argv(self, **_) -> List:
return [f"{self.pack.dirs.code / 'activator'}", f"{self.pack.dirs.venv}", f"{self.pack.dirs.cache}"]
return [activator_script(), f"{self.pack.dirs.venv}", f"{self.pack.dirs.cache}"]



Expand Down
20 changes: 13 additions & 7 deletions milabench/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ def option(name, etype, default=None):
system = system_global.get()
if system:
options = system.get("options", dict())
else:
warn_no_config()

frags = name.split(".")
env_name = as_environment_variable(name)
Expand Down Expand Up @@ -394,14 +392,20 @@ def gethostname(host):


def resolve_hostname(ip):
hostname, _, iplist = socket.gethostbyaddr(ip)
try:
hostname, _, iplist = socket.gethostbyaddr(ip)

for ip in iplist:
if is_loopback(ip):
return hostname, True
for ip in iplist:
if is_loopback(ip):
return hostname, True

return hostname, False
return hostname, False

except:
if offline:
return ip, False

raise

def resolve_node_address(node):
hostname, local = resolve_hostname(node["ip"])
Expand All @@ -410,6 +414,8 @@ def resolve_node_address(node):
node["local"] = local

if local:
# `gethostbyaddr` returns `cn-d003` but we want `cn-d003.server.mila.quebec`
# else torchrun does not recognize the main node
node["hostname"] = socket.gethostname()

return local
Expand Down

0 comments on commit a4edcfd

Please sign in to comment.