Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Identify loopback more reliably
Browse files Browse the repository at this point in the history
pierre.delaunay committed Aug 6, 2024
1 parent 3d8e9f5 commit 917196d
Showing 5 changed files with 46 additions and 40 deletions.
6 changes: 3 additions & 3 deletions milabench/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""This file is generated, do not modify"""

__tag__ = "v0.1.0-29-g56152dc0"
__commit__ = "56152dc0f5938bfdf261798ad8a8df4e42ac3045"
__date__ = "2024-08-01 22:22:13 -0400"
__tag__ = "v0.1.0-30-g3d8e9f5b"
__commit__ = "3d8e9f5b25206b42fac1c2030a0f56a4b6dac114"
__date__ = "2024-08-05 15:22:11 -0400"
44 changes: 23 additions & 21 deletions milabench/cli/slurm.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@
import subprocess
from coleo import tooled

from ..system import get_gpu_capacity
from ..system import get_gpu_capacity, is_loopback


def gethostname(host):
@@ -15,41 +15,32 @@ def gethostname(host):
return host


def getip(ip):
# This does get a good IP for everything except the local node

def resolve_hostname(ip):
hostname, _, iplist = socket.gethostbyaddr(ip)
if len(iplist) > 1:
print("Multiple IP found")

for ip in iplist:
if is_loopback(ip):
return hostname, True

from milabench.system import get_remote_ip

resolved = iplist[0]
if resolved.startswith("127.0"):
ips = get_remote_ip()
for ip in ips:
if "." in ip and not ip.startswith("127.0"):
return ip

return resolved

return resolved
return hostname, False


@tooled
def cli_slurm_system():
"""Generate a system file based of slurm environment variables"""

node_list = expand_node_list(os.getenv("SLURM_JOB_NODELIST", ""))


def make_node(i, ip):
hostname, local = resolve_hostname(ip)

node = {
"name": ip,
"ip": getip(ip),
"ip": hostname,
"hostname": gethostname(ip),
"user": getpass.getuser(),
"main": i == 0,
"main": local,
"sshport": 22,
}

@@ -59,9 +50,20 @@ def make_node(i, ip):
return node

# nvidia-smi --query-gpu=memory.total --format=csv

nodes = [make_node(i, ip) for i, ip in enumerate(node_list)]

# ensure there is a main
# either it is the local node or first node
for node in nodes:
if node.get("main", False):
break
else:
nodes[0]["main"] = True

system = {
"arch": "cuda",
"nodes": [make_node(i, ip) for i, ip in enumerate(node_list)],
"nodes": nodes,
}

capacity = get_gpu_capacity()
19 changes: 19 additions & 0 deletions milabench/system.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,8 @@
from dataclasses import dataclass, field
import sys
from contextlib import contextmanager
import ipaddress

import psutil
import yaml
from voir.instruments.gpu import get_gpu_info
@@ -249,6 +251,21 @@ def get_remote_ip():
return set(result)





def is_loopback(address: str) -> bool:
try:
# Create an IP address object
ip = ipaddress.ip_address(address)
# Check if the address is a loopback address
return ip.is_loopback
except ValueError:
# If the address is invalid, return False
return False



def _resolve_ip(ip):
hostname = ip
aliaslist = []
@@ -327,7 +344,9 @@ def resolve_addresses(nodes):
or (hostname in ("localhost", socket.gethostname(), "127.0.0.1"))
or (socket.gethostname().startswith(hostname))
or len(ip_list.intersection(ipaddrlist)) > 0
or any([is_loopback(ip) for ip in ipaddrlist])
)

# cn-g005 cn-g005.server.mila.quebec
# print(hostname, socket.gethostname())
node["local"] = is_local
2 changes: 1 addition & 1 deletion milabench/utils.py
Original file line number Diff line number Diff line change
@@ -231,7 +231,7 @@ def select_nodes(nodes, n):
ranked = []

for node in nodes:
if node["main"]:
if node.get("main", False):
ranked.insert(0, node)
else:
ranked.append(node)
15 changes: 0 additions & 15 deletions scripts/article/run_cuda_dev.sh
Original file line number Diff line number Diff line change
@@ -95,21 +95,6 @@ else
fi


(
. $MILABENCH_WORDIR/env/bin/activate
pip show setuptools
pip show pip
pip install git+https://github.com/Delaunay/voir.git@patch-8
)

(
. $BENCHMARK_VENV/bin/activate
pip show setuptools
pip show pip
pip install git+https://github.com/Delaunay/voir.git@patch-8
)


if [ "$MILABENCH_PREPARE" -eq 0 ]; then
cd $MILABENCH_WORDIR

0 comments on commit 917196d

Please sign in to comment.