Skip to content

Commit

Permalink
Upstream changes
Browse files Browse the repository at this point in the history
Changes include:

better handling of glibc version allowing environments to be distinguished based on glibc version.
packages can also be specified using --sys-pkg for example in requirements.txt and new @sys_packages and @sys_packages_base decorators exist.
better handling of building pyproject.toml projects
optimistically build wheels and check if compatible as opposed to checking if compatible first (ie: disabling all cross platform builds).
Note that full support for glibc requires conda-lock to be at least as recent as the main branch on December 11th 2023
  • Loading branch information
romain-intel authored Dec 13, 2023
1 parent 00c6589 commit b0a90c9
Show file tree
Hide file tree
Showing 13 changed files with 411 additions and 155 deletions.
112 changes: 95 additions & 17 deletions metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from metaflow.exception import CommandException
from metaflow.plugins import DATASTORES
from metaflow.metaflow_config import (
CONDA_SYS_DEPENDENCIES,
DEFAULT_DATASTORE,
DEFAULT_METADATA,
get_pinned_conda_libs,
Expand Down Expand Up @@ -288,7 +289,6 @@ def create(

if pathspec:
env_name = "step:%s" % env_name

alias_type, resolved_alias = resolve_env_alias(env_name)
if alias_type == AliasType.PATHSPEC:
if not pathspec:
Expand Down Expand Up @@ -539,6 +539,7 @@ def resolve(
resolver = EnvsResolver(obj.conda)
new_conda_deps = {} # type: Dict[str, str]
new_pypi_deps = {} # type: Dict[str, str]
new_sys_deps = {} # type: Dict[str, str]
new_np_conda_deps = {} # type: Dict[str, str]
new_sources = {} # type: Dict[str, List[str]]
new_extras = {} # type: Dict[str, List[str]]
Expand Down Expand Up @@ -588,11 +589,21 @@ def resolve(
parsed_python_version = None
if yml_file:
parsed_python_version = _parse_yml_file(
yml_file, new_extras, new_sources, new_conda_deps, new_pypi_deps
yml_file,
new_extras,
new_sources,
new_conda_deps,
new_pypi_deps,
new_sys_deps,
)
if req_file:
parsed_python_version = _parse_req_file(
req_file, new_extras, new_sources, new_pypi_deps, new_np_conda_deps
req_file,
new_extras,
new_sources,
new_pypi_deps,
new_np_conda_deps,
new_sys_deps,
)

if base_env_python:
Expand Down Expand Up @@ -669,12 +680,18 @@ def resolve(
"Environment for '%s' is not available on architecture '%s'"
% (using_str, cur_arch)
)

sys_pkgs = get_sys_packages(
cast(Conda, obj.conda).virtual_packages, cur_arch, False
)

deps["sys"] = [
"%s==%s" % (name, ver)
for name, ver in get_sys_packages(
cast(Conda, obj.conda).virtual_packages, cur_arch, False, False
).items()
"%s==%s" % (name, ver) if ver else name
for name, ver in new_sys_deps.items()
]
for p, v in sys_pkgs.items():
if p not in new_sys_deps:
deps["sys"].append("%s==%s" % (p, v) if p else p)

# We add the default sources as well -- those sources go last and we convert
# to simple channels if we can
Expand Down Expand Up @@ -965,6 +982,7 @@ def _parse_req_file(
sources: Dict[str, List[str]],
deps: Dict[str, str],
np_deps: Dict[str, str],
sys_deps: Dict[str, str],
) -> Optional[str]:
python_version = None
with open(file_name, mode="r", encoding="utf-8") as f:
Expand Down Expand Up @@ -1001,6 +1019,26 @@ def _parse_req_file(
np_deps[s[0].replace(" ", "")] = ""
else:
np_deps[s[0].replace(" ", "")] = s[1].replace(" ", "").lstrip("=")
elif first_word == "--sys-pkg":
# Special extension to allow the specification of system dependencies
# (currently __cuda and __glibc)
split_res = REQ_SPLIT_LINE.match(splits[1])
if split_res is None:
raise InvalidEnvironmentException(
"Could not parse system package '%s'" % splits[1]
)
s = split_res.groups()
pkg_name = s[0].replace(" ", "")
if pkg_name not in CONDA_SYS_DEPENDENCIES:
raise InvalidEnvironmentException(
"System package '%s' not allowed. Values allowed are: %s"
% (pkg_name, str(CONDA_SYS_DEPENDENCIES))
)
if s[1] is None:
raise InvalidEnvironmentException(
"System package '%s' requires a version" % pkg_name
)
sys_deps[pkg_name] = s[1].replace(" ", "").lstrip("=")
elif first_word.startswith("#"):
continue
elif first_word.startswith("-"):
Expand Down Expand Up @@ -1038,6 +1076,7 @@ def _parse_yml_file(
sources: Dict[str, List[str]],
conda_deps: Dict[str, str],
pypi_deps: Dict[str, str],
sys_deps: Dict[str, str],
) -> Optional[str]:
python_version = None # type: Optional[str]
with open(file_name, mode="r", encoding="utf-8") as f:
Expand All @@ -1056,24 +1095,41 @@ def _parse_yml_file(
mode = "pypi_sources"
else:
mode = "ignore"
elif mode == "sources" or mode == "pypi_sources":
elif mode and mode.endswith("sources"):
line = line.lstrip(" -").rstrip()
sources.setdefault("conda" if mode == "sources" else "pypi", []).append(
line
)
elif mode == "deps" or mode == "pypi_deps":
elif mode and mode.endswith("deps"):
line = line.lstrip(" -").rstrip()
if line == "pip:":
mode = "pypi_deps"
elif line == "sys:":
mode = "sys_deps"
else:
to_update = conda_deps if mode == "deps" else pypi_deps
to_update = (
conda_deps
if mode == "deps"
else pypi_deps
if mode == "pypi_deps"
else sys_deps
)
splits = YML_SPLIT_LINE.split(line.replace(" ", ""), maxsplit=1)
if len(splits) == 1:
if line != "python":
to_update[line] = ""
if splits[0] != "python":
if mode == "sys_deps":
raise InvalidEnvironmentException(
"System package '%s' requires a version" % splits[0]
)
to_update[splits[0]] = ""
else:
dep_name, dep_operator, dep_version = splits
if dep_operator not in ("=", "=="):
if mode == "sys_deps":
raise InvalidEnvironmentException(
"System package '%s' requires a specific version not '%s'"
% (splits[0], dep_operator + dep_version)
)
dep_version = dep_operator + dep_version
if dep_name == "python":
if dep_version:
Expand All @@ -1084,6 +1140,28 @@ def _parse_yml_file(
)
python_version = dep_version
else:
if (
dep_name.startswith("/")
or dep_name.startswith("git+")
or dep_name.startswith("https://")
or dep_name.startswith("ssh://")
):
# Handle the case where only the URL is specified
# without a package name
depname_and_maybe_tag = dep_name.split("/")[-1]
depname = depname_and_maybe_tag.split("@")[0]
if depname.endswith(".git"):
depname = depname[:-4]
dep_name = "%s@%s" % (depname, dep_name)

if (
mode == "sys_deps"
and dep_name not in CONDA_SYS_DEPENDENCIES
):
raise InvalidEnvironmentException(
"System package '%s' not allowed. Values allowed are: %s"
% (dep_name, str(CONDA_SYS_DEPENDENCIES))
)
to_update[dep_name] = dep_version

return python_version
Expand All @@ -1107,21 +1185,21 @@ def _parse_yml_file(
# def list(obj, local_only, archs, python, deps, channels):
# req_id = req_id_from_spec(python, deps, channels)
# my_arch = arch_id()

#
# obj.echo(
# "Listing environments for python: %s, dependencies: %s and channels: %s "
# "(requirement hash: %s)"
# % (python, str(parse_deps(deps)), str(parse_channels(channels)), req_id)
# )

#
# # Get all the environments that we know about
# envs = []
# for arch in archs.split(","):
# envs.extend(obj.conda.environments(req_id, arch, local_only))

#
# # Get the local environments so we can say if an environment is present
# local_instances = local_instances_for_req_id(obj.conda, req_id)

#
# if obj.quiet:
# obj.echo_always(
# "# req_id full_id arch resolved_on resolved_by resolved_on packages local_instances"
Expand All @@ -1136,7 +1214,7 @@ def _parse_yml_file(
# for env_id, env in envs:
# if env_id.full_id == "_default":
# default_env_id[env_id.arch] = env.env_id

#
# # Print out the environments
# for env_id, env in envs:
# if env_id.full_id == "_default":
Expand Down
21 changes: 17 additions & 4 deletions metaflow_extensions/netflix_ext/config/mfextinit_netflixext.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,23 @@

CONDA_REMOTE_COMMANDS = ("batch", "kubernetes")

# Allows you to specify the virtual packages to install when running on a remote node
# Currently only used to specify the __cuda dependency for GPU remote nodes
# As an example {"__cuda": "11.8=0"}
CONDA_REMOTE_PACKAGES = {}
# List of system dependencies that are allowed to indicate the system to build on
CONDA_SYS_DEPENDENCIES = ("__cuda", "__glibc")

# Default system dependencies when not specified. Note that the `linux-64` defaults are
# used as default when building on the remote platform.
# As an example, you can set it to:
# CONDA_SYS_DEFAULT_PACKAGES = {
# "linux-64": {"__glibc": os.environ.get("CONDA_OVERRIDE_GLIBC", "2.27")},
# }
CONDA_SYS_DEFAULT_PACKAGES = {}

# Packages to add when building for GPU machines (ie: if there is a GPU resource
# requirement). As an example you can set this to:
# CONDA_SYS_GPU_PACKAGES = {
# "__cuda": os.environ.get("CONDA_OVERRIDE_CUDA", "11.8")
# }
CONDA_SYS_DEFAULT_GPU_PACKAGES = {}


def _validate_remote_latest(name, value):
Expand Down
4 changes: 0 additions & 4 deletions metaflow_extensions/netflix_ext/plugins/conda/conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,10 @@ def default_pypi_sources(self) -> List[str]:

@property
def virtual_packages(self) -> Dict[str, str]:
# __glibc seems to be causing issues so don't set for now. We will see if we can
# re-add it later
if "virtual_pkgs" in self._info:
return {
name: "%s=%s" % (version, build)
for name, version, build in self._info["virtual_pkgs"]
if name != "__glibc"
}
elif "virtual packages" in self._info:
# Micromamba outputs them differently for some reason
Expand All @@ -238,7 +235,6 @@ def virtual_packages(self) -> Dict[str, str]:
lambda x: x.split("=", 1),
cast(List[str], self._info["virtual packages"]),
)
if name != "__glibc"
}
else:
raise CondaException("Cannot extract virtual package information")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -382,3 +382,19 @@ def from_pathspec(self) -> Optional[str]:
@property
def is_fetch_at_exec(self) -> Optional[bool]:
return self.attributes["fetch_at_exec"]


class SysPackagesRequirementDecoratorMixin(StepRequirementMixin):
defaults = {
"packages": None,
**StepRequirementMixin.defaults,
}

@property
def packages(self) -> Dict[str, Dict[str, str]]:
return {
"sys": {
k: v
for k, v in cast(Dict[str, str], self.attributes["packages"]).items()
}
}
20 changes: 10 additions & 10 deletions metaflow_extensions/netflix_ext/plugins/conda/conda_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,17 +573,17 @@ def extract_merged_reqs_for_step(
# Add the system requirements and default channels.
# The default channels go into the computation of the req ID so it is important
# to have them at this time.
sys_reqs = final_req.copy()
sys_reqs.packages = {
"sys": get_sys_packages(
conda.virtual_packages,
step_arch,
step_is_remote,
step_gpu_requested,
)
}

final_req.merge_update(sys_reqs)
sys_pkgs = get_sys_packages(
conda.virtual_packages, step_arch, step_gpu_requested
)

# The user can specify whatever they want but we inject things they don't
# specify
final_req_sys = final_req.packages.setdefault("sys", {})
for p, v in sys_pkgs.items():
if p not in final_req_sys:
final_req_sys[p] = v

# Update sources -- here the order is important so we explicitly set it
# This code will put:
Expand Down
Loading

0 comments on commit b0a90c9

Please sign in to comment.