Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clp-package: Deduplicate and clean up code in CLI job-launcher scripts. #473

Merged
merged 14 commits into from
Jul 6, 2024
76 changes: 74 additions & 2 deletions components/clp-package-utils/clp_package_utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import socket
import subprocess
import typing
import uuid
from enum import auto
from typing import List, Tuple

import yaml
from clp_py_utils.clp_config import (
Expand All @@ -24,6 +27,7 @@
read_yaml_config_file,
validate_path_could_be_dir,
)
from strenum import KebabCaseStrEnum

# CONSTANTS
# Paths
Expand All @@ -38,6 +42,12 @@ class DockerMountType(enum.IntEnum):
BIND = 0


class JobType(KebabCaseStrEnum):
COMPRESSION = auto()
DECOMPRESSION = auto()
SEARCH = auto()


class DockerMount:
def __init__(
self,
Expand Down Expand Up @@ -91,6 +101,14 @@ def get_clp_home():
return clp_home.resolve()


def generate_container_name(job_type: JobType) -> str:
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
"""
:param job_type:
:return: A unique container name for the given job type.
"""
return f"clp-{job_type}-{str(uuid.uuid4())[-4:]}"


def check_dependencies():
try:
subprocess.run(
Expand Down Expand Up @@ -177,12 +195,15 @@ def is_path_already_mounted(
return host_path_relative_to_mounted_root == container_path_relative_to_mounted_root


def generate_container_config(clp_config: CLPConfig, clp_home: pathlib.Path):
def generate_container_config(
clp_config: CLPConfig, clp_home: pathlib.Path
) -> Tuple[CLPConfig, CLPDockerMounts]:
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
"""
Copies the given config and sets up mounts mapping the relevant host paths into the container

:param clp_config:
:param clp_home:
:return: The container config and the mounts.
"""
container_clp_config = clp_config.copy(deep=True)

Expand Down Expand Up @@ -241,6 +262,57 @@ def generate_container_config(clp_config: CLPConfig, clp_home: pathlib.Path):
return container_clp_config, docker_mounts


def dump_container_config(
container_clp_config: CLPConfig, clp_config: CLPConfig, container_name: str
) -> Tuple[pathlib.Path, pathlib.Path]:
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
"""
Writes the given config to the logs directory so that it's accessible in the container.
:param container_clp_config: The config to write.
:param clp_config: The corresponding config on the host (used to determine the logs directory).
:param container_name:
:return: The path to the config file in the container and on the host.
"""
container_config_filename = f".{container_name}-config.yml"
config_file_path_on_host = clp_config.logs_directory / container_config_filename
config_file_path_on_container = container_clp_config.logs_directory / container_config_filename
with open(config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)

return config_file_path_on_container, config_file_path_on_host


def generate_container_start_cmd(
container_name: str, container_mounts: List[CLPDockerMounts], container_image: str
) -> List[str]:
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
"""
Generates the command to start a container with the given mounts and name.
:param container_name:
:param container_mounts:
:param container_image:
:return: The command.
"""
clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
# fmt: off
container_start_cmd = [
"docker", "run",
"-i",
"--rm",
"--network", "host",
"-w", str(CONTAINER_CLP_HOME),
"-e", f"PYTHONPATH={clp_site_packages_dir}",
"-u", f"{os.getuid()}:{os.getgid()}",
"--name", container_name,
"--log-driver", "local"
]
for mount in container_mounts:
if mount:
container_start_cmd.append("--mount")
container_start_cmd.append(str(mount))
container_start_cmd.append(container_image)

return container_start_cmd


def validate_config_key_existence(config, key):
try:
value = get_config_value(config, key)
Expand All @@ -249,7 +321,7 @@ def validate_config_key_existence(config, key):
return value


def validate_and_load_config_file(
def load_config_file(
config_file_path: pathlib.Path, default_config_file_path: pathlib.Path, clp_home: pathlib.Path
):
if config_file_path.exists():
Expand Down
53 changes: 17 additions & 36 deletions components/clp-package-utils/clp_package_utils/scripts/compress.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import argparse
import logging
import os
import pathlib
import subprocess
import sys
import uuid

import yaml

from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CONTAINER_CLP_HOME,
CONTAINER_INPUT_LOGS_ROOT_DIR,
dump_container_config,
generate_container_config,
generate_container_name,
generate_container_start_cmd,
get_clp_home,
validate_and_load_config_file,
JobType,
load_config_file,
validate_and_load_db_credentials_file,
)

Expand Down Expand Up @@ -57,51 +57,32 @@ def main(argv):
# Validate and load config file
try:
config_file_path = pathlib.Path(parsed_args.config)
clp_config = validate_and_load_config_file(
config_file_path, default_config_file_path, clp_home
)
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
clp_config.validate_logs_dir()

# Validate and load necessary credentials
validate_and_load_db_credentials_file(clp_config, clp_home, False)
except:
logger.exception("Failed to load config.")
return -1

container_name = f"clp-compressor-{str(uuid.uuid4())[-4:]}"
container_name = generate_container_name(JobType.COMPRESSION)

container_clp_config, mounts = generate_container_config(clp_config, clp_home)
container_config_filename = f".{container_name}-config.yml"
container_config_file_path_on_host = clp_config.logs_directory / container_config_filename
with open(container_config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
container_clp_config, clp_config, container_name
)

clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
# fmt: off
container_start_cmd = [
"docker", "run",
"-i",
"--rm",
"--network", "host",
"-w", str(CONTAINER_CLP_HOME),
"-e", f"PYTHONPATH={clp_site_packages_dir}",
"-u", f"{os.getuid()}:{os.getgid()}",
"--name", container_name,
"--log-driver", "local",
"--mount", str(mounts.clp_home),
]
# fmt: on
necessary_mounts = [mounts.input_logs_dir, mounts.data_dir, mounts.logs_dir]
for mount in necessary_mounts:
if mount:
container_start_cmd.append("--mount")
container_start_cmd.append(str(mount))
container_start_cmd.append(clp_config.execution_container)
necessary_mounts = [mounts.clp_home, mounts.input_logs_dir, mounts.data_dir, mounts.logs_dir]
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.execution_container
)

# fmt: off
compress_cmd = [
"python3",
"-m", "clp_package_utils.scripts.native.compress",
"--config", str(container_clp_config.logs_directory / container_config_filename),
"--config", str(generated_config_path_on_container),
"--remove-path-prefix", str(CONTAINER_INPUT_LOGS_ROOT_DIR),
]
# fmt: on
Expand Down Expand Up @@ -140,7 +121,7 @@ def main(argv):
subprocess.run(cmd, check=True)

# Remove generated files
container_config_file_path_on_host.unlink()
generated_config_path_on_host.unlink()

return 0

Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import argparse
import logging
import os
import pathlib
import subprocess
import sys
import uuid

import yaml
from clp_py_utils.clp_config import CLPConfig

from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CONTAINER_CLP_HOME,
DockerMount,
DockerMountType,
dump_container_config,
generate_container_config,
generate_container_name,
generate_container_start_cmd,
get_clp_home,
validate_and_load_config_file,
JobType,
load_config_file,
validate_and_load_db_credentials_file,
validate_path_could_be_dir,
)
Expand Down Expand Up @@ -53,11 +54,10 @@ def main(argv):
# Validate and load config file
try:
config_file_path = pathlib.Path(parsed_args.config)
clp_config = validate_and_load_config_file(
config_file_path, default_config_file_path, clp_home
)
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
clp_config.validate_logs_dir()

# Validate and load necessary credentials
validate_and_load_db_credentials_file(clp_config, clp_home, False)
except:
logger.exception("Failed to load config.")
Expand All @@ -76,33 +76,16 @@ def main(argv):
return -1
extraction_dir.mkdir(exist_ok=True)

container_name = f"clp-decompressor-{str(uuid.uuid4())[-4:]}"

container_name = generate_container_name(JobType.DECOMPRESSION)
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
container_config_filename = f".{container_name}-config.yml"
container_config_file_path_on_host = clp_config.logs_directory / container_config_filename
with open(container_config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)

clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
# fmt: off
container_start_cmd = [
"docker", "run",
"-i",
"--rm",
"--network", "host",
"-w", str(CONTAINER_CLP_HOME),
"-e", f"PYTHONPATH={clp_site_packages_dir}",
"-u", f"{os.getuid()}:{os.getgid()}",
"--name", container_name,
"--log-driver", "local",
"--mount", str(mounts.clp_home),
]
# fmt: on
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
container_clp_config, clp_config, container_name
)
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved

# Set up mounts
container_extraction_dir = pathlib.Path("/") / "mnt" / "extraction-dir"
necessary_mounts = [
mounts.clp_home,
mounts.data_dir,
mounts.logs_dir,
mounts.archives_output_dir,
Expand All @@ -120,18 +103,15 @@ def main(argv):
container_paths_to_decompress_file_path,
)
)
for mount in necessary_mounts:
if mount:
container_start_cmd.append("--mount")
container_start_cmd.append(str(mount))

container_start_cmd.append(clp_config.execution_container)
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.execution_container
)

# fmt: off
decompress_cmd = [
"python3",
"-m", "clp_package_utils.scripts.native.decompress",
"--config", str(container_clp_config.logs_directory / container_config_filename),
"--config", str(generated_config_path_on_container),
"-d", str(container_extraction_dir),
]
# fmt: on
Expand All @@ -145,7 +125,7 @@ def main(argv):
subprocess.run(cmd, check=True)

# Remove generated files
container_config_file_path_on_host.unlink()
generated_config_path_on_host.unlink()

return 0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
get_clp_home,
validate_and_load_config_file,
load_config_file,
)

# Setup logging
Expand Down Expand Up @@ -170,9 +170,7 @@ def main(argv):
# Validate and load config file
try:
config_file_path = pathlib.Path(parsed_args.config)
clp_config = validate_and_load_config_file(
config_file_path, default_config_file_path, clp_home
)
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
clp_config.validate_input_logs_dir()
clp_config.validate_logs_dir()
except:
Expand Down
Loading