Skip to content

Commit

Permalink
clp-package: Deduplicate and clean up code in CLI job-launcher script…
Browse files Browse the repository at this point in the history
…s. (#473)
  • Loading branch information
haiqi96 authored Jul 6, 2024
1 parent 4bb223e commit 986a957
Show file tree
Hide file tree
Showing 10 changed files with 310 additions and 243 deletions.
76 changes: 74 additions & 2 deletions components/clp-package-utils/clp_package_utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import socket
import subprocess
import typing
import uuid
from enum import auto
from typing import List, Tuple

import yaml
from clp_py_utils.clp_config import (
Expand All @@ -24,6 +27,7 @@
read_yaml_config_file,
validate_path_could_be_dir,
)
from strenum import KebabCaseStrEnum

# CONSTANTS
# Paths
Expand All @@ -38,6 +42,12 @@ class DockerMountType(enum.IntEnum):
BIND = 0


class JobType(KebabCaseStrEnum):
COMPRESSION = auto()
DECOMPRESSION = auto()
SEARCH = auto()


class DockerMount:
def __init__(
self,
Expand Down Expand Up @@ -91,6 +101,14 @@ def get_clp_home():
return clp_home.resolve()


def generate_container_name(job_type: JobType) -> str:
"""
:param job_type:
:return: A unique container name for the given job type.
"""
return f"clp-{job_type}-{str(uuid.uuid4())[-4:]}"


def check_dependencies():
try:
subprocess.run(
Expand Down Expand Up @@ -177,12 +195,15 @@ def is_path_already_mounted(
return host_path_relative_to_mounted_root == container_path_relative_to_mounted_root


def generate_container_config(clp_config: CLPConfig, clp_home: pathlib.Path):
def generate_container_config(
clp_config: CLPConfig, clp_home: pathlib.Path
) -> Tuple[CLPConfig, CLPDockerMounts]:
"""
Copies the given config and sets up mounts mapping the relevant host paths into the container
:param clp_config:
:param clp_home:
:return: The container config and the mounts.
"""
container_clp_config = clp_config.copy(deep=True)

Expand Down Expand Up @@ -241,6 +262,57 @@ def generate_container_config(clp_config: CLPConfig, clp_home: pathlib.Path):
return container_clp_config, docker_mounts


def dump_container_config(
container_clp_config: CLPConfig, clp_config: CLPConfig, container_name: str
) -> Tuple[pathlib.Path, pathlib.Path]:
"""
Writes the given config to the logs directory so that it's accessible in the container.
:param container_clp_config: The config to write.
:param clp_config: The corresponding config on the host (used to determine the logs directory).
:param container_name:
:return: The path to the config file in the container and on the host.
"""
container_config_filename = f".{container_name}-config.yml"
config_file_path_on_host = clp_config.logs_directory / container_config_filename
config_file_path_on_container = container_clp_config.logs_directory / container_config_filename
with open(config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)

return config_file_path_on_container, config_file_path_on_host


def generate_container_start_cmd(
container_name: str, container_mounts: List[CLPDockerMounts], container_image: str
) -> List[str]:
"""
Generates the command to start a container with the given mounts and name.
:param container_name:
:param container_mounts:
:param container_image:
:return: The command.
"""
clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
# fmt: off
container_start_cmd = [
"docker", "run",
"-i",
"--rm",
"--network", "host",
"-w", str(CONTAINER_CLP_HOME),
"-e", f"PYTHONPATH={clp_site_packages_dir}",
"-u", f"{os.getuid()}:{os.getgid()}",
"--name", container_name,
"--log-driver", "local"
]
for mount in container_mounts:
if mount:
container_start_cmd.append("--mount")
container_start_cmd.append(str(mount))
container_start_cmd.append(container_image)

return container_start_cmd


def validate_config_key_existence(config, key):
try:
value = get_config_value(config, key)
Expand All @@ -249,7 +321,7 @@ def validate_config_key_existence(config, key):
return value


def validate_and_load_config_file(
def load_config_file(
config_file_path: pathlib.Path, default_config_file_path: pathlib.Path, clp_home: pathlib.Path
):
if config_file_path.exists():
Expand Down
53 changes: 17 additions & 36 deletions components/clp-package-utils/clp_package_utils/scripts/compress.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import argparse
import logging
import os
import pathlib
import subprocess
import sys
import uuid

import yaml

from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CONTAINER_CLP_HOME,
CONTAINER_INPUT_LOGS_ROOT_DIR,
dump_container_config,
generate_container_config,
generate_container_name,
generate_container_start_cmd,
get_clp_home,
validate_and_load_config_file,
JobType,
load_config_file,
validate_and_load_db_credentials_file,
)

Expand Down Expand Up @@ -57,51 +57,32 @@ def main(argv):
# Validate and load config file
try:
config_file_path = pathlib.Path(parsed_args.config)
clp_config = validate_and_load_config_file(
config_file_path, default_config_file_path, clp_home
)
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
clp_config.validate_logs_dir()

# Validate and load necessary credentials
validate_and_load_db_credentials_file(clp_config, clp_home, False)
except:
logger.exception("Failed to load config.")
return -1

container_name = f"clp-compressor-{str(uuid.uuid4())[-4:]}"
container_name = generate_container_name(JobType.COMPRESSION)

container_clp_config, mounts = generate_container_config(clp_config, clp_home)
container_config_filename = f".{container_name}-config.yml"
container_config_file_path_on_host = clp_config.logs_directory / container_config_filename
with open(container_config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
container_clp_config, clp_config, container_name
)

clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
# fmt: off
container_start_cmd = [
"docker", "run",
"-i",
"--rm",
"--network", "host",
"-w", str(CONTAINER_CLP_HOME),
"-e", f"PYTHONPATH={clp_site_packages_dir}",
"-u", f"{os.getuid()}:{os.getgid()}",
"--name", container_name,
"--log-driver", "local",
"--mount", str(mounts.clp_home),
]
# fmt: on
necessary_mounts = [mounts.input_logs_dir, mounts.data_dir, mounts.logs_dir]
for mount in necessary_mounts:
if mount:
container_start_cmd.append("--mount")
container_start_cmd.append(str(mount))
container_start_cmd.append(clp_config.execution_container)
necessary_mounts = [mounts.clp_home, mounts.input_logs_dir, mounts.data_dir, mounts.logs_dir]
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.execution_container
)

# fmt: off
compress_cmd = [
"python3",
"-m", "clp_package_utils.scripts.native.compress",
"--config", str(container_clp_config.logs_directory / container_config_filename),
"--config", str(generated_config_path_on_container),
"--remove-path-prefix", str(CONTAINER_INPUT_LOGS_ROOT_DIR),
]
# fmt: on
Expand Down Expand Up @@ -140,7 +121,7 @@ def main(argv):
subprocess.run(cmd, check=True)

# Remove generated files
container_config_file_path_on_host.unlink()
generated_config_path_on_host.unlink()

return 0

Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import argparse
import logging
import os
import pathlib
import subprocess
import sys
import uuid

import yaml
from clp_py_utils.clp_config import CLPConfig

from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CONTAINER_CLP_HOME,
DockerMount,
DockerMountType,
dump_container_config,
generate_container_config,
generate_container_name,
generate_container_start_cmd,
get_clp_home,
validate_and_load_config_file,
JobType,
load_config_file,
validate_and_load_db_credentials_file,
validate_path_could_be_dir,
)
Expand Down Expand Up @@ -53,11 +54,10 @@ def main(argv):
# Validate and load config file
try:
config_file_path = pathlib.Path(parsed_args.config)
clp_config = validate_and_load_config_file(
config_file_path, default_config_file_path, clp_home
)
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
clp_config.validate_logs_dir()

# Validate and load necessary credentials
validate_and_load_db_credentials_file(clp_config, clp_home, False)
except:
logger.exception("Failed to load config.")
Expand All @@ -76,33 +76,16 @@ def main(argv):
return -1
extraction_dir.mkdir(exist_ok=True)

container_name = f"clp-decompressor-{str(uuid.uuid4())[-4:]}"

container_name = generate_container_name(JobType.DECOMPRESSION)
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
container_config_filename = f".{container_name}-config.yml"
container_config_file_path_on_host = clp_config.logs_directory / container_config_filename
with open(container_config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)

clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
# fmt: off
container_start_cmd = [
"docker", "run",
"-i",
"--rm",
"--network", "host",
"-w", str(CONTAINER_CLP_HOME),
"-e", f"PYTHONPATH={clp_site_packages_dir}",
"-u", f"{os.getuid()}:{os.getgid()}",
"--name", container_name,
"--log-driver", "local",
"--mount", str(mounts.clp_home),
]
# fmt: on
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
container_clp_config, clp_config, container_name
)

# Set up mounts
container_extraction_dir = pathlib.Path("/") / "mnt" / "extraction-dir"
necessary_mounts = [
mounts.clp_home,
mounts.data_dir,
mounts.logs_dir,
mounts.archives_output_dir,
Expand All @@ -120,18 +103,15 @@ def main(argv):
container_paths_to_decompress_file_path,
)
)
for mount in necessary_mounts:
if mount:
container_start_cmd.append("--mount")
container_start_cmd.append(str(mount))

container_start_cmd.append(clp_config.execution_container)
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.execution_container
)

# fmt: off
decompress_cmd = [
"python3",
"-m", "clp_package_utils.scripts.native.decompress",
"--config", str(container_clp_config.logs_directory / container_config_filename),
"--config", str(generated_config_path_on_container),
"-d", str(container_extraction_dir),
]
# fmt: on
Expand All @@ -145,7 +125,7 @@ def main(argv):
subprocess.run(cmd, check=True)

# Remove generated files
container_config_file_path_on_host.unlink()
generated_config_path_on_host.unlink()

return 0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
get_clp_home,
validate_and_load_config_file,
load_config_file,
)

# Setup logging
Expand Down Expand Up @@ -170,9 +170,7 @@ def main(argv):
# Validate and load config file
try:
config_file_path = pathlib.Path(parsed_args.config)
clp_config = validate_and_load_config_file(
config_file_path, default_config_file_path, clp_home
)
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
clp_config.validate_input_logs_dir()
clp_config.validate_logs_dir()
except:
Expand Down
Loading

0 comments on commit 986a957

Please sign in to comment.