Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clp-package: Deduplicate and clean up code in CLI job-launcher scripts. #473

Merged
merged 14 commits into from
Jul 6, 2024
55 changes: 54 additions & 1 deletion components/clp-package-utils/clp_package_utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import socket
import subprocess
import typing
import uuid
from enum import auto
from typing import List, Tuple

import yaml
from clp_py_utils.clp_config import (
Expand All @@ -24,6 +27,7 @@
read_yaml_config_file,
validate_path_could_be_dir,
)
from strenum import KebabCaseStrEnum

# CONSTANTS
# Paths
Expand All @@ -38,6 +42,12 @@ class DockerMountType(enum.IntEnum):
BIND = 0


class JobType(KebabCaseStrEnum):
COMPRESSION = auto()
DECOMPRESSION = auto()
SEARCH = auto()


class DockerMount:
def __init__(
self,
Expand Down Expand Up @@ -91,6 +101,10 @@ def get_clp_home():
return clp_home.resolve()


def generate_container_name(job_type: JobType) -> str:
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
return f"clp-{job_type}-{str(uuid.uuid4())[-4:]}"


def check_dependencies():
try:
subprocess.run(
Expand Down Expand Up @@ -177,7 +191,9 @@ def is_path_already_mounted(
return host_path_relative_to_mounted_root == container_path_relative_to_mounted_root


def generate_container_config(clp_config: CLPConfig, clp_home: pathlib.Path):
def generate_container_config(
clp_config: CLPConfig, clp_home: pathlib.Path
) -> Tuple[CLPConfig, CLPDockerMounts]:
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
"""
Copies the given config and sets up mounts mapping the relevant host paths into the container

Expand Down Expand Up @@ -241,6 +257,43 @@ def generate_container_config(clp_config: CLPConfig, clp_home: pathlib.Path):
return container_clp_config, docker_mounts


def dump_container_config(
clp_config: CLPConfig, container_clp_config: CLPConfig, container_name: str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we swap container_clp_config and clp_config?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

technically, we don't need the clp_config. the only thing needed by this function is clp_config.logs_directory which can be an argument called host_log_directory.
I didn't write in this way because then clp_config.logs_directory need to be duplicated in all callers.
me know what you think

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since clp_config.logs_directory and container_clp_config.logs_directory are linked, I figure it's safer to pass in container_clp_config and clp_config to lessen the risk that the caller passes in two random paths.

) -> Tuple[pathlib.Path, pathlib.Path]:
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
container_config_filename = f".{container_name}-config.yml"
config_file_path_on_host = clp_config.logs_directory / container_config_filename
config_file_path_on_container = container_clp_config.logs_directory / container_config_filename
with open(config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)

return config_file_path_on_container, config_file_path_on_host


def generate_container_start_cmd(
container_name: str, container_mounts: List[CLPDockerMounts], execution_container: str
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
) -> List[str]:
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
# fmt: off
container_start_cmd = [
"docker", "run",
"-i",
"--rm",
"--network", "host",
"-w", str(CONTAINER_CLP_HOME),
"-e", f"PYTHONPATH={clp_site_packages_dir}",
"-u", f"{os.getuid()}:{os.getgid()}",
"--name", container_name,
"--log-driver", "local"
]
for mount in container_mounts:
if mount:
container_start_cmd.append("--mount")
container_start_cmd.append(str(mount))
container_start_cmd.append(execution_container)

return container_start_cmd


def validate_config_key_existence(config, key):
try:
value = get_config_value(config, key)
Expand Down
46 changes: 14 additions & 32 deletions components/clp-package-utils/clp_package_utils/scripts/compress.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import argparse
import logging
import os
import pathlib
import subprocess
import sys
import uuid

import yaml

from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CONTAINER_CLP_HOME,
CONTAINER_INPUT_LOGS_ROOT_DIR,
dump_container_config,
generate_container_config,
generate_container_name,
generate_container_start_cmd,
get_clp_home,
JobType,
validate_and_load_config_file,
validate_and_load_db_credentials_file,
)
Expand Down Expand Up @@ -67,41 +67,23 @@ def main(argv):
logger.exception("Failed to load config.")
return -1

container_name = f"clp-compressor-{str(uuid.uuid4())[-4:]}"
container_name = generate_container_name(JobType.COMPRESSION)

container_clp_config, mounts = generate_container_config(clp_config, clp_home)
container_config_filename = f".{container_name}-config.yml"
container_config_file_path_on_host = clp_config.logs_directory / container_config_filename
with open(container_config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)
config_file_path_on_container, config_file_path_on_host = dump_container_config(
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
clp_config, container_clp_config, container_name
)

clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
# fmt: off
container_start_cmd = [
"docker", "run",
"-i",
"--rm",
"--network", "host",
"-w", str(CONTAINER_CLP_HOME),
"-e", f"PYTHONPATH={clp_site_packages_dir}",
"-u", f"{os.getuid()}:{os.getgid()}",
"--name", container_name,
"--log-driver", "local",
"--mount", str(mounts.clp_home),
]
# fmt: on
necessary_mounts = [mounts.input_logs_dir, mounts.data_dir, mounts.logs_dir]
for mount in necessary_mounts:
if mount:
container_start_cmd.append("--mount")
container_start_cmd.append(str(mount))
container_start_cmd.append(clp_config.execution_container)
necessary_mounts = [mounts.clp_home, mounts.input_logs_dir, mounts.data_dir, mounts.logs_dir]
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.execution_container
)

# fmt: off
compress_cmd = [
"python3",
"-m", "clp_package_utils.scripts.native.compress",
"--config", str(container_clp_config.logs_directory / container_config_filename),
"--config", str(config_file_path_on_container),
"--remove-path-prefix", str(CONTAINER_INPUT_LOGS_ROOT_DIR),
]
# fmt: on
Expand Down Expand Up @@ -140,7 +122,7 @@ def main(argv):
subprocess.run(cmd, check=True)

# Remove generated files
container_config_file_path_on_host.unlink()
config_file_path_on_host.unlink()

return 0

Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
import argparse
import logging
import os
import pathlib
import subprocess
import sys
import uuid

import yaml
from clp_py_utils.clp_config import CLPConfig

from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CONTAINER_CLP_HOME,
DockerMount,
DockerMountType,
dump_container_config,
generate_container_config,
generate_container_name,
generate_container_start_cmd,
get_clp_home,
JobType,
validate_and_load_config_file,
validate_and_load_db_credentials_file,
validate_path_could_be_dir,
Expand Down Expand Up @@ -76,33 +77,15 @@ def main(argv):
return -1
extraction_dir.mkdir(exist_ok=True)

container_name = f"clp-decompressor-{str(uuid.uuid4())[-4:]}"

container_name = generate_container_name(JobType.DECOMPRESSION)
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
container_config_filename = f".{container_name}-config.yml"
container_config_file_path_on_host = clp_config.logs_directory / container_config_filename
with open(container_config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)

clp_site_packages_dir = CONTAINER_CLP_HOME / "lib" / "python3" / "site-packages"
# fmt: off
container_start_cmd = [
"docker", "run",
"-i",
"--rm",
"--network", "host",
"-w", str(CONTAINER_CLP_HOME),
"-e", f"PYTHONPATH={clp_site_packages_dir}",
"-u", f"{os.getuid()}:{os.getgid()}",
"--name", container_name,
"--log-driver", "local",
"--mount", str(mounts.clp_home),
]
# fmt: on

config_file_path_on_container, config_file_path_on_host = dump_container_config(
clp_config, container_clp_config, container_name
)
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
# Set up mounts
container_extraction_dir = pathlib.Path("/") / "mnt" / "extraction-dir"
necessary_mounts = [
mounts.clp_home,
mounts.data_dir,
mounts.logs_dir,
mounts.archives_output_dir,
Expand All @@ -120,18 +103,15 @@ def main(argv):
container_paths_to_decompress_file_path,
)
)
for mount in necessary_mounts:
if mount:
container_start_cmd.append("--mount")
container_start_cmd.append(str(mount))

container_start_cmd.append(clp_config.execution_container)
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.execution_container
)

# fmt: off
decompress_cmd = [
"python3",
"-m", "clp_package_utils.scripts.native.decompress",
"--config", str(container_clp_config.logs_directory / container_config_filename),
"--config", str(config_file_path_on_container),
"-d", str(container_extraction_dir),
]
# fmt: on
Expand All @@ -145,7 +125,7 @@ def main(argv):
subprocess.run(cmd, check=True)

# Remove generated files
container_config_file_path_on_host.unlink()
config_file_path_on_host.unlink()

return 0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,24 @@
logger.addHandler(logging_console_handler)


def decompress_paths(
def handle_decompression(
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
parsed_args,
clp_home: pathlib.Path,
paths,
list_path: pathlib.Path,
clp_config: CLPConfig,
archives_dir: pathlib.Path,
logs_dir: pathlib.Path,
extraction_dir: pathlib.Path,
):
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
# Validate paths were specified using only one method
if len(parsed_args.paths) > 0 and parsed_args.files_from is not None:
logger.error("Paths cannot be specified both on the command line and through a file.")
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved

# Validate extraction directory
extraction_dir = pathlib.Path(parsed_args.extraction_dir)
if not extraction_dir.is_dir():
logger.error(f"extraction-dir ({extraction_dir}) is not a valid directory.")
return -1

logs_dir = clp_config.logs_directory
archives_dir = clp_config.archive_output.directory

# Generate database config file for clp
db_config_file_path = logs_dir / f".decompress-db-config-{uuid.uuid4()}.yml"
with open(db_config_file_path, "w") as f:
Expand All @@ -46,6 +55,8 @@ def decompress_paths(
"--db-config-file", str(db_config_file_path),
]
# fmt: on
paths = parsed_args.paths
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
list_path = parsed_args.files_from
files_to_decompress_list_path = None
if list_path is not None:
decompression_cmd.append("-f")
Expand Down Expand Up @@ -93,16 +104,6 @@ def main(argv):
)
parsed_args = args_parser.parse_args(argv[1:])

# Validate paths were specified using only one method
if len(parsed_args.paths) > 0 and parsed_args.files_from is not None:
args_parser.error("Paths cannot be specified both on the command line and through a file.")

# Validate extraction directory
extraction_dir = pathlib.Path(parsed_args.extraction_dir)
if not extraction_dir.is_dir():
logger.error(f"extraction-dir ({extraction_dir}) is not a valid directory.")
return -1

# Validate and load config file
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
try:
config_file_path = pathlib.Path(parsed_args.config)
Expand All @@ -115,15 +116,7 @@ def main(argv):
logger.exception("Failed to load config.")
return -1

return decompress_paths(
clp_home,
parsed_args.paths,
parsed_args.files_from,
clp_config,
clp_config.archive_output.directory,
clp_config.logs_directory,
extraction_dir,
)
return handle_decompression(parsed_args, clp_home, clp_config)


if "__main__" == __name__:
Expand Down
Loading