From 1733328e928e8a8e7215330a4ebe24af2634fd66 Mon Sep 17 00:00:00 2001 From: gezhengqiang Date: Wed, 6 Mar 2024 14:52:33 +0800 Subject: [PATCH] feat(sgx): get taskset parameters from env --- deploy/scripts/sgx/run_trainer_master_sgx.sh | 9 ++++++++- deploy/scripts/sgx/run_trainer_ps_sgx.sh | 9 ++++++++- deploy/scripts/sgx/run_trainer_worker_sgx.sh | 9 ++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/deploy/scripts/sgx/run_trainer_master_sgx.sh b/deploy/scripts/sgx/run_trainer_master_sgx.sh index a4b9c40f9..ad16210ba 100755 --- a/deploy/scripts/sgx/run_trainer_master_sgx.sh +++ b/deploy/scripts/sgx/run_trainer_master_sgx.sh @@ -103,7 +103,14 @@ fi server_port=$(normalize_env_to_args "--server-port" "$PORT1") -taskset -c 0-3 stdbuf -o0 gramine-sgx python main.py --master \ +if [[ -z "${START_CPU_SN}" ]]; then + START_CPU_SN=0 +fi +if [[ -z "${END_CPU_SN}" ]]; then + END_CPU_SN=3 +fi + +taskset -c $START_CPU_SN-$END_CPU_SN stdbuf -o0 gramine-sgx python main.py --master \ --application-id=$APPLICATION_ID \ --data-source=$DATA_SOURCE \ --data-path=$DATA_PATH \ diff --git a/deploy/scripts/sgx/run_trainer_ps_sgx.sh b/deploy/scripts/sgx/run_trainer_ps_sgx.sh index 1b678215d..965a09c45 100755 --- a/deploy/scripts/sgx/run_trainer_ps_sgx.sh +++ b/deploy/scripts/sgx/run_trainer_ps_sgx.sh @@ -34,4 +34,11 @@ unset HTTPS_PROXY https_proxy http_proxy ftp_proxy make_custom_env 4 source /root/start_aesm_service.sh -taskset -c 0-3 stdbuf -o0 gramine-sgx python -m fedlearner.trainer.parameter_server $POD_IP:${LISTEN_PORT} +if [[ -z "${START_CPU_SN}" ]]; then + START_CPU_SN=0 +fi +if [[ -z "${END_CPU_SN}" ]]; then + END_CPU_SN=3 +fi + +taskset -c $START_CPU_SN-$END_CPU_SN stdbuf -o0 gramine-sgx python -m fedlearner.trainer.parameter_server $POD_IP:${LISTEN_PORT} diff --git a/deploy/scripts/sgx/run_trainer_worker_sgx.sh b/deploy/scripts/sgx/run_trainer_worker_sgx.sh index 0baa4e451..250100068 100755 --- a/deploy/scripts/sgx/run_trainer_worker_sgx.sh +++ b/deploy/scripts/sgx/run_trainer_worker_sgx.sh @@ -91,7 +91,14 @@ source /root/start_aesm_service.sh server_port=$(normalize_env_to_args "--server-port" "$PORT1") -taskset -c 0-3 stdbuf -o0 gramine-sgx python main.py --worker \ +if [[ -z "${START_CPU_SN}" ]]; then + START_CPU_SN=0 +fi +if [[ -z "${END_CPU_SN}" ]]; then + END_CPU_SN=3 +fi + +taskset -c $START_CPU_SN-$END_CPU_SN stdbuf -o0 gramine-sgx python main.py --worker \ --application-id="$APPLICATION_ID" \ --master-addr="$MASTER_HOST:50051" \ --cluster-spec="$CLUSTER_SPEC" \