Skip to content

init prvision test validation #1

init prvision test validation

init prvision test validation #1

name: Build and Validate Platform
on:
workflow_call:
inputs:
product_name:
required: true
type: string
ros_distro:
required: true
type: string
debug_fleet_keep_alive:
required: false
type: boolean
fleet_ips:
required: true
type: string
fleet_number_members:
required: true
type: number
secrets:
auto_commit_user:
required: true
auto_commit_mail:
required: true
auto_commit_pwd:
required: true
registry_user:
required: true
registry_password:
required: true
nexus_publisher_user:
required: true
nexus_publisher_password:
required: true
gh_token:
required: true
aws_key_id:
required: true
aws_secret_key_id:
required: true
slack_token_id:
required: true
ssh_pem_fleet_aws_vm:
required: true
proxmox_ve_username:
required: true
proxmox_ve_password:
required: true
jira_username:
required: true
jira_password:
required: true
xray_clientid:
required: true
xray_secret:
required: true
env:
CI_INTEGRATION_SCRIPTS_VERSION: "2.1.0.23"
MOBTEST_VERSION: "0.0.4.3"
PACKAGE_DEPLOYER_VERSION: "1.0.0.25"
GITHUB_API_USR: "OttoMation-Movai"
AWS_ACCESS_KEY_ID: ${{ secrets.aws_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.aws_secret_key_id }}
AWS_DEFAULT_REGION: "us-east-1"
REGISTRY: registry.cloud.mov.ai
ENV: qa
USERSPACE_FOLDER_PATH: userspace
SIMULATION_ID: ci_simulation
XRAY_CLIENTID: ${{ secrets.xray_clientid}}
XRAY_SECRET: ${{ secrets.xray_secret}}
JIRA_USERNAME: ${{ secrets.jira_username}}
JIRA_PASSWORD: ${{ secrets.jira_password}}
SLACK_CHANNEL: "C02U028NMB7" # rnd-platform
# development slack channel
#SLACK_CHANNEL: "C05K2KF1UP8"
jobs:

Check failure on line 78 in .github/workflows/provision-vm-validation.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/provision-vm-validation.yml

Invalid workflow file

You have an error in your yaml syntax on line 78
Validate-boostrap-configs:
runs-on: integration-pipeline
container:
image: registry.aws.cloud.mov.ai/qa/py-buildserver:v3.0.3
credentials:
username: ${{secrets.registry_user}}
password: ${{secrets.registry_password}}
outputs:
slack_thread_id: ${{ fromJson(steps.send-message.outputs.slack-result).response.message.ts }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Validate Manifest
shell: bash
run: |
yamllint product-manifest.yaml
- name: Install CI Scripts in container
shell: bash
run: |
python3 -m pip install integration-pipeline==$CI_INTEGRATION_SCRIPTS_VERSION --ignore-installed
- name: Bootstraping simulator metadata
run: |
git config --global --add safe.directory $(pwd)
git fetch
git checkout origin/${GITHUB_REF#refs/heads/} -- product.version
cat product.version
rm -rf simulator_artifacts ci_artifacts
integration-pipeline generate_meta_simulator_artifacts \
--manifest_platform_base_key product_components \
--product_name ${{ inputs.product_name }} \
--branch ${GITHUB_REF#refs/heads/}
mkdir simulator_artifacts
cp ci_artifacts/* ./simulator_artifacts
- name: Bootstraping platform metadata
run: |
integration-pipeline generate_meta_artifacts \
--manifest_platform_base_key product_components
- name: Stash robot_configs
uses: actions/upload-artifact@v4
with:
name: robot_configs
path: "*.json*"
retention-days: 5
- name: Stash sim_configs
uses: actions/upload-artifact@v4
with:
name: sim_configs
path: simulator_artifacts/*
retention-days: 5
- name: raise
run: |
rm -rf simulator_artifacts ci_artifacts platform_configs
mkdir platform_configs
integration-pipeline raise
cp product.version ./platform_configs/product.version
cp product-manifest.yaml ./platform_configs/product-manifest.yaml
- name: Prepare slack variables
if: always()
id: pre_slack
run: |
MESSAGE="CI: ${GITHUB_REPOSITORY} (${GITHUB_REF#refs/heads/}), build: $(cat product.version) (Attempt: #${{ github.run_attempt }}) is starting to be validated :construction: Details: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
echo "msg=${MESSAGE}" >> $GITHUB_OUTPUT
- name: Send Slack Message
uses: archive/github-actions-slack@master
id: send-message
with:
slack-function: send-message
slack-bot-user-oauth-access-token: ${{ secrets.slack_token_id }}
slack-channel: ${{ env.SLACK_CHANNEL }}
slack-text: ${{ steps.pre_slack.outputs.msg }}
- name: Stash raised_meta
uses: actions/upload-artifact@v4
with:
name: raised_meta
path: platform_configs/*
retention-days: 5
Validations-Finish:
needs: [Validation-UI-Tests, Validation-Install-Tests, Validation-API-Tests, Validation-Flow-Tests]
runs-on: ubuntu-20.04
outputs:
slack_thread_id: ${{ needs.Validation-UI-Tests.outputs.slack_thread_id }}
steps:
- name: Pass through
run: echo "Pass"
Fleet-Validations:
needs: [Validate-boostrap-configs]
runs-on: ip-hel-queuer
steps:
- name: Cleanup Workspace
uses: rtCamp/action-cleanup@master
- name: Checkout
uses: actions/checkout@v4
- name: Agent info
id: agent_info
run: |
echo "ip=$(hostname -I | awk '{print $1}')" | tee $GITHUB_OUTPUT
- name: Setup CI Scripts in .ci-venv
shell: bash
run: |
python3 -m venv .ci-venv --clear
. .ci-venv/bin/activate
[ -f ci-requirements.txt ] && pip install -r ci-requirements.txt
python3 -m pip install integration-pipeline==$CI_INTEGRATION_SCRIPTS_VERSION --ignore-installed
- name: unstash robot_configs
uses: actions/download-artifact@v4
with:
name: robot_configs
path: .
- name: Setup infra environment configs
id: infra_env_configs_setup
shell: bash
run: |
env_configs_dir=infra_env_configs
env_configs_version=0.0.1-2
env_configs_repo_name=devops-tf-env-conf
rm -rf $env_configs_dir
. .ci-venv/bin/activate
integration-pipeline fetch_by_tag --repo $env_configs_repo_name --version $env_configs_version --gh_api_user $GITHUB_API_USR --gh_api_pwd ${{ secrets.auto_commit_pwd }} --target_dir $env_configs_dir
ls -la $env_configs_dir
echo "target_dir=${env_configs_dir}" >> $GITHUB_OUTPUT
deactivate
- name: Setup terraform proxmox provisioner
id: provision_infra_setup
shell: bash
run: |
provision_infra_dir=provision_scripts
provision_infra_version=1.0.0-3
provision_infra_repo_name=devops-tf-proxmox-bpg
rm -rf $provision_infra_dir
. .ci-venv/bin/activate
integration-pipeline fetch_by_tag --repo $provision_infra_repo_name --version $provision_infra_version --gh_api_user $GITHUB_API_USR --gh_api_pwd ${{ secrets.auto_commit_pwd }} --target_dir $provision_infra_dir
deactivate
ls -la $provision_infra_dir
echo "target_dir=${provision_infra_dir}" >> $GITHUB_OUTPUT
- name: Define Instance names
id: infra_names
shell: bash
run: |
branch=$(echo ${GITHUB_REF#refs/heads/} | sed "s;\.;-;g" )
local_manager_prefix="ip-$branch-manager"
local_worker_prefix="ip-$branch-worker"
echo "$local_manager_prefix"
echo "$local_worker_prefix"
total_resources=${{ inputs.fleet_number_members }}
((total_resources+=1))
echo "manager_prefix=${local_manager_prefix}" >> $GITHUB_OUTPUT
echo "worker_prefix=${local_worker_prefix}" >> $GITHUB_OUTPUT
echo "total_resources=${total_resources}" >> $GITHUB_OUTPUT
- name: Provision remote vms (Proxmox)
working-directory: ${{ steps.provision_infra_setup.outputs.target_dir }}
shell: bash
run: |
multiply_node=$(printf '"hel",%.0s' {1..${{ steps.infra_names.outputs.total_resources }}})
node_list_str=${multiply_node::-1}
var_file_arg='-var-file=../${{ steps.infra_env_configs_setup.outputs.target_dir }}/hel/hel_fleet_test.tfvars'
echo "proxmox_host_list=[$node_list_str]">>input.tfvars
echo "fleet_peer_nr=${{ inputs.fleet_number_members }}">>input.tfvars
echo 'fleet_password="n/a"'>>input.tfvars
echo 'fleet_manager_name="${{ steps.infra_names.outputs.manager_prefix }}"'>>input.tfvars
echo 'fleet_peer_name_prefix="${{ steps.infra_names.outputs.worker_prefix }}"'>>input.tfvars
echo 'ip_list=${{ inputs.fleet_ips }}'>>input.tfvars
echo 'proxmox_ve_username="${{ secrets.proxmox_ve_username }}"'>>input.tfvars
echo 'proxmox_ve_password="${{ secrets.proxmox_ve_password }}"'>>input.tfvars
echo "\n">>input
echo "File args: $var_file_arg"
echo "Input File args: $(cat input.tfvars)"
terraform init -backend-config="key=hel-fleet-${{ steps.infra_names.outputs.manager_prefix }}.tfstate"
terraform apply -auto-approve $var_file_arg -var-file=input.tfvars
terraform refresh $var_file_arg -var-file=input.tfvars
- name: Prepare Devops provisioning slack message
if: always()
id: pre_slack_infra
run: |
MESSAGE_ERR=":x: CI: ${GITHUB_REPOSITORY}, (${GITHUB_REF#refs/heads/}), build: $(cat product.version) is being impacted by an infrastructural issue. \
Provisioning of fleet infrastructure failed. Please take a look! \
Details: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
echo "msg_error=${MESSAGE_ERR}" >> $GITHUB_OUTPUT
- name: Slack message failure
if: failure()
uses: slackapi/[email protected]
with:
channel-id: "G0102LEV1CL"
slack-message: ${{ steps.pre_slack_infra.outputs.msg_error }}
env:
SLACK_BOT_TOKEN: ${{ secrets.slack_token_id }}
- name: Apply ansible inventory
shell: bash
run: |
. .ci-venv/bin/activate
cp ${{ steps.provision_infra_setup.outputs.target_dir }}/provisioned_inventory.yml staging/provisioned_inventory.yml
cat staging/provisioned_inventory.yml
integration-pipeline get_yml_value --file staging/provisioned_inventory.yml --key fleet.children.managers.hosts.manager.ansible_host --output_file ./staging/manager_private_ip.txt
deactivate
- name: Setup ansible installation
id: ansible_install_setup
shell: bash
env:
install_key: ansible_deploy
run: |
rm -f /tmp/target_dir.txt /tmp/version.txt /tmp/repo_name.txt
. .ci-venv/bin/activate
integration-pipeline get_yml_value --file product-manifest.yaml --key product_components.installion.$install_key.target_dir --output_file /tmp/target_dir.txt
integration-pipeline get_yml_value --file product-manifest.yaml --key product_components.installion.$install_key.version --output_file /tmp/version.txt
integration-pipeline get_yml_value --file product-manifest.yaml --key product_components.installion.$install_key.name --output_file /tmp/repo_name.txt
install_infra_dir=$(cat /tmp/target_dir.txt)
install_infra_version=$(cat /tmp/version.txt)
install_infra_repo_name=$(cat /tmp/repo_name.txt)
rm -rf $install_infra_repo_name
integration-pipeline fetch_by_tag --repo $install_infra_repo_name --version $install_infra_version --gh_api_user $GITHUB_API_USR --gh_api_pwd ${{ secrets.auto_commit_pwd }} --target_dir $install_infra_dir
ls -la $install_infra_dir
echo "target_dir=${install_infra_dir}" >> $GITHUB_OUTPUT
deactivate
- name: Ansible install platform
id: ansible_install_platform
working-directory: ${{ steps.ansible_install_setup.outputs.target_dir }}
shell: bash
run: |
echo "${{ secrets.ssh_pem_fleet_aws_vm }}" > ~/.ssh/aws_slave.pem
sudo chmod 600 ~/.ssh/aws_slave.pem
while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1 ; do echo Waiting for other software managers to finish... ; sleep 5;done
python3.9 -m venv ansible-venv
source ansible-venv/bin/activate
python3 -m pip install -r requirements.txt
ansible-galaxy install -r requirements.yml --timeout 120
stripped_ips=$(echo ${{ inputs.fleet_ips }} | sed "s;\[;;g" | sed "s;];;g" | sed "s; ;;g")
touch ~/.ssh/known_hosts
sudo chmod 600 ~/.ssh/known_hosts
IFS=',' read -r -a stripped_ips_arr <<< $stripped_ips
manager_ip=${stripped_ips_arr[0]}
echo "manager_ip=${manager_ip}" | tee >> $GITHUB_OUTPUT
for ip in "${stripped_ips_arr[@]}"
do
if [[ $ip == *"/"* ]]; then
ip=${ip%/*}
fi
ssh-keygen -f ~/.ssh/known_hosts -R $ip
ssh-keyscan -H $ip >> ~/.ssh/known_hosts
done
# Ensure cloud init is done on all the hosts
members=("manager")
for i in $(seq 0 $(( ${{ inputs.fleet_number_members}} - 1 ))); do
members+=("member$i")
done
for fleet_host in ${members[@]}; do
ansible $fleet_host -i ../staging/provisioned_inventory.yml --key-file ~/.ssh/aws_slave.pem -m shell -a 'cloud-init status --wait'
done
ansible-playbook install.yml \
-i ../staging/provisioned_inventory.yml \
--key-file ~/.ssh/aws_slave.pem \
--extra-vars=@"$(pwd)/.."/product-manifest.yaml \
-e fleet_domain_dns="" \
-e "{\"proxycerts__remote_redis_servers_fqn\": [$(cat ../staging/manager_private_ip.txt)]}" \
-e '{"fleet_extra_hosts": ["172.22.0.106 registry.hel.mov.ai traefik"]}' \
--skip-tags "validate,ufw,hardening"
execution_status=$?
deactivate
exit $execution_status
- name: Setup QA API tests
id: api_tests_setup
shell: bash
env:
qa_key: api_tests
run: |
rm -f /tmp/target_dir.txt /tmp/version.txt /tmp/repo_name.txt
. .ci-venv/bin/activate
integration-pipeline get_yml_value --file product-manifest.yaml --key product_components.qa.$qa_key.target_dir --output_file /tmp/target_dir.txt
integration-pipeline get_yml_value --file product-manifest.yaml --key product_components.qa.$qa_key.version --output_file /tmp/version.txt
integration-pipeline get_yml_value --file product-manifest.yaml --key product_components.qa.$qa_key.name --output_file /tmp/repo_name.txt
tests_dir=$(cat /tmp/target_dir.txt)
tests_version=$(cat /tmp/version.txt)
tests_repo_name=$(cat /tmp/repo_name.txt)
rm -rf $tests_repo_name
integration-pipeline fetch_by_tag --repo $tests_repo_name --version $tests_version --gh_api_user $GITHUB_API_USR --gh_api_pwd ${{ secrets.auto_commit_pwd }} --target_dir $tests_dir
ls -la $tests_dir
echo "target_dir=${tests_dir}" >> $GITHUB_OUTPUT
deactivate
# setup tests venv in a step that is always executed
python3 -m venv "${tests_dir}"/test-venv --clear --system-site-packages
. "${tests_dir}"/test-venv/bin/activate
pip install -r "${tests_dir}"/requirements.txt
deactivate
- name: API tests
timeout-minutes: 30
working-directory: ${{ steps.api_tests_setup.outputs.target_dir }}
shell: bash
run: |
. test-venv/bin/activate
echo "Skip tests since they were never planned to run in this manner"
deactivate
- name: Save docker container logs
if: always()
working-directory: ${{ steps.api_tests_setup.outputs.target_dir }}
shell: bash
run: |
# for sanity
docker ps -a
for container in backend spawner messager-server; do
CONTAINER_ID=$(docker ps -a --format '{{.Names}}' --filter "name=^${container}-.*")
docker logs "${CONTAINER_ID}" &> "${container}.log" || true
done || true
# movai-service
journalctl -u movai-service --since '1hour ago' &> "movai-service.log"
- name: Get current job id
if: always()
shell: bash
id: job_info
run: |
job_id=$(gh api repos/${{ github.repository }}/actions/runs/${{ github.run_id}}/attempts/${{ github.run_attempt }}/jobs | jq -r '.jobs | .[0].id')
job_html_url=$(gh api repos/${{ github.repository }}/actions/runs/${{ github.run_id}}/attempts/${{ github.run_attempt }}/jobs | jq -r '.jobs | map(select(.name | contains("${{ github.job }}"))) | .[0].html_url')
echo "$job_id"
echo "$job_html_url"
echo "job_url=$job_html_url" >> $GITHUB_OUTPUT
env:
GITHUB_TOKEN: ${{ secrets.gh_token }}
- name: Prepare slack variables
if: always()
id: pre_slack_result
run: |
MESSAGE=":white_check_mark: ${{ github.job }} (Attempt: #${{ github.run_attempt }}) job passed"
MESSAGE_ERR=":x: ${{ github.job }} (Attempt: #${{ github.run_attempt }}) job failed"
echo "msg=${MESSAGE}" >> $GITHUB_OUTPUT
echo "msg_error=${MESSAGE_ERR}\n Details: ${{ steps.job_info.outputs.job_url }}" >> $GITHUB_OUTPUT
- name: Slack message success
uses: archive/github-actions-slack@master
with:
slack-function: send-message
slack-bot-user-oauth-access-token: ${{ secrets.slack_token_id }}
slack-channel: ${{ env.SLACK_CHANNEL }}
slack-text: ${{ steps.pre_slack_result.outputs.msg }}
slack-optional-thread_ts: ${{ needs.Validate-boostrap-configs.outputs.slack_thread_id }}
- name: Slack message failure
uses: archive/github-actions-slack@master
if: failure()
with:
slack-function: send-message
slack-bot-user-oauth-access-token: ${{ secrets.slack_token_id }}
slack-channel: ${{ env.SLACK_CHANNEL }}
slack-text: ${{ steps.pre_slack_result.outputs.msg_error }}
slack-optional-thread_ts: ${{ needs.Validate-boostrap-configs.outputs.slack_thread_id }}
- name: Collect Fleet QA artifacts
working-directory: ${{ steps.ansible_install_setup.outputs.target_dir }}
if: always()
shell: bash
env:
API_DIR: ${{ steps.api_tests_setup.outputs.target_dir }}
run: |
rm -rf fleet_qa_artifacts
mkdir -p fleet_qa_artifacts/install
source ansible-venv/bin/activate
# install fleet_tests artifacts
for fleet_host in "manager" "member0" "member1"; do
ansible $fleet_host -i ../staging/provisioned_inventory.yml --key-file ~/.ssh/aws_slave.pem -m shell -a 'journalctl -u movai-service --since "1hour ago"' > fleet_qa_artifacts/install/$fleet_host.log || true
echo "From $fleet_host:"
ansible $fleet_host -i ../staging/provisioned_inventory.yml --key-file ~/.ssh/aws_slave.pem -m shell -a 'docker ps -a' > fleet_qa_artifacts/install/$fleet_host-docker_ps.log || true
echo "$(tail -n +2 fleet_qa_artifacts/install/$fleet_host-docker_ps.log )"
ansible $fleet_host -i ../staging/provisioned_inventory.yml --key-file ~/.ssh/aws_slave.pem -m shell -a 'journalctl -u docker --boot --lines=all' > fleet_qa_artifacts/install/$fleet_host-all-docker.log || true
done
deactivate
# qa api tests artifacts
# *.log and *.zip might not exist if the test fails early
mkdir -p fleet_qa_artifacts/api
cp -r "${API_DIR}"/*.log fleet_qa_artifacts/api || true
cp -r "${API_DIR}"/*.tar fleet_qa_artifacts/api || true
cp -r "${API_DIR}"/results/*.zip fleet_qa_artifacts/api || true
- name: Stash Fleet QA artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: fleet_qa_artifacts
path: ${{ steps.ansible_install_setup.outputs.target_dir }}/fleet_qa_artifacts/*
retention-days: 5
- name: Teardown remote vms (Proxmox)
working-directory: ${{ steps.provision_infra_setup.outputs.target_dir }}
if: ${{ ( !inputs.debug_fleet_keep_alive && success() ) || cancelled() || ( !inputs.debug_fleet_keep_alive && failure() ) }}
shell: bash
run: |
attempts=3
count=0
while [ $count -lt $attempts ]; do
var_file_arg='-var-file=../${{ steps.infra_env_configs_setup.outputs.target_dir }}/hel/hel_fleet_test.tfvars'
terraform destroy -auto-approve $var_file_arg -var-file=input.tfvars
exit_status=$?
if [ $exit_status -eq 0 ]; then
break
elif [ $exit_status -eq 1 ]; then
((count++))
echo "Retrying Terraform destroy (attempt $count)..."
else
echo "Terraform destroy failed with exit status ${exit_status:-unknown}. Exiting..."
exit ${exit_status:-1}
fi
done