Skip to content

Commit

Permalink
fix: Fixes for automated testing (#276)
Browse files Browse the repository at this point in the history
  • Loading branch information
claytonparnell authored Mar 19, 2024
1 parent 68d52e9 commit 41790a1
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 11 deletions.
3 changes: 1 addition & 2 deletions test/test_artifacts/v1/keras.test.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RUN sudo apt-get update && sudo apt-get install -y git graphviz && \
:

# Some of the keras guides requires pydot and graphviz to be installed
RUN micromamba install -y --freeze-installed conda-forge::pydot nvidia::cuda-nvcc
RUN micromamba install -y --freeze-installed conda-forge::pydot "nvidia::cuda-nvcc>=11.8,<11.9"
ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/opt/conda

WORKDIR "keras-io/guides"
Expand All @@ -22,4 +22,3 @@ COPY --chown=$MAMBA_USER:$MAMBA_USER scripts/run_keras_tests.sh .
RUN chmod +x run_keras_tests.sh
# Run tests in run_keras_tests.sh
CMD ["./run_keras_tests.sh"]

9 changes: 5 additions & 4 deletions test/test_artifacts/v1/pytorch.examples.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ ARG SAGEMAKER_DISTRIBUTION_IMAGE
FROM $SAGEMAKER_DISTRIBUTION_IMAGE

ARG MAMBA_DOCKERFILE_ACTIVATE=1
RUN sudo apt-get update && \
sudo apt-get install -y git && \
git clone --recursive https://github.com/pytorch/examples && \
:
RUN git clone --recursive https://github.com/pytorch/examples

# During automation some tests fails with `libcuda.so: cannot open shared object file: No such file or directory`
# But libcuda.so.1 exists. Adding this resolves, but also adding `2>/dev/null` to ignore if not needed.
RUN sudo ln -s /usr/lib/x86_64-linux-gnu/libcuda.so.1 /usr/lib/x86_64-linux-gnu/libcuda.so 2>/dev/null

WORKDIR "examples"

Expand Down
2 changes: 1 addition & 1 deletion test/test_artifacts/v1/scripts/run_autogluon_tests.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

AUTOGLUON_VERSION=$(micromamba list | grep autogluon | tr -s ' ' | cut -d ' ' -f 3)
AUTOGLUON_VERSION=$(micromamba list | grep autogluon | tr -s ' ' | head -n 1 | cut -d ' ' -f 3)
git checkout tags/v$AUTOGLUON_VERSION

# Run autogluon quick start as end-to-end check
Expand Down
5 changes: 2 additions & 3 deletions test/test_artifacts/v1/sm-python-sdk.test.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@ ARG SAGEMAKER_DISTRIBUTION_IMAGE
FROM $SAGEMAKER_DISTRIBUTION_IMAGE

ARG MAMBA_DOCKERFILE_ACTIVATE=1
RUN sudo apt-get update && sudo apt-get install -y git && \
git clone --recursive https://github.com/aws/sagemaker-python-sdk.git && \
:
RUN git clone --recursive https://github.com/aws/sagemaker-python-sdk.git

# Sagemaker Python SDK's unit tests requires AWS_DEFAULT_REGION to be set. So, using an arbitrary value of us-east-1
ENV AWS_DEFAULT_REGION=us-east-1
WORKDIR "sagemaker-python-sdk"
Expand Down
2 changes: 1 addition & 1 deletion test/test_dockerfile_based_harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _validate_docker_images(dockerfile_path: str, required_packages: List[str],

try:
image, _ = _docker_client.images.build(path=test_artifacts_path,
dockerfile=dockerfile_path,
dockerfile=dockerfile_path, shmsize='256000000',
tag=dockerfile_path.lower().replace('.', '-'),
rm=True, buildargs={'SAGEMAKER_DISTRIBUTION_IMAGE': docker_image_identifier})
except BuildError as e:
Expand Down

0 comments on commit 41790a1

Please sign in to comment.