update UOp.SPECIAL arg spec [run_process_replay] (#5661) #3873
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit Tests | |
env: | |
# increment this when downloads substantially change to avoid the internet | |
DOWNLOAD_CACHE_VERSION: '5' | |
RUN_PROCESS_REPLAY: 1 | |
on: | |
push: | |
branches: | |
- master | |
pull_request: | |
workflow_dispatch: | |
jobs: | |
uops: | |
name: uops tests | |
runs-on: ubuntu-latest | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.12 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.12 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.12/site-packages | |
key: uops-packages-${{ hashFiles('**/setup.py') }}-3.12 | |
- name: Install dependencies | |
run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Test IMAGE=2 support | |
run: | | |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_simple_conv2d | |
- name: Test emulated METAL tensor cores | |
run: DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_big_gemm | |
- name: Test emulated AMD tensor cores | |
run: | | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py | |
- name: Test emulated CUDA tensor cores | |
run: DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
- name: Full test tensor cores | |
run: | | |
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
- name: Test dtype with Python emulator | |
run: DEBUG=1 PYTHONPATH=. PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py | |
- name: Test ops with Python emulator | |
run: DEBUG=2 PYTHON=1 python3 -m pytest -n=auto test/test_ops.py -k "not (test_split or test_simple_cumsum or test_cumsum or test_einsum or test_dot or test_dot_1d or test_big_gemm or test_broadcastdot or test_multidot or test_var_axis or test_std_axis or test_broadcast_full or test_broadcast_partial or test_simple_conv3d or test_dilated_conv_transpose2d or test_simple_conv_transpose3d or test_large_input_conv2d or test_maxpool2d or test_maxpool2d_simple or test_maxpool2d_bigger_stride or test_avgpool2d or test_cat or test_scaled_product_attention or test_scaled_product_attention_causal or test_slice_fancy_indexing_dim_inject_none or test_slice_fancy_indexing_list_indices or test_slice_fancy_indexing_no_dim_collapse or test_slice_fancy_indexing_tuple_indices or test_slice_fancy_indexing_list_with_tensors or test_slice_fancy_indexing_dim_collapse_int)" --durations=20 | |
- name: Test uops with Python emulator | |
run: PYTHON=1 python3 -m pytest test/test_uops.py --durations=20 | |
- name: Test symbolic with Python emulator | |
run: PYTHONPATH=. PYTHON=1 python3 test/test_symbolic_ops.py | |
- name: test_linearizer_failures with Python emulator | |
run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_linearizer_failures.py::TestLinearizerFailures::test_failure_1 | |
linter: | |
name: Linters | |
runs-on: ubuntu-latest | |
timeout-minutes: 10 | |
# TODO: run the pre-commit hook to replace a lot of this | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.8 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.8/site-packages | |
key: linting-packages-${{ hashFiles('**/setup.py') }}-3.8 | |
- name: Install dependencies | |
run: pip install -e '.[linting,testing,docs]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Lint with pylint | |
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' **/*.py | |
- name: Lint with ruff | |
run: | | |
pip3 install --upgrade --force-reinstall ruff | |
python3 -m ruff check . | |
- name: Lint tinygrad with pylint | |
run: python -m pylint tinygrad/ | |
- name: Run mypy | |
run: python -m mypy --strict-equality | |
- name: Test Docs | |
run: | | |
python docs/abstractions2.py | |
python docs/abstractions3.py | |
- name: Test Docs Build | |
run: mkdocs build --strict | |
- name: Test Quickstart | |
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && PYTHONPATH=. python quickstart.py | |
- name: Test README | |
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py | |
- name: Fuzz Test symbolic | |
run: python test/external/fuzz_symbolic.py | |
- name: Fuzz Test shapetracker | |
run: | | |
PYTHONPATH="." python test/external/fuzz_shapetracker.py | |
PYTHONPATH="." python test/external/fuzz_shapetracker_math.py | |
- name: Test to_movement_ops | |
run: PYTHONPATH="." python extra/to_movement_ops.py | |
- name: Use as an external package | |
run: | | |
mkdir $HOME/test_external_dir | |
cd $HOME/test_external_dir | |
python -m venv venv | |
source venv/bin/activate | |
pip install $GITHUB_WORKSPACE | |
python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
- name: Test DEBUG | |
run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())" | |
- name: Repo line count <8500 lines | |
run: MAX_LINE_COUNT=8500 python sz.py | |
testopencl: | |
strategy: | |
fail-fast: false | |
matrix: | |
task: [optimage, onnx] | |
name: ${{ matrix.task=='optimage'&&'GPU IMAGE+compile Tests' || matrix.task=='onnx'&&'ONNX+Optimization Tests' }} | |
runs-on: ubuntu-20.04 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Install OpenCL | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
echo "deb [ allow-insecure=yes ] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list | |
sudo apt update || true | |
sudo apt install --allow-unauthenticated -y --no-install-recommends \ | |
intel-oneapi-runtime-openmp=2023.2.1-16 intel-oneapi-runtime-compilers-common=2023.2.1-16 intel-oneapi-runtime-compilers=2023.2.1-16 \ | |
intel-oneapi-runtime-dpcpp-sycl-opencl-cpu=2023.2.1-16 intel-oneapi-runtime-tbb-common=2021.10.0-49541 \ | |
intel-oneapi-runtime-tbb=2021.10.0-49541 intel-oneapi-runtime-opencl=2023.2.1-16 | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
key: testing-packages-${{ hashFiles('**/setup.py') }} | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/.cache/tinygrad/downloads/ | |
key: downloads-cache-${{ matrix.task }}-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Install Dependencies | |
run: pip install -e '.[testing,testing_tf]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Run Kernel Count Test | |
run: PYTHONPATH="." GPU=1 python -m pytest -n=auto test/external/external_test_opt.py | |
- if: ${{ matrix.task == 'optimage'}} | |
name: Test WINO=1 | |
run: GPU=1 DEBUG=2 WINO=1 python3 test/test_ops.py TestOps.test_simple_conv2d | |
- if: ${{ matrix.task == 'optimage'}} | |
name: Test GPU IMAGE=2 ops + training | |
run: | | |
GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py | |
GPU=1 IMAGE=2 python3 test/models/test_end2end.py TestEnd2End.test_linear_mnist | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Test openpilot model compile and size | |
run: | | |
PYTHONPATH="." DEBUG=2 ALLOWED_KERNEL_COUNT=208 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py | |
python -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000' | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Test openpilot model correctness (float32) | |
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Test openpilot alt model correctness (float32) | |
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Test openpilot fastvits model correctness (float32) | |
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Compile EfficientNet to C and test it | |
run: | | |
CLANG=1 PYTHONPATH="." python examples/compile_efficientnet.py > recognize.c | |
clang -O2 recognize.c -lm -o recognize | |
cat test/models/efficientnet/Chicken.jpg | ./recognize | grep cock | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test ONNX (GPU) | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test ONNX (CLANG) | |
run: CLANG=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test Action Space | |
run: PYTHONPATH="." GPU=1 python3 extra/optimization/get_action_space.py | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test Beam Search | |
run: PYTHONPATH="." GPU=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test MLPerf optimizers | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_optim.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test MLPerf losses | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_losses.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test MLPerf metrics | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_metrics.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test MLPerf datasets | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_datasets.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test THREEFRY | |
run: PYTHONPATH=. THREEFRY=1 GPU=1 python3 -m pytest test/test_randomness.py test/test_jit.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Run handcode_opt | |
run: PYTHONPATH=. MODEL=resnet GPU=1 DEBUG=1 BS=4 HALF=0 python3 examples/handcode_opt.py | |
- name: Run process replay tests | |
run: | | |
export PR_TITLE=$(jq -r .pull_request.title "$GITHUB_EVENT_PATH") | |
export COMMIT_MESSAGE=$(git show -s --format=%B ${{ github.event.pull_request.head.sha }}) | |
cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py | |
#testwebgpu: | |
# name: WebGPU Tests | |
# runs-on: macos-13 | |
# timeout-minutes: 10 | |
# steps: | |
# - name: Checkout Code | |
# uses: actions/checkout@v4 | |
# - name: Set up Python 3.11 | |
# uses: actions/setup-python@v5 | |
# with: | |
# python-version: 3.11 | |
# - name: Cache python packages | |
# uses: actions/cache@v4 | |
# with: | |
# path: /Users/runner/Library/Python/3.11/lib/python/site-packages | |
# key: webgpu-testing-user3-packages-${{ hashFiles('**/setup.py') }} | |
# - name: Install Dependencies | |
# run: pip install --user -e '.[webgpu,testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
# - name: Cache downloads | |
# uses: actions/cache@v4 | |
# with: | |
# path: ~/Library/Caches/tinygrad/downloads/ | |
# key: downloads-cache-webgpu-${{ env.DOWNLOAD_CACHE_VERSION }} | |
# - name: Check Device.DEFAULT (WEBGPU) and print some source | |
# run: | | |
# WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT" | |
# WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
#- name: Run webgpu pytest | |
# run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto | |
# - name: Run selected webgpu tests | |
# run: | | |
# WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto test/test_ops.py test/test_dtype.py \ | |
# test/test_jit.py test/test_symbolic_ops.py test/test_symbolic_jit.py test/test_linearizer.py \ | |
# test/test_linearizer_failures.py test/test_nn.py | |
# - name: Build WEBGPU Efficientnet | |
# run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m examples.compile_efficientnet | |
# - name: Install Puppeteer | |
# run: npm install puppeteer | |
# - name: Run WEBGPU Efficientnet | |
# run: node test/web/test_webgpu.js | |
testmetal: | |
name: Metal Tests | |
runs-on: macos-14 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 2 # NOTE: this fetches the HEAD commit of the PR | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: /Users/runner/Library/Python/3.11/lib/python/site-packages | |
key: metal-m1-testing-user3-packages-${{ hashFiles('**/setup.py') }} | |
- name: Install Dependencies | |
run: pip install --user -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/Library/Caches/tinygrad/downloads/ | |
key: downloads-cache-metal-only-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Check Device.DEFAULT (METAL) and print some source | |
run: | | |
METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT" | |
METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
- name: Run metal test | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --durations=20 | |
- name: Run real world test | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20 | |
- name: Run ONNX | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Test tensor core ops (fake) | |
run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm | |
- name: Test tensor core ops (real) | |
run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm | |
- name: Test LLaMA compile speed | |
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py | |
- name: Test Beam Search | |
run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
- name: Fuzz Test linearizer | |
run: PYTHONPATH="." METAL=1 CACHELEVEL=0 FUZZ_ALL_ACTIONS=1 DEPTH=2 FUZZ_N=48 FUZZ_MAX_SIZE=10000000 python test/external/fuzz_linearizer.py | |
- name: Fuzz Test models schedule | |
run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py | |
- name: Run TRANSCENDENTAL math | |
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 | |
- name: Run process replay tests | |
run: | | |
export PR_TITLE=$(jq -r .pull_request.title "$GITHUB_EVENT_PATH") | |
export COMMIT_MESSAGE=$(git show -s --format=%B ${{ github.event.pull_request.head.sha }}) | |
cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py | |
# testwebgl: | |
# name: WebGL Tests | |
# runs-on: ubuntu-latest | |
# timeout-minutes: 10 | |
# | |
# steps: | |
# - name: Checkout Code | |
# uses: actions/checkout@v3 | |
# - name: Set up Python 3.11 | |
# uses: actions/setup-python@v4 | |
# with: | |
# python-version: 3.11 | |
# - name: Cache python packages | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
# key: webgl-testing-packages-${{ hashFiles('**/setup.py') }} | |
# - name: Install Dependencies | |
# run: pip install -e '.[webgl,testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
# - name: Cache downloads | |
# uses: actions/cache@v4 | |
# with: | |
# path: ~/Library/Caches/tinygrad/downloads/ | |
# key: downloads-cache-webgl-${{ env.DOWNLOAD_CACHE_VERSION }} | |
# - name: Prepare | |
# run: | | |
# sudo apt-get -y install xvfb | |
# sudo /usr/bin/Xvfb :0 -screen 0 4096x4096x24+32 & | |
# - name: Run selected webgl tests | |
# run: WEBGL=1 python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_jit.py | |
# - name: Build WebGL Efficientnet | |
# run: WEBGL=1 python -m examples.compile_efficientnet | |
tests: | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: [llvm, clang, gpu, ptx, amd, nv] #, triton] | |
name: Tests on (${{ matrix.backend }}) | |
runs-on: ubuntu-latest | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 2 # NOTE: this fetches the HEAD commit of the PR | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
key: ${{ matrix.backend }}-packages-${{ hashFiles('**/setup.py') }} | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/.cache/tinygrad/downloads/ | |
key: downloads-cache-${{ matrix.backend }}-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Set env | |
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nNV=1\nPTX=1\nMOCKGPU=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nNV=1\nMOCKGPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' || matrix.backend == 'nv' && 'NV=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV | |
- name: Install OpenCL | |
if: matrix.backend == 'gpu' | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
echo "deb [ allow-insecure=yes ] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list | |
sudo apt update || true | |
sudo apt install --allow-unauthenticated -y --no-install-recommends opencl-headers \ | |
intel-oneapi-runtime-openmp=2023.2.1-16 intel-oneapi-runtime-compilers-common=2023.2.1-16 intel-oneapi-runtime-compilers=2023.2.1-16 \ | |
intel-oneapi-runtime-dpcpp-sycl-opencl-cpu=2023.2.1-16 intel-oneapi-runtime-tbb-common=2021.10.0-49541 \ | |
intel-oneapi-runtime-tbb=2021.10.0-49541 intel-oneapi-runtime-opencl=2023.2.1-16 | |
- name: Install packages (cuda) | |
if: matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv' | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
sudo apt update -y || true | |
sudo apt install -y --no-install-recommends git g++ cmake ninja-build llvm-15-dev zlib1g-dev libglew-dev \ | |
flex bison libfl-dev libboost-thread-dev libboost-filesystem-dev nvidia-cuda-toolkit-gcc libzstd-dev | |
- name: Cache gpuocelot | |
if: matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv' | |
id: cache-build | |
uses: actions/cache@v4 | |
env: | |
cache-name: cache-gpuocelot-build | |
with: | |
path: ${{ github.workspace }}/gpuocelot/ocelot | |
key: ubuntu22.04-gpuocelot-4524e34adb7eaccc6f71262f2e21d7052bb17c2f-rebuild-7 | |
- name: Clone/compile gpuocelot | |
if: (matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv') && steps.cache-build.outputs.cache-hit != 'true' | |
run: | | |
git clone --recurse-submodules https://github.com/gpuocelot/gpuocelot.git ${{ github.workspace }}/gpuocelot | |
cd ${{ github.workspace }}/gpuocelot/ocelot | |
git checkout 4524e34adb7eaccc6f71262f2e21d7052bb17c2f | |
mkdir build | |
cd build | |
cmake .. -Wno-dev -G Ninja -DOCELOT_BUILD_TOOLS=OFF -DCMAKE_BUILD_ALWAYS=0 -DBUILD_TESTS_CUDA=OFF | |
ninja | |
- name: Install gpuocelot | |
if: matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv' | |
run: | | |
cd ${{ github.workspace }}/gpuocelot/ocelot/build | |
sudo ninja install -d explain | |
- name: Install packages (amd) | |
if: matrix.backend == 'amd' | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null | |
sudo tee /etc/apt/sources.list.d/rocm.list <<'EOF' | |
deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main | |
EOF | |
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 | |
sudo apt update || true | |
sudo apt install --no-install-recommends --allow-unauthenticated -y hsa-rocr comgr hsa-rocr-dev liburing-dev libc6-dev | |
curl -s https://api.github.com/repos/Qazalin/remu/releases/latest | \ | |
jq -r '.assets[] | select(.name == "libremu.so").browser_download_url' | \ | |
sudo xargs curl -L -o /usr/local/lib/libremu.so | |
sudo tee --append /etc/ld.so.conf.d/rocm.conf <<'EOF' | |
/opt/rocm/lib | |
/opt/rocm/lib64 | |
EOF | |
sudo ldconfig | |
- name: Install dependencies | |
run: pip install -e '.[testing${{matrix.backend=='llvm'&&',llvm'||matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}}]' --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ | |
- name: Check Device.DEFAULT and print some source | |
run: | | |
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','AMD','NV'], Device.DEFAULT" | |
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
- name: Verify OpenCL autogen | |
if: matrix.backend == 'gpu' | |
run: | | |
cp tinygrad/runtime/autogen/opencl.py /tmp/opencl.py.bak | |
./autogen_stubs.sh opencl | |
diff /tmp/opencl.py.bak tinygrad/runtime/autogen/opencl.py | |
- name: Verify CUDA autogen | |
if: matrix.backend == 'nv' | |
run: | | |
cp tinygrad/runtime/autogen/cuda.py /tmp/cuda.py.bak | |
cp tinygrad/runtime/autogen/nv_gpu.py /tmp/nv_gpu.py.bak | |
./autogen_stubs.sh cuda | |
./autogen_stubs.sh nv | |
diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py | |
diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py | |
- name: Verify AMD autogen | |
if: matrix.backend == 'amd' | |
run: | | |
cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak | |
cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak | |
cp tinygrad/runtime/autogen/amd_gpu.py /tmp/amd_gpu.py.bak | |
./autogen_stubs.sh hsa | |
./autogen_stubs.sh comgr | |
./autogen_stubs.sh amd | |
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py | |
diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py | |
diff /tmp/amd_gpu.py.bak tinygrad/runtime/autogen/amd_gpu.py | |
- name: Verify Linux autogen | |
if: matrix.backend == 'amd' | |
run: | | |
cp tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak | |
cp tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak | |
./autogen_stubs.sh libc | |
./autogen_stubs.sh io_uring | |
diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py | |
diff /tmp/io_uring.py.bak tinygrad/runtime/autogen/io_uring.py | |
- name: Run pytest (not cuda or amd) | |
if: matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' && matrix.backend != 'nv' | |
run: python -m pytest -n=auto test/ --durations=20 | |
# - name: Run test_ops with FUZZ_UOPS=1 | |
# if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' && matrix.backend != 'nv' | |
# run: FUZZ_UOPS=1 python -m pytest -n=auto test/test_ops.py --durations=20 | |
- name: Run ONNX (only LLVM) | |
if: matrix.backend == 'llvm' | |
run: python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Run pytest (cuda) | |
if: matrix.backend=='ptx'||matrix.backend=='triton'||matrix.backend=='nv' | |
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --ignore test/test_gc.py --durations=20 | |
- name: Run pytest (amd) | |
if: matrix.backend=='amd' | |
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/test_hcq.py --durations=20 | |
- name: Run TRANSCENDENTAL math | |
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 | |
- name: Run process replay tests | |
run: | | |
export PR_TITLE=$(jq -r .pull_request.title "$GITHUB_EVENT_PATH") | |
export COMMIT_MESSAGE=$(git show -s --format=%B ${{ github.event.pull_request.head.sha }}) | |
cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py | |
#testunicorn: | |
# name: ARM64 unicorn Test | |
# runs-on: ubuntu-latest | |
# timeout-minutes: 10 | |
# steps: | |
# - name: Checkout Code | |
# uses: actions/checkout@v4 | |
# - name: Set up Python 3.11 | |
# uses: actions/setup-python@v5 | |
# with: | |
# python-version: 3.11 | |
# - name: Cache python packages | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
# key: testing-arm-packages-${{ hashFiles('**/setup.py') }} | |
# - name: Install cross-assembler | |
# run: | | |
# sudo apt update -y | |
# sudo apt install -y --no-install-recommends gcc-aarch64-linux-gnu | |
# - name: Install dependencies | |
# run: pip install -e '.[testing,arm]' --extra-index-url https://download.pytorch.org/whl/cpu | |
# - name: Test arm | |
# run: CI=1 ARM64=1 CLANG=1 python -m pytest -n=auto test/ -k 'not (test_nn.py and (test_conv_transpose2d or test_conv2d))' --ignore=test/models --ignore=test/test_speed_v_torch.py --ignore=test/test_net_speed.py --ignore=test/test_specific_conv.py --ignore=test/unit/test_disk_tensor.py |