remove dtype from llama precompute_freqs_cis #5355
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit Tests | |
env: | |
# increment this when downloads substantially change to avoid the internet | |
DOWNLOAD_CACHE_VERSION: '7' | |
RUN_PROCESS_REPLAY: 1 | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
PYTHONPATH: . | |
on: | |
push: | |
branches: | |
- master | |
pull_request: | |
workflow_dispatch: | |
jobs: | |
autogen: | |
name: Autogen+Docs | |
runs-on: ubuntu-latest | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.12 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.12 | |
- name: Install docs dependencies (no cache) | |
run: pip install -e '.[docs]' | |
- name: Use as an external package | |
run: | | |
mkdir $HOME/test_external_dir | |
cd $HOME/test_external_dir | |
python -m venv venv | |
source venv/bin/activate | |
pip install $GITHUB_WORKSPACE | |
python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
pip install mypy | |
mypy -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
- name: Run beautiful_mnist with tinygrad only | |
run: | | |
mkdir $GITHUB_WORKSPACE/test_dir | |
cd $GITHUB_WORKSPACE/test_dir | |
python -m venv venv | |
source venv/bin/activate | |
pip install $GITHUB_WORKSPACE | |
cp $GITHUB_WORKSPACE/examples/beautiful_mnist.py . | |
PYTHONPATH=$GITHUB_WORKSPACE BS=2 STEPS=10 python beautiful_mnist.py | |
- name: Test Docs Build | |
run: python -m mkdocs build --strict | |
- name: Test Docs | |
run: | | |
python docs/abstractions2.py | |
python docs/abstractions3.py | |
- name: Test Quickstart | |
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && PYTHONPATH=. python quickstart.py | |
- name: Test DEBUG | |
run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())" | |
- name: Install OpenCL | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
echo "deb [ allow-insecure=yes ] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list | |
sudo apt update || true | |
sudo apt install --allow-unauthenticated -y --no-install-recommends opencl-headers \ | |
intel-oneapi-runtime-openmp=2023.2.1-16 intel-oneapi-runtime-compilers-common=2023.2.1-16 intel-oneapi-runtime-compilers=2023.2.1-16 \ | |
intel-oneapi-runtime-dpcpp-sycl-opencl-cpu=2023.2.1-16 intel-oneapi-runtime-tbb-common=2021.10.0-49541 \ | |
intel-oneapi-runtime-tbb=2021.10.0-49541 intel-oneapi-runtime-opencl=2023.2.1-16 | |
- name: Install packages (cuda) | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
sudo apt update -y || true | |
sudo apt install -y --no-install-recommends git g++ cmake ninja-build llvm-15-dev zlib1g-dev libglew-dev \ | |
flex bison libfl-dev libboost-thread-dev libboost-filesystem-dev nvidia-cuda-toolkit-gcc libzstd-dev | |
- name: Install packages (amd) | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null | |
sudo tee /etc/apt/sources.list.d/rocm.list <<'EOF' | |
deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main | |
EOF | |
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 | |
sudo apt update || true | |
sudo apt install --no-install-recommends --allow-unauthenticated -y hsa-rocr comgr hsa-rocr-dev liburing-dev libc6-dev | |
curl -s https://api.github.com/repos/Qazalin/remu/releases/latest | \ | |
jq -r '.assets[] | select(.name == "libremu.so").browser_download_url' | \ | |
sudo xargs curl -L -o /usr/local/lib/libremu.so | |
sudo tee --append /etc/ld.so.conf.d/rocm.conf <<'EOF' | |
/opt/rocm/lib | |
/opt/rocm/lib64 | |
EOF | |
sudo ldconfig | |
- name: Compile EfficientNet to C and test it | |
run: | | |
CLANG=1 PYTHONPATH="." python examples/compile_efficientnet.py > recognize.c | |
clang -O2 recognize.c -lm -o recognize | |
cat test/models/efficientnet/Chicken.jpg | ./recognize | grep cock | |
- name: Verify OpenCL autogen | |
run: | | |
cp tinygrad/runtime/autogen/opencl.py /tmp/opencl.py.bak | |
./autogen_stubs.sh opencl | |
diff /tmp/opencl.py.bak tinygrad/runtime/autogen/opencl.py | |
- name: Verify CUDA autogen | |
run: | | |
cp tinygrad/runtime/autogen/cuda.py /tmp/cuda.py.bak | |
cp tinygrad/runtime/autogen/nv_gpu.py /tmp/nv_gpu.py.bak | |
./autogen_stubs.sh cuda | |
./autogen_stubs.sh nv | |
diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py | |
diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py | |
- name: Verify AMD autogen | |
run: | | |
cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak | |
cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak | |
cp tinygrad/runtime/autogen/amd_gpu.py /tmp/amd_gpu.py.bak | |
./autogen_stubs.sh hsa | |
./autogen_stubs.sh comgr | |
./autogen_stubs.sh amd | |
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py | |
diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py | |
diff /tmp/amd_gpu.py.bak tinygrad/runtime/autogen/amd_gpu.py | |
- name: Verify Linux autogen | |
run: | | |
cp tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak | |
cp tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak | |
./autogen_stubs.sh libc | |
./autogen_stubs.sh io_uring | |
diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py | |
diff /tmp/io_uring.py.bak tinygrad/runtime/autogen/io_uring.py | |
uops: | |
name: uops tests | |
runs-on: ubuntu-latest | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.12 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.12 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.12/site-packages | |
key: uops-packages-${{ hashFiles('**/setup.py') }}-3.12 | |
- name: Install dependencies | |
run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Test IMAGE=2 support | |
run: | | |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_simple_conv2d | |
- name: Test emulated METAL tensor cores | |
run: | | |
DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_big_gemm | |
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded | |
- name: Test emulated AMX tensor cores | |
run: PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
- name: Test emulated AMD tensor cores | |
run: | | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded | |
- name: Test emulated CUDA tensor cores | |
run: | | |
DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16 | |
PYTHONPATH="." DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded | |
- name: Test emulated INTEL OpenCL tensor cores | |
run: DEBUG=2 EMULATE_INTEL=1 FORWARD_ONLY=1 PYTHON=1 HALF=1 N=64 python3 ./extra/gemm/simple_matmul.py | |
- name: Full test tensor cores | |
run: | | |
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
- name: Test tensor cores (TC=3) | |
run: | | |
TC=3 DEBUG=3 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
TC=3 PYTHONPATH=. DEBUG=3 EMULATE_AMD=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
TC=3 DEBUG=3 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16 | |
TC=3 PYTHONPATH=. DEBUG=3 EMULATE_INTEL=1 PYTHON=1 N=16 HALF=1 python3 ./extra/gemm/simple_matmul.py | |
TC=3 PYTHONPATH=. DEBUG=3 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
- name: Test device flop counts | |
run: | | |
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf | |
PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf | |
PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf | |
PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStats.test_simple_matmul | |
- name: Test dtype with Python emulator | |
run: DEBUG=1 PYTHONPATH=. PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py | |
- name: Test ops with Python emulator | |
run: DEBUG=2 PYTHON=1 python3 -m pytest -n=auto test/test_ops.py -k "not (test_split or test_simple_cumsum or test_cumsum or test_einsum or test_dot or test_dot_1d or test_big_gemm or test_broadcastdot or test_multidot or test_var_axis or test_std_axis or test_broadcast_full or test_broadcast_partial or test_simple_conv3d or test_dilated_conv_transpose2d or test_simple_conv_transpose3d or test_large_input_conv2d or test_max_pool2d or test_max_pool2d_simple or test_max_pool2d_bigger_stride or test_avg_pool2d or test_cat or test_scaled_product_attention or test_scaled_product_attention_causal or test_slice_fancy_indexing_dim_inject_none or test_slice_fancy_indexing_list_indices or test_slice_fancy_indexing_no_dim_collapse or test_slice_fancy_indexing_tuple_indices or test_slice_fancy_indexing_list_with_tensors or test_slice_fancy_indexing_dim_collapse_int or test_interpolate_bilinear or test_interpolate_bilinear_corners_aligned)" --durations=20 | |
- name: Test uops with Python emulator | |
run: PYTHON=1 python3 -m pytest test/test_uops.py --durations=20 | |
- name: Test symbolic with Python emulator | |
run: PYTHONPATH=. PYTHON=1 python3 test/test_symbolic_ops.py | |
- name: test_linearizer_failures with Python emulator | |
run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_linearizer_failures.py::TestLinearizerFailures::test_failure_1 | |
linter: | |
name: Linters+fuzz+unit Tests | |
runs-on: ubuntu-latest | |
timeout-minutes: 20 | |
# TODO: run the pre-commit hook to replace a lot of this | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.10 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.10" | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.10/site-packages | |
key: linting-packages-${{ hashFiles('**/setup.py') }}-3.10 | |
- name: Install dependencies | |
run: pip install -e '.[linting,testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Lint bad-indentation and trailing-whitespace with pylint | |
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y . | |
- name: Lint with ruff | |
run: | | |
pip3 install --upgrade --force-reinstall ruff | |
python3 -m ruff check . | |
- name: Lint tinygrad with pylint | |
run: python -m pylint tinygrad/ | |
- name: Run mypy | |
run: python -m mypy --strict-equality --lineprecision-report . && cat lineprecision.txt | |
- name: Test README | |
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py | |
- name: Run unit tests | |
run: PYTHONPATH="." python -m pytest -n=auto test/unit/ | |
- name: Fuzz Test symbolic | |
run: python test/external/fuzz_symbolic.py | |
- name: Fuzz Test shapetracker | |
run: | | |
PYTHONPATH="." python test/external/fuzz_shapetracker.py | |
PYTHONPATH="." python test/external/fuzz_shapetracker_math.py | |
- name: Repo line count <= 9999 lines | |
run: MAX_LINE_COUNT=9999 python sz.py | |
testopencl: | |
strategy: | |
fail-fast: false | |
matrix: | |
task: [optimage, onnx] | |
name: ${{ matrix.task=='optimage'&&'GPU IMAGE+compile Tests' || matrix.task=='onnx'&&'ONNX+Optimization Tests' }} | |
runs-on: ubuntu-20.04 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Install OpenCL | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
echo "deb [ allow-insecure=yes ] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list | |
sudo apt update || true | |
sudo apt install --allow-unauthenticated -y --no-install-recommends \ | |
intel-oneapi-runtime-openmp=2023.2.1-16 intel-oneapi-runtime-compilers-common=2023.2.1-16 intel-oneapi-runtime-compilers=2023.2.1-16 \ | |
intel-oneapi-runtime-dpcpp-sycl-opencl-cpu=2023.2.1-16 intel-oneapi-runtime-tbb-common=2021.10.0-49541 \ | |
intel-oneapi-runtime-tbb=2021.10.0-49541 intel-oneapi-runtime-opencl=2023.2.1-16 | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
key: testing-packages-${{ hashFiles('**/setup.py') }} | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/.cache/tinygrad/downloads/ | |
key: downloads-cache-${{ matrix.task }}-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Install Dependencies | |
run: pip install -e '.[testing,testing_tf]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Run Kernel Count Test | |
run: PYTHONPATH="." GPU=1 python -m pytest -n=auto test/external/external_test_opt.py | |
- if: ${{ matrix.task == 'optimage'}} | |
name: Test WINO=1 | |
run: GPU=1 DEBUG=2 WINO=1 python3 test/test_ops.py TestOps.test_simple_conv2d | |
- if: ${{ matrix.task == 'optimage'}} | |
name: Test GPU IMAGE=2 ops + training | |
run: | | |
PYTHONPATH="." GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py --durations=20 | |
PYTHONPATH="." GPU=1 IMAGE=2 python3 test/models/test_end2end.py TestEnd2End.test_linear_mnist | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Test openpilot model compile and size | |
run: | | |
PYTHONPATH="." DEBUG=2 ALLOWED_KERNEL_COUNT=208 ALLOWED_GATED_READ_IMAGE=13 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py | |
python -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000' | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Test openpilot model correctness (float32) | |
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Test openpilot compile3 | |
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile3.py | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Test openpilot alt model correctness (float32) | |
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Test openpilot fastvits model correctness (float32) | |
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test ONNX (GPU) | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test ONNX (CLANG) | |
run: CLANG=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Run CLOUD=1 Test | |
run: | | |
CLOUDDEV=CLANG CLOUD=1 python3 test/test_tiny.py | |
CLOUDDEV=GPU CLOUD=1 python3 test/test_tiny.py | |
CLOUDDEV=GPU IMAGE=2 CLOUD=1 python3 test/test_tiny.py | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test Optimization Helpers | |
run: PYTHONPATH="." DEBUG=1 python3 extra/optimization/test_helpers.py | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test Action Space | |
run: PYTHONPATH="." DEBUG=1 GPU=1 python3 extra/optimization/get_action_space.py | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test Beam Search | |
run: PYTHONPATH="." GPU=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test MLPerf optimizers | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_optim.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test MLPerf losses | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_losses.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test MLPerf metrics | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_metrics.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test MLPerf datasets | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_datasets.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Run handcode_opt | |
run: PYTHONPATH=. MODEL=resnet GPU=1 DEBUG=1 BS=4 HALF=0 python3 examples/handcode_opt.py | |
- name: Run process replay tests | |
run: | | |
export PR_TITLE=$(jq -r .pull_request.title "$GITHUB_EVENT_PATH") | |
export COMMIT_MESSAGE=$(git show -s --format=%B ${{ github.event.pull_request.head.sha }}) | |
cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py | |
testwebgpu: | |
name: WebGPU Tests | |
runs-on: macos-14 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: /Users/runner/Library/Python/3.11/lib/python/site-packages | |
key: webgpu-testing-user3-packages-${{ hashFiles('**/setup.py') }} | |
- name: Install Dependencies | |
run: pip install --user -e '.[webgpu,testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/Library/Caches/tinygrad/downloads/ | |
key: downloads-cache-webgpu-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Check Device.DEFAULT (WEBGPU) and print some source | |
run: | | |
WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT" | |
WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
- name: Build WEBGPU Efficientnet | |
run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python3 -m examples.compile_efficientnet | |
- name: Install Puppeteer | |
run: npm install puppeteer | |
- name: Run WEBGPU Efficientnet | |
run: node test/web/test_webgpu.js | |
- name: Run selected webgpu tests | |
run: | | |
WEBGPU=1 WGPU_BACKEND_TYPE=Metal python3 -m pytest -n=auto test/test_assign.py test/test_arange.py test/test_const_folding.py test/test_dtype.py \ | |
test/test_dtype_alu.py test/test_conv.py test/test_conv_shapetracker.py test/test_nn.py test/test_ops.py test/test_optim.py \ | |
test/test_jit.py test/test_randomness.py test/test_symbolic_ops.py test/test_symbolic_jit.py test/test_uops_stats.py test/test_uops.py \ | |
--durations=20 | |
testmetal: | |
name: Metal Tests | |
runs-on: macos-14 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 2 # NOTE: this fetches the HEAD commit of the PR | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: /Users/runner/Library/Python/3.11/lib/python/site-packages | |
key: metal-m1-testing-user3-packages-${{ hashFiles('**/setup.py') }} | |
- name: Install Dependencies | |
run: pip install --user -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/Library/Caches/tinygrad/downloads/ | |
key: downloads-cache-metal-only-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Check Device.DEFAULT (METAL) and print some source | |
run: | | |
METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT" | |
METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
- name: Run metal test | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --ignore=test/unit --durations=20 | |
- name: Run real world test | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20 | |
- name: Run ONNX | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Test tensor core ops (fake) | |
run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm | |
- name: Test tensor core ops (real) | |
run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm | |
- name: Test LLaMA compile speed | |
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py | |
- name: Test Beam Search | |
run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
- name: Fuzz Test linearizer | |
run: PYTHONPATH="." METAL=1 FUZZ_ALL_ACTIONS=1 DEPTH=2 FUZZ_N=24 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py | |
# - name: Fuzz Test models schedule | |
# run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py | |
- name: Run TRANSCENDENTAL math | |
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 | |
- name: Run process replay tests | |
run: | | |
export PR_TITLE=$(jq -r .pull_request.title "$GITHUB_EVENT_PATH") | |
export COMMIT_MESSAGE=$(git show -s --format=%B ${{ github.event.pull_request.head.sha }}) | |
cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py | |
# testwebgl: | |
# name: WebGL Tests | |
# runs-on: ubuntu-latest | |
# timeout-minutes: 20 | |
# | |
# steps: | |
# - name: Checkout Code | |
# uses: actions/checkout@v3 | |
# - name: Set up Python 3.11 | |
# uses: actions/setup-python@v4 | |
# with: | |
# python-version: 3.11 | |
# - name: Cache python packages | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
# key: webgl-testing-packages-${{ hashFiles('**/setup.py') }} | |
# - name: Install Dependencies | |
# run: pip install -e '.[webgl,testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
# - name: Cache downloads | |
# uses: actions/cache@v4 | |
# with: | |
# path: ~/Library/Caches/tinygrad/downloads/ | |
# key: downloads-cache-webgl-${{ env.DOWNLOAD_CACHE_VERSION }} | |
# - name: Prepare | |
# run: | | |
# sudo apt-get -y install xvfb | |
# sudo /usr/bin/Xvfb :0 -screen 0 4096x4096x24+32 & | |
# - name: Run selected webgl tests | |
# run: WEBGL=1 python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_jit.py | |
# - name: Build WebGL Efficientnet | |
# run: WEBGL=1 python -m examples.compile_efficientnet | |
tests: | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: [llvm, clang, gpu, ptx, amd, nv] #, triton] | |
name: Tests on (${{ matrix.backend }}) | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 2 # NOTE: this fetches the HEAD commit of the PR | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
key: ${{ matrix.backend }}-packages-${{ hashFiles('**/setup.py') }} | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/.cache/tinygrad/downloads/ | |
key: downloads-cache-${{ matrix.backend }}-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Set env | |
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nNV=1\nPTX=1\nMOCKGPU=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nNV=1\nMOCKGPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' || matrix.backend == 'nv' && 'NV=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV | |
- name: Install OpenCL | |
if: matrix.backend == 'gpu' | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
echo "deb [ allow-insecure=yes ] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list | |
sudo apt update || true | |
sudo apt install --allow-unauthenticated -y --no-install-recommends opencl-headers \ | |
intel-oneapi-runtime-openmp=2023.2.1-16 intel-oneapi-runtime-compilers-common=2023.2.1-16 intel-oneapi-runtime-compilers=2023.2.1-16 \ | |
intel-oneapi-runtime-dpcpp-sycl-opencl-cpu=2023.2.1-16 intel-oneapi-runtime-tbb-common=2021.10.0-49541 \ | |
intel-oneapi-runtime-tbb=2021.10.0-49541 intel-oneapi-runtime-opencl=2023.2.1-16 | |
- name: Install packages (cuda) | |
if: matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv' | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
sudo apt update -y || true | |
sudo apt install -y --no-install-recommends git g++ cmake ninja-build llvm-15-dev zlib1g-dev libglew-dev \ | |
flex bison libfl-dev libboost-thread-dev libboost-filesystem-dev nvidia-cuda-toolkit-gcc libzstd-dev | |
- name: Cache gpuocelot | |
if: matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv' | |
id: cache-build | |
uses: actions/cache@v4 | |
env: | |
cache-name: cache-gpuocelot-build | |
with: | |
path: ${{ github.workspace }}/gpuocelot/ocelot | |
key: ubuntu22.04-gpuocelot-4524e34adb7eaccc6f71262f2e21d7052bb17c2f-rebuild-9 | |
- name: Clone/compile gpuocelot | |
if: (matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv') && steps.cache-build.outputs.cache-hit != 'true' | |
run: | | |
git clone --recurse-submodules https://github.com/gpuocelot/gpuocelot.git ${{ github.workspace }}/gpuocelot | |
cd ${{ github.workspace }}/gpuocelot/ocelot | |
git checkout 4524e34adb7eaccc6f71262f2e21d7052bb17c2f | |
mkdir build | |
cd build | |
cmake .. -Wno-dev -G Ninja -DOCELOT_BUILD_TOOLS=OFF -DCMAKE_BUILD_ALWAYS=0 -DBUILD_TESTS_CUDA=OFF | |
ninja | |
- name: Install gpuocelot | |
if: matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv' | |
run: | | |
cd ${{ github.workspace }}/gpuocelot/ocelot/build | |
sudo ninja install -d explain | |
- name: Install packages (amd) | |
if: matrix.backend == 'amd' | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null | |
sudo tee /etc/apt/sources.list.d/rocm.list <<'EOF' | |
deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main | |
EOF | |
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 | |
sudo apt update || true | |
sudo apt install --no-install-recommends --allow-unauthenticated -y hsa-rocr comgr hsa-rocr-dev liburing-dev libc6-dev | |
curl -s https://api.github.com/repos/Qazalin/remu/releases/latest | \ | |
jq -r '.assets[] | select(.name == "libremu.so").browser_download_url' | \ | |
sudo xargs curl -L -o /usr/local/lib/libremu.so | |
sudo tee --append /etc/ld.so.conf.d/rocm.conf <<'EOF' | |
/opt/rocm/lib | |
/opt/rocm/lib64 | |
EOF | |
sudo ldconfig | |
- name: Install dependencies | |
run: pip install -e '.[testing${{matrix.backend=='llvm'&&',llvm'||matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}}]' --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ | |
- name: Check Device.DEFAULT and print some source | |
run: | | |
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','AMD','NV'], Device.DEFAULT" | |
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
- name: Run pytest (not cuda or amd) | |
if: matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' && matrix.backend != 'nv' | |
run: python -m pytest -n=auto test/ --ignore=test/unit --durations=20 | |
# - name: Run test_ops with FUZZ_UOPS=1 | |
# if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' && matrix.backend != 'nv' | |
# run: FUZZ_UOPS=1 python -m pytest -n=auto test/test_ops.py --durations=20 | |
- name: Run ONNX (only LLVM) | |
if: matrix.backend == 'llvm' | |
run: python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Run pytest (cuda) | |
if: matrix.backend=='ptx'||matrix.backend=='triton'||matrix.backend=='nv' | |
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --ignore=test/unit --ignore test/test_gc.py --durations=20 | |
- name: Run pytest (amd) | |
if: matrix.backend=='amd' | |
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/test_hcq.py --durations=20 | |
- name: Run TRANSCENDENTAL math | |
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 | |
- name: Run process replay tests | |
run: | | |
export PR_TITLE=$(jq -r .pull_request.title "$GITHUB_EVENT_PATH") | |
export COMMIT_MESSAGE=$(git show -s --format=%B ${{ github.event.pull_request.head.sha }}) | |
cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py | |
#testunicorn: | |
# name: ARM64 unicorn Test | |
# runs-on: ubuntu-latest | |
# timeout-minutes: 20 | |
# steps: | |
# - name: Checkout Code | |
# uses: actions/checkout@v4 | |
# - name: Set up Python 3.11 | |
# uses: actions/setup-python@v5 | |
# with: | |
# python-version: 3.11 | |
# - name: Cache python packages | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
# key: testing-arm-packages-${{ hashFiles('**/setup.py') }} | |
# - name: Install cross-assembler | |
# run: | | |
# sudo apt update -y | |
# sudo apt install -y --no-install-recommends gcc-aarch64-linux-gnu | |
# - name: Install dependencies | |
# run: pip install -e '.[testing,arm]' --extra-index-url https://download.pytorch.org/whl/cpu | |
# - name: Test arm | |
# run: CI=1 ARM64=1 CLANG=1 python -m pytest -n=auto test/ -k 'not (test_nn.py and (test_conv_transpose2d or test_conv2d))' --ignore=test/models --ignore=test/test_speed_v_torch.py --ignore=test/test_net_speed.py --ignore=test/test_specific_conv.py --ignore=test/unit/test_disk_tensor.py |