Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: NIF binary targeting CUDA #16

Merged
merged 23 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 53 additions & 6 deletions .github/workflows/binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,15 @@ on:
- ".github/workflows/binaries.yml"
workflow_dispatch:

env:
PROJECT_NAME: "candlex"
PROJECT_DIR: "native/candlex"
PROJECT_VERSION: "0.1.2"
NIF_VERSION: "2.16"

jobs:
build_binary:
name: ${{ matrix.target }} / ${{ matrix.os }}
build_cpu:
name: cpu / ${{ matrix.target }} / ${{ matrix.os }}
runs-on: ${{ matrix.os }}
permissions:
contents: write
Expand All @@ -40,12 +46,53 @@ jobs:
- uses: philss/rustler-precompiled-action@main
id: precompile
with:
project-dir: "native/candlex"
project-name: candlex
project-version: "0.1.2"
project-dir: ${{ env.PROJECT_DIR }}
project-name: ${{ env.PROJECT_NAME }}
project-version: ${{ env.PROJECT_VERSION }}
target: ${{ matrix.target }}
use-cross: ${{ matrix.use-cross }}
nif-version: "2.16"
nif-version: ${{ env.NIF_VERSION }}

- uses: softprops/action-gh-release@v1
with:
draft: true
files: ${{ steps.precompile.outputs.file-path }}
if: startsWith(github.ref, 'refs/tags/')

build_cuda:
name: cuda / ${{ matrix.target }} / ${{ matrix.os }}
runs-on: ubuntu-22.04
permissions:
contents: write
strategy:
fail-fast: false
matrix:
include:
- target: x86_64-unknown-linux-gnu
os: ubuntu-22.04

container:
image: nvidia/cuda:12.2.2-devel-ubuntu22.04

steps:
- run: apt update && apt install -y curl git
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- run: rustup target add ${{ matrix.target }}

- uses: philss/rustler-precompiled-action@main
id: precompile
env:
CUDA_COMPUTE_CAP: "70"
with:
project-dir: ${{ env.PROJECT_DIR }}
project-name: ${{ env.PROJECT_NAME }}
project-version: ${{ env.PROJECT_VERSION }}
target: ${{ matrix.target }}
use-cross: null
nif-version: ${{ env.NIF_VERSION }}
variant: cuda
cargo-args: "--features cuda"

- uses: softprops/action-gh-release@v1
with:
Expand Down
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,19 @@ if no precompiled binary is available for your target environment. Once set, you
must run `mix deps.clean candlex --build` explicitly to force to recompile.
Building has a number of dependencies, see *Building from source* below.

#### `CANDLEX_NIF_TARGET`

The default value is `cpu`, which implies the final the binary supports targeting
only the host CPU.

| Value | Target environment |
| --- | --- |
| cpu | |
| cuda | CUDA 12.x |

To use Candlex with NVidia GPU you need [CUDA](https://developer.nvidia.com/cuda-downloads) compatible with your
GPU drivers.

## Building from source

To build the native binary locally you need to set `CANDLEX_NIF_BUILD=true`.
Expand All @@ -58,11 +71,16 @@ You will need the following installed in your system for the compilation:
* [Git](https://git-scm.com) for fetching candle-core source
* [Rust](https://www.rust-lang.org) with cargo to compile rustler NIFs

### GPU support

To build native binary with GPU support, you need to run in an environment that has CUDA installed,
then you can build with `CANDLEX_NIF_TARGET=cuda`. See the `CANDLEX_NIF_TARGET` for more details.

## Releasing

To publish a new version of this package:

1. Update `@version` in `mix.exs` and `project-version` in `.github/workflows/binaries.yml`.
1. Update `@version` in `mix.exs` and `PROJECT_VERSION` in `.github/workflows/binaries.yml`.
1. `git tag -s <tag-version>` to create new signed tag.
1. `git push origin <tag-version>` to push the tag.
1. Wait for the `binaries.yml` GitHub workflow to build all the NIF binaries.
Expand Down
16 changes: 1 addition & 15 deletions config/config.exs
Original file line number Diff line number Diff line change
@@ -1,17 +1,3 @@
import Config

enable_cuda =
case System.get_env("CUDA") do
nil -> System.find_executable("nvcc") && System.find_executable("nvidia-smi")
"false" -> false
_ -> true
end

crate_features =
if enable_cuda do
[:cuda]
else
[]
end

config :candlex, crate_features: crate_features
config :candlex, use_cuda: System.get_env("CANDLEX_NIF_TARGET") == "cuda"
7 changes: 5 additions & 2 deletions lib/candlex/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ defmodule Candlex.Native do

use RustlerPrecompiled,
otp_app: :candlex,
features: Application.compile_env(:candlex, :crate_features, []),
features: if(Application.compile_env(:candlex, :use_cuda), do: [:cuda], else: []),
base_url: "#{source_url}/releases/download/v#{version}",
force_build: System.get_env("CANDLEX_NIF_BUILD") in ["1", "true"],
mode: mode,
Expand All @@ -19,7 +19,10 @@ defmodule Candlex.Native do
"aarch64-unknown-linux-gnu",
"x86_64-apple-darwin",
"x86_64-unknown-linux-gnu"
]
],
variants: %{
"x86_64-unknown-linux-gnu" => [cuda: fn -> Application.compile_env(:candlex, :use_cuda) end]
}

# Rustler will override all the below stub functions with real NIFs
def from_binary(_binary, _dtype, _shape, _device), do: error()
Expand Down
67 changes: 34 additions & 33 deletions native/candlex/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,21 @@ fn set_cuda_include_dir() -> Result<()> {

#[allow(unused)]
fn compute_cap() -> Result<usize> {
// Grab compute code from nvidia-smi
let mut compute_cap = {
println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");

// Try to parse compute caps from env
let mut compute_cap = if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") {
println!("cargo:rustc-env=CUDA_COMPUTE_CAP={compute_cap_str}");
compute_cap_str
.parse::<usize>()
.context("Could not parse code")?
} else {
// Use nvidia-smi to get the current compute cap
let out = std::process::Command::new("nvidia-smi")
.arg("--query-gpu=compute_cap")
.arg("--format=csv")
.output()
.context("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.")?;
.arg("--query-gpu=compute_cap")
.arg("--format=csv")
.output()
.context("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.")?;
let out = std::str::from_utf8(&out.stdout).context("stdout is not a utf8 string")?;
let mut lines = out.lines();
assert_eq!(
Expand All @@ -197,16 +205,19 @@ fn compute_cap() -> Result<usize> {
.next()
.context("missing line in stdout")?
.replace('.', "");
cap.parse::<usize>()
.with_context(|| format!("cannot parse as int {cap}"))?
let cap = cap
.parse::<usize>()
.with_context(|| format!("cannot parse as int {cap}"))?;
println!("cargo:rustc-env=CUDA_COMPUTE_CAP={cap}");
cap
};

// Grab available GPU codes from nvcc and select the highest one
let max_nvcc_code = {
let (supported_nvcc_codes, max_nvcc_code) = {
let out = std::process::Command::new("nvcc")
.arg("--list-gpu-code")
.output()
.expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
.arg("--list-gpu-code")
.output()
.expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
let out = std::str::from_utf8(&out.stdout).unwrap();

let out = out.lines().collect::<Vec<&str>>();
Expand All @@ -220,31 +231,21 @@ fn compute_cap() -> Result<usize> {
}
}
codes.sort();
if !codes.contains(&compute_cap) {
anyhow::bail!(
"nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {codes:?}."
);
}
*codes.last().unwrap()
let max_nvcc_code = *codes.last().context("no gpu codes parsed from nvcc")?;
(codes, max_nvcc_code)
};

// If nvidia-smi compute_cap is higher than the highest gpu code from nvcc,
// then choose the highest gpu code in nvcc
// Check that nvcc supports the asked compute caps
if !supported_nvcc_codes.contains(&compute_cap) {
anyhow::bail!(
"nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {supported_nvcc_codes:?}."
);
}
if compute_cap > max_nvcc_code {
println!(
"cargo:warning=Lowering gpu arch {compute_cap} to max nvcc target {max_nvcc_code}."
);
compute_cap = max_nvcc_code;
anyhow::bail!(
"CUDA compute cap {compute_cap} is higher than the highest gpu code from nvcc {max_nvcc_code}"
);
}

println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");

if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") {
compute_cap = compute_cap_str
.parse::<usize>()
.with_context(|| format!("cannot parse as usize '{compute_cap_str}'"))?;
println!("cargo:warning=Using gpu arch {compute_cap} from $CUDA_COMPUTE_CAP");
}
println!("cargo:rustc-env=CUDA_COMPUTE_CAP=sm_{compute_cap}");
Ok(compute_cap)
}