diff --git a/Cargo.toml b/Cargo.toml index 119966f..befdbdb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rusty-ggml" -version = "0.0.7" +version = "0.0.8" description = "Idiomatic bindings for the GGML library (pre-alpha)" repository = "https://github.com/KerfuffleV2/rusty-ggml" keywords = ["deep-learning", "machine-learning", "tensor", "ggml", "ml"] @@ -13,14 +13,15 @@ default = ["ggml-sys-bleedingedge/use_cmake"] no_k_quants = ["ggml-sys-bleedingedge/no_k_quants"] no_accelerate = ["ggml-sys-bleedingedge/no_accelerate"] cublas = ["ggml-sys-bleedingedge/cublas"] +hipblas = ["ggml-sys-bleedingedge/hipblas"] clblast = ["ggml-sys-bleedingedge/clblast"] openblas = ["ggml-sys-bleedingedge/openblas"] metal = ["ggml-sys-bleedingedge/metal"] [dependencies] -ggml-sys-bleedingedge = "=2306220059.0.0" +ggml-sys-bleedingedge = "=2309021811.0.0" anyhow = "1" thiserror = "1" num-traits = "0.2" -num-derive="0.3" +num-derive="0.4" bytemuck = { version = "1", features = ["extern_crate_alloc"] } diff --git a/README.md b/README.md index b916526..2b3fd5e 100644 --- a/README.md +++ b/README.md @@ -13,4 +13,12 @@ See: Not suitable for general use. Consider this to be pre-alpha code. -Example usage: https://github.com/KerfuffleV2/smolrsrwkv/blob/600718ebee029aa684c4a6abbe035d21283a446c/smolrwkv/src/ggml/graph.rs +**`v0.0.8` Warning**: Keeping this in sync with recent GGML changes has lagged. It compiles and seems to work but there might be weird stuff I haven't caught. + +**Note**: There are special considerations when using GPU features like `cublas`, `hipblas`. See the `ggml-sys-bleedingedge` repo or crate documentation for more information + +Example usage: https://github.com/KerfuffleV2/smolrsrwkv/blob/189915ec68b28d057b440f75803d3d056e150733/smolrwkv/src/ggml/graph.rs + +## Related + +For your token sampling needs see https://github.com/KerfuffleV2/llm-samplers ( https://crates.io/crates/llm-samplers ) diff --git a/src/context.rs b/src/context.rs index fbf8461..0125dce 100644 --- a/src/context.rs +++ b/src/context.rs @@ -64,6 +64,9 @@ pub(crate) struct IContext { pub(crate) failed: Option>, } +// FIXME: YOLO? It's an internal struct and only lives in an Arc. +unsafe impl Send for IContext {} + impl Drop for IContext { // Since `IContext` lives inside an `Arc` this will only happen // when the very last instance of the `Arc` is dropped. @@ -405,8 +408,9 @@ impl GContext { /// Runs the supplied graph using this context. pub fn compute(&self, graph: &mut GGraph) -> Result<()> { ensure!(!self.no_alloc, GContextError::NoAlloc); + let n_threads = graph.n_threads; self.with_icontext_infallible(|ictx| unsafe { - gg::ggml_graph_compute(ictx.gptr(), &mut graph.0) + gg::ggml_graph_compute_with_ctx(ictx.gptr(), &mut graph.graph, n_threads as i32) }) } @@ -416,15 +420,16 @@ impl GContext { } } -#[repr(transparent)] -pub struct GGraph(gg::ggml_cgraph); +pub struct GGraph { + n_threads: usize, + graph: gg::ggml_cgraph, +} impl GGraph { /// Create a new computation graph with the specified number of threads. pub fn new(n_threads: usize) -> Self { - let mut graph = unsafe { std::mem::zeroed::() }; - graph.n_threads = n_threads as i32; - Self(graph) + let graph = unsafe { std::mem::zeroed::() }; + Self { n_threads, graph } } /// Register a tensor to be processed when the graph is computed. @@ -439,7 +444,7 @@ impl GGraph { tensor .as_ref() .with_tensor_infallible(|_ctx, _ictx, tptr| unsafe { - gg::ggml_build_forward_expand(&mut self.0, tptr) + gg::ggml_build_forward_expand(&mut self.graph, tptr) }) } } diff --git a/src/gtensor/binary_ops.rs b/src/gtensor/binary_ops.rs index 750dee4..82aa6a0 100644 --- a/src/gtensor/binary_ops.rs +++ b/src/gtensor/binary_ops.rs @@ -204,14 +204,12 @@ where /// # !!!! FIXME !!!! /// # !!!! FIXME !!!! /// # !!!! FIXME !!!! - pub fn conv_1d< - const STRIDE: usize, - const RDIMS: usize, - const ODIMS: usize, - T: AsRef>, - >( + pub fn conv_1d>>( &self, rhs: T, + s0: usize, + p0: usize, + d0: usize, ) -> Self where Dim: DimValid, @@ -220,8 +218,6 @@ where DimPair: DimLt, DimPair: DimGtE, DimPair: DimEq, - DimPair: DimGtE, - DimPair: DimLt, { let rmd = rhs.as_ref().md.clone(); self.new_binary(rhs, |ctx, ictx, ltptr, rtptr| { @@ -234,11 +230,7 @@ where let mr = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, GType::F32, shp) .fit_or_die()?; Ok((mr, unsafe { - if STRIDE == 1 { - gg::ggml_conv_1d_s1_ph(ictx.gptr(), ltptr, rtptr) - } else { - gg::ggml_conv_1d_s2_ph(ictx.gptr(), ltptr, rtptr) - } + gg::ggml_conv_1d(ictx.gptr(), ltptr, rtptr, s0 as i32, p0 as i32, d0 as i32) })) }) } diff --git a/src/gtensor/matmul.rs b/src/gtensor/matmul.rs index bb71e24..d2fcf45 100644 --- a/src/gtensor/matmul.rs +++ b/src/gtensor/matmul.rs @@ -109,7 +109,14 @@ macro_rules! mk_gmulmatinstances { GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, shp) .fit_or_die()?; unsafe { - Ok((mr, gg::ggml_mul_mat(ictx.gptr(), ltptr, rtptr))) + let t = gg::ggml_mul_mat(ictx.gptr(), ltptr, rtptr); + // FIXME: Horrible hack to pretend mul_mat has the old non-broadcasting behavior. + let real_dims = (*t).ne.iter().take_while(|i| **i != 1).collect::>().len(); + if real_dims != $o { + Err(GTensorError::InvalidOperation)?; + } + (*t).n_dims = $o; + Ok((mr, t)) } }) } diff --git a/src/gtensor/tensor.rs b/src/gtensor/tensor.rs index 7da448e..0a0bfe9 100644 --- a/src/gtensor/tensor.rs +++ b/src/gtensor/tensor.rs @@ -66,7 +66,11 @@ where pub(crate) fn from_ptr(tp: NonNull) -> Self { let (tr, tp) = (unsafe { tp.as_ref() }, tp.as_ptr()); let (op, typ, shape) = { - assert_eq!(DIMS, tr.n_dims as usize, "Unexpected number of dimensions!"); + assert_eq!( + DIMS, tr.n_dims as usize, + "Unexpected number of dimensions {:?}!", + tr.ne + ); let mut shp = [0; DIMS]; shp.iter_mut() .zip(tr.ne[0..DIMS].iter()) diff --git a/src/gtensor/unary_ops.rs b/src/gtensor/unary_ops.rs index 186cf06..33bf0f9 100644 --- a/src/gtensor/unary_ops.rs +++ b/src/gtensor/unary_ops.rs @@ -112,24 +112,6 @@ where /// ``` [neg, ggml_neg], - /// Perform LayerNorm operation on tensor `A`. - /// Returns a new tensor. - /// - /// `a.norm()` - /// - /// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md) - /// for more information and comparison with the [GTensor::rms_norm] function. - [rms_norm, ggml_rms_norm], - - /// Perform RMSNorm operation on tensor `A`. - /// Returns a new tensor. - /// - /// `a.rms_norm()` - /// - /// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md) - /// for more information and comparison with the [GTensor::norm] function. - [norm, ggml_norm], - /// Elementwise step operation on tensor `A`. /// Returns a new tensor. /// @@ -229,6 +211,38 @@ where [soft_max, ggml_soft_max], } + /// Perform LayerNorm operation on tensor `A`. + /// Returns a new tensor. + /// + /// `a.norm()` + /// + /// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md) + /// for more information and comparison with the [GTensor::rms_norm] function. + pub fn norm(&self, eps: f32) -> Self { + self.new_unary(|ctx, ictx, tptr| { + let mr = + GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, self.md.shape) + .fit_or_die()?; + unsafe { Ok((mr, gg::ggml_norm(ictx.gptr(), tptr, eps))) } + }) + } + + /// Perform RMSNorm operation on tensor `A`. + /// Returns a new tensor. + /// + /// `a.rms_norm()` + /// + /// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md) + /// for more information and comparison with the [GTensor::norm] function. + pub fn rms_norm(&self, eps: f32) -> Self { + self.new_unary(|ctx, ictx, tptr| { + let mr = + GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, self.md.shape) + .fit_or_die()?; + unsafe { Ok((mr, gg::ggml_rms_norm(ictx.gptr(), tptr, eps))) } + }) + } + /// Elementwise `mean` of tensor `A`. /// Returns a new tensor. /// @@ -384,7 +398,40 @@ where /// # !!!! FIXME !!!! /// # !!!! FIXME !!!! /// # !!!! FIXME !!!! - pub fn rope(self, n_past: usize, n_dims: usize, mode: usize) -> Self { + pub fn rope(self, n_past: usize, n_dims: usize, mode: usize, n_ctx: usize) -> Self { + self.new_unary(|ctx, ictx, tptr| { + // Creates a view plus a i32 tensor with three items. + let mr1 = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, []); + let mr2 = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, GType::I32, [3]); + let mr = (mr1 + mr2).fit_or_die()?; + unsafe { + Ok(( + mr, + gg::ggml_rope( + ictx.gptr(), + tptr, + n_past as i32, + n_dims as i32, + mode as i32, + n_ctx as i32, + ), + )) + } + }) + } + + /// # !!!! FIXME !!!! + /// # !!!! FIXME !!!! + /// # !!!! FIXME !!!! + pub fn rope_custom( + self, + n_past: usize, + n_dims: usize, + mode: usize, + n_ctx: usize, + freq_base: f32, + freq_scale: f32, + ) -> Self { self.new_unary(|ctx, ictx, tptr| { // Creates a view plus a i32 tensor with three items. let mr1 = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, []); @@ -393,7 +440,16 @@ where unsafe { Ok(( mr, - gg::ggml_rope(ictx.gptr(), tptr, n_past as i32, n_dims as i32, mode as i32), + gg::ggml_rope_custom( + ictx.gptr(), + tptr, + n_past as i32, + n_dims as i32, + mode as i32, + n_ctx as i32, + freq_base, + freq_scale, + ), )) } })