diff --git a/Sources/LLM/LLM.swift b/Sources/LLM/LLM.swift index 41c8b2e..11a8120 100644 --- a/Sources/LLM/LLM.swift +++ b/Sources/LLM/LLM.swift @@ -34,6 +34,7 @@ open class LLM: ObservableObject { } } + public var seed: UInt32 public var topK: Int32 public var topP: Float public var temp: Float @@ -74,13 +75,13 @@ open class LLM: ObservableObject { #endif let model = llama_load_model_from_file(self.path, modelParams)! params = llama_context_default_params() - let processorCount = UInt32(ProcessInfo().processorCount) + let processorCount = Int32(ProcessInfo().processorCount) self.maxTokenCount = Int(min(maxTokenCount, llama_n_ctx_train(model))) - params.seed = seed params.n_ctx = UInt32(self.maxTokenCount) params.n_batch = params.n_ctx params.n_threads = processorCount params.n_threads_batch = processorCount + self.seed = seed self.topK = topK self.topP = topP self.temp = temp @@ -186,22 +187,17 @@ open class LLM: ObservableObject { @InferenceActor private func predictNextToken() async -> Token { guard shouldContinuePredicting else { return model.endToken } - let logits = llama_get_logits_ith(context.pointer, batch.n_tokens - 1)! - var candidates = (0..