rwkv Inference

guinmoon · Aug 7, 2023 · 1974dae · 1974dae
1 parent d535d57
commit 1974dae
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 6 deletions.
diff --git a/LLMFarm/AIChatModel.swift b/LLMFarm/AIChatModel.swift
@@ -96,6 +96,8 @@ final class AIChatModel: ObservableObject {
                     self.chat?.loadModel(ModelInference.LLamaInference,contextParams: model_context_param)
                 }else if chat_config!["model_inference"] as! String == "gptneox" {
                     self.chat?.loadModel(ModelInference.GPTNeoxInference,contextParams: model_context_param)
+                }else if chat_config!["model_inference"] as! String == "rwkv" {
+                    self.chat?.loadModel(ModelInference.RWKV,contextParams: model_context_param)
                 }else if chat_config!["model_inference"] as! String == "gpt2" {
                     self.chat?.loadModel(ModelInference.GPT2,contextParams: model_context_param)
                     self.chat?.model.reverse_prompt.append("<|endoftext|>")
@@ -211,7 +213,7 @@ final class AIChatModel: ObservableObject {
     {
         var check = true
         for stop_word in self.chat?.model.reverse_prompt ?? [] {
-            if str.contains(stop_word){
+            if str == stop_word || message.text.contains(stop_word) {
                 self.stop_predict()
                 check = false
                 break

diff --git a/LLMFarm/Settings/AddChatView.swift b/LLMFarm/Settings/AddChatView.swift
@@ -83,7 +83,7 @@ struct AddChatView: View {
     let model_setting_templates = get_model_setting_templates()
 
     @State private var model_inference = "llama"
-    let model_inferences = ["gptneox", "llama", "gpt2", "replit", "starcoder"]
+    let model_inferences = ["gptneox", "llama", "gpt2", "replit", "starcoder", "rwkv"]
 
     @State private var model_icon: String = "ava0"
     let model_icons = ["ava0","ava1","ava2","ava3","ava4","ava5","ava6","ava7"]

diff --git a/LLMFarm_core/AI.swift b/LLMFarm_core/AI.swift
@@ -18,7 +18,7 @@ enum ModelInference {
 
 class AI {
 
-    var aiQueue = DispatchQueue(label: "Mia-Main", qos: .userInitiated, attributes: .concurrent, autoreleaseFrequency: .inherit, target: nil)
+    var aiQueue = DispatchQueue(label: "LLMFarm-Main", qos: .userInitiated, attributes: .concurrent, autoreleaseFrequency: .inherit, target: nil)
 
     //var model: Model!
     var model: Model!

diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@
 
 LLMFarm is an iOS and MacOS app to work with large language models (LLM). It allows you to load different LLMs with certain parameters.<br>
 Based on [ggml](https://github.com/ggerganov/ggml) and [llama.cpp](https://github.com/ggerganov/llama.cpp) by [Georgi Gerganov](https://github.com/ggerganov).
-Also, when creating the application, the source codes from the repository [byroneverson](https://github.com/byroneverson/Mia) were used.
+Also used sources from [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) by [saharNooby](https://github.com/saharNooby).
 
 ## Features
 
@@ -20,8 +20,10 @@ Also, when creating the application, the source codes from the repository [byron
 - [x] Various inferences
 - [x] Metal for llama inference (MacOS and iOS)
 - [x] Model setting templates
-- [ ] Metal for other inference
+- [ ] Sampling from llama.cpp for other inference
+- [ ] Other tokenizers support
 - [ ] Restore context state (now only chat history) 
+- [ ] Metal for other inference
 
 ## Inferences
 
@@ -30,7 +32,7 @@ Also, when creating the application, the source codes from the repository [byron
 - [x] [Replit](https://huggingface.co/replit/replit-code-v1-3b)
 - [x] [GPT2](https://huggingface.co/docs/transformers/model_doc/gpt2) + [Cerebras](https://arxiv.org/abs/2304.03208)
 - [x] [Starcoder(Santacoder)](https://huggingface.co/bigcode/santacoder)
-- [ ] [RWKV](https://huggingface.co/docs/transformers/model_doc/rwkv)
+- [x] [RWKV](https://huggingface.co/docs/transformers/model_doc/rwkv) (20B tokenizer)
 
 ## Getting Started