From 1974dae3fd1b39f428209da1d11de45d9aeed1be Mon Sep 17 00:00:00 2001
From: guinmoon <guinmoon@gmail.com>
Date: Mon, 7 Aug 2023 11:47:12 +0300
Subject: [PATCH] rwkv Inference

---
 LLMFarm/AIChatModel.swift          | 4 +++-
 LLMFarm/Settings/AddChatView.swift | 2 +-
 LLMFarm_core/AI.swift              | 2 +-
 README.md                          | 8 +++++---
 4 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/LLMFarm/AIChatModel.swift b/LLMFarm/AIChatModel.swift
index ca00d73..0ff07b2 100644
--- a/LLMFarm/AIChatModel.swift
+++ b/LLMFarm/AIChatModel.swift
@@ -96,6 +96,8 @@ final class AIChatModel: ObservableObject {
                     self.chat?.loadModel(ModelInference.LLamaInference,contextParams: model_context_param)
                 }else if chat_config!["model_inference"] as! String == "gptneox" {
                     self.chat?.loadModel(ModelInference.GPTNeoxInference,contextParams: model_context_param)
+                }else if chat_config!["model_inference"] as! String == "rwkv" {
+                    self.chat?.loadModel(ModelInference.RWKV,contextParams: model_context_param)
                 }else if chat_config!["model_inference"] as! String == "gpt2" {
                     self.chat?.loadModel(ModelInference.GPT2,contextParams: model_context_param)
                     self.chat?.model.reverse_prompt.append("<|endoftext|>")
@@ -211,7 +213,7 @@ final class AIChatModel: ObservableObject {
     {
         var check = true
         for stop_word in self.chat?.model.reverse_prompt ?? [] {
-            if str.contains(stop_word){
+            if str == stop_word || message.text.contains(stop_word) {
                 self.stop_predict()
                 check = false
                 break
diff --git a/LLMFarm/Settings/AddChatView.swift b/LLMFarm/Settings/AddChatView.swift
index b2c6f8a..5657495 100644
--- a/LLMFarm/Settings/AddChatView.swift
+++ b/LLMFarm/Settings/AddChatView.swift
@@ -83,7 +83,7 @@ struct AddChatView: View {
     let model_setting_templates = get_model_setting_templates()
     
     @State private var model_inference = "llama"
-    let model_inferences = ["gptneox", "llama", "gpt2", "replit", "starcoder"]
+    let model_inferences = ["gptneox", "llama", "gpt2", "replit", "starcoder", "rwkv"]
     
     @State private var model_icon: String = "ava0"
     let model_icons = ["ava0","ava1","ava2","ava3","ava4","ava5","ava6","ava7"]
diff --git a/LLMFarm_core/AI.swift b/LLMFarm_core/AI.swift
index b37f038..9ea25d7 100644
--- a/LLMFarm_core/AI.swift
+++ b/LLMFarm_core/AI.swift
@@ -18,7 +18,7 @@ enum ModelInference {
 
 class AI {
     
-    var aiQueue = DispatchQueue(label: "Mia-Main", qos: .userInitiated, attributes: .concurrent, autoreleaseFrequency: .inherit, target: nil)
+    var aiQueue = DispatchQueue(label: "LLMFarm-Main", qos: .userInitiated, attributes: .concurrent, autoreleaseFrequency: .inherit, target: nil)
     
     //var model: Model!
     var model: Model!
diff --git a/README.md b/README.md
index 0bf3d9d..8fd7cc9 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
 
 LLMFarm is an iOS and MacOS app to work with large language models (LLM). It allows you to load different LLMs with certain parameters.<br>
 Based on [ggml](https://github.com/ggerganov/ggml) and [llama.cpp](https://github.com/ggerganov/llama.cpp) by [Georgi Gerganov](https://github.com/ggerganov).
-Also, when creating the application, the source codes from the repository [byroneverson](https://github.com/byroneverson/Mia) were used.
+Also used sources from [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) by [saharNooby](https://github.com/saharNooby).
 
 ## Features
 
@@ -20,8 +20,10 @@ Also, when creating the application, the source codes from the repository [byron
 - [x] Various inferences
 - [x] Metal for llama inference (MacOS and iOS)
 - [x] Model setting templates
-- [ ] Metal for other inference
+- [ ] Sampling from llama.cpp for other inference
+- [ ] Other tokenizers support
 - [ ] Restore context state (now only chat history) 
+- [ ] Metal for other inference
 
 ## Inferences
 
@@ -30,7 +32,7 @@ Also, when creating the application, the source codes from the repository [byron
 - [x] [Replit](https://huggingface.co/replit/replit-code-v1-3b)
 - [x] [GPT2](https://huggingface.co/docs/transformers/model_doc/gpt2) + [Cerebras](https://arxiv.org/abs/2304.03208)
 - [x] [Starcoder(Santacoder)](https://huggingface.co/bigcode/santacoder)
-- [ ] [RWKV](https://huggingface.co/docs/transformers/model_doc/rwkv)
+- [x] [RWKV](https://huggingface.co/docs/transformers/model_doc/rwkv) (20B tokenizer)
 
 ## Getting Started