From 1974dae3fd1b39f428209da1d11de45d9aeed1be Mon Sep 17 00:00:00 2001 From: guinmoon Date: Mon, 7 Aug 2023 11:47:12 +0300 Subject: [PATCH] rwkv Inference --- LLMFarm/AIChatModel.swift | 4 +++- LLMFarm/Settings/AddChatView.swift | 2 +- LLMFarm_core/AI.swift | 2 +- README.md | 8 +++++--- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/LLMFarm/AIChatModel.swift b/LLMFarm/AIChatModel.swift index ca00d73..0ff07b2 100644 --- a/LLMFarm/AIChatModel.swift +++ b/LLMFarm/AIChatModel.swift @@ -96,6 +96,8 @@ final class AIChatModel: ObservableObject { self.chat?.loadModel(ModelInference.LLamaInference,contextParams: model_context_param) }else if chat_config!["model_inference"] as! String == "gptneox" { self.chat?.loadModel(ModelInference.GPTNeoxInference,contextParams: model_context_param) + }else if chat_config!["model_inference"] as! String == "rwkv" { + self.chat?.loadModel(ModelInference.RWKV,contextParams: model_context_param) }else if chat_config!["model_inference"] as! String == "gpt2" { self.chat?.loadModel(ModelInference.GPT2,contextParams: model_context_param) self.chat?.model.reverse_prompt.append("<|endoftext|>") @@ -211,7 +213,7 @@ final class AIChatModel: ObservableObject { { var check = true for stop_word in self.chat?.model.reverse_prompt ?? [] { - if str.contains(stop_word){ + if str == stop_word || message.text.contains(stop_word) { self.stop_predict() check = false break diff --git a/LLMFarm/Settings/AddChatView.swift b/LLMFarm/Settings/AddChatView.swift index b2c6f8a..5657495 100644 --- a/LLMFarm/Settings/AddChatView.swift +++ b/LLMFarm/Settings/AddChatView.swift @@ -83,7 +83,7 @@ struct AddChatView: View { let model_setting_templates = get_model_setting_templates() @State private var model_inference = "llama" - let model_inferences = ["gptneox", "llama", "gpt2", "replit", "starcoder"] + let model_inferences = ["gptneox", "llama", "gpt2", "replit", "starcoder", "rwkv"] @State private var model_icon: String = "ava0" let model_icons = ["ava0","ava1","ava2","ava3","ava4","ava5","ava6","ava7"] diff --git a/LLMFarm_core/AI.swift b/LLMFarm_core/AI.swift index b37f038..9ea25d7 100644 --- a/LLMFarm_core/AI.swift +++ b/LLMFarm_core/AI.swift @@ -18,7 +18,7 @@ enum ModelInference { class AI { - var aiQueue = DispatchQueue(label: "Mia-Main", qos: .userInitiated, attributes: .concurrent, autoreleaseFrequency: .inherit, target: nil) + var aiQueue = DispatchQueue(label: "LLMFarm-Main", qos: .userInitiated, attributes: .concurrent, autoreleaseFrequency: .inherit, target: nil) //var model: Model! var model: Model! diff --git a/README.md b/README.md index 0bf3d9d..8fd7cc9 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ LLMFarm is an iOS and MacOS app to work with large language models (LLM). It allows you to load different LLMs with certain parameters.
Based on [ggml](https://github.com/ggerganov/ggml) and [llama.cpp](https://github.com/ggerganov/llama.cpp) by [Georgi Gerganov](https://github.com/ggerganov). -Also, when creating the application, the source codes from the repository [byroneverson](https://github.com/byroneverson/Mia) were used. +Also used sources from [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) by [saharNooby](https://github.com/saharNooby). ## Features @@ -20,8 +20,10 @@ Also, when creating the application, the source codes from the repository [byron - [x] Various inferences - [x] Metal for llama inference (MacOS and iOS) - [x] Model setting templates -- [ ] Metal for other inference +- [ ] Sampling from llama.cpp for other inference +- [ ] Other tokenizers support - [ ] Restore context state (now only chat history) +- [ ] Metal for other inference ## Inferences @@ -30,7 +32,7 @@ Also, when creating the application, the source codes from the repository [byron - [x] [Replit](https://huggingface.co/replit/replit-code-v1-3b) - [x] [GPT2](https://huggingface.co/docs/transformers/model_doc/gpt2) + [Cerebras](https://arxiv.org/abs/2304.03208) - [x] [Starcoder(Santacoder)](https://huggingface.co/bigcode/santacoder) -- [ ] [RWKV](https://huggingface.co/docs/transformers/model_doc/rwkv) +- [x] [RWKV](https://huggingface.co/docs/transformers/model_doc/rwkv) (20B tokenizer) ## Getting Started