From 05a533d0153d710c26e5c81f58a638aee33085cb Mon Sep 17 00:00:00 2001 From: guinmoon Date: Tue, 18 Jul 2023 18:11:10 +0300 Subject: [PATCH] add model settings templates fix model_free --- LLMFarm.xcodeproj/project.pbxproj | 51 ++++++++++- LLMFarm/AIChatModel.swift | 17 ++-- LLMFarm/FileHelper.swift | 60 ++++++++++++- LLMFarm/LLMFarmApp.swift | 1 + LLMFarm/ModelSettingsTemplate.swift | 23 ++++- LLMFarm/Settings/AddChatView.swift | 86 +++++++++++++------ .../AI Dungeon 2 Classic.json | 11 +++ .../model_setting_templates/Replit 3B.json | 12 +++ .../cerebras 2.7b.json | 13 +++ .../model_setting_templates/dolly v2 3b.json | 10 +++ .../model_setting_templates/openllama 3b.json | 11 +++ .../model_setting_templates/orca mini 3b.json | 12 +++ .../rp-incite-base-v1-3b.json | 12 +++ LLMFarm/model_setting_templates/saiga 7B.json | 14 +++ .../model_setting_templates/santacoder.json | 13 +++ .../stablelm-tuned-alpha-3b.json | 11 +++ LLMFarm_core/AI.swift | 27 +++--- ModelTest/main.swift | 24 +++--- README.md | 3 +- .../Sources/llmfarm_core/gptneox/gptneox.cpp | 32 ++----- .../Sources/llmfarm_core/replit/replit.cpp | 25 +----- .../llmfarm_core/starcoder/starcoder.cpp | 19 +--- 22 files changed, 362 insertions(+), 125 deletions(-) create mode 100644 LLMFarm/model_setting_templates/AI Dungeon 2 Classic.json create mode 100644 LLMFarm/model_setting_templates/Replit 3B.json create mode 100644 LLMFarm/model_setting_templates/cerebras 2.7b.json create mode 100644 LLMFarm/model_setting_templates/dolly v2 3b.json create mode 100644 LLMFarm/model_setting_templates/openllama 3b.json create mode 100644 LLMFarm/model_setting_templates/orca mini 3b.json create mode 100644 LLMFarm/model_setting_templates/rp-incite-base-v1-3b.json create mode 100644 LLMFarm/model_setting_templates/saiga 7B.json create mode 100644 LLMFarm/model_setting_templates/santacoder.json create mode 100644 LLMFarm/model_setting_templates/stablelm-tuned-alpha-3b.json diff --git a/LLMFarm.xcodeproj/project.pbxproj b/LLMFarm.xcodeproj/project.pbxproj index 90f4a1a..601c119 100644 --- a/LLMFarm.xcodeproj/project.pbxproj +++ b/LLMFarm.xcodeproj/project.pbxproj @@ -11,6 +11,8 @@ 150CAD022A3CE7B30015CD66 /* GPTNeox.swift in Sources */ = {isa = PBXBuildFile; fileRef = 150CAD002A3CE7B30015CD66 /* GPTNeox.swift */; }; 15141E162A6438AE0060E767 /* Starcoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 15141E152A6438AE0060E767 /* Starcoder.swift */; }; 15141E172A6438AE0060E767 /* Starcoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 15141E152A6438AE0060E767 /* Starcoder.swift */; }; + 15141E5C2A651F8C0060E767 /* model_setting_templates in Resources */ = {isa = PBXBuildFile; fileRef = 15141E5B2A651F8C0060E767 /* model_setting_templates */; }; + 15141E712A65799D0060E767 /* ModelSettingsTemplate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 15141E702A65799D0060E767 /* ModelSettingsTemplate.swift */; }; 155FBB9F2A460566004DD5AE /* GPTBase.swift in Sources */ = {isa = PBXBuildFile; fileRef = 155FBB9E2A460566004DD5AE /* GPTBase.swift */; }; 155FBBA02A460566004DD5AE /* GPTBase.swift in Sources */ = {isa = PBXBuildFile; fileRef = 155FBB9E2A460566004DD5AE /* GPTBase.swift */; }; 155FBBA22A48B5C1004DD5AE /* Replit.swift in Sources */ = {isa = PBXBuildFile; fileRef = 155FBBA12A48B5C0004DD5AE /* Replit.swift */; }; @@ -73,6 +75,16 @@ /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ + 15141E6B2A6577160060E767 /* Embed Frameworks */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + ); + name = "Embed Frameworks"; + runOnlyForDeploymentPostprocessing = 0; + }; 15E415CF2A18A2970057A15E /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 12; @@ -87,6 +99,18 @@ /* Begin PBXFileReference section */ 150CAD002A3CE7B30015CD66 /* GPTNeox.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GPTNeox.swift; sourceTree = ""; }; 15141E152A6438AE0060E767 /* Starcoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Starcoder.swift; sourceTree = ""; }; + 15141E362A6488A60060E767 /* Replit 3B.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "Replit 3B.json"; sourceTree = ""; }; + 15141E372A6488A60060E767 /* dolly v2 3b.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "dolly v2 3b.json"; sourceTree = ""; }; + 15141E382A6488A60060E767 /* orca mini 3b.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "orca mini 3b.json"; sourceTree = ""; }; + 15141E392A6488A60060E767 /* openllama 3b.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "openllama 3b.json"; sourceTree = ""; }; + 15141E3A2A6488A60060E767 /* rp-incite-base-v1-3b.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "rp-incite-base-v1-3b.json"; sourceTree = ""; }; + 15141E3B2A6488A60060E767 /* santacoder.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = santacoder.json; sourceTree = ""; }; + 15141E3C2A6488A60060E767 /* saiga 7B.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "saiga 7B.json"; sourceTree = ""; }; + 15141E3D2A6488A60060E767 /* cerebras 2.7b.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "cerebras 2.7b.json"; sourceTree = ""; }; + 15141E3F2A6488A70060E767 /* stablelm-tuned-alpha-3b.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "stablelm-tuned-alpha-3b.json"; sourceTree = ""; }; + 15141E402A6488A70060E767 /* AI Dungeon 2 Classic.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "AI Dungeon 2 Classic.json"; sourceTree = ""; }; + 15141E5B2A651F8C0060E767 /* model_setting_templates */ = {isa = PBXFileReference; lastKnownFileType = folder; name = model_setting_templates; path = LLMFarm/model_setting_templates; sourceTree = ""; }; + 15141E702A65799D0060E767 /* ModelSettingsTemplate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelSettingsTemplate.swift; sourceTree = ""; }; 155FBB9E2A460566004DD5AE /* GPTBase.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GPTBase.swift; sourceTree = ""; }; 155FBBA12A48B5C0004DD5AE /* Replit.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Replit.swift; sourceTree = ""; }; 1560D6162A2D1A3D00918330 /* AddChatView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AddChatView.swift; sourceTree = ""; }; @@ -171,6 +195,23 @@ /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ + 15141E332A64875F0060E767 /* model_setting_templates */ = { + isa = PBXGroup; + children = ( + 15141E402A6488A70060E767 /* AI Dungeon 2 Classic.json */, + 15141E3D2A6488A60060E767 /* cerebras 2.7b.json */, + 15141E372A6488A60060E767 /* dolly v2 3b.json */, + 15141E392A6488A60060E767 /* openllama 3b.json */, + 15141E382A6488A60060E767 /* orca mini 3b.json */, + 15141E362A6488A60060E767 /* Replit 3B.json */, + 15141E3A2A6488A60060E767 /* rp-incite-base-v1-3b.json */, + 15141E3C2A6488A60060E767 /* saiga 7B.json */, + 15141E3B2A6488A60060E767 /* santacoder.json */, + 15141E3F2A6488A70060E767 /* stablelm-tuned-alpha-3b.json */, + ); + path = model_setting_templates; + sourceTree = ""; + }; 1567138A2A5B13C4002BE054 /* metal */ = { isa = PBXGroup; children = ( @@ -252,6 +293,7 @@ 15F2107F2A191B190021F414 /* LLMFarm */ = { isa = PBXGroup; children = ( + 15141E332A64875F0060E767 /* model_setting_templates */, 15A3EDC42A2121D90030AF5A /* Chats */, 15A3EDC12A2121D90030AF5A /* Contacts */, 15A3EDBD2A2121D90030AF5A /* Settings */, @@ -269,6 +311,7 @@ 15A3EDBB2A2121C70030AF5A /* ContentView.swift */, 15A3EDD02A21266D0030AF5A /* SearchView.swift */, 156A48D12A52B65C007DF519 /* Orientation_helper.swift */, + 15141E702A65799D0060E767 /* ModelSettingsTemplate.swift */, ); path = LLMFarm; sourceTree = ""; @@ -293,6 +336,7 @@ E624E1F52958ECC800996CD1 = { isa = PBXGroup; children = ( + 15141E5B2A651F8C0060E767 /* model_setting_templates */, 1567138A2A5B13C4002BE054 /* metal */, E6024EC229EA7BEB001FE9B0 /* Packages */, E624E2102958F21E00996CD1 /* Info.plist */, @@ -359,6 +403,7 @@ 15F2107A2A191B190021F414 /* Sources */, 15F2107B2A191B190021F414 /* Frameworks */, 15F2107C2A191B190021F414 /* Resources */, + 15141E6B2A6577160060E767 /* Embed Frameworks */, ); buildRules = ( ); @@ -418,6 +463,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( + 15141E5C2A651F8C0060E767 /* model_setting_templates in Resources */, 1567138D2A5B155D002BE054 /* ggml-metal.metal in Resources */, 15F210892A191B1B0021F414 /* Preview Assets.xcassets in Resources */, 15F210852A191B1B0021F414 /* Assets.xcassets in Resources */, @@ -470,6 +516,7 @@ 15F210A32A1921E40021F414 /* Math.swift in Sources */, 155FBBA02A460566004DD5AE /* GPTBase.swift in Sources */, 15A3EDCB2A2121D90030AF5A /* ModelsView.swift in Sources */, + 15141E712A65799D0060E767 /* ModelSettingsTemplate.swift in Sources */, 15A3EDC92A2121D90030AF5A /* SettingsItem.swift in Sources */, 15F210A42A1921E40021F414 /* Utils.swift in Sources */, 15A480002A40D2A300B72DC3 /* GPT2.swift in Sources */, @@ -560,7 +607,7 @@ LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks"; "LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks"; MACOSX_DEPLOYMENT_TARGET = 13.0; - MARKETING_VERSION = 0.2.2; + MARKETING_VERSION = 0.3.0; PRODUCT_BUNDLE_IDENTIFIER = com.guinmoon.LLMFarm; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = ""; @@ -607,7 +654,7 @@ LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks"; "LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks"; MACOSX_DEPLOYMENT_TARGET = 13.0; - MARKETING_VERSION = 0.2.2; + MARKETING_VERSION = 0.3.0; PRODUCT_BUNDLE_IDENTIFIER = com.guinmoon.LLMFarm; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = ""; diff --git a/LLMFarm/AIChatModel.swift b/LLMFarm/AIChatModel.swift index d556418..2acc14f 100644 --- a/LLMFarm/AIChatModel.swift +++ b/LLMFarm/AIChatModel.swift @@ -88,17 +88,17 @@ final class AIChatModel: ObservableObject { if (chat_config!["use_metal"] != nil){ model_context_param.use_metal = chat_config!["use_metal"] as! Bool } - try? self.chat?.loadModel(ModelInference.LLamaInference,contextParams: model_context_param) + self.chat?.loadModel(ModelInference.LLamaInference,contextParams: model_context_param) }else if chat_config!["model_inference"] as! String == "gptneox" { - try? self.chat?.loadModel(ModelInference.GPTNeoxInference,contextParams: model_context_param) + self.chat?.loadModel(ModelInference.GPTNeoxInference,contextParams: model_context_param) }else if chat_config!["model_inference"] as! String == "gpt2" { - try? self.chat?.loadModel(ModelInference.GPT2,contextParams: model_context_param) + self.chat?.loadModel(ModelInference.GPT2,contextParams: model_context_param) self.chat?.model.stop_words.append("<|endoftext|>") }else if chat_config!["model_inference"] as! String == "replit" { - try? self.chat?.loadModel(ModelInference.Replit,contextParams: model_context_param) + self.chat?.loadModel(ModelInference.Replit,contextParams: model_context_param) self.chat?.model.stop_words.append("<|endoftext|>") }else if chat_config!["model_inference"] as! String == "starcoder" { - try? self.chat?.loadModel(ModelInference.Starcoder,contextParams: model_context_param) + self.chat?.loadModel(ModelInference.Starcoder,contextParams: model_context_param) self.chat?.model.stop_words.append("<|endoftext|>") } } @@ -107,9 +107,9 @@ final class AIChatModel: ObservableObject { model_lowercase.contains("alpaca") || model_lowercase.contains("vic") || model_lowercase.contains("orca")){ - try? self.chat?.loadModel(ModelInference.LLamaInference) + self.chat?.loadModel(ModelInference.LLamaInference) }else{ - try? self.chat?.loadModel(ModelInference.GPTNeoxInference) + self.chat?.loadModel(ModelInference.GPTNeoxInference) } } if self.chat?.model.context == nil{ @@ -120,7 +120,8 @@ final class AIChatModel: ObservableObject { print(model_sample_param) print(model_context_param) //Set prompt model if in config or try to set promt format by filename - if (chat_config!["prompt_format"] != nil && chat_config!["prompt_format"]! as! String != "auto"){ + if (chat_config!["prompt_format"] != nil && chat_config!["prompt_format"]! as! String != "auto" + && chat_config!["prompt_format"]! as! String != "{{prompt}}"){ self.chat?.model.custom_prompt_format = chat_config!["prompt_format"]! as! String self.chat?.model.promptFormat = .Custom } diff --git a/LLMFarm/FileHelper.swift b/LLMFarm/FileHelper.swift index 195d684..990b851 100644 --- a/LLMFarm/FileHelper.swift +++ b/LLMFarm/FileHelper.swift @@ -23,10 +23,68 @@ func get_avalible_models() -> [String]?{ return res } catch { // failed to read directory – bad permissions, perhaps? + print(error) } return res } +func parse_model_setting_template(template_path:String) -> ModelSettingsTemplate{ + var tmp_template:ModelSettingsTemplate = ModelSettingsTemplate() + do{ + let data = try Data(contentsOf: URL(fileURLWithPath: template_path), options: .mappedIfSafe) + let jsonResult = try JSONSerialization.jsonObject(with: data, options: .mutableLeaves) + let jsonResult_dict = jsonResult as? Dictionary + if (jsonResult_dict!["template_name"] != nil){ + tmp_template.template_name = jsonResult_dict!["template_name"] as! String + }else{ + tmp_template.template_name = (template_path as NSString).lastPathComponent + } + if (jsonResult_dict!["model_inference"] != nil){ + tmp_template.inference = jsonResult_dict!["model_inference"] as! String + } + if (jsonResult_dict!["prompt_format"] != nil){ + tmp_template.prompt_format = jsonResult_dict!["prompt_format"] as! String + } + if (jsonResult_dict!["n_batch"] != nil){ + tmp_template.n_batch = jsonResult_dict!["n_batch"] as! Int32 + } + if (jsonResult_dict!["temp"] != nil){ + tmp_template.temp = Float(jsonResult_dict!["temp"] as! Double) + } + if (jsonResult_dict!["top_k"] != nil){ + tmp_template.top_k = jsonResult_dict!["top_k"] as! Int32 + } + if (jsonResult_dict!["top_p"] != nil){ + tmp_template.top_p = Float(jsonResult_dict!["top_p"] as! Double) + } + if (jsonResult_dict!["repeat_penalty"] != nil){ + tmp_template.repeat_penalty = Float(jsonResult_dict!["repeat_penalty"] as! Double) + } + if (jsonResult_dict!["repeat_last_n"] != nil){ + tmp_template.repeat_last_n = jsonResult_dict!["repeat_last_n"] as! Int32 + } + } + catch { + print(error) + } + return tmp_template +} + +func get_model_setting_templates() -> [ModelSettingsTemplate]{ + var model_setting_templates: [ModelSettingsTemplate] = [] + model_setting_templates.append(ModelSettingsTemplate()) + do{ + var templates_path=Bundle.main.resourcePath!.appending("/model_setting_templates") + let tenplate_files = try FileManager.default.contentsOfDirectory(atPath: templates_path) + for tenplate_file in tenplate_files { + model_setting_templates.append(parse_model_setting_template(template_path: templates_path+"/"+tenplate_file)) + } + } + catch { + print(error) + } + return model_setting_templates +} public func get_chat_info(_ chat_fname:String) -> Dictionary? { do { @@ -39,7 +97,7 @@ public func get_chat_info(_ chat_fname:String) -> Dictionary? let jsonResult_dict = jsonResult as? Dictionary return jsonResult_dict } catch { - + print(error) } return nil } diff --git a/LLMFarm/LLMFarmApp.swift b/LLMFarm/LLMFarmApp.swift index c6e241a..e5d5af9 100644 --- a/LLMFarm/LLMFarmApp.swift +++ b/LLMFarm/LLMFarmApp.swift @@ -26,6 +26,7 @@ struct LLMFarmApp: App { aiChatModel.stop_predict() } + var body: some Scene { WindowGroup { diff --git a/LLMFarm/ModelSettingsTemplate.swift b/LLMFarm/ModelSettingsTemplate.swift index 74b4a30..0030b6d 100644 --- a/LLMFarm/ModelSettingsTemplate.swift +++ b/LLMFarm/ModelSettingsTemplate.swift @@ -1,8 +1,29 @@ // -// model_settings_stemplate.swift +// ModelSettingsTemplate.swift // LLMFarm // // Created by guinmoon on 17.07.2023. // import Foundation + +struct ModelSettingsTemplate: Hashable { + var template_name: String = "Custom" + var inference = "llama" + var context: Int32 = 2048 + var n_batch: Int32 = 512 + var temp: Float = 0.9 + var top_k: Int32 = 40 + var top_p: Float = 0.95 + var repeat_last_n: Int32 = 64 + var repeat_penalty: Float = 1.174 + var prompt_format: String = "{{prompt}}" + + func hash(into hasher: inout Hasher) { + hasher.combine(template_name) + } + + static func == (lhs: ModelSettingsTemplate, rhs: ModelSettingsTemplate) -> Bool { + return lhs.template_name == rhs.template_name + } +} diff --git a/LLMFarm/Settings/AddChatView.swift b/LLMFarm/Settings/AddChatView.swift index c83be96..3fe60d9 100644 --- a/LLMFarm/Settings/AddChatView.swift +++ b/LLMFarm/Settings/AddChatView.swift @@ -68,7 +68,7 @@ struct AddChatView: View { @State private var model_top_p: Float = 0.95 @State private var model_repeat_last_n: Int32 = 64 @State private var model_repeat_penalty: Float = 1.1 - @State private var prompt_format: String = "auto" + @State private var prompt_format: String = "{{prompt}}" @State private var numberOfThreads: Int32 = 0 @State private var use_metal: Bool = false @State private var isImporting: Bool = false @@ -77,8 +77,11 @@ struct AddChatView: View { private var chat_name: String = "" let bin_type = UTType(tag: "bin", tagClass: .filenameExtension, conformingTo: nil) - @State private var model_inference = "auto" - let model_inferences = ["auto","gptneox", "llama", "gpt2", "replit", "starcoder"] + @State private var model_settings_template:ModelSettingsTemplate = ModelSettingsTemplate() + let model_setting_templates = get_model_setting_templates() + + @State private var model_inference = "llama" + let model_inferences = ["gptneox", "llama", "gpt2", "replit", "starcoder"] @State private var model_icon: String = "ava0" let model_icons = ["ava0","ava1","ava2","ava3","ava4","ava5","ava6","ava7"] @@ -141,9 +144,21 @@ struct AddChatView: View { } } + func apply_setting_template(template:ModelSettingsTemplate){ + model_inference = template.inference + prompt_format = template.prompt_format + model_context = template.context + model_n_batch = template.n_batch + model_temp = template.temp + model_top_k = template.top_k + model_top_p = template.top_p + model_repeat_penalty = template.repeat_penalty + model_repeat_last_n = template.repeat_last_n + } + var body: some View { ZStack{ -// Color("color_bg").edgesIgnoringSafeArea(.all) + // Color("color_bg").edgesIgnoringSafeArea(.all) VStack{ HStack{ @@ -258,17 +273,34 @@ struct AddChatView: View { .padding(.top, 8) HStack{ - VStack { - Picker("inference", selection: $model_inference) { - ForEach(model_inferences, id: \.self) { - Text($0) - } + Text("Settings template:") + .frame(maxWidth: .infinity, alignment: .leading) + Picker("", selection: $model_settings_template) { + ForEach(model_setting_templates, id: \.self) { template in + Text(template.template_name).tag(template) } - .pickerStyle(.menu) } - .frame(maxWidth: .infinity, alignment: .trailing) + .onChange(of: model_settings_template) { tmpl in + apply_setting_template(template:model_settings_template) + } + .pickerStyle(.menu) } - .padding() + .padding(.horizontal) + .padding(.top, 8) + + HStack{ + Text("Inference:") + .frame(maxWidth: .infinity, alignment: .leading) + Picker("", selection: $model_inference) { + ForEach(model_inferences, id: \.self) { + Text($0) + } + } + .pickerStyle(.menu) + // + } + .padding(.horizontal) + .padding(.top, 8) HStack { Toggle("Use Metal", isOn: $use_metal) @@ -279,12 +311,16 @@ struct AddChatView: View { VStack { Text("Prompt format:") .frame(maxWidth: .infinity, alignment: .leading) - TextField("size..", text: $prompt_format) + TextField("size..", text: $prompt_format, axis: .vertical) + .lineLimit(2) + + .textFieldStyle(.roundedBorder) .frame( alignment: .leading) - .multilineTextAlignment(.trailing) - .textFieldStyle(.plain) + // .multilineTextAlignment(.trailing) + // .textFieldStyle(.plain) } - .padding() + .padding(.horizontal) + .padding(.top, 8) Divider() .padding(.top, 8) @@ -369,7 +405,7 @@ struct AddChatView: View { .padding(.horizontal) HStack { - Text("Repean last N:") + Text("Repeat last N:") .frame(maxWidth: 75, alignment: .leading) TextField("count..", value: $model_repeat_last_n, format:.number) .frame( alignment: .leading) @@ -395,8 +431,10 @@ struct AddChatView: View { .padding(.horizontal) HStack{ + Text("Icon:") + .frame(maxWidth: .infinity, alignment: .leading) VStack { - Picker("icon", selection: $model_icon) { + Picker("", selection: $model_icon) { ForEach(model_icons, id: \.self) { Text($0) } @@ -417,9 +455,9 @@ struct AddChatView: View { } // -//struct AddChatView_Previews: PreviewProvider { -// static var previews: some View { -// AddChatView(add_chat_dialog: .constant(true),edit_chat_dialog:.constant(false)) -// .preferredColorScheme(.dark) -// } -//} +struct AddChatView_Previews: PreviewProvider { + static var previews: some View { + AddChatView(add_chat_dialog: .constant(true),edit_chat_dialog:.constant(false),renew_chat_list: .constant({})) + .preferredColorScheme(.dark) + } +} diff --git a/LLMFarm/model_setting_templates/AI Dungeon 2 Classic.json b/LLMFarm/model_setting_templates/AI Dungeon 2 Classic.json new file mode 100644 index 0000000..4178012 --- /dev/null +++ b/LLMFarm/model_setting_templates/AI Dungeon 2 Classic.json @@ -0,0 +1,11 @@ +{ + "model_inference" : "gpt2", + "temp" : 0.80000001192092896, + "prompt_format" : "{{prompt}}", + "top_k" : 100, + "icon" : "ava6", + "n_batch" : 1024, + "context" : 1024, + "top_p" : 0.94999998807907104, + "template_name" : "AI Dungeon 2 Classic" +} diff --git a/LLMFarm/model_setting_templates/Replit 3B.json b/LLMFarm/model_setting_templates/Replit 3B.json new file mode 100644 index 0000000..a2b84f2 --- /dev/null +++ b/LLMFarm/model_setting_templates/Replit 3B.json @@ -0,0 +1,12 @@ +{ + "context" : 2048, + "top_k" : 1, + "n_batch" : 512, + "temp" : 0.8, + "template_name" : "Replit 3B", + "top_p" : 0.80000001192092896, + "prompt_format" : "{{prompt}}", + "repeat_last_n" : 128, + "repeat_penalty" : 1.174, + "use_metal" : false +} diff --git a/LLMFarm/model_setting_templates/cerebras 2.7b.json b/LLMFarm/model_setting_templates/cerebras 2.7b.json new file mode 100644 index 0000000..a850e95 --- /dev/null +++ b/LLMFarm/model_setting_templates/cerebras 2.7b.json @@ -0,0 +1,13 @@ +{ + "top_k" : 40, + "n_batch" : 512, + "context" : 2048, + "top_p" : 0.94999998807907104, + "prompt_format" : "{{prompt}}", + "repeat_last_n" : 64, + "repeat_penalty" : 1.1000000238418579, + "temp" : 0.89999997615814209, + "model_inference" : "gpt2", + "use_metal" : false, + "template_name" : "cerebras 2.7B" +} diff --git a/LLMFarm/model_setting_templates/dolly v2 3b.json b/LLMFarm/model_setting_templates/dolly v2 3b.json new file mode 100644 index 0000000..6cf874a --- /dev/null +++ b/LLMFarm/model_setting_templates/dolly v2 3b.json @@ -0,0 +1,10 @@ +{ + "context" : 1024, + "temp" : 0.89999997615814209, + "top_p" : 0.94999998807907104, + "prompt_format" : "Below is an instruction that describes a task. Write a response that appropriately completes the request.### Instruction:{{prompt}}### Response:", + "top_k" : 40, + "model_inference" : "gptneox", + "n_batch" : 512, + "template_name" : "Dolly v2 3B" +} diff --git a/LLMFarm/model_setting_templates/openllama 3b.json b/LLMFarm/model_setting_templates/openllama 3b.json new file mode 100644 index 0000000..6f04646 --- /dev/null +++ b/LLMFarm/model_setting_templates/openllama 3b.json @@ -0,0 +1,11 @@ +{ + "temp" : 0.8, + "context" : 1024, + "top_p" : 0.89999997615814209, + "top_k" : 80, + "model_inference" : "llama", + "n_batch" : 512, + "template_name" : "OpenLLaMA 3B 1T", + "prompt_format" : "Q: {{prompt}}\\nA:", + "icon" : "ava5" +} diff --git a/LLMFarm/model_setting_templates/orca mini 3b.json b/LLMFarm/model_setting_templates/orca mini 3b.json new file mode 100644 index 0000000..cfef726 --- /dev/null +++ b/LLMFarm/model_setting_templates/orca mini 3b.json @@ -0,0 +1,12 @@ +{ + "context" : 1024, + "prompt_format" : "### User:\\n{{prompt}}\\n\\n### Response:\\n", + "template_name" : "ORCA mini 3B", + "temp" : 0.89999997615814209, + "model_inference" : "llama", + "top_p" : 0.94999998807907104, + "icon" : "ava0", + "n_batch" : 512, + "top_k" : 40, + "use_metal" : true +} diff --git a/LLMFarm/model_setting_templates/rp-incite-base-v1-3b.json b/LLMFarm/model_setting_templates/rp-incite-base-v1-3b.json new file mode 100644 index 0000000..ee5060c --- /dev/null +++ b/LLMFarm/model_setting_templates/rp-incite-base-v1-3b.json @@ -0,0 +1,12 @@ +{ + "use_metal" : false, + "model_inference" : "gptneox", + "template_name" : "RedPajama incite base v1 3B", + "prompt_format" : "{{prompt}}", + "icon" : "ava3", + "temp" : 0.80000001192092896, + "n_batch" : 512, + "top_p" : 0.94999998807907104, + "context" : 1024, + "top_k" : 40 +} diff --git a/LLMFarm/model_setting_templates/saiga 7B.json b/LLMFarm/model_setting_templates/saiga 7B.json new file mode 100644 index 0000000..a9eba35 --- /dev/null +++ b/LLMFarm/model_setting_templates/saiga 7B.json @@ -0,0 +1,14 @@ +{ + "repeat_penalty" : 1.1000000238418579, + "use_metal" : false, + "icon" : "ava2", + "n_batch" : 512, + "prompt_format" : "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им.\\nuser:{{prompt}}<\/s>", + "top_k" : 40, + "context" : 2048, + "temp" : 0.40000000596046448, + "model_inference" : "llama", + "repeat_last_n" : 64, + "prompt_format" : "Saiga 7B", + "top_p" : 0.94999998807907104 +} diff --git a/LLMFarm/model_setting_templates/santacoder.json b/LLMFarm/model_setting_templates/santacoder.json new file mode 100644 index 0000000..0d36c34 --- /dev/null +++ b/LLMFarm/model_setting_templates/santacoder.json @@ -0,0 +1,13 @@ +{ + "model_inference" : "starcoder", + "template_name" : "Santacoder", + "context" : 2048, + "n_batch" : 512, + "top_k" : 1, + "prompt_format" : "{{prompt}}", + "temp" : 0.89999997615814209, + "top_p" : 0.94999998807907104, + "use_metal" : false, + "repeat_last_n" : 64, + "repeat_penalty" : 1.1759999990463257 +} diff --git a/LLMFarm/model_setting_templates/stablelm-tuned-alpha-3b.json b/LLMFarm/model_setting_templates/stablelm-tuned-alpha-3b.json new file mode 100644 index 0000000..3df93fd --- /dev/null +++ b/LLMFarm/model_setting_templates/stablelm-tuned-alpha-3b.json @@ -0,0 +1,11 @@ +{ + "icon" : "ava0", + "top_k" : 40, + "temp" : 0.89999997615814209, + "n_batch" : 512, + "model_inference" : "gptneox", + "template_name" : "StableLM tuned 3B", + "context" : 1024, + "prompt_format" : "{{prompt}}", + "top_p" : 0.94999998807907104 +} diff --git a/LLMFarm_core/AI.swift b/LLMFarm_core/AI.swift index baba19c..14d745d 100644 --- a/LLMFarm_core/AI.swift +++ b/LLMFarm_core/AI.swift @@ -35,17 +35,22 @@ class AI { func loadModel(_ aiModel: ModelInference, contextParams: ModelContextParams = .default) { print("AI init") - switch aiModel { - case .LLamaInference: - model = try? LLaMa(path: self.modelPath, contextParams: contextParams) - case .GPTNeoxInference: - model = try? GPTNeoX(path: self.modelPath, contextParams: contextParams) - case .GPT2: - model = try? GPT2(path: self.modelPath, contextParams: contextParams) - case .Replit: - model = try? Replit(path: self.modelPath, contextParams: contextParams) - case .Starcoder: - model = try? Starcoder(path: self.modelPath, contextParams: contextParams) + do{ + switch aiModel { + case .LLamaInference: + model = try LLaMa(path: self.modelPath, contextParams: contextParams) + case .GPTNeoxInference: + model = try GPTNeoX(path: self.modelPath, contextParams: contextParams) + case .GPT2: + model = try GPT2(path: self.modelPath, contextParams: contextParams) + case .Replit: + model = try Replit(path: self.modelPath, contextParams: contextParams) + case .Starcoder: + model = try Starcoder(path: self.modelPath, contextParams: contextParams) + } + } + catch { + print(error) } } diff --git a/ModelTest/main.swift b/ModelTest/main.swift index 9b1e233..d0155c0 100644 --- a/ModelTest/main.swift +++ b/ModelTest/main.swift @@ -37,11 +37,13 @@ func prompt_for_generation(_ instruction:String) -> String{ func main(){ print("Hello.") - var input_text = "State the meaning of life." + var input_text = "State the meaning of life. And tell about Stavropol." + // let ai = AI(_modelPath: "/Users/guinmoon/Library/Containers/com.guinmoon.LLMFarm/Data/Documents/models/dolly-v2-3b-q5_1.bin",_chatName: "chat") // try? ai.loadModel(ModelInference.GPTNeoxInference) +// ai.model.custom_prompt_format = "Below is an instruction that describes a task. Write a response that appropriately completes the request.### Instruction:{{prompt}}### Response:" +// ai.model.promptFormat = .Custom // ai.model.promptFormat = .Dolly_b3 - // let ai = AI(_modelPath: "/Users/guinmoon/Library/Containers/com.guinmoon.LLMFarm/Data/Documents/models/AI-Dungeon-2-Classic.bin",_chatName: "chat") // try? ai.loadModel(ModelInference.GPT2) // ai.model.promptFormat = .None @@ -50,17 +52,17 @@ func main(){ // try? ai.loadModel(ModelInference.Replit) // ai.model.promptFormat = .None -// let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/orca-mini-3b.ggmlv3.q4_0.bin",_chatName: "chat") + let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/orca-mini-7b.ggmlv3.q2_K.bin",_chatName: "chat") // let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/orca-mini-7b.ggmlv3.q3_K_M.bin",_chatName: "chat") -// var params:ModelContextParams = .default -// params.use_metal = true -// try? ai.loadModel(ModelInference.LLamaInference,contextParams: params) -// ai.model.promptFormat = .LLaMa + var params:ModelContextParams = .default + params.use_metal = true + try? ai.loadModel(ModelInference.LLamaInference,contextParams: params) + ai.model.promptFormat = .LLaMa - let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/santacoder-q8_0.bin",_chatName: "chat") - try? ai.loadModel(ModelInference.Starcoder) - ai.model.promptFormat = .None - input_text = "def quicksort" +// let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/santacoder-q8_0.bin",_chatName: "chat") +// try? ai.loadModel(ModelInference.Starcoder) +// ai.model.promptFormat = .None +// input_text = "def quicksort" ai.model.contextParams.seed = 0; // ai.model.promptStyle = .StableLM_Tuned diff --git a/README.md b/README.md index 6abe450..3f4f4dc 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,6 @@ Also, when creating the application, the source codes from the repository [byron - [x] Metal for llama inference (MacOS and iOS) - [ ] Metal for other inference - [ ] Restore context state (now only chat history) -- [ ] Direct embedding input (LLaVA, etc.) - [ ] Model setting templates ## Inferences @@ -30,7 +29,7 @@ Also, when creating the application, the source codes from the repository [byron - [x] [GPTNeoX](https://huggingface.co/docs/transformers/model_doc/gpt_neox) - [x] [Replit](https://huggingface.co/replit/replit-code-v1-3b) - [x] [GPT2](https://huggingface.co/docs/transformers/model_doc/gpt2) + [Cerebras](https://arxiv.org/abs/2304.03208) -- [ ] [Starcoder(Santacoder)](https://huggingface.co/bigcode/santacoder) +- [x] [Starcoder(Santacoder)](https://huggingface.co/bigcode/santacoder) - [ ] [RWKV](https://huggingface.co/docs/transformers/model_doc/rwkv) ## Getting Started diff --git a/llmfarm_core.swift/Sources/llmfarm_core/gptneox/gptneox.cpp b/llmfarm_core.swift/Sources/llmfarm_core/gptneox/gptneox.cpp index 2dc9187..6dd90d5 100644 --- a/llmfarm_core.swift/Sources/llmfarm_core/gptneox/gptneox.cpp +++ b/llmfarm_core.swift/Sources/llmfarm_core/gptneox/gptneox.cpp @@ -63,7 +63,7 @@ struct gpt_neox_layer { struct gpt_neox_hparams:gpt_base_hparams { int32_t n_vocab = 50257; - int32_t n_ctx = 1024; + int32_t n_ctx = 4096; int32_t n_embd = 1024; int32_t n_head = 32; int32_t n_layer = 16; @@ -72,27 +72,11 @@ struct gpt_neox_hparams:gpt_base_hparams { int32_t ftype = 1; }; -struct gpt_neox_model { +struct gpt_neox_model:gpt_base_model { gpt_neox_hparams hparams; - - // normalization - struct ggml_tensor * ln_f_g; - struct ggml_tensor * ln_f_b; - - struct ggml_tensor * wte; // position embedding - struct ggml_tensor * lmh_g; // language model head //struct ggml_tensor * lmh_b; // language model bias - std::vector layers; - - // key + value memory - struct ggml_tensor * memory_k; - struct ggml_tensor * memory_v; - - // - struct ggml_context * ctx; - std::map tensors; }; @@ -204,10 +188,10 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt_ { const auto & hparams = model.hparams; - const int n_embd = hparams.n_embd; - const int n_layer = hparams.n_layer; - const int n_ctx = hparams.n_ctx; - const int n_vocab = hparams.n_vocab; + const size_t n_embd = hparams.n_embd; + const size_t n_layer = hparams.n_layer; + const size_t n_ctx = hparams.n_ctx; + const size_t n_vocab = hparams.n_vocab; ctx_size += n_embd*ggml_type_sizef(GGML_TYPE_F32); // ln_f_g ctx_size += n_embd*ggml_type_sizef(GGML_TYPE_F32); // ln_f_b @@ -238,7 +222,8 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt_ ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_k ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_v - ctx_size += (6 + 16*n_layer)*512; // object overhead + size_t overhead =ggml_tensor_overhead(); + ctx_size += (6 + 16*n_layer)*1024; // object overhead printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0)); } @@ -489,6 +474,7 @@ bool gpt_neox_eval( const int n_rot = hparams.n_rot; static size_t buf_size = 256u*1024*1024; +// static size_t buf_size = 256u*1024*ggml_tensor_overhead(); static void * buf = malloc(buf_size); // use 2 scratch buffers diff --git a/llmfarm_core.swift/Sources/llmfarm_core/replit/replit.cpp b/llmfarm_core.swift/Sources/llmfarm_core/replit/replit.cpp index 4ba5b0c..c4fdfc8 100644 --- a/llmfarm_core.swift/Sources/llmfarm_core/replit/replit.cpp +++ b/llmfarm_core.swift/Sources/llmfarm_core/replit/replit.cpp @@ -166,37 +166,14 @@ struct replit_layer { struct ggml_tensor * ffn_down_proj; }; -struct replit_model { +struct replit_model:gpt_base_model { replit_hparams hparams; - - struct gpt_kv_cache kv_self; - struct ggml_tensor * wte_weight; // position embedding struct ggml_tensor * norm_f_weight; // language model head - std::vector layers; - - // key + value memory - struct ggml_tensor * memory_k; - struct ggml_tensor * memory_v; - - struct ggml_context * ctx; - std::map tensors; - ~replit_model() { - if (ctx) { - ggml_free(ctx); - } - } }; - - -//struct replit_model:gpt_base_model { -// mpt_hparams hparams; -// std::vector layers; -//}; - struct replit_context:gpt_base_context { replit_model model; replit_tokenizer vocab; diff --git a/llmfarm_core.swift/Sources/llmfarm_core/starcoder/starcoder.cpp b/llmfarm_core.swift/Sources/llmfarm_core/starcoder/starcoder.cpp index 7ac708d..36bcbe7 100644 --- a/llmfarm_core.swift/Sources/llmfarm_core/starcoder/starcoder.cpp +++ b/llmfarm_core.swift/Sources/llmfarm_core/starcoder/starcoder.cpp @@ -54,26 +54,9 @@ struct starcoder_layer { struct ggml_tensor * c_mlp_proj_b; }; -struct starcoder_model { +struct starcoder_model:gpt_base_model { starcoder_hparams hparams; - - // normalization - struct ggml_tensor * ln_f_g; - struct ggml_tensor * ln_f_b; - - struct ggml_tensor * wte; // position embedding - struct ggml_tensor * wpe; // token embedding - struct ggml_tensor * lm_head; // language model head - std::vector layers; - - // key + value memory - struct ggml_tensor * memory_k; - struct ggml_tensor * memory_v; - - // - struct ggml_context * ctx; - std::map tensors; };