Skip to content

Commit

Permalink
+ add starcoder inference
Browse files Browse the repository at this point in the history
test with tis model https://huggingface.co/mike-ravkine/gpt_bigcode-santacoder-GGML
+ update ggml to d2b178ee747cfb0a0a1892da362d068080659170
+ update llama to 6e7cca404748dd4b1a3affd0d1296e37f4ac0a6f
  • Loading branch information
guinmoon committed Jul 16, 2023
1 parent 0bd59b6 commit 075a6cf
Show file tree
Hide file tree
Showing 28 changed files with 5,854 additions and 2,740 deletions.
6 changes: 6 additions & 0 deletions LLMFarm.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
/* Begin PBXBuildFile section */
150CAD012A3CE7B30015CD66 /* GPTNeox.swift in Sources */ = {isa = PBXBuildFile; fileRef = 150CAD002A3CE7B30015CD66 /* GPTNeox.swift */; };
150CAD022A3CE7B30015CD66 /* GPTNeox.swift in Sources */ = {isa = PBXBuildFile; fileRef = 150CAD002A3CE7B30015CD66 /* GPTNeox.swift */; };
15141E162A6438AE0060E767 /* Starcoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 15141E152A6438AE0060E767 /* Starcoder.swift */; };
15141E172A6438AE0060E767 /* Starcoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 15141E152A6438AE0060E767 /* Starcoder.swift */; };
155FBB9F2A460566004DD5AE /* GPTBase.swift in Sources */ = {isa = PBXBuildFile; fileRef = 155FBB9E2A460566004DD5AE /* GPTBase.swift */; };
155FBBA02A460566004DD5AE /* GPTBase.swift in Sources */ = {isa = PBXBuildFile; fileRef = 155FBB9E2A460566004DD5AE /* GPTBase.swift */; };
155FBBA22A48B5C1004DD5AE /* Replit.swift in Sources */ = {isa = PBXBuildFile; fileRef = 155FBBA12A48B5C0004DD5AE /* Replit.swift */; };
Expand Down Expand Up @@ -84,6 +86,7 @@

/* Begin PBXFileReference section */
150CAD002A3CE7B30015CD66 /* GPTNeox.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GPTNeox.swift; sourceTree = "<group>"; };
15141E152A6438AE0060E767 /* Starcoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Starcoder.swift; sourceTree = "<group>"; };
155FBB9E2A460566004DD5AE /* GPTBase.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GPTBase.swift; sourceTree = "<group>"; };
155FBBA12A48B5C0004DD5AE /* Replit.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Replit.swift; sourceTree = "<group>"; };
1560D6162A2D1A3D00918330 /* AddChatView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AddChatView.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -221,6 +224,7 @@
1560D61B2A2E11E200918330 /* ExceptionCatcher.h */,
15F210552A1919390021F414 /* AI.swift */,
15F210512A1919390021F414 /* ArrayExt.swift */,
15141E152A6438AE0060E767 /* Starcoder.swift */,
15F210542A1919390021F414 /* GPTNeoX_old.swift */,
150CAD002A3CE7B30015CD66 /* GPTNeox.swift */,
155FBB9E2A460566004DD5AE /* GPTBase.swift */,
Expand Down Expand Up @@ -427,6 +431,7 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
15141E162A6438AE0060E767 /* Starcoder.swift in Sources */,
15F2102C2A190D5B0021F414 /* Math.swift in Sources */,
155FBBA22A48B5C1004DD5AE /* Replit.swift in Sources */,
15F210642A19193A0021F414 /* AI.swift in Sources */,
Expand Down Expand Up @@ -469,6 +474,7 @@
15F210A42A1921E40021F414 /* Utils.swift in Sources */,
15A480002A40D2A300B72DC3 /* GPT2.swift in Sources */,
15F210A52A1921E40021F414 /* ComputeGraph.swift in Sources */,
15141E172A6438AE0060E767 /* Starcoder.swift in Sources */,
1560D6172A2D1A3D00918330 /* AddChatView.swift in Sources */,
15F210962A1920AA0021F414 /* AIChatModel.swift in Sources */,
15F210AA2A1A659B0021F414 /* FileHelper.swift in Sources */,
Expand Down
3 changes: 3 additions & 0 deletions LLMFarm/AIChatModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ final class AIChatModel: ObservableObject {
}else if chat_config!["model_inference"] as! String == "replit" {
try? self.chat?.loadModel(ModelInference.Replit,contextParams: model_context_param)
self.chat?.model.stop_words.append("<|endoftext|>")
}else if chat_config!["model_inference"] as! String == "starcoder" {
try? self.chat?.loadModel(ModelInference.Starcoder,contextParams: model_context_param)
self.chat?.model.stop_words.append("<|endoftext|>")
}
}
else{
Expand Down
9 changes: 8 additions & 1 deletion LLMFarm/Chats/ChatListView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@ struct ChatListView: View {
var close_chat: () -> Void
@Binding var edit_chat_dialog: Bool
@Binding var chat_selection: String?

@Binding var renew_chat_list: () -> Void
@State var chats_previews = get_chat_list()!

func refresh_chat_list(){
self.chats_previews = get_chat_list()!
}

func delete(at offsets: IndexSet) {
let chatsToDelete = offsets.map { self.chats_previews[$0] }
let res = delete_chats(chatsToDelete)
Expand Down Expand Up @@ -105,6 +109,9 @@ struct ChatListView: View {
#endif
}
.background(.opacity(0))
}.task {
renew_chat_list = refresh_chat_list
refresh_chat_list()
}
}

Expand Down
12 changes: 8 additions & 4 deletions LLMFarm/LLMFarmApp.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ struct LLMFarmApp: App {
@StateObject var orientationInfo = OrientationInfo()
@State var isLandscape:Bool = false
@State private var chat_selection: String?
@State var renew_chat_list: () -> Void = {}

func close_chat() -> Void{
aiChatModel.stop_predict()
Expand All @@ -36,12 +37,14 @@ struct LLMFarmApp: App {
add_chat_dialog:$add_chat_dialog,
close_chat:close_chat,
edit_chat_dialog:$edit_chat_dialog,
chat_selection:$chat_selection)
chat_selection:$chat_selection,
renew_chat_list: $renew_chat_list)
.disabled(edit_chat_dialog)
.frame(minWidth: 250, maxHeight: .infinity)
}else{
AddChatView(add_chat_dialog: $add_chat_dialog,
edit_chat_dialog:.constant(false))
edit_chat_dialog: $edit_chat_dialog,
renew_chat_list: $renew_chat_list)
.frame(minWidth: 200,maxHeight: .infinity)
}
}
Expand All @@ -58,8 +61,9 @@ struct LLMFarmApp: App {
}
else{
AddChatView(add_chat_dialog: $add_chat_dialog,
edit_chat_dialog:$edit_chat_dialog,
chat_name: aiChatModel.chat_name)
edit_chat_dialog: $edit_chat_dialog,
chat_name: aiChatModel.chat_name,
renew_chat_list: $renew_chat_list)
.frame(minWidth: 200,maxHeight: .infinity)
#if !os(macOS)
.toolbar(.hidden, for: .automatic)
Expand Down
22 changes: 16 additions & 6 deletions LLMFarm/Settings/AddChatView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -72,25 +72,29 @@ struct AddChatView: View {
@State private var numberOfThreads: Int32 = 0
@State private var use_metal: Bool = false
@State private var isImporting: Bool = false
@Binding var renew_chat_list: () -> Void

private var chat_name: String = ""
let bin_type = UTType(tag: "bin", tagClass: .filenameExtension, conformingTo: nil)

@State private var model_inference = "auto"
let model_inferences = ["auto","gptneox", "llama", "gpt2", "replit"]
let model_inferences = ["auto","gptneox", "llama", "gpt2", "replit", "starcoder"]

@State private var model_icon: String = "ava0"
let model_icons = ["ava0","ava1","ava2","ava3","ava4","ava5","ava6","ava7"]

init(add_chat_dialog: Binding<Bool>,edit_chat_dialog:Binding<Bool>) {
init(add_chat_dialog: Binding<Bool>,edit_chat_dialog:Binding<Bool>,
renew_chat_list: Binding<() -> Void>) {
self._add_chat_dialog = add_chat_dialog
self._edit_chat_dialog = edit_chat_dialog
self._renew_chat_list = renew_chat_list
}

init(add_chat_dialog: Binding<Bool>,edit_chat_dialog:Binding<Bool>,chat_name:String
) {
init(add_chat_dialog: Binding<Bool>,edit_chat_dialog:Binding<Bool>,
chat_name:String,renew_chat_list: Binding<() -> Void>) {
self._add_chat_dialog = add_chat_dialog
self._edit_chat_dialog = edit_chat_dialog
self._renew_chat_list = renew_chat_list
self.chat_name = chat_name
let chat_config = get_chat_info(chat_name)
if (chat_config!["title"] != nil){
Expand Down Expand Up @@ -180,8 +184,14 @@ struct AddChatView: View {
"numberOfThreads":Int32(numberOfThreads),
"icon":model_icon]
let res = create_chat(options,edit_chat_dialog:self.edit_chat_dialog,chat_name:self.chat_name)
add_chat_dialog = false
edit_chat_dialog = false
if add_chat_dialog {
add_chat_dialog = false

}
if edit_chat_dialog {
edit_chat_dialog = false
}
renew_chat_list()
}
} label: {
Text(edit_chat_dialog ? "Save" :"Add" )
Expand Down
3 changes: 3 additions & 0 deletions LLMFarm_core/AI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ enum ModelInference {
case GPTNeoxInference
case GPT2
case Replit
case Starcoder
}

class AI {
Expand Down Expand Up @@ -43,6 +44,8 @@ class AI {
model = try? GPT2(path: self.modelPath, contextParams: contextParams)
case .Replit:
model = try? Replit(path: self.modelPath, contextParams: contextParams)
case .Starcoder:
model = try? Starcoder(path: self.modelPath, contextParams: contextParams)
}
}

Expand Down
2 changes: 1 addition & 1 deletion LLMFarm_core/GPTBase.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class GPTBase: Model {
var params = gpt_context_default_params()
params.n_ctx = contextParams.context
params.n_parts = contextParams.parts
params.seed = contextParams.seed
params.seed = 0
params.f16_kv = contextParams.f16Kv
params.logits_all = contextParams.logitsAll
params.vocab_only = contextParams.vocabOnly
Expand Down
2 changes: 1 addition & 1 deletion LLMFarm_core/LLaMa.swift
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class LLaMa: GPTBase {
var params = llama_context_default_params()
params.n_ctx = contextParams.context
// params.n_parts = contextParams.parts
params.seed = contextParams.seed
params.seed = UInt32(contextParams.seed)
params.f16_kv = contextParams.f16Kv
params.logits_all = contextParams.logitsAll
params.vocab_only = contextParams.vocabOnly
Expand Down
4 changes: 2 additions & 2 deletions LLMFarm_core/Model.swift
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public func get_model_context_param_by_config(_ model_config:Dictionary<String,
public struct ModelContextParams {
public var context: Int32 = 512 // text context
public var parts: Int32 = -1 // -1 for default
public var seed: Int32 = -1 // RNG seed, 0 for random
public var seed: UInt32 = 0xFFFFFFFF // RNG seed, 0 for random
public var numberOfThreads: Int32 = 1

public var f16Kv = true // use fp16 for KV cache
Expand All @@ -85,7 +85,7 @@ public struct ModelContextParams {

public static let `default` = ModelContextParams()

public init(context: Int32 = 2048 /*512*/, parts: Int32 = -1, seed: Int32 = -1, numberOfThreads: Int32 = 0, f16Kv: Bool = true, logitsAll: Bool = false, vocabOnly: Bool = false, useMlock: Bool = false, embedding: Bool = false) {
public init(context: Int32 = 2048 /*512*/, parts: Int32 = -1, seed: UInt32 = 0xFFFFFFFF, numberOfThreads: Int32 = 0, f16Kv: Bool = true, logitsAll: Bool = false, vocabOnly: Bool = false, useMlock: Bool = false, embedding: Bool = false) {
self.context = context
self.parts = parts
self.seed = seed
Expand Down
36 changes: 33 additions & 3 deletions LLMFarm_core/Starcoder.swift
Original file line number Diff line number Diff line change
@@ -1,8 +1,38 @@
//
// Starcoder.swift
// LLMFarm
// GPTNeoX.swift
// Mia
//
// Created by guinmoon on 16.07.2023.
// Created by Byron Everson on 4/19/23.
//

import Foundation
import llmfarm_core

public class Starcoder: GPTBase {

public override func load_model(path: String = "", contextParams: ModelContextParams = .default, params:gpt_context_params ) throws -> Bool{
self.context = starcoder_init_from_file(path, params)
self.promptFormat = .None
return true
}

deinit {
gpt2_free(context)
}

public override func gpt_eval(inputBatch:[ModelToken]) throws -> Bool{
if starcoder_eval(context, inputBatch, Int32(inputBatch.count), nPast, contextParams.numberOfThreads) != 0 {
throw ModelError.failedToEval
}
return true
}

public override func gpt_init_logits() throws -> Bool {
if starcoder_init_logits(context, contextParams.numberOfThreads) != 0 {
throw ModelError.failedToEval
}
return true
}
}


21 changes: 12 additions & 9 deletions ModelTest/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,32 +37,35 @@ func prompt_for_generation(_ instruction:String) -> String{

func main(){
print("Hello.")
var input_text = "State the meaning of life."
// let ai = AI(_modelPath: "/Users/guinmoon/Library/Containers/com.guinmoon.LLMFarm/Data/Documents/models/dolly-v2-3b-q5_1.bin",_chatName: "chat")
// try? ai.loadModel(ModelInference.GPTNeoxInference)
// ai.model.promptFormat = .Dolly_b3

// let ai = AI(_modelPath: "/Users/guinmoon/Library/Containers/com.guinmoon.LLMFarm/Data/Documents/models/AI-Dungeon-2-Classic.bin",_chatName: "chat")
// try? ai.loadModel(ModelInference.GPT2)
// ai.model.promptFormat = .None
//
// let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/replit-code-v1-3b-ggml-q5_1.bin",_chatName: "chat")
// try? ai.loadModel(ModelInference.Replit)
// ai.model.promptFormat = .None

// #define MacMetal
// let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/orca-mini-3b.ggmlv3.q4_0.bin",_chatName: "chat")
let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/orca-mini-7b.ggmlv3.q3_K_M.bin",_chatName: "chat")
var params:ModelContextParams = .default
params.use_metal = true
try? ai.loadModel(ModelInference.LLamaInference,contextParams: params)
ai.model.promptFormat = .LLaMa

// let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/orca-mini-7b.ggmlv3.q3_K_M.bin",_chatName: "chat")
// var params:ModelContextParams = .default
// params.use_metal = true
// try? ai.loadModel(ModelInference.LLamaInference,contextParams: params)
// ai.model.promptFormat = .LLaMa

let ai = AI(_modelPath: "/Users/guinmoon/dev/alpaca_llama_etc/santacoder-q8_0.bin",_chatName: "chat")
try? ai.loadModel(ModelInference.Starcoder)
ai.model.promptFormat = .None
input_text = "def quicksort"

ai.model.contextParams.seed = 0;
// ai.model.promptStyle = .StableLM_Tuned

let input_text = "State the meaning of life."

// let input_text = "Tell about Stavropol."
// let prompt = prompt_for_generation(input_text)
let prompt = input_text
Expand Down
2 changes: 1 addition & 1 deletion llmfarm_core.swift/Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ let package = Package(
targets: [
.target(
name: "llmfarm_core",
sources: ["ggml.c","llama/ggml-metal.m","k_quants.c", "gptneox/gptneox.cpp","gpt2/gpt2.cpp","replit/replit.cpp","common.cpp","gpt_helpers.cpp","gpt_spm.cpp", "llama/llama.cpp"],
sources: ["ggml.c","ggml-metal.m","k_quants.c", "gptneox/gptneox.cpp","gpt2/gpt2.cpp","replit/replit.cpp","starcoder/starcoder.cpp","common.cpp","gpt_helpers.cpp","gpt_spm.cpp", "llama/llama.cpp"],
publicHeadersPath: "spm-headers",
// I'm not sure about some of the flags, please correct it's wrong.
cSettings: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,13 @@ extern "C" {

struct ggml_metal_context;

struct ggml_metal_context * ggml_metal_init(void);
// number of command buffers to use
struct ggml_metal_context * ggml_metal_init(int n_cb);
void ggml_metal_free(struct ggml_metal_context * ctx);

// set the number of command buffers to use
void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);

// creates a mapping between a host memory buffer and a device memory buffer
// - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
// - the mapping is used during computation to determine the arguments of the compute kernels
Expand Down
Loading

0 comments on commit 075a6cf

Please sign in to comment.