From 2a335176e6f5e3f7472f2f8933f90bd3f7d7ee91 Mon Sep 17 00:00:00 2001 From: Chandler Swift Date: Wed, 17 Dec 2025 22:39:18 -0600 Subject: [PATCH] Add go-llama.cpp backend --- README.md | 3 +++ completion/llama/llama.go | 51 +++++++++++++++++++++++++++++++++++++++ go.mod | 5 +++- go.sum | 22 +++++++++++++++++ shell.nix | 19 +++++++++++++++ 5 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 completion/llama/llama.go create mode 100644 shell.nix diff --git a/README.md b/README.md index f6274c2..28924ee 100644 --- a/README.md +++ b/README.md @@ -36,3 +36,6 @@ To start, generate main.go. # GGUF https://huggingface.co/QuantFactory/SmolLM-135M-GGUF/resolve/main/SmolLM-135M.Q8_0.gguf?download=true + +# TODO: +Consider using llama-server instead? diff --git a/completion/llama/llama.go b/completion/llama/llama.go new file mode 100644 index 0000000..4ee51ed --- /dev/null +++ b/completion/llama/llama.go @@ -0,0 +1,51 @@ +package llama + +import ( + "flag" + "fmt" + "os" + "runtime" + + "github.com/go-skynet/go-llama.cpp" +) + +func main() { + var model string + + flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError) + flags.StringVar(&model, "m", "./SmolLM-135M.Q8_0.gguf", "path to gguf model file to load") + flags.IntVar(&gpulayers, "ngl", 0, "Number of GPU layers to use") + flags.IntVar(&threads, "t", runtime.NumCPU(), "number of threads to use during computation") + flags.IntVar(&tokens, "n", 512, "number of tokens to predict") + flags.IntVar(&seed, "s", -1, "predict RNG seed, -1 for random seed") + + err := flags.Parse(os.Args[1:]) + if err != nil { + fmt.Printf("Parsing program arguments failed: %s", err) + os.Exit(1) + } + l, err := llama.New(model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers)) + if err != nil { + fmt.Println("Loading the model failed:", err.Error()) + os.Exit(1) + } + fmt.Printf("Model loaded successfully.\n") + + for { + text := "Generate a poem about the sea in the style of Shakespeare:\n" + + _, err := l.Predict(text, llama.Debug, llama.SetTokenCallback(func(token string) bool { + fmt.Print(token) + return true + }), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"), llama.SetSeed(seed)) + if err != nil { + panic(err) + } + embeds, err := l.Embeddings(text) + if err != nil { + fmt.Printf("Embeddings: error %s \n", err.Error()) + } + fmt.Printf("Embeddings: %v", embeds) + fmt.Printf("\n\n") + } +} diff --git a/go.mod b/go.mod index 26f4c11..38c9877 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,10 @@ module git.chandlerswift.com/chandlerswift/svs-services-server go 1.25.4 -require github.com/hybridgroup/yzma v1.3.0 +require ( + github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46 + github.com/hybridgroup/yzma v1.3.0 +) require ( github.com/ebitengine/purego v0.9.1 // indirect diff --git a/go.sum b/go.sum index 97feb43..e967abd 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,30 @@ github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A= github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= +github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= +github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46 h1:lALhXzDkqtp12udlDLLg+ybXVMmL7Ox9tybqVLWxjPE= +github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/hybridgroup/yzma v1.3.0 h1:5dw9qEcFEGEJq+tA12Ooa6D/e0PROqv7Ix6VfSR9MQI= github.com/hybridgroup/yzma v1.3.0/go.mod h1:UUYw+DLlrgtBYm+B+9XD3boB1ZcDpfbAnYHKW3VKKZ4= github.com/jupiterrider/ffi v0.5.1 h1:l7ANXU+Ex33LilVa283HNaf/sTzCrrht7D05k6T6nlc= github.com/jupiterrider/ffi v0.5.1/go.mod h1:x7xdNKo8h0AmLuXfswDUBxUsd2OqUP4ekC8sCnsmbvo= +github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= +github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= +github.com/onsi/gomega v1.28.0 h1:i2rg/p9n/UqIDAMFUJ6qIUUMcsqOuUHgbpbu235Vr1c= +github.com/onsi/gomega v1.28.0/go.mod h1:A1H2JE76sI14WIP57LMKj7FVfCHx3g3BcZVjJG8bjX8= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= +golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..98c9649 --- /dev/null +++ b/shell.nix @@ -0,0 +1,19 @@ +{ pkgs ? import {} }: + +let + llama = pkgs.llama-cpp; +in +pkgs.mkShell { + nativeBuildInputs = [ + pkgs.go + pkgs.pkg-config + ]; + + buildInputs = [ llama ]; + + shellHook = '' + export CGO_ENABLED=1 + export CGO_CFLAGS="-I${llama}/include" + export CGO_LDFLAGS="-L${llama}/lib -lllama -lm -lstdc++" + ''; +}