Add go-llama.cpp backend
This commit is contained in:
parent
9b80f6733d
commit
2a335176e6
5 changed files with 99 additions and 1 deletions
|
|
@ -36,3 +36,6 @@ To start, generate main.go.
|
|||
# GGUF
|
||||
|
||||
https://huggingface.co/QuantFactory/SmolLM-135M-GGUF/resolve/main/SmolLM-135M.Q8_0.gguf?download=true
|
||||
|
||||
# TODO:
|
||||
Consider using llama-server instead?
|
||||
|
|
|
|||
51
completion/llama/llama.go
Normal file
51
completion/llama/llama.go
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
package llama
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var model string
|
||||
|
||||
flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
|
||||
flags.StringVar(&model, "m", "./SmolLM-135M.Q8_0.gguf", "path to gguf model file to load")
|
||||
flags.IntVar(&gpulayers, "ngl", 0, "Number of GPU layers to use")
|
||||
flags.IntVar(&threads, "t", runtime.NumCPU(), "number of threads to use during computation")
|
||||
flags.IntVar(&tokens, "n", 512, "number of tokens to predict")
|
||||
flags.IntVar(&seed, "s", -1, "predict RNG seed, -1 for random seed")
|
||||
|
||||
err := flags.Parse(os.Args[1:])
|
||||
if err != nil {
|
||||
fmt.Printf("Parsing program arguments failed: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
l, err := llama.New(model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
|
||||
if err != nil {
|
||||
fmt.Println("Loading the model failed:", err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Printf("Model loaded successfully.\n")
|
||||
|
||||
for {
|
||||
text := "Generate a poem about the sea in the style of Shakespeare:\n"
|
||||
|
||||
_, err := l.Predict(text, llama.Debug, llama.SetTokenCallback(func(token string) bool {
|
||||
fmt.Print(token)
|
||||
return true
|
||||
}), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"), llama.SetSeed(seed))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
embeds, err := l.Embeddings(text)
|
||||
if err != nil {
|
||||
fmt.Printf("Embeddings: error %s \n", err.Error())
|
||||
}
|
||||
fmt.Printf("Embeddings: %v", embeds)
|
||||
fmt.Printf("\n\n")
|
||||
}
|
||||
}
|
||||
5
go.mod
5
go.mod
|
|
@ -2,7 +2,10 @@ module git.chandlerswift.com/chandlerswift/svs-services-server
|
|||
|
||||
go 1.25.4
|
||||
|
||||
require github.com/hybridgroup/yzma v1.3.0
|
||||
require (
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46
|
||||
github.com/hybridgroup/yzma v1.3.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/ebitengine/purego v0.9.1 // indirect
|
||||
|
|
|
|||
22
go.sum
22
go.sum
|
|
@ -1,8 +1,30 @@
|
|||
github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
|
||||
github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
||||
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
|
||||
github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46 h1:lALhXzDkqtp12udlDLLg+ybXVMmL7Ox9tybqVLWxjPE=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
|
||||
github.com/hybridgroup/yzma v1.3.0 h1:5dw9qEcFEGEJq+tA12Ooa6D/e0PROqv7Ix6VfSR9MQI=
|
||||
github.com/hybridgroup/yzma v1.3.0/go.mod h1:UUYw+DLlrgtBYm+B+9XD3boB1ZcDpfbAnYHKW3VKKZ4=
|
||||
github.com/jupiterrider/ffi v0.5.1 h1:l7ANXU+Ex33LilVa283HNaf/sTzCrrht7D05k6T6nlc=
|
||||
github.com/jupiterrider/ffi v0.5.1/go.mod h1:x7xdNKo8h0AmLuXfswDUBxUsd2OqUP4ekC8sCnsmbvo=
|
||||
github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4=
|
||||
github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o=
|
||||
github.com/onsi/gomega v1.28.0 h1:i2rg/p9n/UqIDAMFUJ6qIUUMcsqOuUHgbpbu235Vr1c=
|
||||
github.com/onsi/gomega v1.28.0/go.mod h1:A1H2JE76sI14WIP57LMKj7FVfCHx3g3BcZVjJG8bjX8=
|
||||
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
|
||||
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
|
||||
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
|
||||
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
|
||||
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
|
||||
golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss=
|
||||
golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
|
|
|||
19
shell.nix
Normal file
19
shell.nix
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
{ pkgs ? import <nixpkgs> {} }:
|
||||
|
||||
let
|
||||
llama = pkgs.llama-cpp;
|
||||
in
|
||||
pkgs.mkShell {
|
||||
nativeBuildInputs = [
|
||||
pkgs.go
|
||||
pkgs.pkg-config
|
||||
];
|
||||
|
||||
buildInputs = [ llama ];
|
||||
|
||||
shellHook = ''
|
||||
export CGO_ENABLED=1
|
||||
export CGO_CFLAGS="-I${llama}/include"
|
||||
export CGO_LDFLAGS="-L${llama}/lib -lllama -lm -lstdc++"
|
||||
'';
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue