Add go-llama.cpp backend
This commit is contained in:
parent
9b80f6733d
commit
2a335176e6
5 changed files with 99 additions and 1 deletions
51
completion/llama/llama.go
Normal file
51
completion/llama/llama.go
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
package llama
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var model string
|
||||
|
||||
flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
|
||||
flags.StringVar(&model, "m", "./SmolLM-135M.Q8_0.gguf", "path to gguf model file to load")
|
||||
flags.IntVar(&gpulayers, "ngl", 0, "Number of GPU layers to use")
|
||||
flags.IntVar(&threads, "t", runtime.NumCPU(), "number of threads to use during computation")
|
||||
flags.IntVar(&tokens, "n", 512, "number of tokens to predict")
|
||||
flags.IntVar(&seed, "s", -1, "predict RNG seed, -1 for random seed")
|
||||
|
||||
err := flags.Parse(os.Args[1:])
|
||||
if err != nil {
|
||||
fmt.Printf("Parsing program arguments failed: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
l, err := llama.New(model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
|
||||
if err != nil {
|
||||
fmt.Println("Loading the model failed:", err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Printf("Model loaded successfully.\n")
|
||||
|
||||
for {
|
||||
text := "Generate a poem about the sea in the style of Shakespeare:\n"
|
||||
|
||||
_, err := l.Predict(text, llama.Debug, llama.SetTokenCallback(func(token string) bool {
|
||||
fmt.Print(token)
|
||||
return true
|
||||
}), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"), llama.SetSeed(seed))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
embeds, err := l.Embeddings(text)
|
||||
if err != nil {
|
||||
fmt.Printf("Embeddings: error %s \n", err.Error())
|
||||
}
|
||||
fmt.Printf("Embeddings: %v", embeds)
|
||||
fmt.Printf("\n\n")
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue