Initial working copy

2025-12-19 23:14:28 -06:00 · 2025-12-19 23:14:28 -06:00 · 2c876cef42
commit 2c876cef42
parent 2a335176e6
19 changed files with 783 additions and 126 deletions
--- a/completion/gollamacpp/llama.go
+++ b/completion/gollamacpp/llama.go
@ -0,0 +1,58 @@
+package gollamacpp
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"runtime"
+
+	"github.com/go-skynet/go-llama.cpp"
+)
+
+var (
+	threads   = 4
+	tokens    = 128
+	gpulayers = 0
+	seed      = -1
+)
+
+func Run() {
+	var model string
+
+	flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
+	flags.StringVar(&model, "m", "./SmolLM-135M.Q8_0.gguf", "path to gguf model file to load")
+	flags.IntVar(&gpulayers, "ngl", 0, "Number of GPU layers to use")
+	flags.IntVar(&threads, "t", runtime.NumCPU(), "number of threads to use during computation")
+	flags.IntVar(&tokens, "n", 512, "number of tokens to predict")
+	flags.IntVar(&seed, "s", -1, "predict RNG seed, -1 for random seed")
+
+	err := flags.Parse(os.Args[1:])
+	if err != nil {
+		fmt.Printf("Parsing program arguments failed: %s", err)
+		os.Exit(1)
+	}
+	l, err := llama.New(model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
+	if err != nil {
+		fmt.Println("Loading the model failed:", err.Error())
+		os.Exit(1)
+	}
+	fmt.Printf("Model loaded successfully.\n")
+
+	for {
+		text := "Generate a poem about the sea in the style of Shakespeare:\n"
+
+		_, err := l.Predict(text, llama.Debug, llama.SetTokenCallback(func(token string) bool {
+			fmt.Print(token)
+			return true
+		}), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"), llama.SetSeed(seed))
+		if err != nil {
+			panic(err)
+		}
+		embeds, err := l.Embeddings(text)
+		if err != nil {
+			fmt.Printf("Embeddings: error %s \n", err.Error())
+		}
+		fmt.Printf("Embeddings: %v", embeds)
+		fmt.Printf("\n\n")
+	}
+}