diff options
Diffstat (limited to 'pkgs/tools/misc/ollama/disable-gqa.patch')
-rw-r--r-- | pkgs/tools/misc/ollama/disable-gqa.patch | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/pkgs/tools/misc/ollama/disable-gqa.patch b/pkgs/tools/misc/ollama/disable-gqa.patch new file mode 100644 index 000000000000..b54440cd3d53 --- /dev/null +++ b/pkgs/tools/misc/ollama/disable-gqa.patch @@ -0,0 +1,15 @@ +diff --git a/llm/llama.go b/llm/llama.go +index 0b460e9..b79e04a 100644 +--- a/llm/llama.go ++++ b/llm/llama.go +@@ -299,10 +299,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers + params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU)) + } + +- if opts.NumGQA > 0 { +- params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA)) +- } +- + if len(adapters) > 0 { + // TODO: applying multiple adapters is not supported by the llama.cpp server yet + params = append(params, "--lora", adapters[0]) |