diff options
Diffstat (limited to 'nixpkgs/pkgs/tools/misc/ollama')
-rw-r--r-- | nixpkgs/pkgs/tools/misc/ollama/default.nix | 50 | ||||
-rw-r--r-- | nixpkgs/pkgs/tools/misc/ollama/disable-gqa.patch | 15 | ||||
-rw-r--r-- | nixpkgs/pkgs/tools/misc/ollama/set-llamacpp-path.patch | 23 |
3 files changed, 88 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/tools/misc/ollama/default.nix b/nixpkgs/pkgs/tools/misc/ollama/default.nix new file mode 100644 index 000000000000..2176582e1fe9 --- /dev/null +++ b/nixpkgs/pkgs/tools/misc/ollama/default.nix @@ -0,0 +1,50 @@ +{ lib +, buildGoModule +, fetchFromGitHub +, llama-cpp +}: + +buildGoModule rec { + pname = "ollama"; + version = "0.1.17"; + + src = fetchFromGitHub { + owner = "jmorganca"; + repo = "ollama"; + rev = "v${version}"; + hash = "sha256-eXukNn9Lu1hF19GEi7S7a96qktsjnmXCUp38gw+3MzY="; + }; + + patches = [ + # disable passing the deprecated gqa flag to llama-cpp-server + # see https://github.com/ggerganov/llama.cpp/issues/2975 + ./disable-gqa.patch + + # replace the call to the bundled llama-cpp-server with the one in the llama-cpp package + ./set-llamacpp-path.patch + ]; + + postPatch = '' + substituteInPlace llm/llama.go \ + --subst-var-by llamaCppServer "${llama-cpp}/bin/llama-cpp-server" + substituteInPlace server/routes_test.go --replace "0.0.0" "${version}" + ''; + + vendorHash = "sha256-yGdCsTJtvdwHw21v0Ot6I8gxtccAvNzZyRu1T0vaius="; + + ldflags = [ + "-s" + "-w" + "-X=github.com/jmorganca/ollama/version.Version=${version}" + "-X=github.com/jmorganca/ollama/server.mode=release" + ]; + + meta = with lib; { + description = "Get up and running with large language models locally"; + homepage = "https://github.com/jmorganca/ollama"; + license = licenses.mit; + mainProgram = "ollama"; + maintainers = with maintainers; [ dit7ya elohmeier ]; + platforms = platforms.unix; + }; +} diff --git a/nixpkgs/pkgs/tools/misc/ollama/disable-gqa.patch b/nixpkgs/pkgs/tools/misc/ollama/disable-gqa.patch new file mode 100644 index 000000000000..b54440cd3d53 --- /dev/null +++ b/nixpkgs/pkgs/tools/misc/ollama/disable-gqa.patch @@ -0,0 +1,15 @@ +diff --git a/llm/llama.go b/llm/llama.go +index 0b460e9..b79e04a 100644 +--- a/llm/llama.go ++++ b/llm/llama.go +@@ -299,10 +299,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers + params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU)) + } + +- if opts.NumGQA > 0 { +- params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA)) +- } +- + if len(adapters) > 0 { + // TODO: applying multiple adapters is not supported by the llama.cpp server yet + params = append(params, "--lora", adapters[0]) diff --git a/nixpkgs/pkgs/tools/misc/ollama/set-llamacpp-path.patch b/nixpkgs/pkgs/tools/misc/ollama/set-llamacpp-path.patch new file mode 100644 index 000000000000..e90e552bab45 --- /dev/null +++ b/nixpkgs/pkgs/tools/misc/ollama/set-llamacpp-path.patch @@ -0,0 +1,23 @@ +diff --git a/llm/llama.go b/llm/llama.go +index f23d5d8..6563550 100644 +--- a/llm/llama.go ++++ b/llm/llama.go +@@ -25,7 +25,6 @@ import ( + "github.com/jmorganca/ollama/api" + ) + +-//go:embed llama.cpp/*/build/*/bin/* + var llamaCppEmbed embed.FS + + type ModelRunner struct { +@@ -33,6 +32,10 @@ type ModelRunner struct { + } + + func chooseRunners(workDir, runnerType string) []ModelRunner { ++ return []ModelRunner{ ++ {Path: "@llamaCppServer@"}, ++ } ++ + buildPath := path.Join("llama.cpp", runnerType, "build") + var runners []string + |