about summary refs log tree commit diff
path: root/nixpkgs/pkgs/build-support/setup-hooks/auto-patchelf.py
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/build-support/setup-hooks/auto-patchelf.py')
-rw-r--r--nixpkgs/pkgs/build-support/setup-hooks/auto-patchelf.py378
1 files changed, 378 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/build-support/setup-hooks/auto-patchelf.py b/nixpkgs/pkgs/build-support/setup-hooks/auto-patchelf.py
new file mode 100644
index 000000000000..4769179167b3
--- /dev/null
+++ b/nixpkgs/pkgs/build-support/setup-hooks/auto-patchelf.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import pprint
+import subprocess
+import sys
+from fnmatch import fnmatch
+from collections import defaultdict
+from contextlib import contextmanager
+from dataclasses import dataclass
+from itertools import chain
+from pathlib import Path, PurePath
+from typing import DefaultDict, Iterator, List, Optional, Set, Tuple
+
+from elftools.common.exceptions import ELFError  # type: ignore
+from elftools.elf.dynamic import DynamicSection  # type: ignore
+from elftools.elf.elffile import ELFFile  # type: ignore
+from elftools.elf.enums import ENUM_E_TYPE, ENUM_EI_OSABI  # type: ignore
+
+
+@contextmanager
+def open_elf(path: Path) -> Iterator[ELFFile]:
+    with path.open('rb') as stream:
+        yield ELFFile(stream)
+
+
+def is_static_executable(elf: ELFFile) -> bool:
+    # Statically linked executables have an ELF type of EXEC but no INTERP.
+    return (elf.header["e_type"] == 'ET_EXEC'
+            and not elf.get_section_by_name(".interp"))
+
+
+def is_dynamic_executable(elf: ELFFile) -> bool:
+    # We do not require an ELF type of EXEC. This also catches
+    # position-independent executables, as they typically have an INTERP
+    # section but their ELF type is DYN.
+    return bool(elf.get_section_by_name(".interp"))
+
+
+def get_dependencies(elf: ELFFile) -> List[str]:
+    dependencies = []
+    # This convoluted code is here on purpose. For some reason, using
+    # elf.get_section_by_name(".dynamic") does not always return an
+    # instance of DynamicSection, but that is required to call iter_tags
+    for section in elf.iter_sections():
+        if isinstance(section, DynamicSection):
+            for tag in section.iter_tags('DT_NEEDED'):
+                dependencies.append(tag.needed)
+            break # There is only one dynamic section
+
+    return dependencies
+
+
+def get_rpath(elf: ELFFile) -> List[str]:
+    # This convoluted code is here on purpose. For some reason, using
+    # elf.get_section_by_name(".dynamic") does not always return an
+    # instance of DynamicSection, but that is required to call iter_tags
+    for section in elf.iter_sections():
+        if isinstance(section, DynamicSection):
+            for tag in section.iter_tags('DT_RUNPATH'):
+                return tag.runpath.split(':')
+
+            for tag in section.iter_tags('DT_RPATH'):
+                return tag.rpath.split(':')
+
+            break # There is only one dynamic section
+
+    return []
+
+
+def get_arch(elf: ELFFile) -> str:
+    return elf.get_machine_arch()
+
+
+def get_osabi(elf: ELFFile) -> str:
+    return elf.header["e_ident"]["EI_OSABI"]
+
+
+def osabi_are_compatible(wanted: str, got: str) -> bool:
+    """
+    Tests whether two OS ABIs are compatible, taking into account the
+    generally accepted compatibility of SVR4 ABI with other ABIs.
+    """
+    if not wanted or not got:
+        # One of the types couldn't be detected, so as a fallback we'll
+        # assume they're compatible.
+        return True
+
+    # Generally speaking, the base ABI (0x00), which is represented by
+    # readelf(1) as "UNIX - System V", indicates broad compatibility
+    # with other ABIs.
+    #
+    # TODO: This isn't always true. For example, some OSes embed ABI
+    # compatibility into SHT_NOTE sections like .note.tag and
+    # .note.ABI-tag.  It would be prudent to add these to the detection
+    # logic to produce better ABI information.
+    if wanted == 'ELFOSABI_SYSV':
+        return True
+
+    # Similarly here, we should be able to link against a superset of
+    # features, so even if the target has another ABI, this should be
+    # fine.
+    if got == 'ELFOSABI_SYSV':
+        return True
+
+    # Otherwise, we simply return whether the ABIs are identical.
+    return wanted == got
+
+
+def glob(path: Path, pattern: str, recursive: bool) -> Iterator[Path]:
+    if path.is_dir():
+        return path.rglob(pattern) if recursive else path.glob(pattern)
+    else:
+        # path.glob won't return anything if the path is not a directory.
+        # We extend that behavior by matching the file name against the pattern.
+        # This allows to pass single files instead of dirs to auto_patchelf,
+        # for greater control on the files to consider.
+        return [path] if path.match(pattern) else []
+
+
+cached_paths: Set[Path] = set()
+soname_cache: DefaultDict[Tuple[str, str], List[Tuple[Path, str]]] = defaultdict(list)
+
+
+def populate_cache(initial: List[Path], recursive: bool =False) -> None:
+    lib_dirs = list(initial)
+
+    while lib_dirs:
+        lib_dir = lib_dirs.pop(0)
+
+        if lib_dir in cached_paths:
+            continue
+
+        cached_paths.add(lib_dir)
+
+        for path in glob(lib_dir, "*.so*", recursive):
+            if not path.is_file():
+                continue
+
+            # As an optimisation, resolve the symlinks here, as the target is unique
+            # XXX: (layus, 2022-07-25) is this really an optimisation in all cases ?
+            # It could make the rpath bigger or break the fragile precedence of $out.
+            resolved = path.resolve()
+            # Do not use resolved paths when names do not match
+            if resolved.name != path.name:
+                resolved = path
+
+            try:
+                with open_elf(path) as elf:
+                    osabi = get_osabi(elf)
+                    arch = get_arch(elf)
+                    rpath = [Path(p) for p in get_rpath(elf)
+                                     if p and '$ORIGIN' not in p]
+                    lib_dirs += rpath
+                    soname_cache[(path.name, arch)].append((resolved.parent, osabi))
+
+            except ELFError:
+                # Not an ELF file in the right format
+                pass
+
+
+def find_dependency(soname: str, soarch: str, soabi: str) -> Optional[Path]:
+    for lib, libabi in soname_cache[(soname, soarch)]:
+        if osabi_are_compatible(soabi, libabi):
+            return lib
+    return None
+
+
+@dataclass
+class Dependency:
+    file: Path              # The file that contains the dependency
+    name: Path              # The name of the dependency
+    found: bool = False     # Whether it was found somewhere
+
+
+def auto_patchelf_file(path: Path, runtime_deps: list[Path], append_rpaths: List[Path] = [], extra_args: List[str] = []) -> list[Dependency]:
+    try:
+        with open_elf(path) as elf:
+
+            if is_static_executable(elf):
+                # No point patching these
+                print(f"skipping {path} because it is statically linked")
+                return []
+
+            if elf.num_segments() == 0:
+                # no segment (e.g. object file)
+                print(f"skipping {path} because it contains no segment")
+                return []
+
+            file_arch = get_arch(elf)
+            if interpreter_arch != file_arch:
+                # Our target architecture is different than this file's
+                # architecture, so skip it.
+                print(f"skipping {path} because its architecture ({file_arch})"
+                      f" differs from target ({interpreter_arch})")
+                return []
+
+            file_osabi = get_osabi(elf)
+            if not osabi_are_compatible(interpreter_osabi, file_osabi):
+                print(f"skipping {path} because its OS ABI ({file_osabi}) is"
+                      f" not compatible with target ({interpreter_osabi})")
+                return []
+
+            file_is_dynamic_executable = is_dynamic_executable(elf)
+
+            file_dependencies = map(Path, get_dependencies(elf))
+
+    except ELFError:
+        return []
+
+    rpath = []
+    if file_is_dynamic_executable:
+        print("setting interpreter of", path)
+        subprocess.run(
+                ["patchelf", "--set-interpreter", interpreter_path.as_posix(), path.as_posix()] + extra_args,
+                check=True)
+        rpath += runtime_deps
+
+    print("searching for dependencies of", path)
+    dependencies = []
+    # Be sure to get the output of all missing dependencies instead of
+    # failing at the first one, because it's more useful when working
+    # on a new package where you don't yet know the dependencies.
+    for dep in file_dependencies:
+        if dep.is_absolute() and dep.is_file():
+            # This is an absolute path. If it exists, just use it.
+            # Otherwise, we probably want this to produce an error when
+            # checked (because just updating the rpath won't satisfy
+            # it).
+            continue
+        elif (libc_lib / dep).is_file():
+            # This library exists in libc, and will be correctly
+            # resolved by the linker.
+            continue
+
+        if found_dependency := find_dependency(dep.name, file_arch, file_osabi):
+            rpath.append(found_dependency)
+            dependencies.append(Dependency(path, dep, True))
+            print(f"    {dep} -> found: {found_dependency}")
+        else:
+            dependencies.append(Dependency(path, dep, False))
+            print(f"    {dep} -> not found!")
+
+    rpath.extend(append_rpaths)
+
+    # Dedup the rpath
+    rpath_str = ":".join(dict.fromkeys(map(Path.as_posix, rpath)))
+
+    if rpath:
+        print("setting RPATH to:", rpath_str)
+        subprocess.run(
+                ["patchelf", "--set-rpath", rpath_str, path.as_posix()] + extra_args,
+                check=True)
+
+    return dependencies
+
+
+def auto_patchelf(
+        paths_to_patch: List[Path],
+        lib_dirs: List[Path],
+        runtime_deps: List[Path],
+        recursive: bool = True,
+        ignore_missing: List[str] = [],
+        append_rpaths: List[Path] = [],
+        extra_args: List[str] = []) -> None:
+
+    if not paths_to_patch:
+        sys.exit("No paths to patch, stopping.")
+
+    # Add all shared objects of the current output path to the cache,
+    # before lib_dirs, so that they are chosen first in find_dependency.
+    populate_cache(paths_to_patch, recursive)
+    populate_cache(lib_dirs)
+
+    dependencies = []
+    for path in chain.from_iterable(glob(p, '*', recursive) for p in paths_to_patch):
+        if not path.is_symlink() and path.is_file():
+            dependencies += auto_patchelf_file(path, runtime_deps, append_rpaths, extra_args)
+
+    missing = [dep for dep in dependencies if not dep.found]
+
+    # Print a summary of the missing dependencies at the end
+    print(f"auto-patchelf: {len(missing)} dependencies could not be satisfied")
+    failure = False
+    for dep in missing:
+        for pattern in ignore_missing:
+            if fnmatch(dep.name.name, pattern):
+                print(f"warn: auto-patchelf ignoring missing {dep.name} wanted by {dep.file}")
+                break
+        else:
+            print(f"error: auto-patchelf could not satisfy dependency {dep.name} wanted by {dep.file}")
+            failure = True
+
+    if failure:
+        sys.exit('auto-patchelf failed to find all the required dependencies.\n'
+                 'Add the missing dependencies to --libs or use '
+                 '`--ignore-missing="foo.so.1 bar.so etc.so"`.')
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        prog="auto-patchelf",
+        description='auto-patchelf tries as hard as possible to patch the'
+                    ' provided binary files by looking for compatible'
+                    'libraries in the provided paths.')
+    parser.add_argument(
+        "--ignore-missing",
+        nargs="*",
+        type=str,
+        help="Do not fail when some dependencies are not found.")
+    parser.add_argument(
+        "--no-recurse",
+        dest="recursive",
+        action="store_false",
+        help="Disable the recursive traversal of paths to patch.")
+    parser.add_argument(
+        "--paths", nargs="*", type=Path,
+        help="Paths whose content needs to be patched."
+             " Single files and directories are accepted."
+             " Directories are traversed recursively by default.")
+    parser.add_argument(
+        "--libs", nargs="*", type=Path,
+        help="Paths where libraries are searched for."
+             " Single files and directories are accepted."
+             " Directories are not searched recursively.")
+    parser.add_argument(
+        "--runtime-dependencies", nargs="*", type=Path,
+        help="Paths to prepend to the runtime path of executable binaries."
+             " Subject to deduplication, which may imply some reordering.")
+    parser.add_argument(
+        "--append-rpaths",
+        nargs="*",
+        type=Path,
+        help="Paths to append to all runtime paths unconditionally",
+    )
+    parser.add_argument(
+        "--extra-args",
+        # Undocumented Python argparse feature: consume all remaining arguments
+        # as values for this one. This means this argument should always be passed
+        # last.
+        nargs="...",
+        type=str,
+        help="Extra arguments to pass to patchelf. This argument should always come last."
+    )
+
+    print("automatically fixing dependencies for ELF files")
+    args = parser.parse_args()
+    pprint.pprint(vars(args))
+
+    auto_patchelf(
+        args.paths,
+        args.libs,
+        args.runtime_dependencies,
+        args.recursive,
+        args.ignore_missing,
+        append_rpaths=args.append_rpaths,
+        extra_args=args.extra_args)
+
+
+interpreter_path: Path  = None # type: ignore
+interpreter_osabi: str  = None # type: ignore
+interpreter_arch: str   = None # type: ignore
+libc_lib: Path          = None # type: ignore
+
+if __name__ == "__main__":
+    nix_support = Path(os.environ['NIX_BINTOOLS']) / 'nix-support'
+    interpreter_path = Path((nix_support / 'dynamic-linker').read_text().strip())
+    libc_lib = Path((nix_support / 'orig-libc').read_text().strip()) / 'lib'
+
+    with open_elf(interpreter_path) as interpreter:
+        interpreter_osabi = get_osabi(interpreter)
+        interpreter_arch = get_arch(interpreter)
+
+    if interpreter_arch and interpreter_osabi and interpreter_path and libc_lib:
+        main()
+    else:
+        sys.exit("Failed to parse dynamic linker (ld) properties.")