about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMario Rodas <marsam@users.noreply.github.com>2024-03-14 20:53:36 -0500
committerGitHub <noreply@github.com>2024-03-14 20:53:36 -0500
commite6aefe20a154a8ba187d6c0bad26eddbcba0d259 (patch)
tree9ee22177276d7db405475c3a88ac0457cc9aed69
parent6709f487337a06423f663cbc33d2da2d6b09f674 (diff)
parentb4f8ebd2955e2c3ca9f37110b05646f9bd2670ac (diff)
downloadnixlib-e6aefe20a154a8ba187d6c0bad26eddbcba0d259.tar
nixlib-e6aefe20a154a8ba187d6c0bad26eddbcba0d259.tar.gz
nixlib-e6aefe20a154a8ba187d6c0bad26eddbcba0d259.tar.bz2
nixlib-e6aefe20a154a8ba187d6c0bad26eddbcba0d259.tar.lz
nixlib-e6aefe20a154a8ba187d6c0bad26eddbcba0d259.tar.xz
nixlib-e6aefe20a154a8ba187d6c0bad26eddbcba0d259.tar.zst
nixlib-e6aefe20a154a8ba187d6c0bad26eddbcba0d259.zip
Merge pull request #292533 from flyingcircusio/init-postgresql-anonymizer
postgresqlPackages.anonymizer: init at 1.3.1; add me & osnyx to flyingcircus team
-rw-r--r--maintainers/maintainer-list.nix6
-rw-r--r--maintainers/team-list.nix2
-rw-r--r--nixos/tests/all-tests.nix1
-rw-r--r--nixos/tests/pg_anonymizer.nix94
-rw-r--r--pkgs/by-name/pg/pg-dump-anon/package.nix32
-rw-r--r--pkgs/servers/sql/postgresql/ext/anonymizer.nix32
-rw-r--r--pkgs/servers/sql/postgresql/packages.nix2
7 files changed, 169 insertions, 0 deletions
diff --git a/maintainers/maintainer-list.nix b/maintainers/maintainer-list.nix
index ef3ce19da9c8..4994c79f73a4 100644
--- a/maintainers/maintainer-list.nix
+++ b/maintainers/maintainer-list.nix
@@ -14620,6 +14620,12 @@
     githubId = 111265;
     name = "Ozan Sener";
   };
+  osnyx = {
+    email = "os@flyingcircus.io";
+    github = "osnyx";
+    githubId = 104593071;
+    name = "Oliver Schmidt";
+  };
   ostrolucky = {
     email = "gabriel.ostrolucky@gmail.com";
     github = "ostrolucky";
diff --git a/maintainers/team-list.nix b/maintainers/team-list.nix
index 8e9194fd6371..0138338379c2 100644
--- a/maintainers/team-list.nix
+++ b/maintainers/team-list.nix
@@ -311,6 +311,8 @@ with lib.maintainers; {
       dpausp
       frlan
       leona
+      osnyx
+      ma27
     ];
     scope = "Team for Flying Circus employees who collectively maintain packages.";
     shortName = "Flying Circus employees";
diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix
index ac64b85dd486..b2e824642092 100644
--- a/nixos/tests/all-tests.nix
+++ b/nixos/tests/all-tests.nix
@@ -683,6 +683,7 @@ in {
   peering-manager = handleTest ./web-apps/peering-manager.nix {};
   peertube = handleTestOn ["x86_64-linux"] ./web-apps/peertube.nix {};
   peroxide = handleTest ./peroxide.nix {};
+  pg_anonymizer = handleTest ./pg_anonymizer.nix {};
   pgadmin4 = handleTest ./pgadmin4.nix {};
   pgbouncer = handleTest ./pgbouncer.nix {};
   pgjwt = handleTest ./pgjwt.nix {};
diff --git a/nixos/tests/pg_anonymizer.nix b/nixos/tests/pg_anonymizer.nix
new file mode 100644
index 000000000000..2960108e37c3
--- /dev/null
+++ b/nixos/tests/pg_anonymizer.nix
@@ -0,0 +1,94 @@
+import ./make-test-python.nix ({ pkgs, lib, ... }: {
+  name = "pg_anonymizer";
+  meta.maintainers = lib.teams.flyingcircus.members;
+
+  nodes.machine = { pkgs, ... }: {
+    environment.systemPackages = [ pkgs.pg-dump-anon ];
+    services.postgresql = {
+      enable = true;
+      extraPlugins = ps: [ ps.anonymizer ];
+      settings.shared_preload_libraries = "anon";
+    };
+  };
+
+  testScript = ''
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+    machine.wait_for_unit("postgresql.service")
+
+    with subtest("Setup"):
+        machine.succeed("sudo -u postgres psql --command 'create database demo'")
+        machine.succeed(
+            "sudo -u postgres psql -d demo -f ${pkgs.writeText "init.sql" ''
+              create extension anon cascade;
+              select anon.init();
+              create table player(id serial, name text, points int);
+              insert into player(id,name,points) values (1,'Foo', 23);
+              insert into player(id,name,points) values (2,'Bar',42);
+              security label for anon on column player.name is 'MASKED WITH FUNCTION anon.fake_last_name();';
+              security label for anon on column player.points is 'MASKED WITH VALUE NULL';
+            ''}"
+        )
+
+    def get_player_table_contents():
+        return [
+            x.split(',') for x in machine.succeed("sudo -u postgres psql -d demo --csv --command 'select * from player'").splitlines()[1:]
+        ]
+
+    def check_anonymized_row(row, id, original_name):
+        assert row[0] == id, f"Expected first row to have ID {id}, but got {row[0]}"
+        assert row[1] != original_name, f"Expected first row to have a name other than {original_name}"
+        assert not bool(row[2]), "Expected points to be NULL in first row"
+
+    def find_xsv_in_dump(dump, sep=','):
+        """
+        Expecting to find a CSV (for pg_dump_anon) or TSV (for pg_dump) structure, looking like
+
+            COPY public.player ...
+            1,Shields,
+            2,Salazar,
+            \.
+
+        in the given dump (the commas are tabs in case of pg_dump).
+              Extract the CSV lines and split by `sep`.
+        """
+
+        try:
+            from itertools import dropwhile, takewhile
+            return [x.split(sep) for x in list(takewhile(
+                lambda x: x != "\\.",
+                dropwhile(
+                    lambda x: not x.startswith("COPY public.player"),
+                    dump.splitlines()
+                )
+            ))[1:]]
+        except:
+            print(f"Dump to process: {dump}")
+            raise
+
+    def check_original_data(output):
+        assert output[0] == ['1','Foo','23'], f"Expected first row from player table to be 1,Foo,23; got {output[0]}"
+        assert output[1] == ['2','Bar','42'], f"Expected first row from player table to be 2,Bar,42; got {output[1]}"
+
+    def check_anonymized_rows(output):
+        check_anonymized_row(output[0], '1', 'Foo')
+        check_anonymized_row(output[1], '2', 'Bar')
+
+    with subtest("Check initial state"):
+        check_original_data(get_player_table_contents())
+
+    with subtest("Anonymous dumps"):
+        check_original_data(find_xsv_in_dump(
+            machine.succeed("sudo -u postgres pg_dump demo"),
+            sep='\t'
+        ))
+        check_anonymized_rows(find_xsv_in_dump(
+            machine.succeed("sudo -u postgres pg_dump_anon -U postgres -h /run/postgresql -d demo"),
+            sep=','
+        ))
+
+    with subtest("Anonymize"):
+        machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'")
+        check_anonymized_rows(get_player_table_contents())
+  '';
+})
diff --git a/pkgs/by-name/pg/pg-dump-anon/package.nix b/pkgs/by-name/pg/pg-dump-anon/package.nix
new file mode 100644
index 000000000000..fedcf9f40b6a
--- /dev/null
+++ b/pkgs/by-name/pg/pg-dump-anon/package.nix
@@ -0,0 +1,32 @@
+{ lib, fetchFromGitLab, buildGoModule, nixosTests, postgresql, makeWrapper }:
+
+buildGoModule rec {
+  pname = "pg-dump-anon";
+  version = "1.3.1";
+  src = fetchFromGitLab {
+    owner = "dalibo";
+    repo = "postgresql_anonymizer";
+    rev = version;
+    hash = "sha256-Z5Oz/cIYDxFUZwQijRk4xAOUdOK0LWR+px8WOcs+Rs0=";
+  };
+
+  sourceRoot = "${src.name}/pg_dump_anon";
+
+  vendorHash = "sha256-CwU1zoIayxvfnGL9kPdummPJiV+ECfSz4+q6gZGb8pw=";
+
+  passthru.tests = { inherit (nixosTests) pg_anonymizer; };
+
+  nativeBuildInputs = [ makeWrapper ];
+  postInstall = ''
+    wrapProgram $out/bin/pg_dump_anon \
+      --prefix PATH : ${lib.makeBinPath [ postgresql ]}
+  '';
+
+  meta = with lib; {
+    description = "Export databases with data being anonymized with the anonymizer extension";
+    homepage = "https://postgresql-anonymizer.readthedocs.io/en/stable/";
+    maintainers = teams.flyingcircus.members;
+    license = licenses.postgresql;
+    mainProgram = "pg_dump_anon";
+  };
+}
diff --git a/pkgs/servers/sql/postgresql/ext/anonymizer.nix b/pkgs/servers/sql/postgresql/ext/anonymizer.nix
new file mode 100644
index 000000000000..430911d40108
--- /dev/null
+++ b/pkgs/servers/sql/postgresql/ext/anonymizer.nix
@@ -0,0 +1,32 @@
+{ lib, stdenv, pg-dump-anon, postgresql, runtimeShell }:
+
+stdenv.mkDerivation (finalAttrs: {
+  pname = "postgresql_anonymizer";
+
+  inherit (pg-dump-anon) version src passthru;
+
+  buildInputs = [ postgresql ];
+  nativeBuildInputs = [ postgresql ] ++ lib.optional postgresql.jitSupport postgresql.llvm;
+
+  strictDeps = true;
+
+  makeFlags = [
+    "BINDIR=${placeholder "out"}/bin"
+    "datadir=${placeholder "out"}/share/postgresql"
+    "pkglibdir=${placeholder "out"}/lib"
+    "DESTDIR="
+  ];
+
+  postInstall = ''
+    cat >$out/bin/pg_dump_anon.sh <<'EOF'
+    #!${runtimeShell}
+    echo "This script is deprecated by upstream. To use the new script,"
+    echo "please install pkgs.pg-dump-anon."
+    exit 1
+    EOF
+  '';
+
+  meta = pg-dump-anon.meta // {
+    description = "Extension to mask or replace personally identifiable information (PII) or commercially sensitive data from a PostgreSQL database";
+  };
+})
diff --git a/pkgs/servers/sql/postgresql/packages.nix b/pkgs/servers/sql/postgresql/packages.nix
index 9cc83118c526..eabcb0613c0d 100644
--- a/pkgs/servers/sql/postgresql/packages.nix
+++ b/pkgs/servers/sql/postgresql/packages.nix
@@ -2,6 +2,8 @@ self: super: {
 
     age = super.callPackage ./ext/age.nix { };
 
+    anonymizer = super.callPackage ./ext/anonymizer.nix { };
+
     apache_datasketches = super.callPackage ./ext/apache_datasketches.nix { };
 
     citus = super.callPackage ./ext/citus.nix { };