about summary refs log tree commit diff
path: root/nixos
diff options
context:
space:
mode:
authorMaximilian Bosch <mb@flyingcircus.io>2024-03-13 14:36:20 +0100
committerMaximilian Bosch <mb@flyingcircus.io>2024-03-13 16:15:54 +0100
commit0cdaede1444c143b411ebcb203b1a7d2f97a3949 (patch)
tree5d2c6c506ae79cd761a088e43fe6bfc532fc0b8f /nixos
parent8d0e5a3402aabe9a749913d9028b54bad4da32ab (diff)
downloadnixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar
nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.gz
nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.bz2
nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.lz
nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.xz
nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.zst
nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.zip
pg-dump-anon: init at 1.3.1
This is a Go program inside the sources of `postgresql_anonymizer` that
allows to perform database dumps, but with anonymized data. I figured
that it's a little awkward to have a client program to be part of the
extension package.

So I decided to create a second package called `pg-dump-anon`. Since
it's one repository, both share `version` & `src`.

Also extended the VM test to make sure we're getting properly anonymized
data when dumping with `pg_dump_anon`.
Diffstat (limited to 'nixos')
-rw-r--r--nixos/tests/pg_anonymizer.nix54
1 files changed, 47 insertions, 7 deletions
diff --git a/nixos/tests/pg_anonymizer.nix b/nixos/tests/pg_anonymizer.nix
index 601526272d71..2960108e37c3 100644
--- a/nixos/tests/pg_anonymizer.nix
+++ b/nixos/tests/pg_anonymizer.nix
@@ -2,7 +2,8 @@ import ./make-test-python.nix ({ pkgs, lib, ... }: {
   name = "pg_anonymizer";
   meta.maintainers = lib.teams.flyingcircus.members;
 
-  nodes.machine = {
+  nodes.machine = { pkgs, ... }: {
+    environment.systemPackages = [ pkgs.pg-dump-anon ];
     services.postgresql = {
       enable = true;
       extraPlugins = ps: [ ps.anonymizer ];
@@ -39,16 +40,55 @@ import ./make-test-python.nix ({ pkgs, lib, ... }: {
         assert row[1] != original_name, f"Expected first row to have a name other than {original_name}"
         assert not bool(row[2]), "Expected points to be NULL in first row"
 
-    with subtest("Check initial state"):
-        output = get_player_table_contents()
+    def find_xsv_in_dump(dump, sep=','):
+        """
+        Expecting to find a CSV (for pg_dump_anon) or TSV (for pg_dump) structure, looking like
+
+            COPY public.player ...
+            1,Shields,
+            2,Salazar,
+            \.
+
+        in the given dump (the commas are tabs in case of pg_dump).
+              Extract the CSV lines and split by `sep`.
+        """
+
+        try:
+            from itertools import dropwhile, takewhile
+            return [x.split(sep) for x in list(takewhile(
+                lambda x: x != "\\.",
+                dropwhile(
+                    lambda x: not x.startswith("COPY public.player"),
+                    dump.splitlines()
+                )
+            ))[1:]]
+        except:
+            print(f"Dump to process: {dump}")
+            raise
+
+    def check_original_data(output):
         assert output[0] == ['1','Foo','23'], f"Expected first row from player table to be 1,Foo,23; got {output[0]}"
         assert output[1] == ['2','Bar','42'], f"Expected first row from player table to be 2,Bar,42; got {output[1]}"
 
-    with subtest("Anonymize"):
-        machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'")
-        output = get_player_table_contents()
-
+    def check_anonymized_rows(output):
         check_anonymized_row(output[0], '1', 'Foo')
         check_anonymized_row(output[1], '2', 'Bar')
+
+    with subtest("Check initial state"):
+        check_original_data(get_player_table_contents())
+
+    with subtest("Anonymous dumps"):
+        check_original_data(find_xsv_in_dump(
+            machine.succeed("sudo -u postgres pg_dump demo"),
+            sep='\t'
+        ))
+        check_anonymized_rows(find_xsv_in_dump(
+            machine.succeed("sudo -u postgres pg_dump_anon -U postgres -h /run/postgresql -d demo"),
+            sep=','
+        ))
+
+    with subtest("Anonymize"):
+        machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'")
+        check_anonymized_rows(get_player_table_contents())
   '';
 })