diff options
author | Maximilian Bosch <mb@flyingcircus.io> | 2024-03-13 14:36:20 +0100 |
---|---|---|
committer | Maximilian Bosch <mb@flyingcircus.io> | 2024-03-13 16:15:54 +0100 |
commit | 0cdaede1444c143b411ebcb203b1a7d2f97a3949 (patch) | |
tree | 5d2c6c506ae79cd761a088e43fe6bfc532fc0b8f /nixos | |
parent | 8d0e5a3402aabe9a749913d9028b54bad4da32ab (diff) | |
download | nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.gz nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.bz2 nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.lz nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.xz nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.tar.zst nixlib-0cdaede1444c143b411ebcb203b1a7d2f97a3949.zip |
pg-dump-anon: init at 1.3.1
This is a Go program inside the sources of `postgresql_anonymizer` that allows to perform database dumps, but with anonymized data. I figured that it's a little awkward to have a client program to be part of the extension package. So I decided to create a second package called `pg-dump-anon`. Since it's one repository, both share `version` & `src`. Also extended the VM test to make sure we're getting properly anonymized data when dumping with `pg_dump_anon`.
Diffstat (limited to 'nixos')
-rw-r--r-- | nixos/tests/pg_anonymizer.nix | 54 |
1 files changed, 47 insertions, 7 deletions
diff --git a/nixos/tests/pg_anonymizer.nix b/nixos/tests/pg_anonymizer.nix index 601526272d71..2960108e37c3 100644 --- a/nixos/tests/pg_anonymizer.nix +++ b/nixos/tests/pg_anonymizer.nix @@ -2,7 +2,8 @@ import ./make-test-python.nix ({ pkgs, lib, ... }: { name = "pg_anonymizer"; meta.maintainers = lib.teams.flyingcircus.members; - nodes.machine = { + nodes.machine = { pkgs, ... }: { + environment.systemPackages = [ pkgs.pg-dump-anon ]; services.postgresql = { enable = true; extraPlugins = ps: [ ps.anonymizer ]; @@ -39,16 +40,55 @@ import ./make-test-python.nix ({ pkgs, lib, ... }: { assert row[1] != original_name, f"Expected first row to have a name other than {original_name}" assert not bool(row[2]), "Expected points to be NULL in first row" - with subtest("Check initial state"): - output = get_player_table_contents() + def find_xsv_in_dump(dump, sep=','): + """ + Expecting to find a CSV (for pg_dump_anon) or TSV (for pg_dump) structure, looking like + + COPY public.player ... + 1,Shields, + 2,Salazar, + \. + + in the given dump (the commas are tabs in case of pg_dump). + Extract the CSV lines and split by `sep`. + """ + + try: + from itertools import dropwhile, takewhile + return [x.split(sep) for x in list(takewhile( + lambda x: x != "\\.", + dropwhile( + lambda x: not x.startswith("COPY public.player"), + dump.splitlines() + ) + ))[1:]] + except: + print(f"Dump to process: {dump}") + raise + + def check_original_data(output): assert output[0] == ['1','Foo','23'], f"Expected first row from player table to be 1,Foo,23; got {output[0]}" assert output[1] == ['2','Bar','42'], f"Expected first row from player table to be 2,Bar,42; got {output[1]}" - with subtest("Anonymize"): - machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'") - output = get_player_table_contents() - + def check_anonymized_rows(output): check_anonymized_row(output[0], '1', 'Foo') check_anonymized_row(output[1], '2', 'Bar') + + with subtest("Check initial state"): + check_original_data(get_player_table_contents()) + + with subtest("Anonymous dumps"): + check_original_data(find_xsv_in_dump( + machine.succeed("sudo -u postgres pg_dump demo"), + sep='\t' + )) + check_anonymized_rows(find_xsv_in_dump( + machine.succeed("sudo -u postgres pg_dump_anon -U postgres -h /run/postgresql -d demo"), + sep=',' + )) + + with subtest("Anonymize"): + machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'") + check_anonymized_rows(get_player_table_contents()) ''; }) |