-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path04_pseudonymization.sql
More file actions
128 lines (110 loc) · 4.89 KB
/
04_pseudonymization.sql
File metadata and controls
128 lines (110 loc) · 4.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
-- PostgreSQL Data Anonymization in AWS RDS
-- Pseudonymization functions for consistent anonymization
-- Create a function for consistent pseudonymization of names
CREATE OR REPLACE FUNCTION anon.pseudo_name(input text, salt text DEFAULT 'default_salt')
RETURNS text AS
$$
DECLARE
hash_val text;
first_names text[] := ARRAY['John', 'Jane', 'Robert', 'Mary', 'William', 'Patricia', 'James', 'Jennifer', 'Michael', 'Linda',
'David', 'Elizabeth', 'Richard', 'Barbara', 'Joseph', 'Susan', 'Thomas', 'Jessica', 'Charles', 'Sarah'];
last_names text[] := ARRAY['Smith', 'Johnson', 'Williams', 'Jones', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor',
'Anderson', 'Thomas', 'Jackson', 'White', 'Harris', 'Martin', 'Thompson', 'Garcia', 'Martinez', 'Robinson'];
first_name text;
last_name text;
BEGIN
IF input IS NULL THEN
RETURN NULL;
END IF;
-- Generate a deterministic hash
hash_val := encode(hmac(input, salt, 'sha256'), 'hex');
-- Convert the hash to indices
first_name := first_names[1 + (('x' || substring(hash_val, 1, 8))::bit(32)::integer % array_length(first_names, 1))];
last_name := last_names[1 + (('x' || substring(hash_val, 9, 8))::bit(32)::integer % array_length(last_names, 1))];
RETURN first_name || ' ' || last_name;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- Create a function for consistent pseudonymization of emails
CREATE OR REPLACE FUNCTION anon.pseudo_email(input text, salt text DEFAULT 'default_salt')
RETURNS text AS
$$
DECLARE
hash_val text;
domains text[] := ARRAY['example.com', 'sample.org', 'test.net', 'demo.io', 'fake.co'];
username text;
domain text;
BEGIN
IF input IS NULL THEN
RETURN NULL;
END IF;
-- Generate a deterministic hash
hash_val := encode(hmac(input, salt, 'sha256'), 'hex');
-- Create a consistent username from the hash
username := substring(hash_val, 1, 10);
-- Select a domain based on the hash
domain := domains[1 + (('x' || substring(hash_val, 11, 8))::bit(32)::integer % array_length(domains, 1))];
RETURN username || '@' || domain;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- Create a function for consistent pseudonymization of addresses
CREATE OR REPLACE FUNCTION anon.pseudo_address(input text, salt text DEFAULT 'default_salt')
RETURNS text AS
$$
DECLARE
hash_val text;
street_numbers int[] := ARRAY[123, 456, 789, 101, 202, 303, 404, 505, 606, 707, 808, 909, 1010, 1111, 1212];
street_names text[] := ARRAY['Main', 'Oak', 'Pine', 'Maple', 'Cedar', 'Elm', 'Washington', 'Lake', 'Hill', 'Park'];
street_types text[] := ARRAY['St', 'Ave', 'Blvd', 'Rd', 'Ln', 'Dr', 'Way', 'Pl', 'Ct', 'Terrace'];
cities text[] := ARRAY['Springfield', 'Franklin', 'Greenville', 'Bristol', 'Clinton', 'Kingston', 'Marion', 'Salem'];
states text[] := ARRAY['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA'];
zips text[] := ARRAY['12345', '23456', '34567', '45678', '56789', '67890', '78901', '89012', '90123', '01234'];
street_number int;
street_name text;
street_type text;
city text;
state text;
zip text;
BEGIN
IF input IS NULL THEN
RETURN NULL;
END IF;
-- Generate a deterministic hash
hash_val := encode(hmac(input, salt, 'sha256'), 'hex');
-- Select components based on the hash
street_number := street_numbers[1 + (('x' || substring(hash_val, 1, 4))::bit(16)::integer % array_length(street_numbers, 1))];
street_name := street_names[1 + (('x' || substring(hash_val, 5, 4))::bit(16)::integer % array_length(street_names, 1))];
street_type := street_types[1 + (('x' || substring(hash_val, 9, 4))::bit(16)::integer % array_length(street_types, 1))];
city := cities[1 + (('x' || substring(hash_val, 13, 4))::bit(16)::integer % array_length(cities, 1))];
state := states[1 + (('x' || substring(hash_val, 17, 4))::bit(16)::integer % array_length(states, 1))];
zip := zips[1 + (('x' || substring(hash_val, 21, 4))::bit(16)::integer % array_length(zips, 1))];
RETURN street_number || ' ' || street_name || ' ' || street_type || ', ' || city || ', ' || state || ' ' || zip;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- Create a function for consistent pseudonymization of any string
CREATE OR REPLACE FUNCTION anon.pseudonymize(input text, salt text DEFAULT 'default_salt', length int DEFAULT 10)
RETURNS text AS
$$
DECLARE
hash_val text;
chars text := 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
result text := '';
i int := 0;
char_pos int;
BEGIN
IF input IS NULL THEN
RETURN NULL;
END IF;
-- Generate a deterministic hash
hash_val := encode(hmac(input, salt, 'sha256'), 'hex');
-- Convert the hash to a pseudorandom string of specified length
FOR i IN 1..length LOOP
char_pos := (('x' || substring(hash_val, (i % 32) + 1, 2))::bit(8)::integer % length(chars)) + 1;
result := result || substring(chars, char_pos, 1);
END LOOP;
RETURN result;
END;
$$
LANGUAGE plpgsql IMMUTABLE;