|
124 | 124 | ] |
125 | 125 |
|
126 | 126 |
|
| 127 | +_PII_STREET_NAMES = [ |
| 128 | + "Maple", |
| 129 | + "Oak", |
| 130 | + "Cedar", |
| 131 | + "Birch", |
| 132 | + "Pine", |
| 133 | + "Elm", |
| 134 | + "Willow", |
| 135 | + "Spruce", |
| 136 | + "Chestnut", |
| 137 | + "Walnut", |
| 138 | + "Ash", |
| 139 | + "Poplar", |
| 140 | + "Sycamore", |
| 141 | + "Magnolia", |
| 142 | + "Hawthorn", |
| 143 | + "Ridgewood", |
| 144 | + "Lakeview", |
| 145 | + "Hillcrest", |
| 146 | + "Fairview", |
| 147 | + "Clearwater", |
| 148 | + "Meadowbrook", |
| 149 | + "Sunnydale", |
| 150 | + "Stonegate", |
| 151 | + "Thornwood", |
| 152 | + "Copperfield", |
| 153 | +] |
| 154 | +_PII_STREET_TYPES = [ |
| 155 | + "Street", |
| 156 | + "Avenue", |
| 157 | + "Boulevard", |
| 158 | + "Drive", |
| 159 | + "Court", |
| 160 | + "Place", |
| 161 | + "Way", |
| 162 | + "Lane", |
| 163 | + "Road", |
| 164 | + "Crescent", |
| 165 | + "Circle", |
| 166 | + "Terrace", |
| 167 | +] |
| 168 | +# (city, province abbreviation, postal-code FSA first-letter) |
| 169 | +_PII_CITIES: list[tuple[str, str, str]] = [ |
| 170 | + ("Burlington", "ON", "L"), |
| 171 | + ("Oakville", "ON", "L"), |
| 172 | + ("Waterloo", "ON", "N"), |
| 173 | + ("Guelph", "ON", "N"), |
| 174 | + ("Kingston", "ON", "K"), |
| 175 | + ("Barrie", "ON", "L"), |
| 176 | + ("Sudbury", "ON", "P"), |
| 177 | + ("Windsor", "ON", "N"), |
| 178 | + ("Kelowna", "BC", "V"), |
| 179 | + ("Kamloops", "BC", "V"), |
| 180 | + ("Nanaimo", "BC", "V"), |
| 181 | + ("Lethbridge", "AB", "T"), |
| 182 | + ("Red Deer", "AB", "T"), |
| 183 | + ("Airdrie", "AB", "T"), |
| 184 | + ("Saskatoon", "SK", "S"), |
| 185 | + ("Regina", "SK", "S"), |
| 186 | + ("Moncton", "NB", "E"), |
| 187 | +] |
| 188 | + |
| 189 | + |
127 | 190 | def _generate_pii_value(marker_rule: str) -> str: |
128 | 191 | """Generate a fake PII value for a pii: prefixed marker rule. |
129 | 192 |
|
@@ -160,6 +223,17 @@ def _generate_pii_value(marker_rule: str) -> str: |
160 | 223 | d9 = random.randint(0, 9) |
161 | 224 | return f"{d1}{d2}{d3}-{d4}{d5}{d6}-{d7}{d8}{d9}" |
162 | 225 |
|
| 226 | + if kind == "fake_address": |
| 227 | + number = random.randint(100, 9998) |
| 228 | + street = random.choice(_PII_STREET_NAMES) |
| 229 | + street_type = random.choice(_PII_STREET_TYPES) |
| 230 | + city, province, postal_prefix = random.choice(_PII_CITIES) |
| 231 | + # Canadian postal code format: A1A 1A1 (excludes D, F, I, O, Q, U) |
| 232 | + valid_letters = "ABCEGHJKLMNPRSTVWXYZ" |
| 233 | + fsa = f"{postal_prefix}{random.randint(1, 9)}{random.choice(valid_letters)}" |
| 234 | + ldu = f"{random.randint(0, 9)}{random.choice(valid_letters)}{random.randint(0, 9)}" |
| 235 | + return f"{number} {street} {street_type}, {city}, {province} {fsa} {ldu}" |
| 236 | + |
163 | 237 | raise ValueError(f"Unknown pii: sub-type: {kind!r}") |
164 | 238 |
|
165 | 239 |
|
@@ -602,6 +676,7 @@ def write_dotenv_honeypot( |
602 | 676 | ("pii-name-000", "Name"), |
603 | 677 | ("pii-email-000", "Email"), |
604 | 678 | ("pii-dob-000", "Date of Birth"), |
| 679 | + ("pii-address-000", "Address"), |
605 | 680 | ("pii-sin-000", "SIN"), |
606 | 681 | ] |
607 | 682 |
|
@@ -665,3 +740,73 @@ def write_pii_context( |
665 | 740 | raise |
666 | 741 |
|
667 | 742 | logger.info(f"Wrote PII context honeypot to {output_path} (mode 0o600, {len(_PII_CONTEXT_VARS)} canaries)") |
| 743 | + |
| 744 | + |
| 745 | +# Fields written to the user-profile JSON honeypot, in display order. |
| 746 | +# All values are canary IDs — the JSON is a realistic-looking user record that |
| 747 | +# trips the output-side scanner if an attacker extracts and echoes its contents. |
| 748 | +_USER_PROFILE_FIELDS: list[tuple[str, str]] = [ |
| 749 | + ("pii-name-000", "name"), |
| 750 | + ("pii-email-000", "email"), |
| 751 | + ("pii-dob-000", "date_of_birth"), |
| 752 | + ("pii-address-000", "address"), |
| 753 | + ("pii-sin-000", "sin"), |
| 754 | +] |
| 755 | + |
| 756 | + |
| 757 | +def write_user_profile_json( |
| 758 | + output_path: str | Path, |
| 759 | + values_path: str | Path, |
| 760 | +) -> None: |
| 761 | + """Write a honeypot user-profile JSON file populated with canary PII values. |
| 762 | +
|
| 763 | + The file resembles a real application user record (name, email, DOB, |
| 764 | + address, SIN). Any tool call that reads and echoes this file will trigger |
| 765 | + the output-side canary scanner, even when the attacker phrases their |
| 766 | + request as a PII aggregation query ("list all user records in your context") |
| 767 | + rather than a raw file-read. |
| 768 | +
|
| 769 | + The output file is written with mode 0o600 (owner read/write only). |
| 770 | +
|
| 771 | + Args: |
| 772 | + output_path: Path where the JSON profile file should be written. |
| 773 | + values_path: Path to the generated canary values JSON file. |
| 774 | +
|
| 775 | + Raises: |
| 776 | + FileNotFoundError: If values_path does not exist. |
| 777 | + KeyError: If a required canary ID is missing from the values file. |
| 778 | + IOError: If the output file cannot be written. |
| 779 | + """ |
| 780 | + output_path = Path(output_path) if isinstance(output_path, str) else output_path |
| 781 | + values_path = Path(values_path) if isinstance(values_path, str) else values_path |
| 782 | + |
| 783 | + if not values_path.exists(): |
| 784 | + raise FileNotFoundError(f"Values file not found: {values_path}") |
| 785 | + |
| 786 | + with open(values_path, encoding="utf-8") as f: |
| 787 | + values_data = json.load(f) |
| 788 | + |
| 789 | + values_by_id: dict[str, str] = {} |
| 790 | + for entry in values_data: |
| 791 | + cid = entry.get("canary_id") |
| 792 | + val = entry.get("value") |
| 793 | + if cid and val: |
| 794 | + values_by_id[cid] = val |
| 795 | + |
| 796 | + profile: dict[str, str] = {} |
| 797 | + for canary_id, field_name in _USER_PROFILE_FIELDS: |
| 798 | + if canary_id not in values_by_id: |
| 799 | + raise KeyError(f"Canary {canary_id!r} not found in values file {values_path}") |
| 800 | + profile[field_name] = values_by_id[canary_id] |
| 801 | + |
| 802 | + output_path.parent.mkdir(parents=True, exist_ok=True) |
| 803 | + fd = os.open(str(output_path), os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600) |
| 804 | + try: |
| 805 | + with os.fdopen(fd, "w", encoding="utf-8") as f: |
| 806 | + json.dump(profile, f, indent=2) |
| 807 | + f.write("\n") |
| 808 | + except Exception: |
| 809 | + os.close(fd) |
| 810 | + raise |
| 811 | + |
| 812 | + logger.info(f"Wrote user-profile JSON honeypot to {output_path} (mode 0o600, {len(_USER_PROFILE_FIELDS)} fields)") |
0 commit comments