1+ import csv
2+ import os
3+ import re
4+ from django .conf import settings
15from django .test import TestCase
26from pokemon_v2 .models import *
37
@@ -9,3 +13,146 @@ def setUp(self):
913 def fields_are_valid (self ):
1014 smell = Ability .objects .get (name = "Smell" )
1115 self .assertEqual (smell .generation_id , 3 )
16+
17+
18+ class CSVResourceNameValidationTestCase (TestCase ):
19+ """
20+ Test that all resource identifiers in CSV files follow ASCII slug format.
21+
22+ Resource identifiers are used in API URLs and should be URL-safe ASCII slugs
23+ (lowercase letters, numbers, and hyphens only).
24+
25+ This test validates the data source (CSV files) before it's loaded into the database.
26+ """
27+
28+ # Pattern for valid resource identifiers: lowercase letters, numbers, and hyphens only
29+ VALID_IDENTIFIER_PATTERN = re .compile (r"^[a-z0-9-]+$" )
30+
31+ def test_all_csv_identifiers_are_ascii_slugs (self ):
32+ """
33+ Validate that all resource identifiers in CSV files follow the ASCII slug format.
34+
35+ Identifiers should only contain:
36+ - Lowercase letters (a-z)
37+ - Numbers (0-9)
38+ - Hyphens (-)
39+
40+ This test will fail if any CSV contains identifiers with:
41+ - Unicode characters (ñ, ', é, etc.)
42+ - Uppercase letters
43+ - Spaces
44+ - Special characters (&, (), ', etc.)
45+ """
46+ violations = []
47+ csv_dir = os .path .join (settings .BASE_DIR , "data" , "v2" , "csv" )
48+
49+ for filename in sorted (os .listdir (csv_dir )):
50+ if not filename .endswith (".csv" ):
51+ continue
52+
53+ csv_path = os .path .join (csv_dir , filename )
54+
55+ try :
56+ with open (csv_path , "r" , encoding = "utf-8" ) as csvfile :
57+ reader = csv .DictReader (csvfile )
58+
59+ if "identifier" not in reader .fieldnames :
60+ continue
61+
62+ for row_num , row in enumerate (reader , start = 2 ):
63+ identifier = row .get ("identifier" , "" ).strip ()
64+
65+ # Skip empty identifiers
66+ if not identifier :
67+ continue
68+
69+ # Check if identifier matches the pattern
70+ if not self .VALID_IDENTIFIER_PATTERN .match (identifier ):
71+ violations .append (
72+ {
73+ "file" : filename ,
74+ "row" : row_num ,
75+ "id" : row .get ("id" , "N/A" ),
76+ "identifier" : identifier ,
77+ }
78+ )
79+
80+ except Exception as e :
81+ violations .append (
82+ {
83+ "file" : filename ,
84+ "row" : "N/A" ,
85+ "id" : "N/A" ,
86+ "identifier" : f"Error reading file: { str (e )} " ,
87+ }
88+ )
89+
90+ error_lines = []
91+
92+ # Report violations
93+ if violations :
94+ error_lines .append (
95+ "\n \n Found {} resource(s) with invalid identifiers (not ASCII slugs):" .format (
96+ len (violations )
97+ )
98+ )
99+ error_lines .append ("\n Identifiers must match pattern: ^[a-z0-9-]+$" )
100+ error_lines .append ("\n Invalid identifiers found in CSV files:" )
101+
102+ for v in violations :
103+ error_lines .append (
104+ " - {file} (row {row}, id={id}): {identifier}" .format (** v )
105+ )
106+
107+ error_lines .append (
108+ "\n These identifiers contain invalid characters and must be normalized."
109+ )
110+ error_lines .append (
111+ "Update the CSV files in data/v2/csv/ to fix these identifiers."
112+ )
113+ error_lines .append ("\n Suggested fixes:" )
114+ error_lines .append (
115+ " - Remove Unicode apostrophes (') and replace with regular hyphens or remove"
116+ )
117+ error_lines .append (" - Remove Unicode letters (ñ → n)" )
118+ error_lines .append (" - Remove parentheses and other special characters" )
119+ error_lines .append (" - Convert to lowercase" )
120+
121+ self .fail ("\n " .join (error_lines ))
122+
123+ def test_identifier_pattern_examples (self ):
124+ """Test that the validation pattern works correctly with example identifiers."""
125+ # Valid identifiers
126+ valid_identifiers = [
127+ "pikachu" ,
128+ "charizard-mega-x" ,
129+ "mr-mime" ,
130+ "ho-oh" ,
131+ "type-null" ,
132+ "item-123" ,
133+ "mega-stone" ,
134+ ]
135+
136+ for identifier in valid_identifiers :
137+ self .assertTrue (
138+ self .VALID_IDENTIFIER_PATTERN .match (identifier ),
139+ f"{ identifier } should be valid but was rejected" ,
140+ )
141+
142+ # Invalid identifiers
143+ invalid_identifiers = [
144+ "Pikachu" , # Uppercase
145+ "Mr. Mime" , # Space and period
146+ "kofu's-wallet" , # Unicode apostrophe
147+ "jalapeño" , # Unicode ñ
148+ "steel-bottle-(r)" , # Parentheses
149+ "b&w-grass-tablecloth" , # Ampersand
150+ "farfetch'd" , # Apostrophe
151+ "kofu's-wallet" , # Regular apostrophe
152+ ]
153+
154+ for identifier in invalid_identifiers :
155+ self .assertFalse (
156+ self .VALID_IDENTIFIER_PATTERN .match (identifier ),
157+ f"{ identifier } should be invalid but was accepted" ,
158+ )
0 commit comments