1+ import csv
2+ import os
3+ import re
14from django .test import TestCase
25from pokemon_v2 .models import *
36
@@ -9,3 +12,242 @@ def setUp(self):
912 def fields_are_valid (self ):
1013 smell = Ability .objects .get (name = "Smell" )
1114 self .assertEqual (smell .generation_id , 3 )
15+
16+
17+ class CSVResourceNameValidationTestCase (TestCase ):
18+ """
19+ Test that all resource identifiers in CSV files follow ASCII slug format.
20+
21+ Resource identifiers are used in API URLs and should be URL-safe ASCII slugs
22+ (lowercase letters, numbers, and hyphens only).
23+
24+ This test validates the data source (CSV files) before it's loaded into the database.
25+ """
26+
27+ # Pattern for valid resource identifiers: lowercase letters, numbers, and hyphens only
28+ VALID_IDENTIFIER_PATTERN = re .compile (r"^[a-z0-9-]+$" )
29+
30+ # CSV files that contain an 'identifier' column to validate
31+ # Format: (filename, identifier_column_name)
32+ CSV_FILES_TO_VALIDATE = [
33+ ("abilities.csv" , "identifier" ),
34+ ("berry_firmness.csv" , "identifier" ),
35+ ("conquest_episodes.csv" , "identifier" ),
36+ ("conquest_kingdoms.csv" , "identifier" ),
37+ ("conquest_move_displacements.csv" , "identifier" ),
38+ ("conquest_move_ranges.csv" , "identifier" ),
39+ ("conquest_stats.csv" , "identifier" ),
40+ ("conquest_warrior_archetypes.csv" , "identifier" ),
41+ ("conquest_warrior_skills.csv" , "identifier" ),
42+ ("conquest_warrior_stats.csv" , "identifier" ),
43+ ("conquest_warriors.csv" , "identifier" ),
44+ ("contest_types.csv" , "identifier" ),
45+ ("egg_groups.csv" , "identifier" ),
46+ ("encounter_conditions.csv" , "identifier" ),
47+ ("encounter_condition_values.csv" , "identifier" ),
48+ ("encounter_methods.csv" , "identifier" ),
49+ ("evolution_triggers.csv" , "identifier" ),
50+ ("genders.csv" , "identifier" ),
51+ ("generations.csv" , "identifier" ),
52+ ("growth_rates.csv" , "identifier" ),
53+ ("items.csv" , "identifier" ),
54+ ("item_categories.csv" , "identifier" ),
55+ ("item_flags.csv" , "identifier" ),
56+ ("item_fling_effects.csv" , "identifier" ),
57+ ("item_pockets.csv" , "identifier" ),
58+ ("languages.csv" , "identifier" ),
59+ ("locations.csv" , "identifier" ),
60+ ("location_areas.csv" , "identifier" ),
61+ ("moves.csv" , "identifier" ),
62+ ("move_battle_styles.csv" , "identifier" ),
63+ ("move_damage_classes.csv" , "identifier" ),
64+ ("move_flags.csv" , "identifier" ),
65+ ("move_meta_ailments.csv" , "identifier" ),
66+ ("move_meta_categories.csv" , "identifier" ),
67+ ("move_targets.csv" , "identifier" ),
68+ ("natures.csv" , "identifier" ),
69+ ("pal_park_areas.csv" , "identifier" ),
70+ ("pokeathlon_stats.csv" , "identifier" ),
71+ ("pokedexes.csv" , "identifier" ),
72+ ("pokemon.csv" , "identifier" ),
73+ ("pokemon_colors.csv" , "identifier" ),
74+ ("pokemon_forms.csv" , "identifier" ),
75+ ("pokemon_habitats.csv" , "identifier" ),
76+ ("pokemon_move_methods.csv" , "identifier" ),
77+ ("pokemon_shapes.csv" , "identifier" ),
78+ ("pokemon_species.csv" , "identifier" ),
79+ ("regions.csv" , "identifier" ),
80+ ("stats.csv" , "identifier" ),
81+ ("types.csv" , "identifier" ),
82+ ("versions.csv" , "identifier" ),
83+ ("version_groups.csv" , "identifier" ),
84+ ]
85+
86+ def get_csv_path (self , filename ):
87+ """Get the absolute path to a CSV file in data/v2/csv/"""
88+ from django .conf import settings
89+
90+ base_dir = settings .BASE_DIR
91+ return os .path .join (base_dir , "data" , "v2" , "csv" , filename )
92+
93+ def test_all_csv_identifiers_are_ascii_slugs (self ):
94+ """
95+ Validate that all resource identifiers in CSV files follow the ASCII slug format.
96+
97+ Identifiers should only contain:
98+ - Lowercase letters (a-z)
99+ - Numbers (0-9)
100+ - Hyphens (-)
101+
102+ This test will fail if any CSV contains identifiers with:
103+ - Unicode characters (ñ, ', é, etc.)
104+ - Uppercase letters
105+ - Spaces
106+ - Special characters (&, (), ', etc.)
107+ """
108+ violations = []
109+ missing_files = []
110+
111+ for filename , identifier_column in self .CSV_FILES_TO_VALIDATE :
112+ csv_path = self .get_csv_path (filename )
113+
114+ # Track missing files to report at the end
115+ if not os .path .exists (csv_path ):
116+ missing_files .append (filename )
117+ continue
118+
119+ try :
120+ with open (csv_path , "r" , encoding = "utf-8" ) as csvfile :
121+ reader = csv .DictReader (csvfile )
122+
123+ # Check if the identifier column exists
124+ if identifier_column not in reader .fieldnames :
125+ violations .append (
126+ {
127+ "file" : filename ,
128+ "row" : "N/A" ,
129+ "id" : "N/A" ,
130+ "identifier" : f"Column '{ identifier_column } ' not found" ,
131+ "identifier_repr" : "N/A" ,
132+ }
133+ )
134+ continue
135+
136+ for row_num , row in enumerate (
137+ reader , start = 2
138+ ): # Start at 2 (after header)
139+ identifier = row .get (identifier_column , "" ).strip ()
140+
141+ # Skip empty identifiers
142+ if not identifier :
143+ continue
144+
145+ # Check if identifier matches the pattern
146+ if not self .VALID_IDENTIFIER_PATTERN .match (identifier ):
147+ violations .append (
148+ {
149+ "file" : filename ,
150+ "row" : row_num ,
151+ "id" : row .get ("id" , "N/A" ),
152+ "identifier" : identifier ,
153+ "identifier_repr" : repr (
154+ identifier
155+ ), # Shows unicode chars clearly
156+ }
157+ )
158+
159+ except Exception as e :
160+ violations .append (
161+ {
162+ "file" : filename ,
163+ "row" : "N/A" ,
164+ "id" : "N/A" ,
165+ "identifier" : f"Error reading file: { str (e )} " ,
166+ "identifier_repr" : "N/A" ,
167+ }
168+ )
169+
170+ # If there are violations or missing files, create a detailed error message
171+ if violations or missing_files :
172+ error_lines = []
173+
174+ # Report missing files first
175+ if missing_files :
176+ error_lines .append ("\n \n Missing CSV files:" )
177+ for filename in missing_files :
178+ error_lines .append (f" - { filename } " )
179+ error_lines .append (
180+ "\n All CSV files listed in CSV_FILES_TO_VALIDATE must exist."
181+ )
182+
183+ # Report violations
184+ if violations :
185+ error_lines .append (
186+ "\n \n Found {} resource(s) with invalid identifiers (not ASCII slugs):" .format (
187+ len (violations )
188+ )
189+ )
190+ error_lines .append ("\n Identifiers must match pattern: ^[a-z0-9-]+$" )
191+ error_lines .append ("\n Invalid identifiers found in CSV files:" )
192+
193+ for v in violations :
194+ error_lines .append (
195+ " - {file} (row {row}, id={id}): {identifier} {identifier_repr}" .format (
196+ ** v
197+ )
198+ )
199+
200+ error_lines .append (
201+ "\n These identifiers contain invalid characters and must be normalized."
202+ )
203+ error_lines .append (
204+ "Update the CSV files in data/v2/csv/ to fix these identifiers."
205+ )
206+ error_lines .append ("\n Suggested fixes:" )
207+ error_lines .append (
208+ " - Remove Unicode apostrophes (') and replace with regular hyphens or remove"
209+ )
210+ error_lines .append (" - Remove Unicode letters (ñ → n)" )
211+ error_lines .append (
212+ " - Remove parentheses and other special characters"
213+ )
214+ error_lines .append (" - Convert to lowercase" )
215+
216+ self .fail ("\n " .join (error_lines ))
217+
218+ def test_identifier_pattern_examples (self ):
219+ """Test that the validation pattern works correctly with example identifiers."""
220+ # Valid identifiers
221+ valid_identifiers = [
222+ "pikachu" ,
223+ "charizard-mega-x" ,
224+ "mr-mime" ,
225+ "ho-oh" ,
226+ "type-null" ,
227+ "item-123" ,
228+ "mega-stone" ,
229+ ]
230+
231+ for identifier in valid_identifiers :
232+ self .assertTrue (
233+ self .VALID_IDENTIFIER_PATTERN .match (identifier ),
234+ f"{ identifier } should be valid but was rejected" ,
235+ )
236+
237+ # Invalid identifiers
238+ invalid_identifiers = [
239+ "Pikachu" , # Uppercase
240+ "Mr. Mime" , # Space and period
241+ "kofu's-wallet" , # Unicode apostrophe
242+ "jalapeño" , # Unicode ñ
243+ "steel-bottle-(r)" , # Parentheses
244+ "b&w-grass-tablecloth" , # Ampersand
245+ "farfetch'd" , # Apostrophe
246+ "kofu's-wallet" , # Regular apostrophe
247+ ]
248+
249+ for identifier in invalid_identifiers :
250+ self .assertFalse (
251+ self .VALID_IDENTIFIER_PATTERN .match (identifier ),
252+ f"{ identifier } should be invalid but was accepted" ,
253+ )
0 commit comments