@@ -24,49 +24,21 @@ The `csv` module provides functionality for reading and writing CSV (Comma-Separ
2424
2525## Reading CSV Files
2626
27- ### Basic CSV Reading
27+ ### Lazy vs Eager Reading
2828
2929``` python
3030import csv
3131
32- # Create reader - O(1)
33- with open (' data.csv' , ' r' ) as file :
34- reader = csv.reader(file ) # O(1)
35-
36- # Iterate rows - O(k) per row
37- for row in reader: # O(k) per iteration
38- print (row) # List of fields
39- # row = ['name', 'age', 'city']
40- ```
41-
42- ### Row-by-Row Iteration (Memory Efficient)
43-
44- ``` python
45- import csv
46-
47- # Lazy iteration for large files - O(1) memory
48- with open (' data.csv' , ' r' ) as file :
49- reader = csv.reader(file )
50-
51- for row in reader: # O(k) per row, O(1) memory
52- first_name = row[0 ] # O(1)
53- age = row[1 ] # O(1)
54- process(row) # O(k)
55- ```
56-
57- ### Reading All Rows
58-
59- ``` python
60- import csv
32+ # LAZY: O(1) memory - process one row at a time (preferred for large files)
33+ with open (' data.csv' , ' r' , newline = ' ' ) as file :
34+ reader = csv.reader(file ) # O(1) - creates iterator
35+ for row in reader: # O(k) per row, k = row length
36+ process(row) # Row discarded after processing
6137
62- # Load all rows into memory - O(n) memory
63- with open (' data.csv' , ' r' ) as file :
38+ # EAGER: O(n) memory - loads entire file (only for small files or random access)
39+ with open (' data.csv' , ' r' , newline = ' ' ) as file :
6440 reader = csv.reader(file )
65- all_rows = list (reader) # O(n) memory, n = file size
66-
67- # Now iterate from memory
68- for row in all_rows: # O(1) per iteration (already in memory)
69- process(row)
41+ all_rows = list (reader) # O(n*k) time, O(n*k) memory
7042```
7143
7244## Writing CSV Files
@@ -93,24 +65,6 @@ with open('output.csv', 'w') as file:
9365# File auto-closed
9466```
9567
96- ### Escaping and Quoting
97-
98- ``` python
99- import csv
100-
101- # CSV handles escaping automatically - O(k)
102- with open (' output.csv' , ' w' ) as file :
103- writer = csv.writer(file )
104-
105- # Values with commas (auto-quoted) - O(k)
106- writer.writerow([' Alice' , ' NYC, NY' , ' Engineer' ]) # O(k)
107- # Output: Alice,"NYC, NY",Engineer
108-
109- # Values with quotes (auto-escaped) - O(k)
110- writer.writerow([' Bob' , ' Says "hi"' ]) # O(k)
111- # Output: Bob,"Says ""hi"""
112- ```
113-
11468## Dictionary-based CSV Operations
11569
11670### Reading as Dictionaries
@@ -176,26 +130,6 @@ with open('data.csv', 'r', encoding='latin-1') as file:
176130 process(row)
177131```
178132
179- ## Quote Handling
180-
181- ### Quote Styles
182-
183- ``` python
184- import csv
185-
186- # QUOTE_MINIMAL (default) - quotes only when needed
187- writer = csv.writer(file , quoting = csv.QUOTE_MINIMAL )
188-
189- # QUOTE_ALL - quotes all fields
190- writer = csv.writer(file , quoting = csv.QUOTE_ALL )
191-
192- # QUOTE_NONNUMERIC - quotes non-numeric fields
193- writer = csv.writer(file , quoting = csv.QUOTE_NONNUMERIC )
194-
195- # QUOTE_NONE - no quoting (must escape manually)
196- writer = csv.writer(file , quoting = csv.QUOTE_NONE , escapechar = ' \\ ' )
197- ```
198-
199133## Common Patterns
200134
201135### Reading and Processing
@@ -296,23 +230,23 @@ with open('merged.csv', 'w') as outfile:
296230``` python
297231import csv
298232
299- # Write in batches (more efficient) - O(n*k)
300- with open (' output.csv' , ' w' ) as file :
233+ # Write in batches - reduces per-row overhead
234+ with open (' output.csv' , ' w' , newline = ' ' ) as file :
301235 writer = csv.writer(file )
302- writer.writeheader()
236+ writer.writerow([ ' Name ' , ' Age ' , ' City ' ]) # Header
303237
304238 # Collect rows in memory, then write batch
305239 batch = []
306240 for row in generate_rows(): # O(n)
307241 batch.append(row)
308242
309243 if len (batch) >= 1000 : # Write every 1000 rows
310- writer.writerows(batch) # O(1000*k)
244+ writer.writerows(batch) # O(1000*k) - one call vs 1000
311245 batch = []
312246
313247 # Write remaining
314248 if batch:
315- writer.writerows(batch) # O(final_batch*k)
249+ writer.writerows(batch)
316250```
317251
318252### Reading Large Files Efficiently
@@ -337,122 +271,27 @@ with open('large_file.csv', 'r') as file:
337271 process_chunk(chunk)
338272```
339273
340- ## Encoding Handling
341-
342- ``` python
343- import csv
344-
345- # UTF-8 (default) - O(1) setup
346- with open (' data.csv' , ' r' , encoding = ' utf-8' ) as file :
347- reader = csv.reader(file )
348- for row in reader: # O(k) per row
349- process(row)
350-
351- # Latin-1 (for European data) - O(1) setup
352- with open (' data.csv' , ' r' , encoding = ' latin-1' ) as file :
353- reader = csv.reader(file )
354- for row in reader:
355- process(row)
356-
357- # Write with specific encoding
358- with open (' output.csv' , ' w' , encoding = ' utf-8' , newline = ' ' ) as file :
359- writer = csv.writer(file )
360- writer.writerow([' Name' , ' Value' ])
361- ```
362-
363- ## Edge Cases
364-
365- ### Handling Empty Fields
366-
367- ``` python
368- import csv
369-
370- # Empty fields are preserved
371- data = [
372- [' Name' , ' ' , ' City' ], # Empty age field
373- [' Alice' , ' ' , ' NYC' ],
374- [' Bob' , ' 25' , ' LA' ]
375- ]
376-
377- with open (' output.csv' , ' w' ) as file :
378- writer = csv.writer(file )
379- writer.writerows(data)
380-
381- # Reading back
382- with open (' output.csv' , ' r' ) as file :
383- reader = csv.DictReader(file )
384- for row in reader:
385- age = row[' ' ] if ' ' in row else None # Handle empty header
386- ```
387-
388- ### Handling Newlines
389-
390- ``` python
391- import csv
392-
393- # newline='' is required for proper handling (Python 3)
394- with open (' output.csv' , ' w' , newline = ' ' ) as file :
395- writer = csv.writer(file )
396- writer.writerow([' Name' , ' Description' ])
397- writer.writerow([' Alice' , ' Multi\n line\n text' ]) # Properly quoted
398-
399- # Reading
400- with open (' output.csv' , ' r' , newline = ' ' ) as file :
401- reader = csv.reader(file )
402- for row in reader:
403- print (row)
404- ```
405-
406- ## Comparison: CSV vs JSON vs Pickle
407-
408- ``` python
409- import csv
410- import json
411- import pickle
412-
413- # CSV - good for tabular data
414- # Pros: Simple, human-readable, Excel-compatible
415- # Cons: No schema, string values
416-
417- # JSON - good for hierarchical data
418- # Pros: Structured, preserves types
419- # Cons: More verbose
420-
421- # Pickle - good for Python object serialization
422- # Pros: Preserves exact Python types
423- # Cons: Python-specific, security risk
424-
425- # CSV is preferred for tabular data export
426- ```
427-
428274## Version Notes
429275
430- - ** Python 2.x** : csv module available, unicode handling complex
431- - ** Python 3.x** : csv module standard, better unicode support
432- - ** All versions** : Use ` newline='' ` parameter in Python 3
276+ - ** Python 3.12+** : Added ` QUOTE_STRINGS ` and ` QUOTE_NOTNULL ` constants
277+ - ** All Python 3** : Use ` newline='' ` parameter when opening CSV files
433278
434279## Related Modules
435280
436- - ** pandas** - High -level CSV operations and data manipulation
437- - ** [ json] ( json.md ) ** - Alternative structured data format
438- - ** [ io] ( io.md ) ** - Low-level I/O operations
281+ - ** pandas** - Higher -level CSV with O(n) memory but faster vectorized operations
282+ - ** [ json] ( json.md ) ** - O(n) parsing; use for hierarchical data
283+ - ** [ io] ( io.md ) ** - StringIO for in-memory CSV processing
439284
440- ## Best Practices
285+ ## Performance Best Practices
441286
442287✅ ** Do** :
443288
444- - Use ` csv.DictReader ` for named columns (clearer)
445- - Use ` csv.reader ` for simple positional access
446- - Use ` newline='' ` when opening CSV files (Python 3)
447- - Specify ` encoding='utf-8' ` explicitly
448- - Process large files line-by-line (lazy iteration)
449- - Close files with context manager (` with ` statement)
289+ - Process large files lazily (O(1) memory) instead of ` list(reader) ` (O(n) memory)
290+ - Use ` writerows() ` for batches - fewer function calls than repeated ` writerow() `
291+ - Use ` csv.reader ` for positional access (O(1) per field vs O(1) dict lookup overhead)
450292
451293❌ ** Avoid** :
452294
453- - Manual string splitting (let csv handle parsing)
454- - Assuming comma is delimiter (specify if different)
455- - Loading entire large CSV into memory at once
456- - Forgetting newline='' parameter
457- - Mixing csv.reader with manual field indexing
458- - Trying to handle complex nested structures (use JSON)
295+ - ` list(reader) ` on large files - loads entire file into memory O(n)
296+ - Manual string splitting with ` split(',') ` - incorrect for quoted fields, same O(k) complexity but buggy
297+ - Repeated small writes - buffer with batches for better I/O performance
0 commit comments