101101 end
102102
103103 it "performs OCR on .tiff files" do
104- tiff_file = File . join ( __dir__ , ".." , "fixtures" , "ocr_rgb_lzw.tif " )
104+ tiff_file = File . join ( __dir__ , ".." , "fixtures" , "rgb_lzw.tiff " )
105105 if File . exist? ( tiff_file )
106106 result = parser . parse_file ( tiff_file )
107107 expect ( result ) . to be_a ( String )
128128 Tempfile . create ( [ 'test' , '.json' ] ) do |file |
129129 file . write ( '{"test": true}' )
130130 file . rewind
131-
131+
132132 result = parser . parse_file ( file . path )
133133 expect ( result ) . to include ( '"test"' )
134134 expect ( result ) . to include ( 'true' )
141141 Tempfile . create ( [ 'test' , '.xml' ] ) do |file |
142142 file . write ( '<?xml version="1.0"?><root>content</root>' )
143143 file . rewind
144-
144+
145145 result = parser . parse_file ( file . path )
146146 expect ( result ) . to include ( 'content' )
147147 end
151151 Tempfile . create ( [ 'test' , '.html' ] ) do |file |
152152 file . write ( '<!DOCTYPE html><html><body>content</body></html>' )
153153 file . rewind
154-
154+
155155 result = parser . parse_file ( file . path )
156156 expect ( result ) . to include ( 'content' )
157157 end
163163 Tempfile . create ( [ 'test' , '.txt' ] ) do |file |
164164 file . write ( 'Plain text content' )
165165 file . rewind
166-
166+
167167 result = parser . parse_file ( file . path )
168168 expect ( result ) . to eq ( 'Plain text content' )
169169 end
173173 Tempfile . create ( [ 'test' , '.md' ] ) do |file |
174174 file . write ( '# Markdown content' )
175175 file . rewind
176-
176+
177177 result = parser . parse_file ( file . path )
178178 expect ( result ) . to eq ( '# Markdown content' )
179179 end
184184 content = "col1,col2\n val1,val2"
185185 file . write ( content )
186186 file . rewind
187-
187+
188188 result = parser . parse_file ( file . path )
189189 expect ( result ) . to eq ( content )
190190 end
196196 Tempfile . create ( [ 'test' , '.xyz' ] ) do |file |
197197 file . write ( 'Unknown format content' )
198198 file . rewind
199-
199+
200200 result = parser . parse_file ( file . path )
201201 expect ( result ) . to eq ( 'Unknown format content' )
202202 end
206206 Tempfile . create ( 'test_no_ext' ) do |file |
207207 file . write ( 'No extension content' )
208208 file . rewind
209-
209+
210210 result = parser . parse_file ( file . path )
211211 expect ( result ) . to eq ( 'No extension content' )
212212 end
326326 Tempfile . create ( [ 'test' , ext ] ) do |file |
327327 file . write ( content )
328328 file . rewind
329-
329+
330330 result = parser . parse_file_routed ( file . path )
331331 expect ( result ) . to include ( expected )
332332 end
357357 Tempfile . create ( [ 'test' , '.json' ] ) do |file |
358358 file . write ( '{"test": true}' )
359359 file . rewind
360-
360+
361361 # Ruby routing
362362 ruby_result = parser . parse_file_routed ( file . path )
363-
363+
364364 # Direct parse (uses Rust routing)
365365 rust_result = parser . parse_file ( file . path )
366-
366+
367367 expect ( ruby_result ) . to eq ( rust_result )
368368 end
369369 end
370370
371371 it "Ruby and Rust dispatch produce same results for bytes" do
372372 test_data = '{"key": "value"}'
373-
373+
374374 # Ruby routing
375375 ruby_result = parser . parse_bytes_routed ( test_data )
376-
376+
377377 # Direct parse (uses Rust routing)
378378 rust_result = parser . parse_bytes ( test_data . bytes )
379-
379+
380380 expect ( ruby_result ) . to eq ( rust_result )
381381 end
382382
383383 it "All three dispatch methods handle unknown formats consistently" do
384384 unknown_data = "Unknown format content"
385-
385+
386386 Tempfile . create ( [ 'test' , '.xyz' ] ) do |file |
387387 file . write ( unknown_data )
388388 file . rewind
389-
389+
390390 # All should route to text parser
391391 file_result = parser . parse_file ( file . path )
392392 routed_file_result = parser . parse_file_routed ( file . path )
393393 bytes_result = parser . parse_bytes ( unknown_data . bytes )
394394 routed_bytes_result = parser . parse_bytes_routed ( unknown_data )
395-
395+
396396 expect ( file_result ) . to eq ( unknown_data )
397397 expect ( routed_file_result ) . to eq ( unknown_data )
398398 expect ( bytes_result ) . to eq ( unknown_data )
419419 Tempfile . create ( [ 'corrupted' , '.pdf' ] ) do |file |
420420 file . write ( "Not a real PDF" )
421421 file . rewind
422-
422+
423423 # Should route to PDF parser but fail parsing
424424 expect { parser . parse_file ( file . path ) } . to raise_error ( StandardError )
425425 end
432432 Tempfile . create ( [ 'test' , '.json' ] ) do |file |
433433 file . write ( '{"fast": true}' )
434434 file . rewind
435-
435+
436436 start_time = Time . now
437437 result = parser . parse_file ( file . path )
438438 elapsed = Time . now - start_time
439-
439+
440440 expect ( result ) . to include ( '"fast"' )
441441 # Should be very fast for small files
442442 expect ( elapsed ) . to be < 0.1
443443 end
444444 end
445445 end
446- end
446+ end
0 commit comments