@@ -495,7 +495,7 @@ def test_general_api_returns_422_bad_pdf():
495495 response = client .post (
496496 MAIN_API_ROUTE , files = [("files" , (str (tmp .name ), open (tmp .name , "rb" ), "application/pdf" ))]
497497 )
498- assert response . json () == { "detail" : " File does not appear to be a valid PDF"}
498+ assert " File does not appear to be a valid PDF" in response . json ()[ "detail" ]
499499 assert response .status_code == 422
500500 tmp .close ()
501501
@@ -506,10 +506,56 @@ def test_general_api_returns_422_bad_pdf():
506506 files = [("files" , (str (test_file ), open (test_file , "rb" ), "application/pdf" ))],
507507 )
508508
509- assert response . json () == { "detail" : " File does not appear to be a valid PDF"}
509+ assert " File does not appear to be a valid PDF" in response . json ()[ "detail" ]
510510 assert response .status_code == 422
511511
512512
513+ @pytest .mark .parametrize (
514+ ("pdf_name" , "expected_error_message" ),
515+ [
516+ (
517+ "failing-invalid.pdf" ,
518+ "File does not appear to be a valid PDF. Error: Stream has ended unexpectedly" ,
519+ ),
520+ (
521+ "failing-missing-root.pdf" ,
522+ "File does not appear to be a valid PDF. Error: Cannot find Root object in pdf" ,
523+ ),
524+ (
525+ "failing-missing-pages.pdf" ,
526+ "File does not appear to be a valid PDF. Error: Invalid object in /Pages" ,
527+ ),
528+ ],
529+ )
530+ @pytest .mark .parametrize (
531+ "strategy" ,
532+ [
533+ "auto" ,
534+ "fast" ,
535+ "hi_res" ,
536+ "ocr_only" ,
537+ ],
538+ )
539+ def test_general_api_returns_422_invalid_pdf (
540+ pdf_name : str , expected_error_message : str , strategy : str
541+ ):
542+ """
543+ Verify that we get a 422 with the correct error message for invalid PDF files
544+ """
545+ client = TestClient (app )
546+ test_file = Path (__file__ ).parent .parent .parent / "sample-docs" / pdf_name
547+
548+ with open (test_file , "rb" ) as f :
549+ response = client .post (
550+ MAIN_API_ROUTE ,
551+ files = [("files" , (str (test_file ), f ))],
552+ data = {"strategy" : strategy },
553+ )
554+
555+ assert response .status_code == 422
556+ assert expected_error_message == str (response .json ()["detail" ])
557+
558+
513559def test_general_api_returns_503 (monkeypatch ):
514560 """
515561 When available memory is below the minimum. return a 503, unless our origin ip is 10.{4,5}.x.x
@@ -939,13 +985,13 @@ def test_encrypted_pdf():
939985 writer .encrypt (user_password = "password123" )
940986 writer .write (temp_file .name )
941987
942- # Response should be 400
988+ # Response should be 422
943989 response = client .post (
944990 MAIN_API_ROUTE ,
945991 files = [("files" , (str (temp_file .name ), open (temp_file .name , "rb" ), "application/pdf" ))],
946992 )
947993 assert response .json () == {"detail" : "File is encrypted. Please decrypt it with password." }
948- assert response .status_code == 400
994+ assert response .status_code == 422
949995
950996 # This file is owner encrypted, i.e. readable with edit restrictions
951997 writer = PdfWriter ()
0 commit comments