44from typing import List
55
66from fastapi import FastAPI
7- from fastapi import UploadFile
7+ from fastapi import UploadFile , Form
88from fastapi .exceptions import HTTPException
99from fastapi .middleware .cors import CORSMiddleware
1010
@@ -116,7 +116,7 @@ def extract_img_text(attachment: UploadFile):
116116
117117
118118@app .post ("/ocr" )
119- def ocr (attachment : UploadFile , sychronous : bool = True ):
119+ def ocr (attachment : UploadFile , synchronous : bool = Form ( True ) ):
120120 """
121121 TODO: Support multiple attachments
122122 It could pass a PDF or an image.
@@ -145,7 +145,7 @@ def ocr(attachment: UploadFile, sychronous: bool = True):
145145 elif type_details .mime_type .startswith ('application/pdf' ):
146146 # Attempt extracting text using pdfminer.six or else through the image conversion -> OCR pipeline.
147147 extraction_function = extract_pdf_text_all
148- if sychronous is True :
148+ if synchronous is True :
149149 is_success , content = extraction_function (file_path = output_filename )
150150 if is_success is True :
151151 # Add one more step.
@@ -170,15 +170,23 @@ def ocr_result(key: str):
170170
171171
172172@app .post ("/textract-ocr" )
173- def textract_ocr (attachment : UploadFile ):
173+ def textract_ocr (attachment : UploadFile , synchronous : bool = Form ( True ) ):
174174 type_details = identify_file_type (attachment .file )
175175 if not type_details .mime_type .startswith ('image' ):
176176 raise HTTPException (status_code = 400 , detail = "Provide an image" )
177177 output_filename = f"/media/textract-ocr-files/{ attachment .filename } "
178178 save_file (attachment .file , output_filename )
179179 attachment .file .seek (0 )
180- is_success , content = detect_text (output_filename )
181- if is_success is True :
182- return {"content" : content }
180+ if synchronous is True :
181+ is_success , content = detect_text (output_filename )
182+ if is_success is True :
183+ return {"content" : content }
184+ else :
185+ raise HTTPException (400 , detail = content )
183186 else :
184- raise HTTPException (400 , detail = content )
187+ # Add it to a queue.
188+ enqueue_extraction (extraction_function = detect_text , file_path = output_filename )
189+ path_hash = hashlib .sha256 (output_filename .encode ('utf-8' )).hexdigest ()
190+ BASE_URL = os .environ .get ("BASE_URL" , "http://localhost:8000" )
191+ link = f"{ BASE_URL } /ocr-result/{ path_hash } "
192+ return {"link" : link }
0 commit comments