11import os
2- from fastapi import APIRouter ,UploadFile ,File
2+ from fastapi import APIRouter ,UploadFile ,File , Form
33from pdf2image import convert_from_path
44from google .cloud import vision
55from typing import List
2222
2323
2424def pdf_to_images_from_bytes (pdf_content , output_folder , file_name ):
25- s3_bucket_name = 'learnmateai'
26-
2725 # Save PDF content to a temporary file
2826 with tempfile .NamedTemporaryFile (delete = False ) as temp_file :
2927 temp_filename = temp_file .name
@@ -46,19 +44,14 @@ def pdf_to_images_from_bytes(pdf_content, output_folder, file_name):
4644 s3 .put_object (Body = image_bytes , Bucket = s3_bucket_name , Key = image_key )
4745
4846 image_paths .append (image_key )
49- noImg = i + 1
47+
48+ noImg = i + 1
5049 return image_paths , noImg
5150
5251
53- def convert (s ):
54- substring_to_remove = "Scanned by CamScanner"
55- s3_bucket_name = 'learnmateai'
56-
57-
58- prefix = s
59-
52+ def convert (prefix ,user ):
6053 # List files in the S3 bucket with the specified prefix
61- response = s3 .list_objects_v2 (Bucket = s3_bucket_name , Prefix = prefix )
54+ response = s3 .list_objects_v2 (Bucket = s3_bucket_name , Prefix = user + prefix )
6255
6356 # Extract the file names from the response
6457 files = [obj ['Key' ] for obj in response .get ('Contents' , [])]
@@ -67,16 +60,16 @@ def convert(s):
6760 for file_name in files :
6861 file_name = os .path .splitext (os .path .basename (file_name ))[0 ]
6962
70- print (f"converting { file_name } ...." )
63+ print (f"Converting { file_name } ...." )
64+
65+ # Delete existing files in the output folder
66+ output_folder = f'{ user } images/Notes_images/{ file_name } '
67+ delete_folder_objects (output_folder )
7168
7269 # Download the PDF file from S3
73- pdf_object = s3 .get_object (Bucket = s3_bucket_name , Key = f'{ prefix } { file_name } .pdf' )
70+ pdf_object = s3 .get_object (Bucket = s3_bucket_name , Key = f'{ user } { prefix } { file_name } .pdf' )
7471 pdf_content = pdf_object ['Body' ].read ()
7572
76- # Create the output folder in S3
77- output_folder = f'images/Notes_images/{ file_name } '
78- s3 .put_object (Body = '' , Bucket = s3_bucket_name , Key = f'{ output_folder } /' )
79-
8073 # Convert the PDF to images and save them in the output folder in S3
8174 image_paths , noImg = pdf_to_images_from_bytes (pdf_content , output_folder , file_name )
8275 print (noImg )
@@ -98,9 +91,11 @@ def convert(s):
9891 response = client .text_detection (image = content )
9992 texts = response .text_annotations [0 ]
10093 text = str (texts .description )
101- image_contents += text .replace (substring_to_remove , "" )
94+ if "Scanned by CamScanner" in text :
95+ text = text .replace ("Scanned by CamScanner" , "" )
96+ image_contents += text
10297
103- s3_key = f'notes_txt/{ file_name } .txt'
98+ s3_key = f'{ user } notes_txt/{ file_name } .txt'
10499
105100 # Upload the text content to S3
106101 s3 .put_object (
@@ -116,89 +111,114 @@ def convert(s):
116111 response .error .message ))
117112
118113
119- @router .get ("/notestotext" )
120- def NotesToText_handler ():
114+ def delete_folder_objects (prefix ):
115+ # List objects in the S3 bucket with the specified prefix
116+ response = s3 .list_objects_v2 (Bucket = s3_bucket_name , Prefix = prefix )
117+
118+ # Extract the object keys from the response
119+ objects = [obj ['Key' ] for obj in response .get ('Contents' , [])]
120+
121+ # Delete each object
122+ for obj_key in objects :
123+ s3 .delete_object (Bucket = s3_bucket_name , Key = obj_key )
121124
125+
126+ @router .post ("/filestotext2" )
127+ async def NotesToText_handler (user : str = Form (...)):
128+ user = user + "/"
122129 prefix = 'notes_pdf/'
123130 prefix2 = 'pyqs_pdf/'
124- convert (prefix )
125- convert (prefix2 )
126-
131+
132+ # Delete existing files in the output folders
133+ delete_folder_objects (user + 'images/Notes_images/' )
134+ delete_folder_objects (user + 'notes_txt/' )
135+
136+ convert (prefix ,user )
137+ convert (prefix2 ,user )
138+
139+ return {"process completed" }
127140
128141
129142@router .post ("/notestotext_modwise" )
130- async def upload_files1 (files : List [UploadFile ] = File (...)):
143+ async def upload_files1 (files : List [UploadFile ] = File (...), user : str = Form (...) ):
131144 filenames = []
132-
145+ user = user + "/"
133146 for file in files :
134147 contents = await file .read ()
135148 file_obj = BytesIO (contents )
136149 try :
137150 s3 .upload_fileobj (
138151 file_obj ,
139152 s3_bucket_name ,
140- "notes_pdf/" + file .filename ,
153+ user + "notes_pdf/" + file .filename ,
141154 )
142155 filenames .append (file .filename )
143156 except NoCredentialsError :
144157 return {"error" : "AWS credentials not found." }
145158
146159 return {"filenames" : filenames }
147160
161+
148162@router .post ("/notestotext_syllabus" )
149- async def upload_files2 (files : List [UploadFile ] = File (...)):
163+ async def upload_files2 (files : List [UploadFile ] = File (...), user : str = Form (...) ):
150164 filenames = []
165+ user = user + "/"
151166 for file in files :
152167 contents = await file .read ()
153168 file_obj = BytesIO (contents )
154169 try :
155170 s3 .upload_fileobj (
156171 file_obj ,
157172 s3_bucket_name ,
158- "syllabus_pdf /" + file .filename ,
173+ user + "syllabus_txt /" + file .filename ,
159174 )
160175 filenames .append (file .filename )
161176 except NoCredentialsError :
162177 return {"error" : "AWS credentials not found." }
163178
164179 return {"filenames" : filenames }
165180
181+
166182@router .post ("/notestotext_pyqs" )
167- async def upload_files3 (files : List [UploadFile ] = File (...)):
183+ async def upload_files3 (files : List [UploadFile ] = File (...), user : str = Form (...) ):
168184 filenames = []
185+ user = user + "/"
169186 for file in files :
170187 contents = await file .read ()
171188 file_obj = BytesIO (contents )
172189 try :
173190 s3 .upload_fileobj (
174191 file_obj ,
175192 s3_bucket_name ,
176- "pyqs_pdf/" + file .filename ,
193+ user + "pyqs_pdf/" + file .filename ,
177194 )
178195 filenames .append (file .filename )
179196 except NoCredentialsError :
180197 return {"error" : "AWS credentials not found." }
181198
182199 return {"filenames" : filenames }
183200
201+
184202@router .post ("/notestotext_anythingelse" )
185- async def upload_files4 (files : List [UploadFile ] = File (...)):
203+ async def upload_files4 (files : List [UploadFile ] = File (...), user : str = Form (...) ):
186204 filenames = []
205+ user = user + "/"
187206 for file in files :
188207 contents = await file .read ()
189208 file_obj = BytesIO (contents )
190209 try :
191210 s3 .upload_fileobj (
192211 file_obj ,
193212 s3_bucket_name ,
194- "anything_else/" + file .filename ,
213+ user + "anything_else/" + file .filename ,
195214 )
196215 filenames .append (file .filename )
197216 except NoCredentialsError :
198217 return {"error" : "AWS credentials not found." }
199218
200219 return {"filenames" : filenames }
201220
221+
202222@router .get ("/" )
203223async def hello ():
204- return {"hello nigga " }
224+ return {"Byte 404 rocks " }
0 commit comments