Skip to content

Commit 2194a7e

Browse files
committed
link
1 parent 9bf0999 commit 2194a7e

34 files changed

+311
-759
lines changed
Lines changed: 55 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from fastapi import APIRouter,UploadFile,File
2+
from fastapi import APIRouter,UploadFile,File,Form
33
from pdf2image import convert_from_path
44
from google.cloud import vision
55
from typing import List
@@ -22,8 +22,6 @@
2222

2323

2424
def pdf_to_images_from_bytes(pdf_content, output_folder, file_name):
25-
s3_bucket_name = 'learnmateai'
26-
2725
# Save PDF content to a temporary file
2826
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
2927
temp_filename = temp_file.name
@@ -46,19 +44,14 @@ def pdf_to_images_from_bytes(pdf_content, output_folder, file_name):
4644
s3.put_object(Body=image_bytes, Bucket=s3_bucket_name, Key=image_key)
4745

4846
image_paths.append(image_key)
49-
noImg = i+1
47+
48+
noImg = i + 1
5049
return image_paths, noImg
5150

5251

53-
def convert(s):
54-
substring_to_remove = "Scanned by CamScanner"
55-
s3_bucket_name = 'learnmateai'
56-
57-
58-
prefix = s
59-
52+
def convert(prefix,user):
6053
# List files in the S3 bucket with the specified prefix
61-
response = s3.list_objects_v2(Bucket=s3_bucket_name, Prefix=prefix)
54+
response = s3.list_objects_v2(Bucket=s3_bucket_name, Prefix=user+prefix)
6255

6356
# Extract the file names from the response
6457
files = [obj['Key'] for obj in response.get('Contents', [])]
@@ -67,16 +60,16 @@ def convert(s):
6760
for file_name in files:
6861
file_name = os.path.splitext(os.path.basename(file_name))[0]
6962

70-
print(f"converting {file_name}....")
63+
print(f"Converting {file_name}....")
64+
65+
# Delete existing files in the output folder
66+
output_folder = f'{user}images/Notes_images/{file_name}'
67+
delete_folder_objects(output_folder)
7168

7269
# Download the PDF file from S3
73-
pdf_object = s3.get_object(Bucket=s3_bucket_name, Key=f'{prefix}{file_name}.pdf')
70+
pdf_object = s3.get_object(Bucket=s3_bucket_name, Key=f'{user}{prefix}{file_name}.pdf')
7471
pdf_content = pdf_object['Body'].read()
7572

76-
# Create the output folder in S3
77-
output_folder = f'images/Notes_images/{file_name}'
78-
s3.put_object(Body='', Bucket=s3_bucket_name, Key=f'{output_folder}/')
79-
8073
# Convert the PDF to images and save them in the output folder in S3
8174
image_paths, noImg = pdf_to_images_from_bytes(pdf_content, output_folder, file_name)
8275
print(noImg)
@@ -98,9 +91,11 @@ def convert(s):
9891
response = client.text_detection(image=content)
9992
texts = response.text_annotations[0]
10093
text = str(texts.description)
101-
image_contents += text.replace(substring_to_remove, "")
94+
if "Scanned by CamScanner" in text:
95+
text = text.replace("Scanned by CamScanner", "")
96+
image_contents += text
10297

103-
s3_key = f'notes_txt/{file_name}.txt'
98+
s3_key = f'{user}notes_txt/{file_name}.txt'
10499

105100
# Upload the text content to S3
106101
s3.put_object(
@@ -116,89 +111,114 @@ def convert(s):
116111
response.error.message))
117112

118113

119-
@router.get("/notestotext")
120-
def NotesToText_handler():
114+
def delete_folder_objects(prefix):
115+
# List objects in the S3 bucket with the specified prefix
116+
response = s3.list_objects_v2(Bucket=s3_bucket_name, Prefix=prefix)
117+
118+
# Extract the object keys from the response
119+
objects = [obj['Key'] for obj in response.get('Contents', [])]
120+
121+
# Delete each object
122+
for obj_key in objects:
123+
s3.delete_object(Bucket=s3_bucket_name, Key=obj_key)
121124

125+
126+
@router.post("/filestotext2")
127+
async def NotesToText_handler(user: str = Form(...)):
128+
user = user + "/"
122129
prefix = 'notes_pdf/'
123130
prefix2 = 'pyqs_pdf/'
124-
convert(prefix)
125-
convert(prefix2)
126-
131+
132+
# Delete existing files in the output folders
133+
delete_folder_objects(user+'images/Notes_images/')
134+
delete_folder_objects(user+'notes_txt/')
135+
136+
convert(prefix,user)
137+
convert(prefix2,user)
138+
139+
return {"process completed"}
127140

128141

129142
@router.post("/notestotext_modwise")
130-
async def upload_files1(files: List[UploadFile] = File(...)):
143+
async def upload_files1(files: List[UploadFile] = File(...), user: str = Form(...)):
131144
filenames = []
132-
145+
user = user + "/"
133146
for file in files:
134147
contents = await file.read()
135148
file_obj = BytesIO(contents)
136149
try:
137150
s3.upload_fileobj(
138151
file_obj,
139152
s3_bucket_name,
140-
"notes_pdf/" + file.filename,
153+
user + "notes_pdf/" + file.filename,
141154
)
142155
filenames.append(file.filename)
143156
except NoCredentialsError:
144157
return {"error": "AWS credentials not found."}
145158

146159
return {"filenames": filenames}
147160

161+
148162
@router.post("/notestotext_syllabus")
149-
async def upload_files2(files: List[UploadFile] = File(...)):
163+
async def upload_files2(files: List[UploadFile] = File(...), user: str = Form(...)):
150164
filenames = []
165+
user = user + "/"
151166
for file in files:
152167
contents = await file.read()
153168
file_obj = BytesIO(contents)
154169
try:
155170
s3.upload_fileobj(
156171
file_obj,
157172
s3_bucket_name,
158-
"syllabus_pdf/" + file.filename,
173+
user + "syllabus_txt/" + file.filename,
159174
)
160175
filenames.append(file.filename)
161176
except NoCredentialsError:
162177
return {"error": "AWS credentials not found."}
163178

164179
return {"filenames": filenames}
165180

181+
166182
@router.post("/notestotext_pyqs")
167-
async def upload_files3(files: List[UploadFile] = File(...)):
183+
async def upload_files3(files: List[UploadFile] = File(...), user: str = Form(...)):
168184
filenames = []
185+
user = user + "/"
169186
for file in files:
170187
contents = await file.read()
171188
file_obj = BytesIO(contents)
172189
try:
173190
s3.upload_fileobj(
174191
file_obj,
175192
s3_bucket_name,
176-
"pyqs_pdf/" + file.filename,
193+
user + "pyqs_pdf/" + file.filename,
177194
)
178195
filenames.append(file.filename)
179196
except NoCredentialsError:
180197
return {"error": "AWS credentials not found."}
181198

182199
return {"filenames": filenames}
183200

201+
184202
@router.post("/notestotext_anythingelse")
185-
async def upload_files4(files: List[UploadFile] = File(...)):
203+
async def upload_files4(files: List[UploadFile] = File(...), user: str = Form(...)):
186204
filenames = []
205+
user = user + "/"
187206
for file in files:
188207
contents = await file.read()
189208
file_obj = BytesIO(contents)
190209
try:
191210
s3.upload_fileobj(
192211
file_obj,
193212
s3_bucket_name,
194-
"anything_else/" + file.filename,
213+
user + "anything_else/" + file.filename,
195214
)
196215
filenames.append(file.filename)
197216
except NoCredentialsError:
198217
return {"error": "AWS credentials not found."}
199218

200219
return {"filenames": filenames}
201220

221+
202222
@router.get("/")
203223
async def hello():
204-
return {"hello nigga"}
224+
return {"Byte 404 rocks"}

Backend/Narrator.py

Lines changed: 0 additions & 150 deletions
This file was deleted.

0 commit comments

Comments
 (0)