Skip to content

Commit 4b83b98

Browse files
committed
Added posibility to import PDF files
1 parent 82a964a commit 4b83b98

4 files changed

Lines changed: 20 additions & 3 deletions

File tree

app/main.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ def main():
6161
st.success("Your summary is ready:")
6262
st.markdown(result)
6363

64+
st.download_button(
65+
label="Download summary",
66+
data=result,
67+
file_name="summary.txt",
68+
mime="text/plain"
69+
)
70+
6471

6572
if __name__ == "__main__":
6673
main()

app/src/file_processor.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
from typing import Optional
55
from docx import Document
6+
import fitz
67
#from tests import test_file_ext
78

89
class FileProcessor:
@@ -11,7 +12,8 @@ def __init__(self):
1112
self.file_type: None
1213
self.supported_types = {
1314
".txt": self._read_txt,
14-
".docx": self._read_docx
15+
".docx": self._read_docx,
16+
".pdf": self._read_pdf,
1517
}
1618
self.content = None
1719

@@ -37,6 +39,13 @@ def _read_docx(self) -> str:
3739
text.append(para.text)
3840
return "\n".join(text)
3941

42+
def _read_pdf(self) -> str:
43+
doc = fitz.open(self.file)
44+
text = ""
45+
for page in doc:
46+
text += page.get_text()
47+
return text
48+
4049

4150
def get_content(self) -> Optional[str]:
4251
if not self.file:

app/tests/test_file_ext.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
@pytest.mark.parametrize("file_name, expected", [
99
(".jpeg", False),
10-
(".pdf", False),
10+
(".pdf", True),
1111
(".zip", False),
1212
(".txt", True),
1313
(".docx", True),

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ requests
55
typing
66
pytest
77
openai
8-
python-docx
8+
python-docx
9+
pyMuPDF

0 commit comments

Comments
 (0)