Skip to content

Commit 35a7aa5

Browse files
authored
Merge pull request #24 from AstraBert/feat/more-documents-formats
feat: support for more document formats for telegram upload
2 parents dfc5ae1 + 18a72db commit 35a7aa5

6 files changed

Lines changed: 118 additions & 13 deletions

File tree

packages/lobsterx/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "uv_build"
44

55
[project]
66
name = "lobsterx"
7-
version = "0.1.0-beta"
7+
version = "0.1.1-beta"
88
description = "Background AI assistant working as a Telegram bot, built specifically for document-related use cases"
99
readme = "README.md"
1010
requires-python = ">=3.11"

packages/lobsterx/src/lobsterx/bot.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ async def run_bot(log_level: str) -> None:
9191
application.add_handler(
9292
MessageHandler(filters.Document.PDF, handle_documents_tg)
9393
)
94+
application.add_handler(
95+
MessageHandler(filters.Document.DOCX, handle_documents_tg)
96+
)
97+
application.add_handler(
98+
MessageHandler(filters.Document.TXT, handle_documents_tg)
99+
)
94100
application.add_handler(MessageHandler(filters.TEXT, handle_prompt_tg))
95101
application.add_error_handler(cast(HandlerCallback, error_handler))
96102
if application.updater is not None:

packages/lobsterx/src/lobsterx/utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import functools
22
import json
33
import logging
4+
import mimetypes
45
import os
56
from typing import cast
67

@@ -85,12 +86,15 @@ def get_workflow() -> AgentWorkflow:
8586

8687

8788
def _get_file_name(document: Document) -> str:
89+
extension = (
90+
mimetypes.guess_extension(document.mime_type or "application/pdf") or ".pdf"
91+
)
8892
if document.file_name is None:
89-
return generate_name() + ".pdf"
93+
return generate_name() + extension
9094
else:
91-
if document.file_name.endswith(".pdf"):
95+
if document.file_name.endswith(extension):
9296
return document.file_name
93-
return document.file_name + ".pdf"
97+
return document.file_name + extension
9498

9599

96100
async def handle_documents(document: Document, context: CallbackContext) -> str:

packages/lobsterx/tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class TelegramUserMock:
5959
class TelegramDocumentMock:
6060
file_name: str | None
6161
file_id: str
62+
mime_type: str | None
6263

6364

6465
class ParsingMock:

packages/lobsterx/tests/test_utils.py

Lines changed: 102 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
from unittest.mock import Mock, patch
55

66
import pytest
7+
from telegram import Document, User
8+
from telegram.ext import CallbackContext
9+
from workflows_acp.llm_wrapper import LLMWrapper
10+
from workflows_acp.llms.openai_llm import OpenAILLM
11+
from workflows_acp.tools.agentfs import load_all_files
12+
from workflows_acp.workflow import AgentWorkflow
13+
714
from lobsterx.constants import (
815
DATA_DIR,
916
DEFAULT_TO_AVOID,
@@ -23,12 +30,6 @@
2330
handle_prompt,
2431
start,
2532
)
26-
from telegram import Document, User
27-
from telegram.ext import CallbackContext
28-
from workflows_acp.llm_wrapper import LLMWrapper
29-
from workflows_acp.llms.openai_llm import OpenAILLM
30-
from workflows_acp.tools.agentfs import load_all_files
31-
from workflows_acp.workflow import AgentWorkflow
3233

3334
from .conftest import (
3435
AgentWorkflowMock,
@@ -79,7 +80,10 @@ async def test_handle_documents_success(
7980
to_avoid_files=DEFAULT_TO_AVOID_FILES,
8081
)
8182
document = cast(
82-
Document, TelegramDocumentMock(file_name="hello.pdf", file_id="123")
83+
Document,
84+
TelegramDocumentMock(
85+
file_name="hello.pdf", file_id="123", mime_type="application/pdf"
86+
),
8387
)
8488
callback_context = cast(CallbackContext, TelegramCallBackContextMock())
8589
result = await handle_documents(document, callback_context)
@@ -90,6 +94,93 @@ async def test_handle_documents_success(
9094
)
9195

9296

97+
@pytest.mark.asyncio
98+
async def test_handle_documents_no_name_mimetype(
99+
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
100+
) -> None:
101+
monkeypatch.chdir(tmp_path)
102+
(tmp_path / "test.txt").write_text("Hello world")
103+
await load_all_files(
104+
to_avoid_dirs=DEFAULT_TO_AVOID,
105+
to_avoid_files=DEFAULT_TO_AVOID_FILES,
106+
)
107+
document = cast(
108+
Document,
109+
TelegramDocumentMock(
110+
file_name=None,
111+
file_id="123",
112+
mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
113+
),
114+
)
115+
callback_context = cast(CallbackContext, TelegramCallBackContextMock())
116+
result = await handle_documents(document, callback_context)
117+
assert (
118+
result.startswith("Your file has been successfully downloaded at: ")
119+
and result.endswith(
120+
". Use this path to reference the file in your follow-up requests to the agent"
121+
)
122+
and ".docx" in result
123+
)
124+
125+
126+
@pytest.mark.asyncio
127+
async def test_handle_documents_no_name_no_mimetype(
128+
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
129+
) -> None:
130+
monkeypatch.chdir(tmp_path)
131+
(tmp_path / "test.txt").write_text("Hello world")
132+
await load_all_files(
133+
to_avoid_dirs=DEFAULT_TO_AVOID,
134+
to_avoid_files=DEFAULT_TO_AVOID_FILES,
135+
)
136+
document = cast(
137+
Document,
138+
TelegramDocumentMock(
139+
file_name=None,
140+
file_id="123",
141+
mime_type=None,
142+
),
143+
)
144+
callback_context = cast(CallbackContext, TelegramCallBackContextMock())
145+
result = await handle_documents(document, callback_context)
146+
assert (
147+
result.startswith("Your file has been successfully downloaded at: ")
148+
and result.endswith(
149+
". Use this path to reference the file in your follow-up requests to the agent"
150+
)
151+
and ".pdf" in result
152+
)
153+
154+
155+
@pytest.mark.asyncio
156+
async def test_handle_documents_no_extension(
157+
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
158+
) -> None:
159+
monkeypatch.chdir(tmp_path)
160+
(tmp_path / "test.txt").write_text("Hello world")
161+
await load_all_files(
162+
to_avoid_dirs=DEFAULT_TO_AVOID,
163+
to_avoid_files=DEFAULT_TO_AVOID_FILES,
164+
)
165+
document = cast(
166+
Document,
167+
TelegramDocumentMock(
168+
file_name="file",
169+
file_id="123",
170+
mime_type="text/plain",
171+
),
172+
)
173+
callback_context = cast(CallbackContext, TelegramCallBackContextMock())
174+
result = await handle_documents(document, callback_context)
175+
assert (
176+
result.startswith("Your file has been successfully downloaded at: ")
177+
and result.endswith(
178+
". Use this path to reference the file in your follow-up requests to the agent"
179+
)
180+
and "file.txt" in result
181+
)
182+
183+
93184
@pytest.mark.asyncio
94185
async def test_handle_documents_fail(
95186
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
@@ -101,7 +192,10 @@ async def test_handle_documents_fail(
101192
to_avoid_files=DEFAULT_TO_AVOID_FILES,
102193
)
103194
document = cast(
104-
Document, TelegramDocumentMock(file_name="hello.pdf", file_id="123")
195+
Document,
196+
TelegramDocumentMock(
197+
file_name="hello.pdf", file_id="123", mime_type="application/pdf"
198+
),
105199
)
106200
callback_context = cast(
107201
CallbackContext, TelegramCallBackContextMock(should_fail=True)

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)