Skip to content

Commit 01e70a7

Browse files
authored
Merge pull request #355 from open-edge-platform/update-branch
906 bug feedback on use case visual textual query driven document (#910)
2 parents ca753f5 + f253373 commit 01e70a7

2 files changed

Lines changed: 61 additions & 21 deletions

File tree

  • usecases/ai/visual-textual-query-driven-document-reasoning-engine

usecases/ai/visual-textual-query-driven-document-reasoning-engine/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ Ensure the GPU drivers are installed using the [`gpu_installer.sh`](https://gith
5151
## Installation
5252
1. **Install the necessary dependencies.**
5353
```bash
54-
apt-get update
55-
apt-get install -y python3-venv poppler-utils
54+
sudo apt update
55+
sudo apt install -y python3-venv poppler-utils
5656
```
5757

5858
2. **Create and activate a Python virtual environment:**

usecases/ai/visual-textual-query-driven-document-reasoning-engine/ui.py

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,60 @@ def generate_gallery(
188188
return gallery
189189

190190

191+
def handle_answer(
192+
query_text,
193+
component_image,
194+
component_description,
195+
embeddings,
196+
page_images,
197+
k,
198+
):
199+
"""Main handler that validates inputs and orchestrates the answer generation flow."""
200+
# Validate inputs first - return early with warning message instead of raising exception
201+
if not query_text and not component_image:
202+
print("Warning: Both Query and Component Image are required. Please provide both before clicking Answer.")
203+
gr.Warning("Both Query and Component Image are required. Please provide both before clicking Answer.")
204+
yield None, gr.skip(), ""
205+
return
206+
if not query_text:
207+
print("Warning: Query is required. Please enter a query before clicking Answer.")
208+
gr.Warning("Query is required. Please enter a query before clicking Answer.")
209+
yield None, gr.skip(), ""
210+
return
211+
if not component_image:
212+
print("Warning: Component Image is required. Please upload an image before clicking Answer.")
213+
gr.Warning("Component Image is required. Please upload an image before clicking Answer.")
214+
yield None, gr.skip(), ""
215+
return
216+
if not embeddings or len(embeddings) == 0:
217+
print("Warning: No PDF documents indexed. Please upload and index PDFs first before clicking Answer.")
218+
gr.Warning("No PDF documents indexed. Please upload and index PDFs first before clicking Answer.")
219+
yield None, gr.skip(), ""
220+
return
221+
222+
# Step 1: Identify component
223+
component_name = visual_understanding_agent.identify_component(
224+
component_image, component_description
225+
)
226+
227+
# Step 2: Generate gallery
228+
gallery = generate_gallery(
229+
embeddings,
230+
page_images,
231+
k,
232+
query_text,
233+
component_image,
234+
component_name,
235+
)
236+
237+
# Step 3: Update history
238+
history_dataset = update_history(query_text, component_image)
239+
240+
# Step 4: Generate response (this is a generator, so we need to yield from it)
241+
for response_chunk in visual_understanding_agent.generate_response(query_text, gallery):
242+
yield gallery, history_dataset, response_chunk
243+
244+
191245
def build():
192246
with gr.Blocks(title=APP_TITLE, theme=gr.themes.Ocean()) as demo:
193247
gr.Markdown(f"# {APP_TITLE}")
@@ -261,30 +315,16 @@ def build():
261315
outputs=[index_status, embeddings, page_images],
262316
)
263317
answer_button.click(
264-
lambda img, desc: (
265-
visual_understanding_agent.identify_component(img, desc) if img else ""
266-
),
267-
inputs=[component_image, component_description],
268-
outputs=[component_name],
269-
).then(
270-
generate_gallery,
318+
handle_answer,
271319
inputs=[
320+
query,
321+
component_image,
322+
component_description,
272323
embeddings,
273324
page_images,
274325
k,
275-
query,
276-
component_image,
277-
component_name,
278326
],
279-
outputs=[gallery],
280-
).then(
281-
update_history,
282-
inputs=[query, component_image],
283-
outputs=history.dataset,
284-
).then(
285-
visual_understanding_agent.generate_response,
286-
inputs=[query, gallery],
287-
outputs=[answer],
327+
outputs=[gallery, history.dataset, answer],
288328
)
289329

290330
return demo

0 commit comments

Comments
 (0)