1313from strands import Agent , tool
1414
1515from idp_common .assessment .strands_models import AssessmentOutput , BoundingBox
16- from idp_common .utils .grid_overlay import draw_bounding_boxes
16+ from idp_common .utils .grid_overlay import add_ruler_and_draw_boxes , add_ruler_edges
1717from idp_common .utils .strands_agent_tools .todo_list import (
1818 create_todo_list ,
1919 update_todo ,
@@ -37,10 +37,13 @@ class ViewImageInput(BaseModel):
3737
3838@tool
3939def submit_assessment (assessment : AssessmentOutput , agent : Agent ) -> str :
40+ """Submit the final assessment for a field."""
4041 # Validate assessment structure and return helpful errors
4142 validated_assessment = AssessmentOutput .model_validate (assessment )
4243
43- # Store in agent state
44+ # Store in agent state - coordinates are in 0-1000 document space
45+ # The ruler shows 0-1000 scale mapping to the document, so LLM coordinates
46+ # are already in document space and need no adjustment
4447 agent .state .set ("assessment_output" , validated_assessment .model_dump (mode = "json" ))
4548
4649 logger .info (
@@ -56,15 +59,15 @@ def create_view_image_tool(page_images: list[bytes], sorted_page_ids: list[str])
5659 Create a view_image tool that has access to page images.
5760
5861 Args:
59- page_images: List of page image bytes (with grid overlay already applied )
62+ page_images: List of raw page image bytes (without ruler overlay)
6063 sorted_page_ids: List of page IDs in sorted order
6164
6265 Returns:
6366 A Strands tool function for viewing images
6467 """
6568
6669 @tool
67- def view_image (input_data : ViewImageInput , agent : Agent ) -> dict :
70+ def view_image (input_data : ViewImageInput , agent : Agent ) -> dict [ str , Any ] :
6871 """
6972 View a specific page image, optionally highlighting a bounding box area.
7073
@@ -97,13 +100,13 @@ def view_image(input_data: ViewImageInput, agent: Agent) -> dict:
97100 f"Valid range: 0-{ len (page_images ) - 1 } "
98101 )
99102
100- # Get the base image (already has grid overlay)
101- img_bytes = page_images [view_input .image_index ]
103+ # Get the raw image (no ruler overlay yet )
104+ raw_img_bytes = page_images [view_input .image_index ]
102105 page_id = sorted_page_ids [view_input .image_index ]
103106
104- # If bounding box is specified, draw it on the image
107+ # Add ruler and optionally draw bounding box
105108 if view_input .bounding_box :
106- # Convert BoundingBox to dict format for draw_bounding_boxes
109+ # Convert BoundingBox to dict format
107110 bbox_dict = {
108111 "bbox" : [
109112 view_input .bounding_box .x1 ,
@@ -115,21 +118,19 @@ def view_image(input_data: ViewImageInput, agent: Agent) -> dict:
115118 "color" : "red" ,
116119 }
117120
118- # Draw the bounding box on the image (which has 30px margin for ruler)
119- # Let drawing errors propagate - if we can't draw, something is wrong
120- img_bytes = draw_bounding_boxes (
121- img_bytes ,
122- [bbox_dict ],
123- margin_offset = 30 ,
124- )
121+ # Add ruler overlay and draw bounding box in one step
122+ img_bytes = add_ruler_and_draw_boxes (raw_img_bytes , [bbox_dict ])
125123
126124 logger .debug (
127- "Drew bounding box on image" ,
125+ "Added ruler and drew bounding box on image" ,
128126 extra = {
129127 "image_index" : view_input .image_index ,
130128 "bbox" : bbox_dict ["bbox" ],
131129 },
132130 )
131+ else :
132+ # Just add ruler overlay (no bounding box)
133+ img_bytes = add_ruler_edges (raw_img_bytes )
133134
134135 logger .info (
135136 "Returning image to agent" ,
0 commit comments