@@ -136,56 +136,73 @@ def predict_tables(
136136 ) -> Sequence [TableStructurePrediction ]:
137137
138138 pages = list (pages )
139+ page_images : list [numpy .ndarray | None ] = []
139140 predictions : list [TableStructurePrediction ] = []
140141
141- for page in pages :
142- assert page ._backend is not None
143- if not page ._backend .is_valid ():
144- existing_prediction = page .predictions .tablestructure or TableStructurePrediction ()
145- page .predictions .tablestructure = existing_prediction
146- predictions .append (existing_prediction )
142+ table_images : list [numpy .ndarray ] = []
143+ table_clusters : list [Cluster ] = []
144+ cluster_page : list [int ] = []
145+
146+ for i , page in enumerate (pages ):
147+ table_prediction = page .predictions .tablestructure or TableStructurePrediction ()
148+ page .predictions .tablestructure = table_prediction
149+ predictions .append (table_prediction )
150+
151+ if (
152+ page ._backend is None
153+ or not page ._backend .is_valid ()
154+ or page .size is None
155+ or page .predictions .layout is None
156+ ):
157+ page_images .append (None )
147158 continue
148159
149- with TimeRecorder (conv_res , "table_structure" ):
150- assert page .predictions .layout is not None
151- assert page .size is not None
160+ clusters = [
161+ cluster
162+ for cluster in page .predictions .layout .clusters
163+ if cluster .label in [DocItemLabel .TABLE , DocItemLabel .DOCUMENT_INDEX ]
164+ ]
152165
153- table_prediction = TableStructurePrediction ()
154- page .predictions .tablestructure = table_prediction
166+ if not clusters :
167+ page_images .append (None )
168+ continue
155169
156- in_tables = [
157- cluster
158- for cluster in page .predictions .layout .clusters
159- if cluster .label in [DocItemLabel .TABLE , DocItemLabel .DOCUMENT_INDEX ]
160- ]
161- if not in_tables :
162- predictions .append (table_prediction )
163- continue
170+ page_image = numpy .asarray (page .get_image (scale = self .scale ))
171+
172+ page_images .append (page_image )
173+ cluster_page .extend ([i ] * len (clusters ))
174+ table_clusters .extend (clusters )
164175
165- page_image = numpy .asarray (page .get_image (scale = self .scale ))
176+ for cluster in clusters :
177+ bbox = cluster .bbox
166178
167- for table_cluster in in_tables :
168- bbox = table_cluster .bbox
179+ table_image = page_image [
180+ round (bbox .t * self .scale ) : round (bbox .b * self .scale ),
181+ round (bbox .l * self .scale ) : round (bbox .r * self .scale ),
182+ ]
169183
170- table_image = page_image [
171- round (bbox .t * self .scale ) : round (bbox .b * self .scale ),
172- round (bbox .l * self .scale ) : round (bbox .r * self .scale ),
173- ]
184+ table_images .append (table_image )
174185
175- table = self .pipeline ([table_image ], self .options .confidence_threshold )[0 ]
186+ if len (table_images ) == 0 :
187+ return predictions
176188
177- docling_table = build_docling_table (table , table_cluster , page , self .scale )
189+ with TimeRecorder (conv_res , "table_structure" ):
190+ tables = self .pipeline (table_images , self .options .confidence_threshold )
178191
179- table_prediction .table_map [table_cluster .id ] = docling_table
192+ for table , cluster , page_idx in zip (tables , table_clusters , cluster_page ):
193+ page = pages [page_idx ]
194+ assert page .predictions .tablestructure is not None
180195
181- if settings .debug .visualize_tables :
182- self .draw_table_and_cells (
183- conv_res ,
184- page ,
185- page .predictions .tablestructure .table_map .values (),
186- )
196+ docling_table = build_docling_table (table , cluster , page , self .scale )
197+
198+ page .predictions .tablestructure .table_map [cluster .id ] = docling_table
187199
188- predictions .append (table_prediction )
200+ if settings .debug .visualize_tables :
201+ self .draw_table_and_cells (
202+ conv_res ,
203+ page ,
204+ page .predictions .tablestructure .table_map .values (),
205+ )
189206
190207 return predictions
191208
0 commit comments