@@ -204,14 +204,24 @@ function blended_pairwise_conditional_gradient(
204204 local_gap = dot_away_vertex - dot_forward_vertex
205205 if ! lazy
206206 if t > 1
207- v = compute_extreme_point (lmo, gradient)
208- dual_gap = fast_dot (gradient, x) - fast_dot (gradient, v)
209- phi = dual_gap
207+ dot_x = fast_dot (gradient, x)
208+ (v, weak_gap) = if weak_separation
209+ # we need a separation point v
210+ # ⟨∇f(x), x-v⟩ ≥ local_gap * lazy_threshold
211+ # ⟨∇f(x), v⟩ ≤ ⟨∇f(x), x⟩ - local_gap * lazy_threshold
212+ threshold = dot_x - local_gap * lazy_threshold
213+ compute_weak_separation_point (lmo, gradient, threshold)
214+ else
215+ v = compute_extreme_point (lmo, gradient)
216+ (v, zero (phi))
217+ end
218+ dual_gap = dot_x - fast_dot (gradient, v)
219+ phi = dual_gap + weak_gap
210220 end
211221 end
212222 # minor modification from original paper for improved sparsity
213223 # (proof follows with minor modification when estimating the step)
214- if local_gap ≥ phi / lazy_tolerance
224+ if local_gap ≥ phi / lazy_tolerance # pairwise step
215225 d = muladd_memory_mode (memory_mode, d, a, v_local)
216226 vertex_taken = v_local
217227 gamma_max = a_lambda
0 commit comments