Speed up Expr * Expr (#1175)

Zeroto521 · Joao-Dionisio · web-flow · commit a03b00eddf38 · 2026-02-02T10:54:42.000Z
* Refactor Expr multiplication logic and Term operator Replaces Term.__add__ with Term.__mul__ and updates Expr.__mul__ to use more efficient Cython dict iteration and item access. This improves performance and correctness when multiplying expressions, especially for large term dictionaries. * Optimize Term multiplication in expr.pxi Replaces the simple concatenation in Term.__mul__ with an efficient merge that maintains variable order based on pointer values. This improves performance and correctness when multiplying Term objects. * Update CHANGELOG to reorder quicksum optimization entry Moved the 'Speed up MatrixExpr.sum(axis=...) via quicksum' entry from the Added section to the Changed section for better categorization and clarity. * Update changelog with Expr multiplication speedup Added a new entry to the changelog noting the performance improvement for Expr * Expr operations. * Fix Term class operator signature in type stub Corrects the Term class in scip.pyi to define __mul__ instead of __add__, updating the method signature to accept and return Term objects. * Add tests for expression multiplication Introduces test_mul to verify correct string representations of multiplied expressions involving variables and constants. * Update tests for Expr multiplication behavior Replaces Term with CONST import from pyscipopt.scip and adds new assertions in test_mul to verify multiplication involving constants and variables. Removes redundant CONST definition. * Add test for commutativity in multiplication with zero Added an assertion to test that multiplying y by (x - x) yields the same zero term as (x - x) * y. This ensures correct handling of zero expressions in multiplication. * Update changelog for Expr and Term multiplication improvements Documented performance enhancements for Expr * Expr and Term * Term operations, including use of C-level API and an O(n) algorithm. Also clarified method renaming from Term.__add__ to Term.__mul__. * Add note about sorted vartuple requirement in Term Added a comment in the Term.__mul__ method to highlight that Term.vartuple must be sorted for correct merging. Suggests ensuring sorting in the Term constructor to avoid potential issues. * Clarify algorithm complexity in changelog Updated the description of the Term * Term speedup to specify the use of an O(n) sort algorithm instead of Python's O(log(n)) sorted function. * Correct complexity notation in changelog Updated the changelog to fix the time complexity notation for the Term * Term sort algorithm from O(log(n)) to O(n log(n)). * Apply suggestion from @Joao-Dionisio * Apply suggestion from @Joao-Dionisio * Fix indentation in Expr multiplication logic Corrected the indentation of the isinstance(other, Expr) block in the Expr class to ensure proper execution flow during multiplication operations. * Preserve zero-coefficient terms in Expr mul Do not skip terms with 0.0 coefficients when multiplying Expr objects: remove earlier zero-checks and compute product values inline in src/pyscipopt/expr.pxi. This causes zero-product terms to be retained in the resulting expression. Update tests (tests/test_expr.py) to expect the preserved zero-coefficient terms for cases like (x - x) * y and y * (x - x). --------- Co-authored-by: João Dionísio <57299939+Joao-Dionisio@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,7 +4,6 @@
 ### Added
 - Added automated script for generating type stubs
 - Include parameter names in type stubs
-- Speed up MatrixExpr.sum(axis=...) via quicksum
 - Added pre-commit hook for automatic stub regeneration (see .pre-commit-config.yaml)
 - Wrapped isObjIntegral() and test
 - Added structured_optimization_trace recipe for structured optimization progress tracking
@@ -20,8 +19,12 @@
 - Fixed segmentation fault when using Variable or Constraint objects after freeTransform() or Model destruction
 ### Changed
 - changed default value of enablepricing flag to True
+- Speed up MatrixExpr.sum(axis=...) via quicksum
 - Speed up MatrixExpr.add.reduce via quicksum
 - Speed up np.ndarray(..., dtype=np.float64) @ MatrixExpr
+- Speed up Expr * Expr via C-level API and Term * Term
+- Speed up Term * Term via a $O(n)$ sort algorithm instead of Python $O(n\log(n))$ sorted function. `Term.__mul__` requires that Term.vartuple is sorted.
+- Rename from `Term.__add__` to `Term.__mul__`, due to this method only working with Expr * Expr.
 - MatrixExpr and MatrixExprCons use `__array_ufunc__` protocol to control all numpy.ufunc inputs and outputs
 - Set `__array_priority__` for MatrixExpr and MatrixExprCons
 - changed addConsNode() and addConsLocal() to mirror addCons() and accept ExprCons instead of Constraint
diff --git a/src/pyscipopt/expr.pxi b/src/pyscipopt/expr.pxi
@@ -45,9 +45,10 @@
 import math
 from typing import TYPE_CHECKING
 
-from cpython.dict cimport PyDict_Next
+from cpython.dict cimport PyDict_Next, PyDict_GetItem
 from cpython.object cimport Py_TYPE
 from cpython.ref cimport PyObject
+from cpython.tuple cimport PyTuple_GET_ITEM
 from pyscipopt.scip cimport Variable, Solution
 
 import numpy as np
@@ -123,9 +124,41 @@ cdef class Term:
     def __len__(self):
         return len(self.vartuple)
 
-    def __add__(self, other):
-        both = self.vartuple + other.vartuple
-        return Term(*both)
+    def __mul__(self, Term other):
+        # NOTE: This merge algorithm requires a sorted `Term.vartuple`.
+        # This should be ensured in the constructor of Term.
+        cdef int n1 = len(self)
+        cdef int n2 = len(other)
+        if n1 == 0: return other
+        if n2 == 0: return self
+
+        cdef list vartuple = [None] * (n1 + n2)
+        cdef int i = 0, j = 0, k = 0
+        cdef Variable var1, var2
+        while i < n1 and j < n2:
+            var1 = <Variable>PyTuple_GET_ITEM(self.vartuple, i)
+            var2 = <Variable>PyTuple_GET_ITEM(other.vartuple, j)
+            if var1.ptr() <= var2.ptr():
+                vartuple[k] = var1
+                i += 1
+            else:
+                vartuple[k] = var2
+                j += 1
+            k += 1
+        while i < n1:
+            vartuple[k] = <Variable>PyTuple_GET_ITEM(self.vartuple, i)
+            i += 1
+            k += 1
+        while j < n2:
+            vartuple[k] = <Variable>PyTuple_GET_ITEM(other.vartuple, j)
+            j += 1
+            k += 1
+
+        cdef Term res = Term.__new__(Term)
+        res.vartuple = tuple(vartuple)
+        res.ptrtuple = tuple(v.ptr() for v in res.vartuple)
+        res.hashval = <Py_ssize_t>hash(res.ptrtuple)
+        return res
 
     def __repr__(self):
         return 'Term(%s)' % ', '.join([str(v) for v in self.vartuple])
@@ -248,16 +281,32 @@ cdef class Expr:
         if isinstance(other, np.ndarray):
             return other * self
 
+        cdef dict res = {}
+        cdef Py_ssize_t pos1 = <Py_ssize_t>0, pos2 = <Py_ssize_t>0
+        cdef PyObject *k1_ptr = NULL
+        cdef PyObject *v1_ptr = NULL
+        cdef PyObject *k2_ptr = NULL
+        cdef PyObject *v2_ptr = NULL
+        cdef PyObject *old_v_ptr = NULL
+        cdef Term child
+        cdef double prod_v
+
         if _is_number(other):
             f = float(other)
             return Expr({v:f*c for v,c in self.terms.items()})
+
         elif isinstance(other, Expr):
-            terms = {}
-            for v1, c1 in self.terms.items():
-                for v2, c2 in other.terms.items():
-                    v = v1 + v2
-                    terms[v] = terms.get(v, 0.0) + c1 * c2
-            return Expr(terms)
+            while PyDict_Next(self.terms, &pos1, &k1_ptr, &v1_ptr):
+                pos2 = <Py_ssize_t>0
+                while PyDict_Next(other.terms, &pos2, &k2_ptr, &v2_ptr):
+                    child = (<Term>k1_ptr) * (<Term>k2_ptr)
+                    prod_v = (<double>(<object>v1_ptr)) * (<double>(<object>v2_ptr))
+                    if (old_v_ptr := PyDict_GetItem(res, child)) != NULL:
+                        res[child] = <double>(<object>old_v_ptr) + prod_v
+                    else:
+                        res[child] = prod_v
+            return Expr(res)
+
         elif isinstance(other, GenExpr):
             return buildGenExprObj(self) * other
         else:
diff --git a/src/pyscipopt/scip.pyi b/src/pyscipopt/scip.pyi
@@ -2187,7 +2187,7 @@ class Term:
     ptrtuple: Incomplete
     vartuple: Incomplete
     def __init__(self, *vartuple: Incomplete) -> None: ...
-    def __add__(self, other: Incomplete) -> Incomplete: ...
+    def __mul__(self, other: Term) -> Term: ...
     def __eq__(self, other: object) -> bool: ...
     def __ge__(self, other: object) -> bool: ...
     def __getitem__(self, index: Incomplete) -> Incomplete: ...
diff --git a/tests/test_expr.py b/tests/test_expr.py
@@ -3,7 +3,7 @@
 import pytest
 
 from pyscipopt import Model, sqrt, log, exp, sin, cos
-from pyscipopt.scip import Expr, GenExpr, ExprCons, Term
+from pyscipopt.scip import Expr, GenExpr, ExprCons, CONST
 
 
 @pytest.fixture(scope="module")
@@ -14,7 +14,6 @@ def model():
     z = m.addVar("z")
     return m, x, y, z
 
-CONST = Term()
 
 def test_upgrade(model):
     m, x, y, z = model
@@ -220,6 +219,25 @@ def test_getVal_with_GenExpr():
         m.getVal(1 / z)
 
 
+def test_mul():
+    m = Model()
+    x = m.addVar(name="x")
+    y = m.addVar(name="y")
+
+    assert str(Expr({CONST: 1.0}) * x) == "Expr({Term(x): 1.0})"
+    assert str(y * Expr({CONST: -1.0})) == "Expr({Term(y): -1.0})"
+    assert str((x - x) * y) == "Expr({Term(x, y): 0.0})"
+    assert str(y * (x - x)) == "Expr({Term(x, y): 0.0})"
+    assert (
+        str((x + 1) * (y - 1))
+        == "Expr({Term(x, y): 1.0, Term(x): -1.0, Term(y): 1.0, Term(): -1.0})"
+    )
+    assert (
+        str((x + 1) * (x + 1) * y)
+        == "Expr({Term(x, x, y): 1.0, Term(x, y): 2.0, Term(y): 1.0})"
+    )
+
+
 def test_abs_abs_expr():
     m = Model()
     x = m.addVar(name="x")