threatcode · pull · Sep 16, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 16, 2025
diff --git a/django/contrib/postgres/search.py b/django/contrib/postgres/search.py
@@ -1,3 +1,4 @@
+from django.db.backends.postgresql.psycopg_any import is_psycopg3
 from django.db.models import (
     CharField,
     Expression,
@@ -10,9 +11,45 @@
 )
 from django.db.models.expressions import CombinedExpression, register_combinable_fields
 from django.db.models.functions import Cast, Coalesce
+from django.utils.regex_helper import _lazy_re_compile
 
 from .utils import CheckPostgresInstalledMixin
 
+if is_psycopg3:
+    from psycopg.adapt import Dumper
+
+    class UTF8Dumper(Dumper):
+        def dump(self, obj):
+            return bytes(obj, "utf-8")
+
+    def quote_lexeme(value):
+        return UTF8Dumper(str).quote(psql_escape(value)).decode()
+
+else:
+    from psycopg2.extensions import adapt
+
+    def quote_lexeme(value):
+        adapter = adapt(psql_escape(value))
+        adapter.encoding = "utf-8"
+        return adapter.getquoted().decode()
+
+
+spec_chars_re = _lazy_re_compile(r"['\0\[\]()|&:*!@<>\\]")
+multiple_spaces_re = _lazy_re_compile(r"\s{2,}")
+
+
+def normalize_spaces(val):
+    """Convert multiple spaces to single and strip from both sides."""
+    if not (val := val.strip()):
+        return None
+    return multiple_spaces_re.sub(" ", val)
+
+
+def psql_escape(query):
+    """Replace chars not fit for use in search queries with a single space."""
+    query = spec_chars_re.sub(" ", query)
+    return normalize_spaces(query)
+
 
 class SearchVectorExact(Lookup):
     lookup_name = "exact"
@@ -205,6 +242,9 @@ def __init__(
         invert=False,
         search_type="plain",
     ):
+        if isinstance(value, LexemeCombinable):
+            search_type = "raw"
+
         self.function = self.SEARCH_TYPES.get(search_type)
         if self.function is None:
             raise ValueError("Unknown search_type argument '%s'." % search_type)
@@ -383,3 +423,104 @@ class TrigramWordSimilarity(TrigramWordBase):
 
 class TrigramStrictWordSimilarity(TrigramWordBase):
     function = "STRICT_WORD_SIMILARITY"
+
+
+class LexemeCombinable:
+    BITAND = "&"
+    BITOR = "|"
+
+    def _combine(self, other, connector, reversed):
+        if not isinstance(other, LexemeCombinable):
+            raise TypeError(
+                "A Lexeme can only be combined with another Lexeme, "
+                f"got {other.__class__.__name__}."
+            )
+        if reversed:
+            return CombinedLexeme(other, connector, self)
+        return CombinedLexeme(self, connector, other)
+
+    # On Combinable, these are not implemented to reduce confusion with Q. In
+    # this case we are actually (ab)using them to do logical combination so
+    # it's consistent with other usage in Django.
+    def __or__(self, other):
+        return self._combine(other, self.BITOR, False)
+
+    def __ror__(self, other):
+        return self._combine(other, self.BITOR, True)
+
+    def __and__(self, other):
+        return self._combine(other, self.BITAND, False)
+
+    def __rand__(self, other):
+        return self._combine(other, self.BITAND, True)
+
+
+class Lexeme(LexemeCombinable, Value):
+    _output_field = SearchQueryField()
+
+    def __init__(
+        self, value, output_field=None, *, invert=False, prefix=False, weight=None
+    ):
+        if value == "":
+            raise ValueError("Lexeme value cannot be empty.")
+
+        if not isinstance(value, str):
+            raise TypeError(
+                f"Lexeme value must be a string, got {value.__class__.__name__}."
+            )
+
+        if weight is not None and (
+            not isinstance(weight, str) or weight.lower() not in {"a", "b", "c", "d"}
+        ):
+            raise ValueError(
+                f"Weight must be one of 'A', 'B', 'C', and 'D', got {weight!r}."
+            )
+
+        self.prefix = prefix
+        self.invert = invert
+        self.weight = weight
+        super().__init__(value, output_field=output_field)
+
+    def as_sql(self, compiler, connection):
+        param = quote_lexeme(self.value)
+        label = ""
+        if self.prefix:
+            label += "*"
+        if self.weight:
+            label += self.weight
+
+        if label:
+            param = f"{param}:{label}"
+        if self.invert:
+            param = f"!{param}"
+
+        return "%s", (param,)
+
+    def __invert__(self):
+        cloned = self.copy()
+        cloned.invert = not self.invert
+        return cloned
+
+
+class CombinedLexeme(LexemeCombinable, CombinedExpression):
+    _output_field = SearchQueryField()
+
+    def as_sql(self, compiler, connection):
+        value_params = []
+        lsql, params = compiler.compile(self.lhs)
+        value_params.extend(params)
+
+        rsql, params = compiler.compile(self.rhs)
+        value_params.extend(params)
+
+        combined_sql = f"({lsql} {self.connector} {rsql})"
+        combined_value = combined_sql % tuple(value_params)
+        return "%s", (combined_value,)
+
+    def __invert__(self):
+        # Apply De Morgan's theorem.
+        cloned = self.copy()
+        cloned.connector = self.BITAND if self.connector == self.BITOR else self.BITOR
+        cloned.lhs = ~self.lhs
+        cloned.rhs = ~self.rhs
+        return cloned
diff --git a/django/db/models/base.py b/django/db/models/base.py
@@ -1153,7 +1153,8 @@ def _save_table(
                     getattr(self, field.attname) if raw else field.pre_save(self, False)
                 )
                 if hasattr(value, "resolve_expression"):
-                    returning_fields.append(field)
+                    if field not in returning_fields:
+                        returning_fields.append(field)
                 elif field.db_returning:
                     returning_fields.remove(field)
             results = self._do_insert(
@@ -1357,7 +1358,7 @@ def _get_field_expression_map(self, meta, exclude=None):
         meta = meta or self._meta
         field_map = {}
         generated_fields = []
-        for field in meta.local_concrete_fields:
+        for field in meta.local_fields:
             if field.name in exclude:
                 continue
             if field.generated:
@@ -1368,7 +1369,19 @@ def _get_field_expression_map(self, meta, exclude=None):
                     continue
                 generated_fields.append(field)
                 continue
-            value = getattr(self, field.attname)
+            if (
+                isinstance(field.remote_field, ForeignObjectRel)
+                and field not in meta.local_concrete_fields
+            ):
+                value = tuple(
+                    getattr(self, from_field) for from_field in field.from_fields
+                )
+                if len(value) == 1:
+                    value = value[0]
+            elif field.concrete:
+                value = getattr(self, field.attname)
+            else:
+                continue
             if not value or not hasattr(value, "resolve_expression"):
                 value = Value(value, field)
             field_map[field.name] = value

diff --git a/django/db/models/query.py b/django/db/models/query.py
@@ -2333,8 +2333,8 @@ def normalize_prefetch_lookups(lookups, prefix=None):
 
 def prefetch_related_objects(model_instances, *related_lookups):
     """
-    Populate prefetched object caches for a list of model instances based on
-    the lookups/Prefetch instances given.
+    Populate prefetched object caches for an iterable of model instances based
+    on the lookups/Prefetch instances given.
     """
     if not model_instances:
         return  # nothing to do
@@ -2402,7 +2402,7 @@ def prefetch_related_objects(model_instances, *related_lookups):
             # We assume that objects retrieved are homogeneous (which is the
             # premise of prefetch_related), so what applies to first object
             # applies to all.
-            first_obj = obj_list[0]
+            first_obj = next(iter(obj_list))
             to_attr = lookup.get_current_to_attr(level)[0]
             prefetcher, descriptor, attr_found, is_fetched = get_prefetcher(
                 first_obj, through_attr, to_attr

diff --git a/docs/ref/contrib/postgres/search.txt b/docs/ref/contrib/postgres/search.txt
@@ -96,7 +96,7 @@ Examples:
 
 .. code-block:: pycon
 
-    >>> from django.contrib.postgres.search import SearchQuery
+    >>> from django.contrib.postgres.search import SearchQuery, Lexeme
     >>> SearchQuery("red tomato")  # two keywords
     >>> SearchQuery("tomato red")  # same results as above
     >>> SearchQuery("red tomato", search_type="phrase")  # a phrase
@@ -105,6 +105,7 @@ Examples:
     >>> SearchQuery(
     ...     "'tomato' ('red' OR 'green')", search_type="websearch"
     ... )  # websearch operators
+    >>> SearchQuery(Lexeme("tomato") & (Lexeme("red") | Lexeme("green")))  # Lexeme objects
 
 ``SearchQuery`` terms can be combined logically to provide more flexibility:
 
@@ -118,6 +119,10 @@ Examples:
 See :ref:`postgresql-fts-search-configuration` for an explanation of the
 ``config`` parameter.
 
+.. versionchanged:: 6.0
+
+    :class:`Lexeme` objects were added.
+
 ``SearchRank``
 ==============
 
@@ -276,6 +281,53 @@ floats to :class:`SearchRank` as ``weights`` in the same order above:
     >>> rank = SearchRank(vector, query, weights=[0.2, 0.4, 0.6, 0.8])
     >>> Entry.objects.annotate(rank=rank).filter(rank__gte=0.3).order_by("-rank")
 
+``Lexeme``
+==========
+
+.. versionadded:: 6.0
+
+.. class:: Lexeme(value, output_field=None, *, invert=False, prefix=False, weight=None)
+
+``Lexeme`` objects allow search operators to be safely used with strings from
+an untrusted source. The content of each lexeme is escaped so that any
+operators that may exist in the string itself will not be interpreted.
+
+You can combine lexemes with other lexemes using the ``&`` and ``|`` operators
+and also negate them with the ``~`` operator. For example:
+
+.. code-block:: pycon
+
+    >>> from django.contrib.postgres.search import SearchQuery, SearchVector, Lexeme
+    >>> vector = SearchVector("body_text", "blog__tagline")
+    >>> Entry.objects.annotate(search=vector).filter(
+    ...     search=SearchQuery(Lexeme("fruit") & Lexeme("dessert"))
+    ... )
+    <QuerySet [<Entry: Apple Crumble Recipes>, <Entry: Banana Split Recipes>]>
+
+.. code-block:: pycon
+
+    >>> Entry.objects.annotate(search=vector).filter(
+    ...     search=SearchQuery(Lexeme("fruit") & Lexeme("dessert") & ~Lexeme("banana"))
+    ... )
+    <QuerySet [<Entry: Apple Crumble Recipes>]>
+
+Lexeme objects also support term weighting and prefixes:
+
+.. code-block:: pycon
+
+    >>> Entry.objects.annotate(search=vector).filter(
+    ...     search=SearchQuery(Lexeme("Pizza") | Lexeme("Cheese"))
+    ... )
+    <QuerySet [<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]>
+    >>> Entry.objects.annotate(search=vector).filter(
+    ...     search=SearchQuery(Lexeme("Pizza") | Lexeme("Cheese", weight="A"))
+    ... )
+    <QuerySet [<Entry: Pizza recipes>]>
+    >>> Entry.objects.annotate(search=vector).filter(
+    ...     search=SearchQuery(Lexeme("za", prefix=True))
+    ... )
+    <QuerySet []>
+
 Performance
 ===========
 

diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
@@ -4223,8 +4223,9 @@ Prefetches the given lookups on an iterable of model instances. This is useful
 in code that receives a list of model instances as opposed to a ``QuerySet``;
 for example, when fetching models from a cache or instantiating them manually.
 
-Pass an iterable of model instances (must all be of the same class) and the
-lookups or :class:`Prefetch` objects you want to prefetch for. For example:
+Pass an iterable of model instances (must all be of the same class and able to
+be iterated multiple times) and the lookups or :class:`Prefetch` objects you
+want to prefetch for. For example:
 
 .. code-block:: pycon
 

diff --git a/docs/releases/6.0.txt b/docs/releases/6.0.txt
@@ -171,6 +171,12 @@ Minor features
 :mod:`django.contrib.postgres`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+* The new :class:`Lexeme <django.contrib.postgres.search.Lexeme>` expression
+  for full text search provides fine-grained control over search terms.
+  ``Lexeme`` objects automatically escape their input and support logical
+  combination operators (``&``, ``|``, ``~``), prefix matching, and term
+  weighting.
+
 * Model fields, indexes, and constraints from :mod:`django.contrib.postgres`
   now include system checks to verify that ``django.contrib.postgres`` is an
   installed app.

diff --git a/tests/foreign_object/models/__init__.py b/tests/foreign_object/models/__init__.py
@@ -1,5 +1,5 @@
 from .article import Article, ArticleIdea, ArticleTag, ArticleTranslation, NewsArticle
-from .customers import Address, Contact, Customer
+from .customers import Address, Contact, Customer, CustomerTab
 from .empty_join import SlugPage
 from .person import Country, Friendship, Group, Membership, Person
 
@@ -12,6 +12,7 @@
     "Contact",
     "Country",
     "Customer",
+    "CustomerTab",
     "Friendship",
     "Group",
     "Membership",

diff --git a/tests/foreign_object/models/customers.py b/tests/foreign_object/models/customers.py
@@ -39,3 +39,22 @@ class Contact(models.Model):
         to_fields=["customer_id", "company"],
         from_fields=["customer_code", "company_code"],
     )
+
+
+class CustomerTab(models.Model):
+    customer_id = models.IntegerField()
+    customer = models.ForeignObject(
+        Customer,
+        from_fields=["customer_id"],
+        to_fields=["id"],
+        on_delete=models.CASCADE,
+    )
+
+    class Meta:
+        required_db_features = {"supports_table_check_constraints"}
+        constraints = [
+            models.CheckConstraint(
+                condition=models.Q(customer__lt=1000),
+                name="customer_id_limit",
+            ),
+        ]