Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions django/contrib/postgres/search.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from django.db.backends.postgresql.psycopg_any import is_psycopg3
from django.db.models import (
CharField,
Expression,
Expand All @@ -10,9 +11,45 @@
)
from django.db.models.expressions import CombinedExpression, register_combinable_fields
from django.db.models.functions import Cast, Coalesce
from django.utils.regex_helper import _lazy_re_compile

from .utils import CheckPostgresInstalledMixin

if is_psycopg3:
from psycopg.adapt import Dumper

class UTF8Dumper(Dumper):
def dump(self, obj):
return bytes(obj, "utf-8")

def quote_lexeme(value):
return UTF8Dumper(str).quote(psql_escape(value)).decode()

else:
from psycopg2.extensions import adapt

def quote_lexeme(value):
adapter = adapt(psql_escape(value))
adapter.encoding = "utf-8"
return adapter.getquoted().decode()


spec_chars_re = _lazy_re_compile(r"['\0\[\]()|&:*!@<>\\]")
multiple_spaces_re = _lazy_re_compile(r"\s{2,}")


def normalize_spaces(val):
"""Convert multiple spaces to single and strip from both sides."""
if not (val := val.strip()):
return None
return multiple_spaces_re.sub(" ", val)


def psql_escape(query):
"""Replace chars not fit for use in search queries with a single space."""
query = spec_chars_re.sub(" ", query)
return normalize_spaces(query)


class SearchVectorExact(Lookup):
lookup_name = "exact"
Expand Down Expand Up @@ -205,6 +242,9 @@ def __init__(
invert=False,
search_type="plain",
):
if isinstance(value, LexemeCombinable):
search_type = "raw"

self.function = self.SEARCH_TYPES.get(search_type)
if self.function is None:
raise ValueError("Unknown search_type argument '%s'." % search_type)
Expand Down Expand Up @@ -383,3 +423,104 @@ class TrigramWordSimilarity(TrigramWordBase):

class TrigramStrictWordSimilarity(TrigramWordBase):
function = "STRICT_WORD_SIMILARITY"


class LexemeCombinable:
BITAND = "&"
BITOR = "|"

def _combine(self, other, connector, reversed):
if not isinstance(other, LexemeCombinable):
raise TypeError(
"A Lexeme can only be combined with another Lexeme, "
f"got {other.__class__.__name__}."
)
if reversed:
return CombinedLexeme(other, connector, self)
return CombinedLexeme(self, connector, other)

# On Combinable, these are not implemented to reduce confusion with Q. In
# this case we are actually (ab)using them to do logical combination so
# it's consistent with other usage in Django.
def __or__(self, other):
return self._combine(other, self.BITOR, False)

def __ror__(self, other):
return self._combine(other, self.BITOR, True)

def __and__(self, other):
return self._combine(other, self.BITAND, False)

def __rand__(self, other):
return self._combine(other, self.BITAND, True)


class Lexeme(LexemeCombinable, Value):
_output_field = SearchQueryField()

def __init__(
self, value, output_field=None, *, invert=False, prefix=False, weight=None
):
if value == "":
raise ValueError("Lexeme value cannot be empty.")

if not isinstance(value, str):
raise TypeError(
f"Lexeme value must be a string, got {value.__class__.__name__}."
)

if weight is not None and (
not isinstance(weight, str) or weight.lower() not in {"a", "b", "c", "d"}
):
raise ValueError(
f"Weight must be one of 'A', 'B', 'C', and 'D', got {weight!r}."
)

self.prefix = prefix
self.invert = invert
self.weight = weight
super().__init__(value, output_field=output_field)

def as_sql(self, compiler, connection):
param = quote_lexeme(self.value)
label = ""
if self.prefix:
label += "*"
if self.weight:
label += self.weight

if label:
param = f"{param}:{label}"
if self.invert:
param = f"!{param}"

return "%s", (param,)

def __invert__(self):
cloned = self.copy()
cloned.invert = not self.invert
return cloned


class CombinedLexeme(LexemeCombinable, CombinedExpression):
_output_field = SearchQueryField()

def as_sql(self, compiler, connection):
value_params = []
lsql, params = compiler.compile(self.lhs)
value_params.extend(params)

rsql, params = compiler.compile(self.rhs)
value_params.extend(params)

combined_sql = f"({lsql} {self.connector} {rsql})"
combined_value = combined_sql % tuple(value_params)
return "%s", (combined_value,)

def __invert__(self):
# Apply De Morgan's theorem.
cloned = self.copy()
cloned.connector = self.BITAND if self.connector == self.BITOR else self.BITOR
cloned.lhs = ~self.lhs
cloned.rhs = ~self.rhs
return cloned
19 changes: 16 additions & 3 deletions django/db/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,8 @@ def _save_table(
getattr(self, field.attname) if raw else field.pre_save(self, False)
)
if hasattr(value, "resolve_expression"):
returning_fields.append(field)
if field not in returning_fields:
returning_fields.append(field)
elif field.db_returning:
returning_fields.remove(field)
results = self._do_insert(
Expand Down Expand Up @@ -1357,7 +1358,7 @@ def _get_field_expression_map(self, meta, exclude=None):
meta = meta or self._meta
field_map = {}
generated_fields = []
for field in meta.local_concrete_fields:
for field in meta.local_fields:
if field.name in exclude:
continue
if field.generated:
Expand All @@ -1368,7 +1369,19 @@ def _get_field_expression_map(self, meta, exclude=None):
continue
generated_fields.append(field)
continue
value = getattr(self, field.attname)
if (
isinstance(field.remote_field, ForeignObjectRel)
and field not in meta.local_concrete_fields
):
value = tuple(
getattr(self, from_field) for from_field in field.from_fields
)
if len(value) == 1:
value = value[0]
elif field.concrete:
value = getattr(self, field.attname)
else:
continue
if not value or not hasattr(value, "resolve_expression"):
value = Value(value, field)
field_map[field.name] = value
Expand Down
6 changes: 3 additions & 3 deletions django/db/models/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2333,8 +2333,8 @@ def normalize_prefetch_lookups(lookups, prefix=None):

def prefetch_related_objects(model_instances, *related_lookups):
"""
Populate prefetched object caches for a list of model instances based on
the lookups/Prefetch instances given.
Populate prefetched object caches for an iterable of model instances based
on the lookups/Prefetch instances given.
"""
if not model_instances:
return # nothing to do
Expand Down Expand Up @@ -2402,7 +2402,7 @@ def prefetch_related_objects(model_instances, *related_lookups):
# We assume that objects retrieved are homogeneous (which is the
# premise of prefetch_related), so what applies to first object
# applies to all.
first_obj = obj_list[0]
first_obj = next(iter(obj_list))
to_attr = lookup.get_current_to_attr(level)[0]
prefetcher, descriptor, attr_found, is_fetched = get_prefetcher(
first_obj, through_attr, to_attr
Expand Down
54 changes: 53 additions & 1 deletion docs/ref/contrib/postgres/search.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Examples:

.. code-block:: pycon

>>> from django.contrib.postgres.search import SearchQuery
>>> from django.contrib.postgres.search import SearchQuery, Lexeme
>>> SearchQuery("red tomato") # two keywords
>>> SearchQuery("tomato red") # same results as above
>>> SearchQuery("red tomato", search_type="phrase") # a phrase
Expand All @@ -105,6 +105,7 @@ Examples:
>>> SearchQuery(
... "'tomato' ('red' OR 'green')", search_type="websearch"
... ) # websearch operators
>>> SearchQuery(Lexeme("tomato") & (Lexeme("red") | Lexeme("green"))) # Lexeme objects

``SearchQuery`` terms can be combined logically to provide more flexibility:

Expand All @@ -118,6 +119,10 @@ Examples:
See :ref:`postgresql-fts-search-configuration` for an explanation of the
``config`` parameter.

.. versionchanged:: 6.0

:class:`Lexeme` objects were added.

``SearchRank``
==============

Expand Down Expand Up @@ -276,6 +281,53 @@ floats to :class:`SearchRank` as ``weights`` in the same order above:
>>> rank = SearchRank(vector, query, weights=[0.2, 0.4, 0.6, 0.8])
>>> Entry.objects.annotate(rank=rank).filter(rank__gte=0.3).order_by("-rank")

``Lexeme``
==========

.. versionadded:: 6.0

.. class:: Lexeme(value, output_field=None, *, invert=False, prefix=False, weight=None)

``Lexeme`` objects allow search operators to be safely used with strings from
an untrusted source. The content of each lexeme is escaped so that any
operators that may exist in the string itself will not be interpreted.

You can combine lexemes with other lexemes using the ``&`` and ``|`` operators
and also negate them with the ``~`` operator. For example:

.. code-block:: pycon

>>> from django.contrib.postgres.search import SearchQuery, SearchVector, Lexeme
>>> vector = SearchVector("body_text", "blog__tagline")
>>> Entry.objects.annotate(search=vector).filter(
... search=SearchQuery(Lexeme("fruit") & Lexeme("dessert"))
... )
<QuerySet [<Entry: Apple Crumble Recipes>, <Entry: Banana Split Recipes>]>

.. code-block:: pycon

>>> Entry.objects.annotate(search=vector).filter(
... search=SearchQuery(Lexeme("fruit") & Lexeme("dessert") & ~Lexeme("banana"))
... )
<QuerySet [<Entry: Apple Crumble Recipes>]>

Lexeme objects also support term weighting and prefixes:

.. code-block:: pycon

>>> Entry.objects.annotate(search=vector).filter(
... search=SearchQuery(Lexeme("Pizza") | Lexeme("Cheese"))
... )
<QuerySet [<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]>
>>> Entry.objects.annotate(search=vector).filter(
... search=SearchQuery(Lexeme("Pizza") | Lexeme("Cheese", weight="A"))
... )
<QuerySet [<Entry: Pizza recipes>]>
>>> Entry.objects.annotate(search=vector).filter(
... search=SearchQuery(Lexeme("za", prefix=True))
... )
<QuerySet []>

Performance
===========

Expand Down
5 changes: 3 additions & 2 deletions docs/ref/models/querysets.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4223,8 +4223,9 @@ Prefetches the given lookups on an iterable of model instances. This is useful
in code that receives a list of model instances as opposed to a ``QuerySet``;
for example, when fetching models from a cache or instantiating them manually.

Pass an iterable of model instances (must all be of the same class) and the
lookups or :class:`Prefetch` objects you want to prefetch for. For example:
Pass an iterable of model instances (must all be of the same class and able to
be iterated multiple times) and the lookups or :class:`Prefetch` objects you
want to prefetch for. For example:

.. code-block:: pycon

Expand Down
6 changes: 6 additions & 0 deletions docs/releases/6.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ Minor features
:mod:`django.contrib.postgres`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* The new :class:`Lexeme <django.contrib.postgres.search.Lexeme>` expression
for full text search provides fine-grained control over search terms.
``Lexeme`` objects automatically escape their input and support logical
combination operators (``&``, ``|``, ``~``), prefix matching, and term
weighting.

* Model fields, indexes, and constraints from :mod:`django.contrib.postgres`
now include system checks to verify that ``django.contrib.postgres`` is an
installed app.
Expand Down
3 changes: 2 additions & 1 deletion tests/foreign_object/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .article import Article, ArticleIdea, ArticleTag, ArticleTranslation, NewsArticle
from .customers import Address, Contact, Customer
from .customers import Address, Contact, Customer, CustomerTab
from .empty_join import SlugPage
from .person import Country, Friendship, Group, Membership, Person

Expand All @@ -12,6 +12,7 @@
"Contact",
"Country",
"Customer",
"CustomerTab",
"Friendship",
"Group",
"Membership",
Expand Down
19 changes: 19 additions & 0 deletions tests/foreign_object/models/customers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,22 @@ class Contact(models.Model):
to_fields=["customer_id", "company"],
from_fields=["customer_code", "company_code"],
)


class CustomerTab(models.Model):
customer_id = models.IntegerField()
customer = models.ForeignObject(
Customer,
from_fields=["customer_id"],
to_fields=["id"],
on_delete=models.CASCADE,
)

class Meta:
required_db_features = {"supports_table_check_constraints"}
constraints = [
models.CheckConstraint(
condition=models.Q(customer__lt=1000),
name="customer_id_limit",
),
]
Loading
Loading