Skip to content

Commit 0b175ad

Browse files
authored
Merge pull request #183 from plone/fix/4205-munge-search-term
Use munge_search_term from plone.base, fix multi-word wildcard
2 parents c0afea6 + 690693c commit 0b175ad

3 files changed

Lines changed: 11 additions & 123 deletions

File tree

news/4205.bugfix

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix multi-word search so all word parts get wildcard prefix matching, not just the last one.
2+
Use ``munge_search_term`` from ``plone.base.utils``, deprecate old import path.
3+
@jensens

src/plone/app/querystring/querybuilder.py

Lines changed: 8 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,70 +4,29 @@
44
from plone.app.querystring.interfaces import IParsedQueryIndexModifier
55
from plone.app.querystring.interfaces import IQueryModifier
66
from plone.app.querystring.interfaces import IQuerystringRegistryReader
7+
from plone.base.utils import munge_search_term as _munge_search_term
78
from plone.batching import Batch
89
from plone.registry.interfaces import IRegistry
910
from Products.CMFCore.utils import getToolByName
1011
from zope.component import getMultiAdapter
1112
from zope.component import getUtilitiesFor
1213
from zope.component import getUtility
14+
from zope.deferredimport import deprecated
1315
from zope.i18n import translate
1416
from zope.i18nmessageid import MessageFactory
1517
from zope.publisher.browser import BrowserView
1618

1719
import json
1820
import logging
19-
import re
2021

2122
logger = logging.getLogger("plone.app.querystring")
2223
_ = MessageFactory("plone")
2324

24-
# We should accept both a simple space, unicode u'\u0020 but also a
25-
# multi-space, so called 'waji-kankaku', unicode u'\u3000'
26-
_MULTISPACE = "\u3000"
27-
_BAD_CHARS = ("?", "-", "+", "*", _MULTISPACE)
28-
29-
30-
def _quote_chars(s):
31-
# We need to quote parentheses when searching text indices
32-
if "(" in s:
33-
s = s.replace("(", '"("')
34-
if ")" in s:
35-
s = s.replace(")", '")"')
36-
if _MULTISPACE in s:
37-
s = s.replace(_MULTISPACE, " ")
38-
return s
39-
40-
41-
def _quote(term):
42-
# The terms and, or and not must be wrapped in quotes to avoid
43-
# being parsed as logical query atoms.
44-
if term.lower() in ("and", "or", "not"):
45-
term = '"%s"' % term
46-
return _quote_chars(term)
47-
48-
49-
def munge_search_term(query):
50-
original_query = query
51-
for char in _BAD_CHARS:
52-
query = query.replace(char, " ")
53-
54-
# extract quoted phrases first
55-
quoted_phrases = re.findall(r'"([^"]*)"', query)
56-
r = []
57-
for qp in quoted_phrases:
58-
# remove from original query
59-
query = query.replace(f'"{qp}"', "")
60-
# replace with cleaned leading/trailing whitespaces
61-
# and skip empty phrases
62-
clean_qp = qp.strip()
63-
if not clean_qp:
64-
continue
65-
r.append(f'"{clean_qp}"')
66-
67-
r += map(_quote, query.strip().split())
68-
r = " AND ".join(r)
69-
r = r + ("*" if r and not original_query.endswith('"') else "")
70-
return r
25+
deprecated(
26+
"Moved to plone.base.utils. Import from there instead "
27+
"(will be removed in Plone 7).",
28+
munge_search_term="plone.base.utils:munge_search_term",
29+
)
7130

7231

7332
class ContentListingView(BrowserView):
@@ -267,7 +226,7 @@ def filter_query(self, query):
267226
return query
268227

269228
def munge_search_term(self, q):
270-
return munge_search_term(q)
229+
return _munge_search_term(q)
271230

272231

273232
class RegistryConfiguration(BrowserView):

src/plone/app/querystring/tests/testQueryBuilder.py

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -333,80 +333,6 @@ def testQueryBuilderCustomQueryDoNotOverrideValues(self):
333333
self.assertEqual(len(results), 1)
334334
self.assertEqual(results[0].Title(), "Collectionstestpage 2")
335335

336-
def test_munge_search_term(self):
337-
from plone.app.querystring.querybuilder import _BAD_CHARS
338-
from plone.app.querystring.querybuilder import munge_search_term
339-
340-
search_term_tests = [
341-
(
342-
# search term
343-
"spam ham",
344-
"spam AND ham*",
345-
),
346-
(
347-
# quoted term
348-
'"spam ham"',
349-
'"spam ham"',
350-
),
351-
(
352-
# cleanup quoted terms
353-
'" spam ham "',
354-
'"spam ham"',
355-
),
356-
(
357-
# quoted term with inner parenthesis
358-
'"spam (ham)"',
359-
'"spam (ham)"',
360-
),
361-
(
362-
# quoted term with inner parenthesis
363-
'"spam" (ham)',
364-
'"spam" AND "("ham")"*',
365-
),
366-
(
367-
# quoted term with inner parenthesis
368-
'"(spam ham)"',
369-
'"(spam ham)"',
370-
),
371-
(
372-
# mixed cases
373-
"Spam hAm",
374-
"Spam AND hAm*",
375-
),
376-
(
377-
# mix quoting and unquoted
378-
'let\'s eat some "ham and eggs " without spam ',
379-
'"ham and eggs" AND let\'s AND eat AND some ' "AND without AND spam*",
380-
),
381-
(
382-
'test "Welcome" to "Plone" retest',
383-
'"Welcome" AND "Plone" AND test AND to AND retest*',
384-
),
385-
(
386-
# parentheses
387-
"spam (ham)",
388-
'spam AND "("ham")"*',
389-
),
390-
(
391-
# special keywords
392-
"spam or not ham and eggs",
393-
'spam AND "or" AND "not" AND ham AND "and" AND eggs*',
394-
),
395-
(
396-
# bad characters
397-
" ".join(_BAD_CHARS),
398-
"",
399-
),
400-
(
401-
# weird input
402-
'test ""Welcome" to "Plone"" retest',
403-
'"to" AND test AND WelcomePlone AND retest*',
404-
),
405-
]
406-
407-
for _in, _out in search_term_tests:
408-
self.assertEqual(munge_search_term(_in), _out)
409-
410336
def test_query_builder_unknown_sort(self):
411337
results = self.querybuilder(query=self.query, sort_on="unknown")
412338
self.assertEqual(len(results), 1)

0 commit comments

Comments
 (0)