This repository was archived by the owner on May 13, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsearches.py
More file actions
445 lines (370 loc) · 15.9 KB
/
searches.py
File metadata and controls
445 lines (370 loc) · 15.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
"""
Search support in the CDR
"""
from cdrapi.db import Query
from cdrapi.docs import Doc
class Search:
"""
From OCECDR-4255:
The existing search module implements an XQL parser built with
low-level lexical and grammar processors which are not directly
available to Python. While it would be possible to create a
compiled extension to replicate the existing XQL parser
functionality, that would involve a non-trivial level of effort,
and would compromise the goal of reducing dependencies on
programming expertise in C and C++. An analysis of the uses of the
search module shows that only a subset of the flexibility of the
supported XQL syntax is ever used, and it would be possible to
provide the required functionality without using XQL syntax. A
replacement API was implemented using assertion test strings which
can be easily parsed by the builtin string support in Python. Each
valid test assertion string contains exactly three tokens:
* a path, which can be one of
- CdrCtl/Title
- the xpath (starting with a single forward slash) for an
element or attribute, with /value or /int_val appended to
indicate which column of the query_term table should be
used for the test
* an operator (the same operators supported by the current XQL
parser; e.g. =, contains, begins, gt, etc.)
* a value to be used in the test; wildcards are added as
appropriate if the operator is "contains" or "begins"
The three tokens are separated by whitespace. The first two tokens
cannot contain whitespace, but there are no whitespace restrictions
on the value component of the test, which should not be enclosed
in quote marks.
The API also supports passing a list of document types which
can be used to narrow the results to documents of those types.
Making the specifying of a document type be a rule was considered,
but that would mean that you could only specify one document
type, or that we would have to move back toward the complexity
of XQL in order to distinguish between AND and OR groupings
and relationships for tests. This seemed like the best compromise
for allowing the query to pick up documents of more than one
document type, leaving the other tests to be implicitly ANDed
together.
The places which currently use the search module (and which will
therefore need to be modified) are significantly fewer than
original anticipated:
* Bin/UpdateSchemas.py
* DevTools/Utilities/DiffSchemas.py
* DevTools/Utilities/UpdateFilter.py
* DevTools/Utilities/UpdatePubControlDoc.py
* Inetpub/wwwroot/cgi-bin/cdr/post-schema.py
* XMetaL/DLL/SearchDialog.cpp
* lib/Python/RtfWriter.py
* lib/Python/cdrpub.py (just remove the calls to cdr.search();
XQL queries have never been used in the publishing control
documents)
"""
def __init__(self, session, *tests, **opts):
"""
Capture the session and options for this search request
Pass:
session - required reference to `Session` object
tests - one or more assertion strings; see explanation above
limit - optional integer keyword argument limiting number of results
doctypes - optional sequence of document types to be included
"""
self.__session = session
self.__tests = tests
self.__opts = opts
@property
def cursor(self):
"""
Give the `Search` object its own cursor
"""
if not hasattr(self, "_cursor"):
self._cursor = self.session.conn.cursor()
return self._cursor
@property
def limit(self):
"""
Optional throttle on the number of documents to return for the search
"""
if not hasattr(self, "_limit"):
self._limit = self.__opts.get("limit")
if self._limit:
try:
self._limit = int(self._limit)
except Exception:
raise Exception("limit must be integer")
return self._limit
@property
def doctypes(self):
"""
Optional sequence of document type names for restricting the search
"""
if not hasattr(self, "_doctypes"):
self._doctypes = self.__opts.get("doctypes") or []
if not isinstance(self._doctypes, (list, tuple)):
self._doctypes = [self._doctypes]
return self._doctypes
@property
def tests(self):
"""
Sequence of `Search.Test` objects containing the search logic
"""
if not hasattr(self, "_tests"):
tests = self.__tests or []
if not isinstance(tests, (list, tuple)):
tests = [tests]
self._tests = []
for test in tests:
if isinstance(test, bytes):
test = test.decode("utf-8")
if isinstance(test, str):
test = Search.Test(test)
if not isinstance(test, Search.Test):
message = "Search test must be string or Test object"
raise Exception(message)
self._tests.append(test)
return self._tests
@property
def logger(self):
"""
Object for recording what we do, borrowed from the `Session` object
"""
return self.session.logger
@property
def query(self):
"""
Assemble the database query object for performing the search
"""
# Use cached `Query` object if we've already done this.
if hasattr(self, "_query"):
return self._query
# Create a new `Query` object and apply any doctype filters.
query = Query("document d", "d.id").order("d.title")
if self.doctypes:
query.join("doc_type t", "t.id = d.doc_type")
if len(self.doctypes) == 1:
query.where(query.Condition("t.name", self.doctypes[0]))
else:
query.where(query.Condition("t.name", self.doctypes, "IN"))
# Apply the conditions for each of the `Test` objects.
n = 1
for test in self.tests:
# If the `Test` object specifies a column use the `query_term`
# table.
if test.column:
alias = f"qt{n:d}"
n += 1
query.join("query_term " + alias, alias + ".doc_id = d.id")
query.where(query.Condition(alias + ".path", test.path))
column = f"{alias}.{test.column}"
# Otherwise, the test is looking for matching title strings.
else:
column = "d.title"
# Construct and add a new `Condition` object to the query.
query.where(query.Condition(column, test.value, test.operator))
# If the caller doesn't want all the matching documents, apply the
# limit requested.
if self.limit:
query.limit(self.limit)
# Cache and return the `Query` object.
self._query = query
return query
@property
def session(self):
"""
`Session` for which this `Search` object was requested
"""
return self.__session
def run(self):
"""
Perform the search
Called by:
cdr.search()
client XML wrapper command CdrSearch
Return:
possibly empty sequence of `Doc` objects
"""
# All the heavy lifting is done in the `query` property.
self.session.log(f"Search.run({self.__tests!r}, {self.__opts!r})")
rows = self.query.execute(self.cursor).fetchall()
return [Doc(self.session, id=row.id) for row in rows]
class Test:
"""
Assertion to be tested while looking for matching documents
Attributes:
path - location of what we're looking for in the documents
operator - SQL operator to be applied for the assertion's test
value - string we're looking for
"""
# We support a number of aliases for the operators for backward
# compatibility.
OPS = {
"eq": "=", "=": "=",
"ne": "<>", "<>": "<>", "!=": "<>",
"lt": "<", "<": "<", "lte": "<=", "<=": "<=",
"gt": ">", ">": ">", "gte": ">=", ">=": ">="
}
def __init__(self, assertion):
"""
Parse the test's assertion string
Pass:
assertion - string in the form PATH OPERATOR VALUE
(see `Search` class documentation above
for more details)
"""
try:
path, operator, value = assertion.split(None, 2)
except Exception:
raise ValueError(f"invalid test assertion {assertion!r}")
assert path and value, "query test must have path and value"
if path.startswith("/"):
self.path, self.column = path.rsplit("/", 1)
if self.column not in ("value", "int_val"):
raise ValueError(f"invalid table column {self.path!r}")
elif path == "CdrCtl/Title":
self.path, self.column = path, None
else:
raise ValueError(f"unsupported path {path!r}")
if operator == "contains":
self.value = f"%{value}%"
self.operator = "LIKE"
elif operator == "begins":
self.value = f"{value}%"
self.operator = "LIKE"
else:
self.value = value
self.operator = self.OPS.get(operator)
if not self.operator:
raise ValueError(f"unsupported operator {operator!r}")
class QueryTermDef:
"""
Identification of a portion of documents to be indexed for searching
Attributes:
session - reference to object representing current login
path - string indicating a part of documents to be indexed;
paths beginning with a single forward slash character
are absolute paths (e.g., "/Summary/SummaryTitle");
paths beginning with a double forward slash are relative
(e.g., "//@cdr:ref")
rule - string naming custom rule to be applied (if any) or None;
as far as I know the only context in which this has been
specified is for unit testing, and even that has not
extended to the actual implementation of a custom rule,
but only population of and linking to the `query_term_rule`
table; from the documentation in tables.sql:
"Allows for future customization of the query support
mechanism, using more sophisticated index logic than
simply the text content of a single element. Syntax TBD."
From the original documentation in CdrSearch.cpp:
"Rules cannot be created through the CDR command interface.
They are inserted by the programmer implementing the
custom software behind the rule."
I don't really know (beyond these quotes) what the original
programmer had in mind, or what use cases were envisioned.
Property:
rule_id - integer for primary key into the `query_term_rule` table
if this definition has a custom rule to be applied;
otherwise None
"""
def __init__(self, session, path, rule=None):
"""
Wrap the caller's arguments as attributes of the object
Pass:
session - reference to object representing current login
path - document location of values to be indexed
rule - name of custom indexing rule to be applied (see notes above)
"""
self.session = session
self.path = path
self.rule = rule
@property
def rule_id(self):
"""
Primary key of row in the `query_term_rule` table (or None)
See notes above about the custom rule mechanism, which AFAIK
has never been used.
"""
if not hasattr(self, "_rule_id"):
if not self.rule:
self._rule_id = None
else:
query = Query("query_term_rule", "id")
query.where(query.Condition("name", self.rule))
row = query.execute(self.session.cursor).fetchone()
if not row:
raise Exception(f"Unknown query term rule: {self.rule}")
self._rule_id = row.id
return self._rule_id
def add(self):
"""
Store the new query term definition
Called by:
cdr.addQueryTermDef()
client XML wrapper command CdrAddQueryTermDef
"""
self.session.log(f"QueryTermDef.add({self.path!r}, {self.rule!r})")
if not self.session.can_do("ADD QUERY TERM DEF"):
message = "User not authorized to add query term definitions"
raise Exception(message)
if not self.path:
raise Exception("Missing required path")
query = Query("query_term_def", "COUNT(*) AS n")
query.where(query.Condition("path", self.path))
if query.execute(self.session.cursor).fetchone().n > 0:
raise Exception("Duplicate query term definition")
names = "path, term_rule"
values = self.path, self.rule_id
insert = f"INSERT INTO query_term_def ({names}) VALUES (?, ?)"
self.session.cursor.execute(insert, values)
self.session.conn.commit()
def delete(self):
"""
Drop the query term definition
Called by:
cdr.delQueryTermDef()
client XML wrapper command CdrDelQueryTermDef
"""
self.session.log(f"QueryTermDef.delete({self.path!r})")
if not self.session.can_do("DELETE QUERY TERM DEF"):
message = "User not authorized to delete query term definitions"
raise Exception(message)
if not self.path:
raise Exception("Missing required path")
delete = "DELETE FROM query_term_def WHERE path = ?"
self.session.cursor.execute(delete, (self.path,))
if self.session.cursor.rowcount != 1:
self.session.cursor.execute("ROLLBACK TRANSACTION")
raise Exception("Query term definition not found")
self.session.conn.commit()
@classmethod
def get_rules(cls, session):
"""
Find the available query term rules
Used by the user interface for managing search path definitions.
See notes above in the documentation for the `QueryTermDef` class
on the custom rule mechanism, which AFAIK has never been used.
Required positional argument:
session - reference to object for current login
Called by:
cdr.listQueryTermRules()
client XML wrapper command CdrListQueryTermRules
"""
session.log("QueryTermDef.get_rules()")
query = Query("query_term_rule", "name").order("name")
return [row.name for row in query.execute(session.cursor).fetchall()]
@classmethod
def get_definitions(cls, session):
"""
Fetch the list of CDR query term definitions
Required positional argument:
session - reference to object for current login
Called by:
cdr.listQueryTermDefs()
client XML wrapper command CdrListQueryTermDefs
Return:
sequence of `QueryTermDef` objects
"""
session.log("QueryTermDef.get_definitions()")
query = Query("query_term_def d", "d.path", "r.name")
query.outer("query_term_rule r", "r.id = d.term_rule")
query.order("d.path", "r.name")
definitions = []
for row in query.execute(session.cursor).fetchall():
definitions.append(QueryTermDef(session, row.path, row.name))
return definitions