-
Notifications
You must be signed in to change notification settings - Fork 214
Expand file tree
/
Copy pathschema.py
More file actions
364 lines (308 loc) · 11.8 KB
/
schema.py
File metadata and controls
364 lines (308 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
import re
from datetime import datetime
from enum import Enum
from functools import cached_property
from typing import (
Generic,
Iterable,
List,
Optional,
Pattern,
Set,
Tuple,
TypeVar,
)
from elementary.utils.log import get_logger
from elementary.utils.pydantic_shim import BaseModel, Field, validator
from elementary.utils.time import DATETIME_FORMAT, convert_local_time_to_timezone
logger = get_logger(__name__)
class InvalidSelectorError(Exception):
pass
class Status(str, Enum):
WARN = "warn"
FAIL = "fail"
SKIPPED = "skipped"
ERROR = "error"
RUNTIME_ERROR = "runtime error"
class ResourceType(str, Enum):
TEST = "test"
MODEL = "model"
SOURCE_FRESHNESS = "source_freshness"
class FilterType(str, Enum):
IS = "is"
IS_NOT = "is_not"
CONTAINS = "contains"
NOT_CONTAINS = "not_contains"
class FilterFields(BaseModel):
tags: List[str] = []
models: List[str] = []
owners: List[str] = []
statuses: List[str] = []
resource_types: List[ResourceType] = []
node_names: List[str] = []
test_ids: List[str] = []
@property
def normalized_status(self) -> List[Status]:
return [Status(status) for status in self.statuses if status in list(Status)]
ValueT = TypeVar("ValueT")
ANY_OPERATORS = [FilterType.IS, FilterType.CONTAINS]
ALL_OPERATORS = [FilterType.IS_NOT, FilterType.NOT_CONTAINS]
NEGATIVE_OPERATORS = [FilterType.IS_NOT, FilterType.NOT_CONTAINS]
class FilterSchema(BaseModel, Generic[ValueT]):
# The relation between values is OR.
values: List[ValueT]
type: FilterType = FilterType.IS
class Config:
# Make sure that serializing Enum return values
use_enum_values = True
@cached_property
def _normalized_values(self) -> list[str]:
return [str(value).lower() for value in self.values]
@cached_property
def _values_set(self) -> Set[ValueT]:
return set(self.values)
def get_matching_values(self, values: Iterable[ValueT]) -> Set[ValueT]:
values_set = set(values)
if self.type == FilterType.IS:
return values_set.intersection(self._values_set)
elif self.type == FilterType.IS_NOT:
matching_values = values_set.difference(self._values_set)
if len(matching_values) != len(values_set):
return set()
return matching_values
if self.type == FilterType.CONTAINS:
return set(
value
for value in values_set
if any(
filter_value in str(value).lower()
for filter_value in self._normalized_values
)
)
if self.type == FilterType.NOT_CONTAINS:
matching_values = set(
value
for value in values_set
if not any(
filter_value in str(value).lower()
for filter_value in self._normalized_values
)
)
if len(matching_values) != len(values_set):
return set()
return matching_values
raise ValueError(f"Unsupported filter type: {self.type}")
def apply_filter_on_values(self, values: List[ValueT]) -> bool:
if self.type in NEGATIVE_OPERATORS and not values:
return True
return bool(self.get_matching_values(values))
def apply_filter_on_value(self, value: ValueT) -> bool:
return self.apply_filter_on_values([value])
class StatusFilterSchema(FilterSchema[Status]):
values: List[Status]
class ResourceTypeFilterSchema(FilterSchema[ResourceType]):
values: List[ResourceType]
def _get_default_statuses_filter() -> List[StatusFilterSchema]:
return [
StatusFilterSchema(
type=FilterType.IS,
values=[Status.FAIL, Status.ERROR, Status.RUNTIME_ERROR, Status.WARN],
)
]
class FiltersSchema(BaseModel):
selector: Optional[str] = None
invocation_id: Optional[str] = None
invocation_time: Optional[str] = None
last_invocation: Optional[bool] = False
node_names: List[str] = Field(default_factory=list)
tags: List[FilterSchema] = Field(default_factory=list)
owners: List[FilterSchema] = Field(default_factory=list)
models: List[FilterSchema] = Field(default_factory=list)
statuses: List[StatusFilterSchema] = Field(default=_get_default_statuses_filter())
resource_types: List[ResourceTypeFilterSchema] = Field(default_factory=list)
test_ids: List[FilterSchema[str]] = Field(default_factory=list)
@validator("invocation_time", pre=True)
def format_invocation_time(cls, invocation_time) -> Optional[str]:
if invocation_time:
try:
invocation_datetime = convert_local_time_to_timezone(
datetime.fromisoformat(invocation_time)
)
return invocation_datetime.strftime(DATETIME_FORMAT)
except ValueError as err:
logger.error(
f"Failed to parse invocation time filter: {err}\nPlease use a valid ISO 8601 format"
)
raise
return None
def validate_report_selector(self) -> None:
# If we start supporting multiple selectors we need to change this logic
if not self.selector:
return
valid_report_selectors = ["last_invocation", "invocation_id", "invocation_time"]
if all(
[
selector_type not in self.selector
for selector_type in valid_report_selectors
]
):
raise InvalidSelectorError(
"Selector is invalid for report: ", self.selector
)
@staticmethod
def from_cli_params(
cli_filters: Tuple[str], cli_excludes: Tuple[str]
) -> "FiltersSchema":
all_filters: list[tuple[str, FilterType]] = []
for cli_filter in cli_filters:
all_filters.append((cli_filter, FilterType.IS))
for cli_exclude in cli_excludes:
all_filters.append((cli_exclude, FilterType.IS_NOT))
if not all_filters:
return FiltersSchema()
tags = []
owners = []
models = []
statuses = []
resource_types = []
for cli_filter, filter_type in all_filters:
tags_match = FiltersSchema._match_filter_regex(
filter_string=cli_filter, regex=re.compile(r"tags:(.*)")
)
if tags_match:
tags.append(FilterSchema(values=tags_match, type=filter_type))
continue
owners_match = FiltersSchema._match_filter_regex(
filter_string=cli_filter, regex=re.compile(r"owners:(.*)")
)
if owners_match:
owners.append(FilterSchema(values=owners_match, type=filter_type))
continue
models_match = FiltersSchema._match_filter_regex(
filter_string=cli_filter, regex=re.compile(r"models:(.*)")
)
if models_match:
models.append(FilterSchema(values=models_match, type=filter_type))
continue
statuses_match = FiltersSchema._match_filter_regex(
filter_string=cli_filter, regex=re.compile(r"statuses:(.*)")
)
if statuses_match:
statuses.append(
StatusFilterSchema(
values=[Status(status) for status in statuses_match],
type=filter_type,
)
)
continue
resource_types_match = FiltersSchema._match_filter_regex(
filter_string=cli_filter, regex=re.compile(r"resource_types:(.*)")
)
if resource_types_match:
resource_types.append(
ResourceTypeFilterSchema(
values=[
ResourceType(resource_type)
for resource_type in resource_types_match
],
type=filter_type,
)
)
continue
logger.warning(
f'Filter "{cli_filter.split(":")[0]}" is not supported - Skipping this filter ("{cli_filter}").'
)
if not any(status_filter.type == FilterType.IS for status_filter in statuses):
statuses.extend(_get_default_statuses_filter())
return FiltersSchema(
tags=tags,
owners=owners,
models=models,
statuses=statuses,
resource_types=resource_types,
)
@staticmethod
def _match_filter_regex(filter_string: str, regex: Pattern) -> List[str]:
match = regex.search(filter_string)
if match:
return match.group(1).split(",")
return []
def to_selector_filter_schema(self) -> "SelectorFilterSchema":
selector = self.selector if self.selector else None
invocation_id = self.invocation_id if self.invocation_id else None
invocation_time = self.invocation_time if self.invocation_time else None
last_invocation = self.last_invocation if self.last_invocation else False
node_names = self.node_names if self.node_names else None
tags = self.tags[0].values[0] if self.tags else None
owners = self.owners[0].values[0] if self.owners else None
models = self.models[0].values[0] if self.models else None
statuses = self.statuses[0].values if self.statuses else None
resource_types = self.resource_types[0].values if self.resource_types else None
return SelectorFilterSchema(
selector=selector,
invocation_id=invocation_id,
invocation_time=invocation_time,
last_invocation=last_invocation,
node_names=node_names,
tag=tags,
owner=owners,
model=models,
statuses=statuses,
resource_types=resource_types,
)
def apply(
self,
filter_fields: FilterFields,
) -> bool:
return (
all(
filter_schema.apply_filter_on_values(filter_fields.tags)
for filter_schema in self.tags
)
and all(
filter_schema.apply_filter_on_values(filter_fields.models)
for filter_schema in self.models
)
and all(
filter_schema.apply_filter_on_values(filter_fields.owners)
for filter_schema in self.owners
)
and all(
filter_schema.apply_filter_on_values(filter_fields.normalized_status)
for filter_schema in self.statuses
)
and all(
filter_schema.apply_filter_on_values(filter_fields.resource_types)
for filter_schema in self.resource_types
)
and (
FilterSchema(
values=self.node_names, type=FilterType.IS
).apply_filter_on_values(filter_fields.node_names)
if self.node_names
else True
)
and all(
filter_schema.apply_filter_on_values(filter_fields.test_ids)
for filter_schema in self.test_ids
)
)
class SelectorFilterSchema(BaseModel):
selector: Optional[str] = None
invocation_id: Optional[str] = None
invocation_time: Optional[str] = None
last_invocation: Optional[bool] = False
tag: Optional[str] = None
owner: Optional[str] = None
model: Optional[str] = None
statuses: Optional[List[Status]] = [
Status.FAIL,
Status.ERROR,
Status.RUNTIME_ERROR,
Status.WARN,
]
resource_types: Optional[List[ResourceType]] = None
node_names: Optional[List[str]] = None
class WarehouseInfo(BaseModel):
id: str
type: str