Skip to content

Commit 6d80c67

Browse files
authored
python(feat): Try using faster yaml loader with fallback for older py… (#322)
1 parent 9ae5ff2 commit 6d80c67

5 files changed

Lines changed: 85 additions & 86 deletions

File tree

python/lib/sift_py/ingestion/config/yaml/load.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from pathlib import Path
22
from typing import Any, Dict, List, cast
33

4-
import yaml
5-
64
import sift_py.yaml.rule as rule_yaml
75
from sift_py.ingestion.config.yaml.error import YamlConfigError
86
from sift_py.ingestion.config.yaml.spec import (
@@ -11,7 +9,7 @@
119
)
1210
from sift_py.yaml.channel import ChannelConfigYamlSpec, _validate_channel, _validate_channel_anchor
1311
from sift_py.yaml.rule import RuleYamlSpec
14-
from sift_py.yaml.utils import _type_fqn
12+
from sift_py.yaml.utils import _type_fqn, try_fast_yaml_load
1513

1614
load_named_expression_modules = rule_yaml.load_named_expression_modules
1715

@@ -22,7 +20,7 @@ def read_and_validate(path: Path) -> TelemetryConfigYamlSpec:
2220
step will return an error whose source is the `yaml` package. Any errors that may occur during the
2321
validation step will return a `sift_py.ingestion.config.yaml.error.YamlConfigError`.
2422
"""
25-
raw_config = _read_yaml(path)
23+
raw_config = try_fast_yaml_load(path)
2624
return _validate_yaml(raw_config)
2725

2826

@@ -88,11 +86,6 @@ def _validate_yaml(raw_config: Dict[Any, Any]) -> TelemetryConfigYamlSpec:
8886
return cast(TelemetryConfigYamlSpec, raw_config)
8987

9088

91-
def _read_yaml(path: Path) -> Dict[Any, Any]:
92-
with open(path, "r") as f:
93-
return cast(Dict[Any, Any], yaml.safe_load(f.read()))
94-
95-
9689
def _validate_flow(val: Any):
9790
flow = cast(Dict[Any, Any], val)
9891

Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
from pathlib import Path
2-
from typing import Any, Dict, List, cast
3-
4-
import yaml
2+
from typing import List
53

64
from sift_py.calculated_channels.config import CalculatedChannelConfig
75
from sift_py.ingestion.config.yaml.error import YamlConfigError
8-
from sift_py.yaml.utils import _handle_subdir
6+
from sift_py.yaml.utils import _handle_subdir, try_fast_yaml_load
97

108

119
def load_calculated_channels(paths: List[Path]) -> List[CalculatedChannelConfig]:
@@ -29,36 +27,35 @@ def update_calculated_channels(path: Path):
2927

3028
def _read_calculated_channels_yaml(path: Path) -> List[CalculatedChannelConfig]:
3129
calculated_channel_configs = []
32-
with open(path, "r") as f:
33-
channel_config_yaml = cast(Dict[str, Any], yaml.safe_load(f.read()))
34-
35-
calculated_channel_list = channel_config_yaml.get("calculated_channels", [])
36-
for calc_channel in calculated_channel_list:
37-
if not isinstance(calc_channel, dict):
30+
channel_config_yaml = try_fast_yaml_load(path)
31+
32+
calculated_channel_list = channel_config_yaml.get("calculated_channels", [])
33+
for calc_channel in calculated_channel_list:
34+
if not isinstance(calc_channel, dict):
35+
raise YamlConfigError(
36+
f"Expected 'calculated_channels' to be a list of dictionaries in yaml: '{path}'"
37+
)
38+
for channel_ref in calc_channel.get("channel_references", []):
39+
parsed_channel_refs = []
40+
if not isinstance(channel_ref, dict):
3841
raise YamlConfigError(
39-
f"Expected 'calculated_channels' to be a list of dictionaries in yaml: '{path}'"
42+
f"Expected 'channel_references' to be a list of dictionaries in yaml: '{path}'"
4043
)
41-
for channel_ref in calc_channel.get("channel_references", []):
42-
parsed_channel_refs = []
43-
if not isinstance(channel_ref, dict):
44-
raise YamlConfigError(
45-
f"Expected 'channel_references' to be a list of dictionaries in yaml: '{path}'"
46-
)
47-
if "channel_reference" not in channel_ref:
48-
for k, v in channel_ref.items():
49-
parsed_channel_refs.append(dict(channel_reference=k, channel_identifier=v))
50-
else:
51-
parsed_channel_refs.append(channel_ref)
52-
calc_channel["channel_references"] = parsed_channel_refs
53-
54-
if not isinstance(calculated_channel_list, list):
55-
raise YamlConfigError(f"Expected 'calculated_channels' to be a list in yaml: '{path}'")
44+
if "channel_reference" not in channel_ref:
45+
for k, v in channel_ref.items():
46+
parsed_channel_refs.append(dict(channel_reference=k, channel_identifier=v))
47+
else:
48+
parsed_channel_refs.append(channel_ref)
49+
calc_channel["channel_references"] = parsed_channel_refs
50+
51+
if not isinstance(calculated_channel_list, list):
52+
raise YamlConfigError(f"Expected 'calculated_channels' to be a list in yaml: '{path}'")
53+
54+
for calc_channel in calculated_channel_list:
55+
try:
56+
calc_channel_cfg = CalculatedChannelConfig(**calc_channel)
57+
calculated_channel_configs.append(calc_channel_cfg)
58+
except Exception as e:
59+
raise YamlConfigError(f"Error parsing calculated channel '{calc_channel}'") from e
5660

57-
for calc_channel in calculated_channel_list:
58-
try:
59-
calc_channel_cfg = CalculatedChannelConfig(**calc_channel)
60-
calculated_channel_configs.append(calc_channel_cfg)
61-
except Exception as e:
62-
raise YamlConfigError(f"Error parsing calculated channel '{calc_channel}'") from e
63-
64-
return calculated_channel_configs
61+
return calculated_channel_configs

python/lib/sift_py/yaml/report_templates.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
from datetime import datetime
22
from pathlib import Path
3-
from typing import Any, Dict, List, cast
3+
from typing import List
44

5-
import yaml
65
from typing_extensions import NotRequired, TypedDict
76

87
from sift_py.ingestion.config.yaml.error import YamlConfigError
98
from sift_py.report_templates.config import ReportTemplateConfig
10-
from sift_py.yaml.utils import _handle_subdir
9+
from sift_py.yaml.utils import _handle_subdir, try_fast_yaml_load
1110

1211

1312
def load_report_templates(paths: List[Path]) -> List[ReportTemplateConfig]:
@@ -31,23 +30,22 @@ def update_report_templates(path: Path):
3130

3231
def _read_report_template_yaml(path: Path) -> List[ReportTemplateConfig]:
3332
report_templates = []
34-
with open(path, "r") as f:
35-
report_templates_yaml = cast(Dict[str, Any], yaml.safe_load(f.read()))
33+
report_templates_yaml = try_fast_yaml_load(path)
3634

37-
report_template_list = report_templates_yaml.get("report_templates")
38-
if not isinstance(report_template_list, list):
39-
raise YamlConfigError(
40-
f"Expected 'report_templates' to be a list in report template yaml: '{path}'"
41-
)
35+
report_template_list = report_templates_yaml.get("report_templates")
36+
if not isinstance(report_template_list, list):
37+
raise YamlConfigError(
38+
f"Expected 'report_templates' to be a list in report template yaml: '{path}'"
39+
)
4240

43-
for report_template in report_template_list:
44-
try:
45-
report_template_config = ReportTemplateConfig(**report_template)
46-
report_templates.append(report_template_config)
47-
except Exception as e:
48-
raise YamlConfigError(f"Error parsing report template '{report_template}'") from e
41+
for report_template in report_template_list:
42+
try:
43+
report_template_config = ReportTemplateConfig(**report_template)
44+
report_templates.append(report_template_config)
45+
except Exception as e:
46+
raise YamlConfigError(f"Error parsing report template '{report_template}'") from e
4947

50-
return report_templates
48+
return report_templates
5149

5250

5351
class ReportTemplateYamlSpec(TypedDict):

python/lib/sift_py/yaml/rule.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from pathlib import Path
55
from typing import Any, Dict, List, Literal, Union, cast
66

7-
import yaml
87
from typing_extensions import NotRequired, TypedDict
98

109
from sift_py.ingestion.config.yaml.error import YamlConfigError
@@ -13,7 +12,7 @@
1312
ChannelConfigYamlSpec,
1413
_validate_channel_reference,
1514
)
16-
from sift_py.yaml.utils import _handle_subdir, _type_fqn
15+
from sift_py.yaml.utils import _handle_subdir, _type_fqn, try_fast_yaml_load
1716

1817
_SUB_EXPRESSION_REGEX = re.compile(r"^\$[a-zA-Z_]+$")
1918

@@ -63,36 +62,34 @@ def update_rule_modules(rule_module_path: Path):
6362

6463

6564
def _read_named_expression_module_yaml(path: Path) -> Dict[str, str]:
66-
with open(path, "r") as f:
67-
named_expressions = cast(Dict[Any, Any], yaml.safe_load(f.read()))
65+
named_expressions = try_fast_yaml_load(path)
6866

69-
for key, value in named_expressions.items():
70-
if not isinstance(key, str):
71-
raise YamlConfigError(
72-
f"Expected '{key}' to be a string in named expression module '{path}'."
73-
)
74-
if not isinstance(value, str):
75-
raise YamlConfigError(
76-
f"Expected expression of '{key}' to be a string in named expression module '{path}'."
77-
)
67+
for key, value in named_expressions.items():
68+
if not isinstance(key, str):
69+
raise YamlConfigError(
70+
f"Expected '{key}' to be a string in named expression module '{path}'."
71+
)
72+
if not isinstance(value, str):
73+
raise YamlConfigError(
74+
f"Expected expression of '{key}' to be a string in named expression module '{path}'."
75+
)
7876

79-
return cast(Dict[str, str], named_expressions)
77+
return cast(Dict[str, str], named_expressions)
8078

8179

8280
def _read_rule_module_yaml(path: Path) -> List[RuleYamlSpec]:
83-
with open(path, "r") as f:
84-
module_rules = cast(Dict[Any, Any], yaml.safe_load(f.read()))
85-
rules = module_rules.get("rules")
86-
if not isinstance(rules, list):
87-
raise YamlConfigError(
88-
f"Expected '{rules}' to be a list in rule module yaml: '{path}'"
89-
f"{_type_fqn(RuleYamlSpec)}"
90-
)
81+
module_rules = try_fast_yaml_load(path)
82+
rules = module_rules.get("rules")
83+
if not isinstance(rules, list):
84+
raise YamlConfigError(
85+
f"Expected '{rules}' to be a list in rule module yaml: '{path}'"
86+
f"{_type_fqn(RuleYamlSpec)}"
87+
)
9188

92-
for rule in cast(List[Any], rules):
93-
_validate_rule(rule)
89+
for rule in cast(List[Any], rules):
90+
_validate_rule(rule)
9491

95-
return cast(List[RuleYamlSpec], rules)
92+
return cast(List[RuleYamlSpec], rules)
9693

9794

9895
def _validate_rule(val: Any):

python/lib/sift_py/yaml/utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from pathlib import Path
2-
from typing import Callable, Type
2+
from typing import Any, Callable, Dict, Type, cast
3+
4+
import yaml
35

46

57
def _handle_subdir(path: Path, file_handler: Callable):
@@ -13,3 +15,15 @@ def _handle_subdir(path: Path, file_handler: Callable):
1315

1416
def _type_fqn(typ: Type) -> str:
1517
return f"{typ.__module__}.{typ.__name__}"
18+
19+
20+
def try_fast_yaml_load(path: Path) -> Dict[Any, Any]:
21+
"""
22+
Try to load the YAML file using the CSafeLoader, which is faster than the pyyaml safe loader but not built into the wheel for earlier versions of python..
23+
If the CSafeLoader is not available, use the pyyaml safe loader.
24+
"""
25+
with open(path, "r") as f:
26+
if hasattr(yaml, "CSafeLoader"):
27+
return cast(Dict[Any, Any], yaml.load(f.read(), Loader=yaml.CSafeLoader))
28+
else:
29+
return cast(Dict[Any, Any], yaml.safe_load(f.read()))

0 commit comments

Comments
 (0)