Skip to content

Commit 685ace7

Browse files
committed
_rules: Time and log ruleset reduction
Closes #4
1 parent 5cb6b65 commit 685ace7

File tree

2 files changed

+47
-14
lines changed

2 files changed

+47
-14
lines changed

https_everywhere/_rules.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import unicode_literals
22

33
import re
4+
from timeit import default_timer as timer
45

56
from cached_property import cached_property
67
from logging_helper import setup_logging
@@ -470,18 +471,22 @@ def _reduce_ruleset(ruleset):
470471
return True
471472

472473

473-
def _reduce_rules(rulesets, check=False):
474+
def _reduce_rules(rulesets, check=False, simplify=False):
474475
if isinstance(rulesets, dict):
475476
rulesets = rulesets["rulesets"]
476477

477-
if check and not expand_pattern:
478+
if (check or simplify) and not expand_pattern:
478479
logger.warning("Rule analysis and simplification only supported on Python 3")
479-
check = False
480+
check = simplify = False
480481

481482
mapping = {}
482483
domains = set()
483484
prefix_targets = set()
484485
suffix_targets = set()
486+
simplifications_performed = 0
487+
488+
logger.info("Importing HTTPSEverywhere rules")
489+
start = timer()
485490

486491
for ruleset in rulesets:
487492
orig_ruleset = ruleset.copy()
@@ -636,9 +641,11 @@ def _reduce_rules(rulesets, check=False):
636641
# Discard common data
637642
if rules == ONLY_FORCE_HTTPS_RULE_IN:
638643
rules = ONLY_FORCE_HTTPS_RULE_COMPILED
644+
ruleset = rules, exclusions
639645

640-
else:
646+
elif simplify:
641647
reduced_rules = []
648+
original_rule_count = len(rules)
642649
for item in rules:
643650
from_ = item["from"]
644651
if from_ in _FIXME_REJECT_PATTERNS:
@@ -659,20 +666,26 @@ def _reduce_rules(rulesets, check=False):
659666
if rules[-1] == FORCE_HTTPS_RULE:
660667
rules[-1] = FORCE_HTTPS_RULE_COMPILED
661668

662-
reduced_ruleset = _Ruleset(rules, exclusions, targets)
669+
reduced_ruleset = _Ruleset(rules, exclusions, targets)
663670

664-
if check:
665671
_reduce_ruleset(reduced_ruleset)
672+
final_rule_count = len(reduced_ruleset._rules)
673+
simplifications_performed += final_rule_count
666674

667-
ruleset = (reduced_ruleset._rules, reduced_ruleset._exclusions)
675+
ruleset = (reduced_ruleset._rules, reduced_ruleset._exclusions)
676+
677+
else:
678+
rules = [(item["from"], item["to"]) for item in rules]
679+
ruleset = rules, exclusions
668680

669681
if ruleset == ONLY_FORCE_HTTPS_RULE_COMPILED_NO_EXCEPTIONS:
670682
ruleset = ONLY_FORCE_HTTPS_RULE_COMPILED_NO_EXCEPTIONS
671683

672684
for target in targets:
673685
# https://github.com/EFForg/https-everywhere/issues/18897
674686
if (
675-
name == "Vox Media.com (resources)"
687+
check
688+
and name == "Vox Media.com (resources)"
676689
and target
677690
in [
678691
"voxmedia.com",
@@ -712,6 +725,17 @@ def _reduce_rules(rulesets, check=False):
712725
# TODO: re-enable or remove when new published ruleset is fixed
713726
# assert sorted(overlapping_prefixes) == _FIXME_MULTIPLE_RULEST_PREFIXES, sorted(overlapping_prefixes)
714727

728+
end = timer()
729+
elapsed = end - start
730+
simplifications_message = "; {} non-trivial simplifications".format(
731+
simplifications_performed
732+
)
733+
logger.info(
734+
"Finished importing HTTPSEverywhere rules after {:.2f}s{}".format(
735+
elapsed, simplifications_message if simplifications_performed else ""
736+
)
737+
)
738+
715739
return mapping
716740

717741

@@ -750,7 +774,7 @@ def _get_rulesets():
750774
global _DATA
751775
if not _DATA:
752776
data = fetch_update()
753-
_DATA = _reduce_rules(data)
777+
_DATA = _reduce_rules(data, simplify=True)
754778
return _DATA
755779

756780

tests/test_rules.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,16 @@
1919
https_everything_project = os.path.join(project_root, "..", "https-everywhere")
2020

2121
if not os.path.exists(https_everything_project):
22-
raise unittest.SkipTest("Clone https-everywhere beside https-everywhere-py to run rule tests")
22+
raise unittest.SkipTest(
23+
"Clone https-everywhere beside https-everywhere-py to run rule tests"
24+
)
2325

24-
https_everywhere_checker_root = os.path.join(https_everything_project, "test", "rules", "src")
26+
https_everywhere_checker_root = os.path.join(
27+
https_everything_project, "test", "rules", "src"
28+
)
2529
https_everywhere_checker_root_init = os.path.join(
26-
https_everywhere_checker_root, "https_everywhere_checker", "__init__.py")
30+
https_everywhere_checker_root, "https_everywhere_checker", "__init__.py"
31+
)
2732

2833
if not os.path.exists(https_everywhere_checker_root_init):
2934
with open(https_everywhere_checker_root_init, "w") as f:
@@ -37,7 +42,9 @@
3742
except ImportError as e:
3843
raise unittest.SkipTest("https_everywhere_checker not importable: {}".format(e))
3944

40-
rules_path = os.path.join(https_everything_project, *("src/chrome/content/rules".split("/")))
45+
rules_path = os.path.join(
46+
https_everything_project, *("src/chrome/content/rules".split("/"))
47+
)
4148
ruleset_file = os.path.join(rules_path, "default.rulesets")
4249

4350
if not os.path.exists(ruleset_file):
@@ -77,7 +84,9 @@ def _load_upstream_reduced_rulesets():
7784

7885
with open(ruleset_file) as f:
7986
_upstream_data = json.load(f)
80-
_upstream_reduced = _reduce_rules(_upstream_data, check=_run_check)
87+
_upstream_reduced = _reduce_rules(
88+
_upstream_data, check=_run_check, simplify=True
89+
)
8190
return _upstream_reduced
8291

8392

0 commit comments

Comments
 (0)