11from __future__ import unicode_literals
22
33import re
4+ from timeit import default_timer as timer
45
56from cached_property import cached_property
67from logging_helper import setup_logging
@@ -470,18 +471,22 @@ def _reduce_ruleset(ruleset):
470471 return True
471472
472473
473- def _reduce_rules (rulesets , check = False ):
474+ def _reduce_rules (rulesets , check = False , simplify = False ):
474475 if isinstance (rulesets , dict ):
475476 rulesets = rulesets ["rulesets" ]
476477
477- if check and not expand_pattern :
478+ if ( check or simplify ) and not expand_pattern :
478479 logger .warning ("Rule analysis and simplification only supported on Python 3" )
479- check = False
480+ check = simplify = False
480481
481482 mapping = {}
482483 domains = set ()
483484 prefix_targets = set ()
484485 suffix_targets = set ()
486+ simplifications_performed = 0
487+
488+ logger .info ("Importing HTTPSEverywhere rules" )
489+ start = timer ()
485490
486491 for ruleset in rulesets :
487492 orig_ruleset = ruleset .copy ()
@@ -636,9 +641,11 @@ def _reduce_rules(rulesets, check=False):
636641 # Discard common data
637642 if rules == ONLY_FORCE_HTTPS_RULE_IN :
638643 rules = ONLY_FORCE_HTTPS_RULE_COMPILED
644+ ruleset = rules , exclusions
639645
640- else :
646+ elif simplify :
641647 reduced_rules = []
648+ original_rule_count = len (rules )
642649 for item in rules :
643650 from_ = item ["from" ]
644651 if from_ in _FIXME_REJECT_PATTERNS :
@@ -659,20 +666,26 @@ def _reduce_rules(rulesets, check=False):
659666 if rules [- 1 ] == FORCE_HTTPS_RULE :
660667 rules [- 1 ] = FORCE_HTTPS_RULE_COMPILED
661668
662- reduced_ruleset = _Ruleset (rules , exclusions , targets )
669+ reduced_ruleset = _Ruleset (rules , exclusions , targets )
663670
664- if check :
665671 _reduce_ruleset (reduced_ruleset )
672+ final_rule_count = len (reduced_ruleset ._rules )
673+ simplifications_performed += final_rule_count
666674
667- ruleset = (reduced_ruleset ._rules , reduced_ruleset ._exclusions )
675+ ruleset = (reduced_ruleset ._rules , reduced_ruleset ._exclusions )
676+
677+ else :
678+ rules = [(item ["from" ], item ["to" ]) for item in rules ]
679+ ruleset = rules , exclusions
668680
669681 if ruleset == ONLY_FORCE_HTTPS_RULE_COMPILED_NO_EXCEPTIONS :
670682 ruleset = ONLY_FORCE_HTTPS_RULE_COMPILED_NO_EXCEPTIONS
671683
672684 for target in targets :
673685 # https://github.com/EFForg/https-everywhere/issues/18897
674686 if (
675- name == "Vox Media.com (resources)"
687+ check
688+ and name == "Vox Media.com (resources)"
676689 and target
677690 in [
678691 "voxmedia.com" ,
@@ -712,6 +725,17 @@ def _reduce_rules(rulesets, check=False):
712725 # TODO: re-enable or remove when new published ruleset is fixed
713726 # assert sorted(overlapping_prefixes) == _FIXME_MULTIPLE_RULEST_PREFIXES, sorted(overlapping_prefixes)
714727
728+ end = timer ()
729+ elapsed = end - start
730+ simplifications_message = "; {} non-trivial simplifications" .format (
731+ simplifications_performed
732+ )
733+ logger .info (
734+ "Finished importing HTTPSEverywhere rules after {:.2f}s{}" .format (
735+ elapsed , simplifications_message if simplifications_performed else ""
736+ )
737+ )
738+
715739 return mapping
716740
717741
@@ -750,7 +774,7 @@ def _get_rulesets():
750774 global _DATA
751775 if not _DATA :
752776 data = fetch_update ()
753- _DATA = _reduce_rules (data )
777+ _DATA = _reduce_rules (data , simplify = True )
754778 return _DATA
755779
756780
0 commit comments