Skip to content

Commit be9531b

Browse files
committed
Implemented bulk replacing in files.
1 parent 9af8024 commit be9531b

File tree

9 files changed

+210
-5
lines changed

9 files changed

+210
-5
lines changed

https_everywhere/__main__.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from pathlib import Path
2+
3+
from plumbum import cli
4+
5+
from .core import CombinedReplacerFactory
6+
from .core.InBufferReplacer import InBufferReplacer
7+
from .core.InFileReplacer import InFileReplacer
8+
9+
from .replacers.HEReplacer import HEReplacer
10+
from .replacers.HSTSPreloadReplacer import HSTSPreloadReplacer
11+
12+
13+
class OurInBufferReplacer(InBufferReplacer):
14+
__slots__ = ()
15+
FACS = CombinedReplacerFactory(
16+
{
17+
"preloads": HSTSPreloadReplacer,
18+
"heRulesets": HEReplacer,
19+
}
20+
)
21+
22+
def __init__(self, preloads=None, heRulesets=None):
23+
super().__init__(preloads=preloads, heRulesets=heRulesets)
24+
25+
26+
class OurInFileReplacer(InFileReplacer):
27+
def __init__(self, preloads=None, heRulesets=None):
28+
super().__init__(OurInBufferReplacer(preloads=preloads, heRulesets=heRulesets))
29+
30+
31+
class CLI(cli.Application):
32+
"""HTTPSEverywhere-like URI rewriter"""
33+
34+
35+
@CLI.subcommand("bulk")
36+
class FileRewriteCLI(cli.Application):
37+
"""Rewrites URIs in files."""
38+
39+
def main(self, *filesOrDirs):
40+
try:
41+
from tqdm import tqdm
42+
except ImportError:
43+
def tqdm(x):
44+
return x
45+
46+
if not filesOrDirs:
47+
filesOrDirs = (".",)
48+
49+
for fileOrDir in tqdm(filesOrDirs):
50+
fileOrDir = Path(fileOrDir).absolute()
51+
if fileOrDir.is_dir():
52+
files = [el for el in fileOrDir.glob("**/*") if not el.is_dir()]
53+
print(files)
54+
else:
55+
files = [fileOrDir]
56+
57+
if files:
58+
repl = OurInFileReplacer()
59+
60+
for f in files:
61+
repl(f)
62+
63+
64+
if __name__ == "__main__":
65+
CLI.run()

https_everywhere/adapter.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import unicode_literals
22

33
import urllib3
4-
from urllib3.util.url import parse_url
54

65
import requests
76
from requests.adapters import HTTPAdapter
@@ -12,6 +11,7 @@
1211
from ._chrome_preload_hsts import _preload_including_subdomains
1312
from ._mozilla_preload_hsts import _preload_remove_negative
1413
from ._util import _check_in
14+
from .replacers.HSTSPreloadReplacer import apply_HSTS_preload
1515

1616
PY2 = str != "".__class__
1717
if PY2:
@@ -154,10 +154,7 @@ def __init__(self, *args, **kwargs):
154154

155155
def get_redirect(self, url):
156156
if url.startswith("http://"):
157-
p = parse_url(url)
158-
if _check_in(self._domains, p.host):
159-
new_url = "https:" + url[5:]
160-
return new_url
157+
return apply_HSTS_preload(url, self._domains)
161158

162159
return super(PreloadHSTSAdapter, self).get_redirect(url)
163160

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import re
2+
3+
from urllib3.util.url import parse_url
4+
5+
from . import ReplaceContext, SingleURIReplacer
6+
7+
uri_re = re.compile(
8+
"(?:http|ftp):\\/\\/?((?:[\\w-]+)(?::[\\w-]+)?@)?[\\w\\.:()-]+(?:\\/[\\w\\.:()/-]*)?"
9+
)
10+
11+
12+
class InBufferReplacer(SingleURIReplacer):
13+
__slots__ = ("singleURIReplacer",)
14+
FACS = None
15+
16+
def __init__(self, **kwargs):
17+
self.singleURIReplacer = self.__class__.FACS(**kwargs)
18+
19+
def _rePlaceFunc(self, m):
20+
uri = m.group(0)
21+
ctx = ReplaceContext(uri)
22+
self.singleURIReplacer(ctx)
23+
if ctx.count > 0:
24+
return ctx.res
25+
return uri
26+
27+
def __call__(self, inputStr):
28+
return ReplaceContext(*uri_re.subn(self._rePlaceFunc, inputStr))
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from pathlib import Path
2+
3+
4+
class InFileReplacer:
5+
__slots__ = ("inBufferReplacer", "encoding")
6+
7+
def __init__(self, inBufferReplacer, encoding="utf-8"):
8+
self.inBufferReplacer = inBufferReplacer
9+
self.encoding = encoding
10+
11+
def __call__(self, inputFilePath):
12+
t = inputFilePath.read_text(encoding=self.encoding)
13+
ctx = self.inBufferReplacer(t)
14+
15+
if ctx.count:
16+
inputFilePath.write_text(ctx.res)

https_everywhere/core/__init__.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from functools import partial
2+
3+
4+
class ReplaceContext:
5+
__slots__ = ("res", "shouldStop", "count")
6+
7+
def __init__(self, res, count=0, shouldStop=False):
8+
self.res = res
9+
self.shouldStop = shouldStop
10+
self.count = count
11+
12+
13+
class SingleURIReplacer:
14+
def __init__(self, arg):
15+
raise NotImplementedError
16+
17+
def __call__(self, ctx):
18+
raise NotImplementedError
19+
20+
21+
class CombinedReplacer(SingleURIReplacer):
22+
__slots__ = ("children",)
23+
24+
def __init__(self, children):
25+
self.children = children
26+
27+
def __call__(self, ctx):
28+
for r in self.children:
29+
r(ctx)
30+
if ctx.shouldStop:
31+
break
32+
return ctx
33+
34+
35+
class CombinedReplacerFactory:
36+
__slots__ = ("args2Ctors", "ctor")
37+
38+
def __init__(self, args2Ctors):
39+
self.args2Ctors = args2Ctors
40+
41+
def _gen_replacers(self, kwargs):
42+
for k, v in kwargs.items():
43+
c = self.args2Ctors.get(k, None)
44+
if c:
45+
yield c(v)
46+
47+
def __call__(self, **kwargs):
48+
return CombinedReplacer(tuple(self._gen_replacers(kwargs)))
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from .. import _rules
2+
from .._rules import _get_rulesets, https_url_rewrite
3+
from ..core import SingleURIReplacer
4+
5+
6+
class HEReplacer(SingleURIReplacer):
7+
__slots__ = ("rulesets",)
8+
9+
def __init__(self, rulesets):
10+
if rulesets is None:
11+
_get_rulesets()
12+
rulesets = _rules._DATA
13+
self.rulesets = rulesets
14+
15+
def __call__(self, ctx):
16+
prevRes = ctx.res
17+
ctx.res = https_url_rewrite(ctx.res, self.rulesets)
18+
if prevRes != ctx.res:
19+
ctx.count += 1
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from .._chrome_preload_hsts import _preload_including_subdomains as _get_preload_chrome
2+
from .._mozilla_preload_hsts import _preload_remove_negative as _get_preload_mozilla
3+
from ..core import SingleURIReplacer
4+
from urllib3.util.url import parse_url
5+
6+
from .._util import _check_in
7+
8+
9+
def apply_HSTS_preload(url, domains):
10+
p = parse_url(url)
11+
if _check_in(domains, p.host):
12+
new_url = "https:" + url[len(p.scheme) + 1:]
13+
return new_url
14+
return url
15+
16+
17+
class HSTSPreloadReplacer(SingleURIReplacer):
18+
__slots__ = ("preloads",)
19+
20+
def __init__(self, preloads):
21+
if preloads is None:
22+
preloads = _get_preload_mozilla() | _get_preload_chrome()
23+
self.preloads = preloads
24+
25+
def __call__(self, ctx):
26+
prevRes = ctx.res
27+
ctx.res = apply_HSTS_preload(ctx.res, self.preloads)
28+
if prevRes != ctx.res:
29+
ctx.count += 1

https_everywhere/replacers/__init__.py

Whitespace-only changes.

setup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,7 @@
6363
classifiers=classifiers.splitlines(),
6464
tests_require=["unittest-expander", "lxml", "tldextract", "regex"],
6565
# lxml is optional, needed for testing upstream rules
66+
entry_points = {
67+
"console_scripts": ["pyhttpeverywhere = https_everywhere.__main__:CLI"]
68+
}
6669
)

0 commit comments

Comments
 (0)