Skip to content

Commit 06093ac

Browse files
committed
filter mode http
1 parent d9ca462 commit 06093ac

5 files changed

Lines changed: 67 additions & 21 deletions

File tree

config.ini.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ html_403 = assets/403.html
1414

1515
[Filtering]
1616
no_filter = false
17+
filter_mode = local
1718
blocked_sites = config/blocked_sites.txt
1819
blocked_url = config/blocked_url.txt
1920

pyproxy.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@
4949
help="Path to the custom 403 Forbidden HTML page"
5050
)
5151
parser.add_argument("--no-filter", action="store_true", help="Disable URL and domain filtering")
52+
parser.add_argument(
53+
"--filter-mode",
54+
type=str,
55+
choices=["local", "http"],
56+
help="Filter list mode"
57+
)
5258
parser.add_argument(
5359
"--blocked-sites",
5460
type=str,
@@ -97,6 +103,11 @@
97103
if args.no_filter
98104
else config.getboolean('Filtering', 'no_filter', fallback=False)
99105
)
106+
filter_mode = (
107+
args.filter_mode
108+
if args.filter_mode
109+
else config.get('Filtering', 'filter_mode', fallback="local")
110+
)
100111
blocked_sites = (
101112
args.blocked_sites
102113
if args.blocked_sites
@@ -146,6 +157,7 @@
146157
block_log=block_log,
147158
html_403=html_403,
148159
no_filter=no_filter,
160+
filter_mode=filter_mode,
149161
no_logging_access=no_logging_access,
150162
no_logging_block=no_logging_block,
151163
ssl_inspect=ssl_inspect,

tests/test_filter.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ def test_load_blacklist(self, mock_file):
3333
- Ensures that the sites and URLs are correctly read and loaded into sets.
3434
- Verifies that the function handles the file content properly.
3535
"""
36-
blocked_sites, blocked_urls = load_blacklist("blocked_sites.txt", "blocked_urls.txt")
36+
blocked_sites, blocked_urls = load_blacklist(
37+
"blocked_sites.txt",
38+
"blocked_urls.txt",
39+
"local"
40+
)
3741

3842
self.assertIn("blocked.com", blocked_sites)
3943
self.assertIn("allowed.com/blocked", blocked_sites)

utils/filter.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,55 @@
1414
import time
1515
import sys
1616
import threading
17+
import requests
1718

18-
def load_blacklist(blocked_sites_path: str, blocked_url_path: str) -> set:
19+
def load_blacklist(blocked_sites_path: str, blocked_url_path: str, filter_mode: str) -> set:
1920
"""
20-
Loads blocked FQDNs or URLs from a file into a set for fast lookup.
21+
Loads blocked FQDNs or URLs from a file or URL into a set for fast lookup.
2122
2223
Args:
23-
blocked_sites_path (str): The path to the file containing blocked FQDNs.
24-
blocked_url_path (str): The path to the file containing blocked URLs.
24+
blocked_sites_path (str): The path or URL to the file containing blocked FQDNs.
25+
blocked_url_path (str): The path or URL to the file containing blocked URLs.
26+
filter_mode (str): Mode to determine if we load from local file or HTTP URL.
2527
2628
Returns:
2729
set: A set of blocked domains/URLs.
2830
"""
2931
blocked_sites = set()
3032
blocked_url = set()
3133

32-
with open(blocked_sites_path, 'r', encoding='utf-8') as f:
33-
for line in f:
34-
blocked_sites.add(line.strip())
35-
with open(blocked_url_path, 'r', encoding='utf-8') as f:
36-
for line in f:
37-
blocked_url.add(line.strip())
34+
def load_from_file(file_path: str) -> set:
35+
data = set()
36+
with open(file_path, 'r', encoding='utf-8') as f:
37+
for line in f:
38+
data.add(line.strip())
39+
return data
40+
41+
def load_from_http(url: str) -> set:
42+
data = set()
43+
try:
44+
response = requests.get(url, timeout=3)
45+
response.raise_for_status()
46+
for line in response.text.splitlines():
47+
data.add(line.strip())
48+
except requests.exceptions.RequestException as e:
49+
raise requests.exceptions.RequestException(f"Failed to load data from {url}: {e}")
50+
return data
51+
52+
if filter_mode == "local":
53+
blocked_sites = load_from_file(blocked_sites_path)
54+
blocked_url = load_from_file(blocked_url_path)
55+
elif filter_mode == "http":
56+
blocked_sites = load_from_http(blocked_sites_path)
57+
blocked_url = load_from_http(blocked_url_path)
3858

3959
return blocked_sites, blocked_url
4060

61+
# pylint: disable=too-many-locals
4162
def filter_process(
4263
queue: multiprocessing.Queue,
4364
result_queue: multiprocessing.Queue,
65+
filter_mode: str,
4466
blocked_sites_path: str,
4567
blocked_url_path: str
4668
) -> None:
@@ -51,13 +73,14 @@ def filter_process(
5173
queue (multiprocessing.Queue): A queue to receive URL/domain for checking.
5274
result_queue (multiprocessing.Queue): A queue to send back the result of
5375
the filtering (blocked or allowed).
76+
filter_mode (str): Filter list mode (local or http).
5477
blocked_sites_path (str): The path to the file containing blocked FQDNs.
5578
blocked_url_path (str): The path to the file containing blocked URLs.
5679
"""
5780
manager = multiprocessing.Manager()
5881
blocked_data = manager.dict({
59-
"sites": load_blacklist(blocked_sites_path, blocked_url_path)[0],
60-
"urls": load_blacklist(blocked_sites_path, blocked_url_path)[1],
82+
"sites": load_blacklist(blocked_sites_path, blocked_url_path, filter_mode)[0],
83+
"urls": load_blacklist(blocked_sites_path, blocked_url_path, filter_mode)[1],
6184
})
6285

6386
error_event = threading.Event()
@@ -67,7 +90,8 @@ def file_monitor() -> None:
6790
while True:
6891
new_blocked_sites, new_blocked_url = load_blacklist(
6992
blocked_sites_path,
70-
blocked_url_path
93+
blocked_url_path,
94+
filter_mode
7195
)
7296

7397
blocked_data["sites"] = new_blocked_sites

utils/proxy.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class ProxyServer:
3232
"""
3333
# pylint: disable=too-many-locals
3434
def __init__(self, host, port, debug, access_log, block_log,
35-
html_403, no_filter, no_logging_access, no_logging_block, ssl_inspect,
35+
html_403, no_filter, filter_mode, no_logging_access, no_logging_block, ssl_inspect,
3636
blocked_sites, blocked_url, inspect_ca_cert, inspect_ca_key, inspect_certs_folder):
3737
"""
3838
Initializes the ProxyServer instance with the provided configurations.
@@ -41,6 +41,7 @@ def __init__(self, host, port, debug, access_log, block_log,
4141
self.debug = debug
4242
self.html_403 = html_403
4343
self.no_filter = no_filter
44+
self.filter_mode = filter_mode
4445
self.no_logging_access = no_logging_access
4546
self.no_logging_block = no_logging_block
4647
self.ssl_inspect = ssl_inspect
@@ -58,6 +59,7 @@ def __init__(self, host, port, debug, access_log, block_log,
5859
if not self.no_logging_block:
5960
self.block_logger = configure_file_logger(block_log, "BlockLogger")
6061

62+
# pylint: disable=too-many-branches, too-many-statements
6163
def start(self):
6264
"""
6365
Starts the proxy server, initializes the filtering process if enabled,
@@ -71,6 +73,7 @@ def start(self):
7173
self.console_logger.debug("[*] debug = %s", self.debug)
7274
self.console_logger.debug("[*] html_403 = %s", self.html_403)
7375
self.console_logger.debug("[*] no_filter = %s", self.no_filter)
76+
self.console_logger.debug("[*] filter_mode = %s", self.filter_mode)
7477
self.console_logger.debug("[*] no_logging_access = %s", self.no_logging_access)
7578
self.console_logger.debug("[*] no_logging_block = %s", self.no_logging_block)
7679
self.console_logger.debug("[*] ssl_inspect = %s", self.ssl_inspect)
@@ -101,19 +104,21 @@ def start(self):
101104
except OSError as e:
102105
self.console_logger.debug("OS error deleting %s: %s", file_path, e)
103106

104-
if not os.path.exists(self.config_blocked_sites):
105-
with open(self.config_blocked_sites, "w", encoding='utf-8'):
106-
pass
107-
if not os.path.exists(self.config_blocked_url):
108-
with open(self.config_blocked_url, "w", encoding='utf-8'):
109-
pass
107+
if self.filter_mode == "local":
108+
if not os.path.exists(self.config_blocked_sites):
109+
with open(self.config_blocked_sites, "w", encoding='utf-8'):
110+
pass
111+
if not os.path.exists(self.config_blocked_url):
112+
with open(self.config_blocked_url, "w", encoding='utf-8'):
113+
pass
110114

111115
if not self.no_filter:
112116
self.filter_proc = multiprocessing.Process(
113117
target=filter_process,
114118
args=(
115119
self.queue,
116120
self.result_queue,
121+
self.filter_mode,
117122
self.config_blocked_sites,
118123
self.config_blocked_url
119124
)

0 commit comments

Comments
 (0)