Skip to content

Commit 27cf3cd

Browse files
committed
extract: Prohibit specific files from being extracted
* Known DMCA risks. Change-Id: I64d683b49b27537b4b87af80840561da7197a6dc
1 parent 7c97424 commit 27cf3cd

4 files changed

Lines changed: 105 additions & 0 deletions

File tree

extract_utils/args.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@
8888
'--download-sha256',
8989
help='SHA256 of the download',
9090
)
91+
parser.add_argument(
92+
'--allow-prohibited-files',
93+
action='store_true',
94+
help='Allow extraction of normally-prohibited files',
95+
)
9196

9297
parser.add_argument(
9398
'source',
@@ -120,6 +125,7 @@ def __init__(self, args: argparse.Namespace):
120125
self.section: Optional[str] = args.section
121126
self.download_dir: Optional[str] = args.download_dir
122127
self.download_sha256: Optional[str] = args.download_sha256
128+
self.allow_prohibited_files: bool = args.allow_prohibited_files
123129

124130
if self.download_dir is None and DOWNLOAD_DIR_ENV_KEY in os.environ:
125131
self.download_dir = os.environ[DOWNLOAD_DIR_ENV_KEY]

extract_utils/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ def process_modules(self, source: Source):
121121
self.__args.no_cleanup,
122122
self.__args.extract_factory,
123123
self.__args.section,
124+
self.__args.allow_prohibited_files,
124125
)
125126
if not copied:
126127
all_copied = False

extract_utils/module.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
postprocess_carriersettings_fn_impl,
5151
postprocess_fn_type,
5252
)
53+
from extract_utils.prohibited_files import check_prohibited_file
5354
from extract_utils.source import DiskSource, Source
5455
from extract_utils.tools import android_root
5556
from extract_utils.utils import (
@@ -1031,6 +1032,7 @@ def process_file(
10311032
vendor_path: str,
10321033
is_firmware: bool,
10331034
kang: bool,
1035+
allow_prohibited_files: bool = False,
10341036
) -> bool:
10351037
file_path = source.get_file_copy_path(file, vendor_path)
10361038

@@ -1083,6 +1085,9 @@ def process_file(
10831085
)
10841086
return False
10851087

1088+
if not allow_prohibited_files:
1089+
check_prohibited_file(file.dst, file_path)
1090+
10861091
if kang:
10871092
self.process_kanged_file(
10881093
file,
@@ -1108,6 +1113,7 @@ def process_proprietary_files(
11081113
backup_source: Source,
11091114
kang: bool,
11101115
extract_factory: bool,
1116+
allow_prohibited_files: bool = False,
11111117
) -> bool:
11121118
all_copied = True
11131119

@@ -1131,6 +1137,7 @@ def process_proprietary_files(
11311137
vendor_path,
11321138
is_firmware,
11331139
kang,
1140+
allow_prohibited_files=allow_prohibited_files,
11341141
)
11351142

11361143
if not copied:
@@ -1155,6 +1162,7 @@ def process(
11551162
no_cleanup: bool,
11561163
extract_factory: bool,
11571164
section: Optional[str],
1165+
allow_prohibited_files: bool = False,
11581166
):
11591167
with tempfile.TemporaryDirectory() as backup_dir:
11601168
# Kang is usually combined with section, but allow them separately
@@ -1171,4 +1179,5 @@ def process(
11711179
backup_source,
11721180
kang,
11731181
extract_factory,
1182+
allow_prohibited_files=allow_prohibited_files,
11741183
)

extract_utils/prohibited_files.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# SPDX-FileCopyrightText: The LineageOS Project
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import sys
5+
from fnmatch import fnmatch
6+
from pathlib import Path
7+
from typing import Callable, List
8+
9+
from extract_utils.utils import Color, color_print
10+
11+
"""
12+
Prohibited blob policy
13+
14+
This module blocks extraction of prohibited files including:
15+
16+
- Megvii / Face++ (face recognition, beautification, etc.)
17+
- SenseTime license files (e.g. license.lic)
18+
19+
These are disallowed due to licensing, redistribution restrictions,
20+
and more importantly DMCA takedown risk.
21+
22+
To extend this policy:
23+
- Add fnmatch pattern + checker function pairs to PROHIBITED_CHECKS
24+
"""
25+
26+
27+
def _check_sensetime(data: bytes) -> bool:
28+
return any(x in data for x in [b'com.sensetime', b'SenseTime'])
29+
30+
31+
def _check_megvii(data: bytes) -> bool:
32+
return any(x in data for x in [b'megface', b'megvii', b'MEGVII'])
33+
34+
35+
# Maps fnmatch pattern (matched against lowercase basename) to a binary
36+
# checker function. The file is only read if the filename matches.
37+
PROHIBITED_CHECKS: List[tuple[str, str, Callable[[bytes], bool]]] = [
38+
('*.lic', 'SenseTime', _check_sensetime),
39+
('libmegface*', 'Megvii/Face++', _check_megvii),
40+
('libmegjpeg*', 'Megvii/Face++', _check_megvii),
41+
('libmegskeleton*', 'Megvii/Face++', _check_megvii),
42+
('libmegvii*', 'Megvii/Face++', _check_megvii),
43+
('libmgbeauty*', 'Megvii/Face++', _check_megvii),
44+
('libmgface*', 'Megvii/Face++', _check_megvii),
45+
]
46+
47+
48+
def check_prohibited_file(dst: str, file_path: str):
49+
basename = Path(dst).name.lower()
50+
51+
for pattern, label, checker in PROHIBITED_CHECKS:
52+
if not fnmatch(basename, pattern):
53+
continue
54+
try:
55+
data = open(file_path, 'rb').read(4 * 1024 * 1024)
56+
except OSError:
57+
continue
58+
if not checker(data):
59+
continue
60+
61+
color_print(
62+
f'ERROR: Prohibited file detected: {dst}',
63+
color=Color.RED,
64+
)
65+
color_print(
66+
f' Reason: {label} binary signature matched in {Path(dst).name}',
67+
color=Color.RED,
68+
)
69+
print()
70+
color_print('Policy violation:', color=Color.RED)
71+
print(
72+
"""The following categories of files are not allowed:
73+
74+
- Megvii / Face++ related libraries and assets:
75+
(e.g. lib*{M,m}eg*.so, lib*{M,m}g*.so, *{M,m}egvii*)
76+
77+
- SenseTime license artifacts:
78+
(e.g. license.lic)
79+
80+
These files are not permitted in LineageOS repositories/builds.
81+
82+
Please look for available shims, or develop one to mitigate these dependencies.
83+
84+
To extract them anyway for a private/local build, re-run with:
85+
86+
extract-files.py --allow-prohibited-files [...]
87+
"""
88+
)
89+
sys.exit(1)

0 commit comments

Comments
 (0)