Skip to content

Commit a86d623

Browse files
committed
use pacfiles instead of alpmfiles if available
Huge performance improvements: nvchecker.scope: Consumed 2.015s CPU time over 2.196s wall clock time, 942M memory peak, 113.4M read from disk. nvchecker.scope: Consumed 342ms CPU time over 361ms wall clock time, 38.6M memory peak, 12.2M read from disk. The above is just for one package. pacfiles avoids OOM when running concurrently.
1 parent ebbf8cf commit a86d623

7 files changed

Lines changed: 131 additions & 13 deletions

File tree

lilac2/lilacyaml.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,15 @@ def load_lilac_yaml(dir: Path) -> dict[str, Any]:
6767

6868
return conf
6969

70-
def load_managed_lilacinfos(repodir: Path) -> tuple[LilacInfos, dict[str, ExcInfo]]:
70+
def load_managed_lilacinfos(
71+
repodir: Path, use_pacfiles: bool = False,
72+
) -> tuple[LilacInfos, dict[str, ExcInfo]]:
7173
infos: LilacInfos = {}
7274
errors = {}
7375

7476
for x in iter_pkgdir(repodir):
7577
try:
76-
info = load_lilacinfo(x)
78+
info = load_lilacinfo(x, use_pacfiles=use_pacfiles)
7779
if not info.managed:
7880
continue
7981
if info.time_limit_hours < 0:
@@ -84,10 +86,10 @@ def load_managed_lilacinfos(repodir: Path) -> tuple[LilacInfos, dict[str, ExcInf
8486

8587
return infos, errors
8688

87-
def load_lilacinfo(dir: Path) -> LilacInfo:
89+
def load_lilacinfo(dir: Path, use_pacfiles: bool = False) -> LilacInfo:
8890
yamlconf = load_lilac_yaml(dir)
8991
if update_on := yamlconf.get('update_on'):
90-
update_ons, throttle_info = parse_update_on(update_on)
92+
update_ons, throttle_info = parse_update_on(update_on, use_pacfiles=use_pacfiles)
9193
else:
9294
update_ons = []
9395
throttle_info = {}
@@ -114,6 +116,7 @@ def expand_alias_arg(value: str) -> str:
114116

115117
def parse_update_on(
116118
update_on: list[dict[str, Any]],
119+
use_pacfiles: bool = False,
117120
) -> tuple[NvEntries, dict[int, datetime.timedelta]]:
118121
ret_update: NvEntries = []
119122
ret_throttle = {}
@@ -149,6 +152,9 @@ def parse_update_on(
149152
if source == 'alpm' or source == 'alpmfiles':
150153
entry.setdefault('dbpath', str(PACMAN_DB_DIR))
151154

155+
if use_pacfiles and source == 'alpmfiles':
156+
entry['source'] = 'pacfiles'
157+
152158
ret_update.append(entry)
153159

154160
return ret_update, ret_throttle

lilac2/pkgbuild.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from .const import _G, OFFICIAL_REPOS
1818
from .cmd import UNTRUSTED_PREFIX
1919
from .typing import PkgVers
20+
from .tools import has_pacfiles
2021

2122
logger = logging.getLogger(__name__)
2223
_official_packages: Dict[str, int] = {}
@@ -53,8 +54,11 @@ def _save_timed_dict(
5354
data_str = ''.join(f'{k} {v}\n' for k, v in data.items())
5455
safe_overwrite(str(path), data_str, mode='w')
5556

56-
def update_pacmandb(dbpath: Path, pacman_conf: Optional[str] = None,
57-
*, quiet: bool = False) -> None:
57+
def update_pacmandb(
58+
dbpath: Path, pacman_conf: Optional[str] = None, *,
59+
quiet: bool = False,
60+
update_pacfiles: bool = False,
61+
) -> None:
5862
stdout = subprocess.DEVNULL if quiet else None
5963

6064
for update_arg in ['-Sy', '-Fy']:
@@ -73,11 +77,18 @@ def update_pacmandb(dbpath: Path, pacman_conf: Optional[str] = None,
7377
else:
7478
p.check_returncode()
7579

76-
def update_data(pacman_conf: Optional[str],
77-
*, quiet: bool = False) -> None:
80+
if update_pacfiles:
81+
cmd = ['pacfiles', '--dbpath', dbpath, '--update-db']
82+
subprocess.check_call(cmd, stdout = stdout)
83+
84+
def update_data(
85+
pacman_conf: Optional[str], *,
86+
quiet: bool = False, update_pacfiles: bool = False,
87+
) -> None:
7888
from .const import PACMAN_DB_DIR
7989
dbpath = PACMAN_DB_DIR
80-
update_pacmandb(dbpath, pacman_conf, quiet=quiet)
90+
update_pacmandb(dbpath, pacman_conf,
91+
quiet=quiet, update_pacfiles=update_pacfiles)
8192

8293
now = int(time.time())
8394
deadline = now - 90 * 86400
@@ -177,4 +188,4 @@ def _get_package_version(srcinfo: List[str]) -> PkgVers:
177188
if __name__ == '__main__':
178189
import sys
179190
conf = sys.argv[1] if len(sys.argv) == 2 else None
180-
update_data(conf)
191+
update_data(conf, update_pacfiles=has_pacfiles())

lilac2/repo.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
from .mail import MailService
2121
from .packages import get_built_package_files
22-
from .tools import ansi_escape_re
22+
from .tools import ansi_escape_re, has_pacfiles
2323
from . import lilacyaml, intl
2424
from .typing import LilacMod, Maintainer, LilacInfos, LilacInfo
2525
from .nomypy import BuildResult # type: ignore
@@ -352,7 +352,8 @@ def manages(self, dep: Dependency) -> bool:
352352
return dep.pkgdir.name in self.lilacinfos
353353

354354
def load_managed_lilac_and_report(self) -> dict[str, tuple[str, ...]]:
355-
self.lilacinfos, errors = lilacyaml.load_managed_lilacinfos(self.repodir)
355+
self.lilacinfos, errors = lilacyaml.load_managed_lilacinfos(
356+
self.repodir, use_pacfiles=has_pacfiles())
356357
failed: dict[str, tuple[str, ...]] = {p: () for p in errors}
357358
l10n = intl.get_l10n('mail')
358359
for name, exc_info in errors.items():

lilac2/tools.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@ def get_avail_memory() -> int:
5151
return int(l.split()[1]) * 1024
5252
return 10 * 1024 ** 3
5353

54+
_HAS_PACFILES = None
55+
56+
def has_pacfiles() -> bool:
57+
global _HAS_PACFILES
58+
if _HAS_PACFILES is None:
59+
cmd = subprocess.run(['pacfiles', '--help'], stdout=subprocess.DEVNULL)
60+
_HAS_PACFILES = cmd.returncode == 0
61+
return _HAS_PACFILES
62+
5463
if __name__ == '__main__':
5564
cpu = get_running_task_cpu_ratio()
5665
mem = get_avail_memory()

lilac2/web.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from aiohttp import web
2+
3+
async def get_current(request):
4+
raise NotImplementedError
5+
6+
async def watch_update(request):
7+
raise NotImplementedError
8+
9+
def setup_app(app):
10+
app.router.add_get('/building/current', get_current)
11+
app.router.add_get('/building/watch', watch_update)
12+
13+
def main():
14+
import argparse
15+
16+
from .vendor.nicelogger import enable_pretty_logging
17+
18+
parser = argparse.ArgumentParser(
19+
description = 'HTTP services for build.archlinuxcn.org',
20+
)
21+
parser.add_argument('--port', default=9008, type=int,
22+
help='port to listen on')
23+
parser.add_argument('--ip', default='127.0.0.1',
24+
help='address to listen on')
25+
parser.add_argument('--loglevel', default='info',
26+
choices=['debug', 'info', 'warn', 'error'],
27+
help='log level')
28+
args = parser.parse_args()
29+
30+
enable_pretty_logging(args.loglevel.upper())
31+
32+
app = web.Application()
33+
setup_app(app)
34+
35+
web.run_app(app, host=args.ip, port=args.port)
36+
37+
if __name__ == '__main__':
38+
main()

lilac2/workerman.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from .typing import PkgToBuild, Rusages
1111
from .cmd import git_pull_override
12+
from .tools import has_pacfiles
1213

1314
logger = logging.getLogger(__name__)
1415

@@ -154,7 +155,7 @@ def prepare_batch(
154155
) -> None:
155156
from . import pkgbuild
156157
logger.info('[%s] updating pacman databases', self.name)
157-
pkgbuild.update_data(pacman_conf)
158+
pkgbuild.update_data(pacman_conf, update_pacfiles=has_pacfiles())
158159

159160
@override
160161
def finish_batch(self) -> None:

nvchecker_source/pacfiles.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright (c) 2023 Pekka Ristola <pekkarr [at] protonmail [dot] com>, et al.
2+
3+
from asyncio import create_subprocess_exec
4+
from asyncio.subprocess import PIPE
5+
import re
6+
from typing import Tuple, List
7+
8+
from nvchecker.api import GetVersionError
9+
10+
async def get_files(info: Tuple[str, str]) -> List[str]:
11+
dbpath, pkg = info
12+
# pacfiles is faster and memory-efficient
13+
cmd = ['pacfiles', '-Flq', '--dbpath', dbpath, pkg]
14+
15+
p = await create_subprocess_exec(*cmd, stdout = PIPE, stderr = PIPE)
16+
stdout, stderr = await p.communicate()
17+
18+
if p.returncode == 0:
19+
return stdout.decode().splitlines()
20+
else:
21+
raise GetVersionError(
22+
'pacfiles failed to get file list',
23+
pkg = pkg,
24+
cmd = cmd,
25+
stdout = stdout.decode(errors='replace'),
26+
stderr = stderr.decode(errors='replace'),
27+
returncode = p.returncode,
28+
)
29+
30+
async def get_version(name, conf, *, cache, **kwargs):
31+
pkg = conf['pkgname']
32+
repo = conf.get('repo')
33+
if repo is not None:
34+
pkg = f'{repo}/{pkg}'
35+
dbpath = conf.get('dbpath', '/var/lib/pacman')
36+
regex = re.compile(conf['filename'])
37+
if regex.groups > 1:
38+
raise GetVersionError('multi-group regex')
39+
strip_dir = conf.get('strip_dir', False)
40+
41+
files = await cache.get((dbpath, pkg), get_files)
42+
43+
for f in files:
44+
fn = f.rsplit('/', 1)[-1] if strip_dir else f
45+
match = regex.fullmatch(fn)
46+
if match:
47+
groups = match.groups()
48+
return groups[0] if len(groups) > 0 else fn
49+
50+
raise GetVersionError('no file matches specified regex')
51+
52+

0 commit comments

Comments
 (0)