Skip to content

Commit 393477f

Browse files
tiranclaude
andauthored
feat: Add dynamic symbol extraction (#11)
Extract exported and imported symbols from .dynsym section with version info from .gnu.version. Opt-in via include_symbols setting to avoid performance impact when not needed. New public types: SymbolBinding and SymbolType enums, SymbolInfo dataclass with slots. ELFInfo gains exported_symbols and imported_symbols fields (None when disabled). Fixes: #8 Signed-off-by: Christian Heimes <cheimes@redhat.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b79dbab commit 393477f

4 files changed

Lines changed: 315 additions & 11 deletions

File tree

.github/workflows/ci.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ jobs:
4949
with:
5050
name: coverage-${{ matrix.python }}
5151
path: .coverage.*
52+
include-hidden-files: true
5253
if-no-files-found: ignore
5354

5455
ruff:

src/elfdeps/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
"ELFError",
66
"ELFInfo",
77
"SOInfo",
8+
"SymbolBinding",
9+
"SymbolInfo",
10+
"SymbolType",
811
"analyze_dirtree",
912
"analyze_elffile",
1013
"analyze_file",
@@ -27,6 +30,9 @@
2730
ELFAnalyzeSettings,
2831
ELFInfo,
2932
SOInfo,
33+
SymbolBinding,
34+
SymbolInfo,
35+
SymbolType,
3036
analyze_elffile,
3137
analyze_file,
3238
)

src/elfdeps/_elfdeps.py

Lines changed: 131 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,21 @@
77
"""
88

99
import dataclasses
10+
import enum
1011
import os
1112
import pathlib
1213
import stat
14+
import typing
1315

1416
from elftools.elf.constants import VER_FLAGS
1517
from elftools.elf.dynamic import DynamicSection
1618
from elftools.elf.elffile import ELFFile
17-
from elftools.elf.gnuversions import GNUVerDefSection, GNUVerNeedSection
19+
from elftools.elf.gnuversions import (
20+
GNUVerDefSection,
21+
GNUVerNeedSection,
22+
GNUVerSymSection,
23+
)
24+
from elftools.elf.sections import SymbolTableSection
1825

1926
from ._fileinfo import (
2027
LD_PREFIX,
@@ -24,6 +31,65 @@
2431
)
2532

2633

34+
class SymbolBinding(str, enum.Enum):
35+
"""ELF dynamic symbol binding (STB_*)"""
36+
37+
GLOBAL = "global" # Global symbol
38+
WEAK = "weak" # Weak symbol
39+
40+
41+
class SymbolType(str, enum.Enum):
42+
"""ELF dynamic symbol type (STT_*)"""
43+
44+
NOTYPE = "notype" # Symbol type is unspecified
45+
OBJECT = "object" # Symbol is a data object
46+
FUNC = "func" # Symbol is a code object
47+
COMMON = "common" # Symbol is a common data object
48+
TLS = "tls" # Symbol is thread-local data object
49+
GNU_IFUNC = "ifunc" # Symbol is indirect code object
50+
51+
52+
_SYMBOL_BINDING_MAP: dict[str, SymbolBinding] = {
53+
"STB_GLOBAL": SymbolBinding.GLOBAL,
54+
"STB_WEAK": SymbolBinding.WEAK,
55+
}
56+
57+
_SYMBOL_TYPE_MAP: dict[str, SymbolType] = {
58+
"STT_NOTYPE": SymbolType.NOTYPE,
59+
"STT_OBJECT": SymbolType.OBJECT,
60+
"STT_FUNC": SymbolType.FUNC,
61+
"STT_COMMON": SymbolType.COMMON,
62+
"STT_TLS": SymbolType.TLS,
63+
# STT_GNU_IFUNC and STT_LOOS constants have the same int value
64+
"STT_GNU_IFUNC": SymbolType.GNU_IFUNC,
65+
"STT_LOOS": SymbolType.GNU_IFUNC,
66+
}
67+
68+
69+
@dataclasses.dataclass(frozen=True, slots=True, order=True)
70+
class SymbolInfo:
71+
"""Dynamic symbol information
72+
73+
name: symbol name (e.g. ``printf``)
74+
version: version tag (e.g. ``GLIBC_2.34``)
75+
binding: symbol binding (global or weak)
76+
type: symbol type (func, object, etc.)
77+
"""
78+
79+
name: str
80+
version: str | None
81+
binding: SymbolBinding = dataclasses.field(compare=False)
82+
type: SymbolType
83+
84+
def __str__(self) -> str:
85+
if self.version:
86+
return f"{self.name}@{self.version}"
87+
return self.name
88+
89+
def __repr__(self) -> str:
90+
return str(self)
91+
92+
2793
@dataclasses.dataclass(frozen=True, order=True)
2894
class SOInfo:
2995
"""Shared object information
@@ -83,6 +149,8 @@ class ELFInfo:
83149
marker: str = ""
84150
# useful extras
85151
runpath: list[str] | None = None
152+
exported_symbols: list[SymbolInfo] | None = None
153+
imported_symbols: list[SymbolInfo] | None = None
86154

87155

88156
@dataclasses.dataclass(frozen=True)
@@ -94,6 +162,7 @@ class ELFAnalyzeSettings:
94162
filter_soname: exclude sonames that don't match 'lib*.so*'
95163
require_interp: add dependency on ELF interpreter
96164
unique: remove duplicates
165+
include_symbols: extract individual dynamic symbols
97166
98167
Flag for collections (analyze tree, tarfile, zipfile)
99168
@@ -105,6 +174,7 @@ class ELFAnalyzeSettings:
105174
filter_soname: bool = False
106175
require_interp: bool = False
107176
unique: bool = True
177+
include_symbols: bool = False
108178
ignore_suffix: set[str] | frozenset[str] = frozenset(
109179
{".py", ".md", ".rst", ".sh", ".txt"}
110180
)
@@ -170,9 +240,12 @@ def __init__(
170240
requires=[],
171241
provides=[],
172242
is_exec=is_exec,
243+
exported_symbols=[] if settings.include_symbols else None,
244+
imported_symbols=[] if settings.include_symbols else None,
173245
)
174246
self.settings: ELFAnalyzeSettings = settings
175247
self._seen: set[tuple[bool, SOInfo]] = set()
248+
self._version_map: dict[int, str] = {}
176249

177250
def process(self) -> ELFInfo:
178251
"""Process ELF file
@@ -186,6 +259,8 @@ def process(self) -> ELFInfo:
186259
self.info.is_dso = ehdr["e_type"] == "ET_DYN"
187260
self.info.interp = self.process_prog_headers()
188261
self.process_sections()
262+
if self.settings.include_symbols:
263+
self.process_symbols()
189264

190265
# For DSOs which use the .gnu_hash section and don't have a .hash
191266
# section, we need to ensure that we have a new enough glibc.
@@ -318,8 +393,10 @@ def process_verdef(self, sec: GNUVerDefSection) -> None:
318393
# aux entry of verdef with VER_FLG_BASE is the soname
319394
if verdef["vd_flags"] & VER_FLAGS.VER_FLG_BASE:
320395
soname = aux.name
321-
elif soname is not None and not self.settings.soname_only:
322-
self.add_provides(soname, version=aux.name)
396+
else:
397+
self._version_map.setdefault(verdef["vd_ndx"], aux.name)
398+
if soname is not None and not self.settings.soname_only:
399+
self.add_provides(soname, version=aux.name)
323400

324401
def process_verneed(self, sec: GNUVerNeedSection) -> None:
325402
"""Process GNU version need section
@@ -329,13 +406,10 @@ def process_verneed(self, sec: GNUVerNeedSection) -> None:
329406
for verneed, vernaux in sec.iter_versions():
330407
soname: str = verneed.name
331408
for aux in vernaux:
332-
if (
333-
aux.name
334-
and self.gen_requires
335-
and soname
336-
and not self.settings.soname_only
337-
):
338-
self.add_requires(soname, version=aux.name)
409+
if aux.name:
410+
self._version_map[aux["vna_other"]] = aux.name
411+
if self.gen_requires and soname and not self.settings.soname_only:
412+
self.add_requires(soname, version=aux.name)
339413

340414
def process_dynamic(self, sec: DynamicSection) -> None:
341415
"""Process dynamic tags section
@@ -375,3 +449,50 @@ def process_prog_headers(self) -> str | None:
375449
return interp
376450
else:
377451
return None
452+
453+
def process_symbols(self) -> None:
454+
"""Extract individual dynamic symbols from .dynsym"""
455+
dynsym_sec = typing.cast(
456+
SymbolTableSection | None,
457+
self.elffile.get_section_by_name(".dynsym"),
458+
)
459+
if dynsym_sec is None:
460+
return
461+
versym_sec = typing.cast(
462+
GNUVerSymSection | None,
463+
self.elffile.get_section_by_name(".gnu.version"),
464+
)
465+
assert self.info.exported_symbols is not None
466+
assert self.info.imported_symbols is not None
467+
version_map = self._version_map
468+
for i, sym in enumerate(dynsym_sec.iter_symbols()):
469+
name: str = sym.name
470+
if not name:
471+
continue
472+
# skip non-default visibility (internal, hidden, protected)
473+
if sym["st_other"]["visibility"] != "STV_DEFAULT":
474+
continue
475+
binding = _SYMBOL_BINDING_MAP.get(sym["st_info"]["bind"])
476+
if binding is None:
477+
continue
478+
sym_type = _SYMBOL_TYPE_MAP.get(sym["st_info"]["type"])
479+
if sym_type is None:
480+
continue
481+
version: str | None = None
482+
if versym_sec is not None:
483+
try:
484+
ndx = versym_sec.get_symbol(i)["ndx"]
485+
if isinstance(ndx, int):
486+
version = version_map.get(ndx & 0x7FFF)
487+
except (IndexError, KeyError):
488+
pass
489+
sym_info = SymbolInfo(
490+
name=name,
491+
version=version,
492+
binding=binding,
493+
type=sym_type,
494+
)
495+
if sym["st_shndx"] == "SHN_UNDEF":
496+
self.info.imported_symbols.append(sym_info)
497+
else:
498+
self.info.exported_symbols.append(sym_info)

0 commit comments

Comments
 (0)