288 lines
8.9 KiB
Python
288 lines
8.9 KiB
Python
import subprocess
|
|
import os
|
|
import sys
|
|
import re
|
|
|
|
|
|
### helper functions
|
|
|
|
def list_to_dict(f, l):
|
|
"""dict with elements of list as keys & as values transformed by f"""
|
|
d = {}
|
|
for el in l:
|
|
d[el] = f(el)
|
|
return d
|
|
|
|
def dict_remove_empty(d):
|
|
"""remove keys that have [] or {} or as values"""
|
|
new = {}
|
|
for k, v in d.items():
|
|
if not (v == [] or v == {}):
|
|
new[k] = v
|
|
return new
|
|
|
|
def identity(x):
|
|
"""identity function"""
|
|
return x
|
|
|
|
def const(x):
|
|
"""(curried) constant function"""
|
|
def f(y):
|
|
return x
|
|
return f
|
|
|
|
def memoized(cache, f, arg):
|
|
"""Memoizes a call to `f` with `arg` in the dict `cache`.
|
|
Modifies the cache dict in place."""
|
|
res = cache.get(arg)
|
|
if arg in cache:
|
|
return cache[arg]
|
|
else:
|
|
res = f(arg)
|
|
cache[arg] = res
|
|
return res
|
|
|
|
### IO functions that find elf dependencies
|
|
|
|
_field_matcher = re.compile(b" ([A-Z0-9_]+) +(.*)$")
|
|
|
|
def read_dynamic_fields(elf_path):
|
|
"""Read the dynamic header fields from an elf binary
|
|
|
|
Args:
|
|
elf_path: path to the elf binary (either absolute or relative to pwd)
|
|
|
|
Returns:
|
|
a list [(field_key, field_value)] where field_keys could appear multiple
|
|
times (for example there's usually more than one NEEDED field).
|
|
"""
|
|
res = subprocess.check_output([
|
|
# force locale to C for stable output
|
|
"env", "LC_ALL=C",
|
|
"objdump",
|
|
# specifying the section brings execution time down from 150ms to 10ms
|
|
"--section=.dynamic",
|
|
"--all-headers",
|
|
elf_path
|
|
])
|
|
to_end = res.split(b"Dynamic Section:\n")[1]
|
|
# to first empty line
|
|
dyn_section = to_end[: 1 + to_end.find(b"\n\n")]
|
|
def read_dynamic_field(s):
|
|
"""return (field_key, field_value)"""
|
|
return _field_matcher.match(s).groups()
|
|
return list(map(read_dynamic_field, dyn_section.splitlines(True)))
|
|
|
|
def __query_dynamic_fields(df, key):
|
|
"""takes a list of dynamic field tuples (key and value),
|
|
where keys can appear multiple times, and returns a list of all
|
|
values with the given key (in stable order)."""
|
|
return [v for k, v in df if k == key]
|
|
|
|
def parse_runpath_dirs(elf_path, elf_dynamic_fields):
|
|
"""Parse a RUNPATH entry from an elf header bytestring.
|
|
|
|
Returns:
|
|
{ path: unmodified string from DT_RUNPATH
|
|
, absolute_path: fully normalized, absolute path to dir }
|
|
"""
|
|
fields = __query_dynamic_fields(elf_dynamic_fields, b"RUNPATH")
|
|
if fields == []:
|
|
return []
|
|
assert len(fields) == 1
|
|
val = fields[0]
|
|
origin = os.path.dirname(elf_path)
|
|
return [{ 'path': path,
|
|
'absolute_path': os.path.abspath(path.replace("$ORIGIN", origin)) }
|
|
for path in val.decode().strip(":").split(":")
|
|
if path != ""]
|
|
|
|
def parse_needed(elf_dynamic_fields):
|
|
"""Returns the list of DT_NEEDED entries for elf"""
|
|
return [n.decode() for n in __query_dynamic_fields(elf_dynamic_fields, b"NEEDED")]
|
|
|
|
|
|
### Main utility
|
|
|
|
# cannot find dependency
|
|
LDD_MISSING = "MISSING"
|
|
# don't know how to search for dependency
|
|
LDD_UNKNOWN = "DUNNO"
|
|
# list of all errors for easy branching
|
|
LDD_ERRORS = [ LDD_MISSING, LDD_UNKNOWN ]
|
|
|
|
def _ldd(elf_cache, f, elf_path):
|
|
"""Same as `ldd` (below), except for an additional `elf_cache` argument,
|
|
which is a dict needed for memoizing elf files that were already read.
|
|
This is done because the elf reading operation is quite expensive
|
|
and many files are referenced multiple times (e.g. glib.so)."""
|
|
|
|
def search(rdirs, elf_libname):
|
|
"""search for elf_libname in runfile dirs
|
|
and return either the name or missing"""
|
|
res = LDD_MISSING
|
|
for rdir in rdirs:
|
|
potential_path = os.path.join(rdir['absolute_path'], elf_libname)
|
|
if os.path.exists(potential_path):
|
|
res = {
|
|
'item': potential_path,
|
|
'found_in': rdir,
|
|
}
|
|
break
|
|
return res
|
|
|
|
def recurse(search_res):
|
|
"""Unfold the subtree of ELF dependencies for a `search` result"""
|
|
if search_res == LDD_MISSING:
|
|
return LDD_MISSING
|
|
else:
|
|
# we keep all other fields in search_res the same,
|
|
# just item is the one that does the recursion.
|
|
# This is the part that would normally be done by fmap.
|
|
search_res['item'] = _ldd(elf_cache, f, search_res['item'])
|
|
return search_res
|
|
|
|
# (GNU) ld.so resolves any symlinks before searching for dependencies
|
|
elf_realpath = os.path.realpath(elf_path)
|
|
|
|
# memoized uses the cache to not repeat the I/O action
|
|
# for the same elf files (same path)
|
|
dyn_fields = memoized(
|
|
elf_cache, read_dynamic_fields, elf_realpath
|
|
)
|
|
rdirs = parse_runpath_dirs(elf_realpath, dyn_fields)
|
|
all_needed = parse_needed(dyn_fields)
|
|
|
|
# if there's no runpath dirs we don't know where to search
|
|
if rdirs == []:
|
|
needed = list_to_dict(const(LDD_UNKNOWN), all_needed)
|
|
else:
|
|
needed = list_to_dict(
|
|
lambda name: recurse(search(rdirs, name)),
|
|
all_needed
|
|
)
|
|
|
|
result = {
|
|
'runpath_dirs': rdirs,
|
|
'needed': needed
|
|
}
|
|
# Here, f is applied to the result of the previous level of recursion
|
|
return f(result)
|
|
|
|
|
|
def ldd(f, elf_path):
|
|
"""follows DT_NEEDED ELF headers for elf by searching the through DT_RUNPATH.
|
|
|
|
DependencyInfo :
|
|
{ needed : dict(string, union(
|
|
LDD_MISSING, LDD_UNKNOWN,
|
|
{
|
|
# the needed dependency
|
|
item : a,
|
|
# where the dependency was found in
|
|
found_in : RunpathDir
|
|
}))
|
|
# all runpath directories that were searched
|
|
, runpath_dirs : [ RunpathDir ] }
|
|
|
|
Args:
|
|
f: DependencyInfo -> a
|
|
modifies the results of each level
|
|
elf_path: path to ELF file, either absolute or relative to current working dir
|
|
|
|
Returns: a
|
|
"""
|
|
elf_cache = {}
|
|
return _ldd(elf_cache, f, elf_path)
|
|
|
|
|
|
### Functions to pass to ldd
|
|
|
|
# Only use the current layer
|
|
|
|
def remove_matching_needed(d, re_matcher_absolute_path=None, re_matcher_path=None):
|
|
"""Destructively removes needed values from d['needed']
|
|
if they match the given regex matcher.
|
|
Doesn't remove LDD_ERRORS."""
|
|
def pred(v):
|
|
"""return true if match"""
|
|
if v in LDD_ERRORS:
|
|
return False
|
|
found_in = v['found_in']
|
|
abs_match = re_matcher_absolute_path.match(found_in['absolute_path']) \
|
|
if re_matcher_absolute_path else False
|
|
match = re_matcher_path.match(found_in['path']) \
|
|
if re_matcher_path else False
|
|
if abs_match or match:
|
|
return True
|
|
d['needed'] = {
|
|
k: v for k, v in d['needed'].items()
|
|
if not pred(v)
|
|
}
|
|
|
|
def remove_matching_runpaths(d, re_matcher):
|
|
"""Destructively removes runpaths from d['runpath_dirs']
|
|
if they match the given regex matcher."""
|
|
d['runpath_dirs'] = [
|
|
runp for runp in d['runpath_dirs']
|
|
if not re_matcher.match(runp['absolute_path'])
|
|
]
|
|
return d
|
|
|
|
def non_existing_runpaths(d):
|
|
"""Return a list of runpaths_dirs that do not exist in the file system."""
|
|
return [
|
|
runp for runp in d['runpath_dirs']
|
|
if not os.path.exists(runp['absolute_path'])
|
|
]
|
|
|
|
def unused_runpaths(d):
|
|
"""Return a list of runpath_dirs that were not used to find NEEDED dependencies."""
|
|
used = set()
|
|
for k, v in d['needed'].items():
|
|
if not v in LDD_ERRORS:
|
|
used.add(v['found_in']['absolute_path'])
|
|
return [
|
|
u for u in d['runpath_dirs']
|
|
if u['absolute_path'] not in used
|
|
]
|
|
|
|
# Also use the results of sub-layers
|
|
|
|
def collect_unused_runpaths(d):
|
|
"""This is like `unused_runpaths`, but it creates a deduplicated list of all unused runpaths
|
|
for its dependencies instead of just returning them for the current layer.
|
|
|
|
Returns:
|
|
a dict of two fields;
|
|
`mine` contains the unused dependencies of the current binary under scrutiny
|
|
`others` contains a flat dict of all .sos with unused runpath entries and a list of them for each .so
|
|
"""
|
|
used = set()
|
|
given = set(r['absolute_path'] for r in d['runpath_dirs'])
|
|
prev = {}
|
|
# TODO: use `unused_runpaths` here
|
|
for k, v in d['needed'].items():
|
|
if not v in LDD_ERRORS:
|
|
used.add(v['found_in']['absolute_path'])
|
|
prev[k] = v['item']
|
|
unused = [
|
|
u for u in given.difference(used)
|
|
# leave out nix storepaths
|
|
if not u.startswith("/nix/store")
|
|
]
|
|
|
|
# Each layer doesn't know about their own name
|
|
# So we return a list of unused for this layer ('mine')
|
|
# and a dict of all previeous layers combined (name to list)
|
|
def combine_unused(deps):
|
|
res = {}
|
|
for name, dep in deps.items():
|
|
res.update(dep['others'])
|
|
res[name] = dep['mine']
|
|
return res
|
|
|
|
return {
|
|
'mine': unused,
|
|
'others': combine_unused(prev),
|
|
}
|