tools: symalyzer

Signed-off-by: David Lamparter <equinox@diac24.net>
This commit is contained in:
David Lamparter 2019-11-27 23:19:10 +01:00
parent 45e69fa8f7
commit ba7eb55ec6
3 changed files with 734 additions and 0 deletions

4
.gitignore vendored
View File

@ -33,6 +33,10 @@
/Makefile
/Makefile.in
/symalyzer_report.html
/jquery-3.4.1.min.js
/jquery-3.4.1.min.js.tmp
### autoconf/automake subdir stuff
.deps

347
tools/symalyzer.html Normal file
View File

@ -0,0 +1,347 @@
<html>
<!--
- 2019 by David Lamparter, placed in public domain
-->
<head>
<title>Symalyzer report</title>
<style type="text/css">
html {
margin:auto;
max-width:70em;
font-family:Fira Sans, sans-serif;
}
dl {
display:grid;
grid-template-columns: 1.4em 1.4em 1fr 1fr;
grid-auto-rows: auto;
}
dt.dir {
background-color:#ff8;
color:#000;
border:1px solid #000;
border-bottom:2px solid #000;
font-size:14pt;
padding:2pt 15pt;
margin:0pt;
margin-top:10pt;
grid-column:1 / -1;
}
dt.file {
background-color:#ffa;
color:#000;
border-bottom:1px solid #000;
font-size:12pt;
padding:2pt 15pt;
margin:5pt 0pt;
grid-column:1 / -1;
}
dt.file.filehidden {
background-color:#ffc;
font-size:10pt;
padding:0.5pt 15pt;
margin-bottom:-5pt;
}
dd {
display:inline-block;
vertical-align:middle;
margin:0;
}
dd.symtype {
grid-column:1;
border:1px solid #666;
text-align:center;
}
dd.symklass {
grid-column:2;
border:1px solid #666;
text-align:center;
}
dd.symname {
grid-column:3;
font-family:monospace;
padding:0 0.5em;
padding-top:2px;
border-bottom:1px dashed #ccc;
}
dd.symloc {
grid-column:4;
padding:0 0.5em;
border-bottom:1px dashed #ccc;
}
.symloc-unknown {
font-style:italic;
color:#aaa;
}
.symtype.sym-static {
background-color:#cf4;
color:#000;
}
.symtype.sym-extrastatic {
background-color:#fe8;
color:#000;
}
.symtype.sym-liblocal {
background-color:#fc6;
color:#000;
}
.symklass.symk-T {
background-color:#ddd;
color:#000;
}
.symklass.symk-B,
.symklass.symk-C,
.symklass.symk-D {
background-color:#faa;
color:#000;
}
.symklass.symk-R {
background-color:#fd8;
color:#000;
}
.symtype.sym-api {
background-color:#d9f;
color:#000;
}
.symname.sym-api,
.symloc.sym-api {
background-color:#f8e8ff;
}
dt.file.dirhidden,
dd.dirhidden {
display:none;
}
dd.filehidden {
display:none;
}
dd.symhidden {
display:none;
}
ul {
font-size:10pt;
}
li {
margin-bottom:6pt;
text-indent:-2.5em;
margin-left:2.5em;
}
code {
background-color:#eee;
color:#060;
text-decoration:underline;
}
b.symtype,
b.symklass {
display:inline-block;
text-align:center;
border:1px solid #666;
width:1.4em;
text-indent:0;
}
</style>
<script src="jquery-3.4.1.min.js"></script>
<script>
function dirtoggle(elem, visible) {
if (visible) {
elem.removeClass("dirhidden");
} else {
elem.addClass("dirhidden");
}
var next = elem.next();
while (next.is("dd") || next.is("dt.file")) {
if (visible) {
next.removeClass("dirhidden");
} else {
next.addClass("dirhidden");
}
next = next.next();
}
}
function filetoggle(elem, visible) {
if (visible) {
elem.removeClass("filehidden");
} else {
elem.addClass("filehidden");
}
var next = elem.next();
while (next.is("dd")) {
if (visible) {
next.removeClass("filehidden");
} else {
next.addClass("filehidden");
}
next = next.next();
}
}
function symtoggle(elem, visible) {
if (visible) {
elem.removeClass("symhidden");
} else {
elem.addClass("symhidden");
}
var next = elem.next();
while (next.is(".symklass") || next.is(".symname") || next.is(".symloc")) {
if (visible) {
next.removeClass("symhidden");
} else {
next.addClass("symhidden");
}
next = next.next();
}
}
$(document).ready(function(){
$("dt.dir").each(function(){
var elem = $(this);
elem.click(function(){
dirtoggle(elem, elem.is(".dirhidden"));
});
dirtoggle(elem, false);
});
$("dt.file").each(function(){
var elem = $(this);
elem.click(function(){
filetoggle(elem, elem.is(".filehidden"));
});
/* filetoggle(elem, false); */
});
$("#f_hide_all").click(function(){
$("dt.file").each(function(){
filetoggle($(this), false);
});
});
$("#f_show_all").click(function(){
$("dt.file").each(function(){
filetoggle($(this), true);
});
});
$("#s_show_all").click(function(){
$("dd.symtype").each(function(){
symtoggle($(this), true);
});
});
$("#s_hide_all").click(function(){
$("dd.symtype").each(function(){
symtoggle($(this), false);
});
});
$("#s_show_vars").click(function(){
$("dd.symtype").each(function(){
var elem_type = $(this);
if (elem_type.text() === "A") {
return;
}
var elem_klass = elem_type.next();
if ("BbCDdGgnRrSs".indexOf(elem_klass.text()) >= 0) {
symtoggle(elem_type, true);
}
});
});
$("#s_show_funcs").click(function(){
$("dd.symtype").each(function(){
var elem_type = $(this);
if (elem_type.text() === "A") {
return;
}
var elem_klass = elem_type.next();
if ("Tt".indexOf(elem_klass.text()) >= 0) {
symtoggle(elem_type, true);
}
});
});
$("#s_show_api").click(function(){
$("dd.sym-api").each(function(){
symtoggle($(this), true);
});
});
$("#jsbuttons").show();
});
</script>
</head>
<body>
<table style="display:none" id="jsbuttons">
<tr><td>Files</td><td>
<button type="button" id="f_hide_all">Hide all</button>
<button type="button" id="f_show_all">Show all</button>
</td></tr>
<tr><td>Symbols</td><td>
<button type="button" id="s_hide_all">Hide all</button>
<button type="button" id="s_show_all">Show all</button><br>
<button type="button" id="s_show_vars">Show variables</button>
<button type="button" id="s_show_funcs">Show functions</button>
<button type="button" id="s_show_api">Show module/API usage</button>
</td></tr>
</table>
<div style="display:grid;grid-template-columns:1fr 1fr;">
<ul>
<li><b class="symtype sym-static">S</b> means the symbol is not used outside its own file.
It could either be completely unused or used locally. It might be appropriate to make it
<code>static</code>.</li>
<li><b class="symtype sym-extrastatic">Z</b> means the symbol is not used outside its own file,
and it's not visible to the outside of the library or daemon (i.e. ELF hidden linkage.)
It could still be completely unused, or used within the library. It might be appropriate to make it
<code>static</code>.</li>
<li><b class="symtype sym-liblocal">L</b> means the symbol is used from other files in the library,
but not from outside. It might be appropriate to make it <code>DSO_LOCAL</code>.</li>
<li><b class="symtype sym-api">A</b> means the symbol is used from some other file, most likely a
loadable module. Note this is only flagged for symbols in executable files, not libraries.</li>
</ul>
<ul>
<li><b class="symklass symk-T">T</b> are normal functions ("program <u>T</u>ext")</li>
<li style="text-indent:0;margin-left:0">
<b class="symklass symk-B">B</b> (<u>B</u>SS),<br>
<b class="symklass symk-C">C</b> (<u>C</u>ommon),<br>
<b class="symklass symk-D">D</b> (<u>D</u>ata)<br>
are various types of writable global variables</li>
<li><b class="symklass symk-R">R</b> are read-only global variables ("<u>R</u>odata")</li>
</ul>
</div>
<dl>
{%- for subdir, subreport in dirgroups.items()|sort %}
<dt class="dir">{{ subdir }}</dt>
{%- for obj, reports in subreport.items()|sort %}
<dt class="file">{{ obj }}</dt>
{%- for report in reports|sort %}
{#- <dd class="{{ report.idlong }}"> #}
<dd class="sym-{{ report.idlong }} symtype" title="{{ report.title }}">{{ report.idshort }}</dd>
<dd class="sym-{{ report.idlong }} symk-{{ report.sym.klass }} symklass" title="{{ klasses.get(report.sym.klass, '???') }}">{{ report.sym.klass }}</dd>
<dd class="sym-{{ report.idlong }} symname">{{ report.sym.name }}</dd>
{% if report.sym.loc %}
<dd class="sym-{{ report.idlong }} symloc">{{ report.sym.loc }}</dd>
{% else %}
<dd class="sym-{{ report.idlong }} symloc symloc-unknown">unknown</dd>
{% endif %}
{#- </dd> #}
{%- endfor %}
{%- endfor %}
{%- endfor %}
</dl>
</body>
</html>

383
tools/symalyzer.py Executable file
View File

@ -0,0 +1,383 @@
#!/usr/bin/python3
#
# 2019 by David Lamparter, placed in public domain
#
# This tool generates a report of possibly unused symbols in the build. It's
# particularly useful for libfrr to find bitrotting functions that aren't even
# used anywhere anymore.
#
# Note that the tool can't distinguish between "a symbol is completely unused"
# and "a symbol is used only in its file" since file-internal references are
# invisible in nm output. However, the compiler will warn you if a static
# symbol is unused.
#
# This tool is only tested on Linux, it probably needs `nm` from GNU binutils
# (as opposed to BSD `nm`). Could use pyelftools instead but that's a lot of
# extra work.
#
# This is a developer tool, please don't put it in any packages :)
import sys, os, subprocess
import re
from collections import namedtuple
class MakeVars(object):
'''
makevars['FOO_CFLAGS'] gets you "FOO_CFLAGS" from Makefile
'''
def __init__(self):
self._data = dict()
def getvars(self, varlist):
'''
get a batch list of variables from make. faster than individual calls.
'''
rdfd, wrfd = os.pipe()
shvars = ['shvar-%s' % s for s in varlist]
make = subprocess.Popen(['make', '-s', 'VARFD=%d' % wrfd] + shvars, pass_fds = [wrfd])
os.close(wrfd)
data = b''
rdf = os.fdopen(rdfd, 'rb')
while True:
rdata = rdf.read()
if len(rdata) == 0:
break
data += rdata
del rdf
make.wait()
data = data.decode('US-ASCII').strip().split('\n')
for row in data:
k, v = row.split('=', 1)
v = v[1:-1]
self._data[k] = v
def __getitem__(self, k):
if k not in self._data:
self.getvars([k])
return self._data[k]
def get(self, k, defval = None):
if k not in self._data:
self.getvars([k])
return self._data[k] or defval
SymRowBase = namedtuple('SymRow', ['target', 'object', 'name', 'address', 'klass', 'typ', 'size', 'line', 'section', 'loc'])
class SymRow(SymRowBase):
'''
wrapper around a line of `nm` output
'''
lib_re = re.compile(r'/lib[^/]+\.(so|la)$')
def is_global(self):
return self.klass.isupper() or self.klass in 'uvw'
def scope(self):
if self.lib_re.search(self.target) is None:
return self.target
# "global"
return None
def is_export(self):
'''
FRR-specific list of symbols which are considered "externally used"
e.g. hooks are by design APIs for external use, same for qobj_t_*
frr_inet_ntop is here because it's used through an ELF alias to
"inet_ntop()"
'''
if self.name in ['main', 'frr_inet_ntop', '_libfrr_version']:
return True
if self.name.startswith('_hook_'):
return True
if self.name.startswith('qobj_t_'):
return True
return False
class Symbols(dict):
'''
dict of all symbols in all libs & executables
'''
from_re = re.compile(r'^Symbols from (.*?):$')
lt_re = re.compile(r'^(.*/)([^/]+)\.l[oa]$')
def __init__(self):
super().__init__()
class ReportSym(object):
def __init__(self, sym):
self.sym = sym
def __repr__(self):
return '<%-25s %-40s [%s]>' % (self.__class__.__name__ + ':', self.sym.name, self.sym.loc)
def __lt__(self, other):
return self.sym.name.__lt__(other.sym.name)
class ReportSymCouldBeStaticAlreadyLocal(ReportSym):
idshort = 'Z'
idlong = 'extrastatic'
title = "symbol is local to library, but only used in its source file (make static?)"
class ReportSymCouldBeStatic(ReportSym):
idshort = 'S'
idlong = 'static'
title = "symbol is only used in its source file (make static?)"
class ReportSymCouldBeLibLocal(ReportSym):
idshort = 'L'
idlong = 'liblocal'
title = "symbol is only used inside of library"
class ReportSymModuleAPI(ReportSym):
idshort = 'A'
idlong = 'api'
title = "symbol (in executable) is referenced externally from a module"
class Symbol(object):
def __init__(self, name):
super().__init__()
self.name = name
self.defs = {}
self.refs = []
def process(self, row):
scope = row.scope()
if row.section == '*UND*':
self.refs.append(row)
else:
self.defs.setdefault(scope, []).append(row)
def evaluate(self, out):
'''
generate output report
invoked after all object files have been read in, so it can look
at inter-object-file relationships
'''
if len(self.defs) == 0:
out.extsyms.add(self.name)
return
for scopename, symdefs in self.defs.items():
common_defs = [symdef for symdef in symdefs if symdef.section == '*COM*']
proper_defs = [symdef for symdef in symdefs if symdef.section != '*COM*']
if len(proper_defs) > 1:
print(self.name, ' DUPLICATE')
print('\tD: %s %s' % (scopename, '\n\t\t'.join([repr(s) for s in symdefs])))
for syms in self.refs:
print('\tR: %s' % (syms, ))
return
if len(proper_defs):
primary_def = proper_defs[0]
elif len(common_defs):
# "common" = global variables without initializer;
# they can occur in multiple .o files and the linker will
# merge them into one variable/storage location.
primary_def = common_defs[0]
else:
# undefined symbol, e.g. libc
continue
if scopename is not None and len(self.refs) > 0:
for ref in self.refs:
if ref.target != primary_def.target and ref.target.endswith('.la'):
outobj = out.report.setdefault(primary_def.object, [])
outobj.append(out.ReportSymModuleAPI(primary_def))
break
if len(self.refs) == 0:
if primary_def.is_export():
continue
outobj = out.report.setdefault(primary_def.object, [])
if primary_def.visible:
outobj.append(out.ReportSymCouldBeStatic(primary_def))
else:
outobj.append(out.ReportSymCouldBeStaticAlreadyLocal(primary_def))
continue
if scopename is None and primary_def.visible:
# lib symbol
for ref in self.refs:
if ref.target != primary_def.target:
break
else:
outobj = out.report.setdefault(primary_def.object, [])
outobj.append(out.ReportSymCouldBeLibLocal(primary_def))
def evaluate(self):
self.extsyms = set()
self.report = {}
for sym in self.values():
sym.evaluate(self)
def load(self, target, files):
def libtoolmustdie(fn):
m = self.lt_re.match(fn)
if m is None:
return fn
return m.group(1) + '.libs/' + m.group(2) + '.o'
def libtooltargetmustdie(fn):
m = self.lt_re.match(fn)
if m is None:
a, b = fn.rsplit('/', 1)
return '%s/.libs/%s' % (a, b)
return m.group(1) + '.libs/' + m.group(2) + '.so'
files = list(set([libtoolmustdie(fn) for fn in files]))
def parse_nm_output(text):
filename = None
path_rel_to = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
for line in text.split('\n'):
if line.strip() == '':
continue
m = self.from_re.match(line)
if m is not None:
filename = m.group(1)
continue
if line.startswith('Name'):
continue
items = [i.strip() for i in line.split('|')]
loc = None
if '\t' in items[-1]:
items[-1], loc = items[-1].split('\t', 1)
fn, lno = loc.rsplit(':', 1)
fn = os.path.relpath(fn, path_rel_to)
loc = '%s:%s' % (fn, lno)
items[1] = int(items[1] if items[1] != '' else '0', 16)
items[4] = int(items[4] if items[4] != '' else '0', 16)
items.append(loc)
row = SymRow(target, filename, *items)
if row.section == '.group' or row.name == '_GLOBAL_OFFSET_TABLE_':
continue
if not row.is_global():
continue
yield row
visible_syms = set()
# the actual symbol report uses output from the individual object files
# (e.g. lib/.libs/foo.o), but we also read the linked binary (e.g.
# lib/.libs/libfrr.so) to determine which symbols are actually visible
# in the linked result (this covers ELF "hidden"/"internal" linkage)
libfile = libtooltargetmustdie(target)
nmlib = subprocess.Popen(['nm', '-l', '-g', '--defined-only', '-f', 'sysv', libfile], stdout = subprocess.PIPE)
out = nmlib.communicate()[0].decode('US-ASCII')
for row in parse_nm_output(out):
visible_syms.add(row.name)
nm = subprocess.Popen(['nm', '-l', '-f', 'sysv'] + files, stdout = subprocess.PIPE)
out = nm.communicate()[0].decode('US-ASCII')
for row in parse_nm_output(out):
row.visible = row.name in visible_syms
sym = self.setdefault(row.name, self.Symbol(row.name))
sym.process(row)
def write_html_report(syms):
try:
import jinja2
except ImportError:
sys.stderr.write('jinja2 could not be imported, not writing HTML report!\n')
return
self_path = os.path.dirname(os.path.abspath(__file__))
jenv = jinja2.Environment(loader=jinja2.FileSystemLoader(self_path))
template = jenv.get_template('symalyzer.html')
dirgroups = {}
for fn, reports in syms.report.items():
dirname, filename = fn.replace('.libs/', '').rsplit('/', 1)
dirgroups.setdefault(dirname, {})[fn] = reports
klasses = {
'T': 'code / plain old regular function (Text)',
'D': 'global variable, read-write, with nonzero initializer (Data)',
'B': 'global variable, read-write, with zero initializer (BSS)',
'C': 'global variable, read-write, with zero initializer (Common)',
'R': 'global variable, read-only (Rodata)',
}
with open('symalyzer_report.html.tmp', 'w') as fd:
fd.write(template.render(dirgroups = dirgroups, klasses = klasses))
os.rename('symalyzer_report.html.tmp', 'symalyzer_report.html')
if not os.path.exists('jquery-3.4.1.min.js'):
url = 'https://code.jquery.com/jquery-3.4.1.min.js'
sys.stderr.write(
'trying to grab a copy of jquery from %s\nif this fails, please get it manually (the HTML output is done.)\n' % (url))
import requests
r = requests.get('https://code.jquery.com/jquery-3.4.1.min.js')
if r.status_code != 200:
sys.stderr.write('failed -- please download jquery-3.4.1.min.js and put it next to the HTML report\n')
else:
with open('jquery-3.4.1.min.js.tmp', 'w') as fd:
fd.write(r.text)
os.rename('jquery-3.4.1.min.js.tmp', 'jquery-3.4.1.min.js.tmp')
sys.stderr.write('done.\n')
def automake_escape(s):
return s.replace('.', '_').replace('/', '_')
if __name__ == '__main__':
mv = MakeVars()
if not (os.path.exists('config.version') and os.path.exists('lib/.libs/libfrr.so')):
sys.stderr.write('please execute this script in the root directory of an FRR build tree\n')
sys.stderr.write('./configure && make need to have completed successfully\n')
sys.exit(1)
amtargets = ['bin_PROGRAMS', 'sbin_PROGRAMS', 'lib_LTLIBRARIES', 'module_LTLIBRARIES']
targets = []
mv.getvars(amtargets)
for amtarget in amtargets:
targets.extend([item for item in mv[amtarget].strip().split() if item != 'tools/ssd'])
mv.getvars(['%s_LDADD' % automake_escape(t) for t in targets])
ldobjs = targets[:]
for t in targets:
ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split()
for item in ldadd:
if item.startswith('-'):
continue
if item.endswith('.a'):
ldobjs.append(item)
mv.getvars(['%s_OBJECTS' % automake_escape(o) for o in ldobjs])
syms = Symbols()
for t in targets:
objs = mv['%s_OBJECTS' % automake_escape(t)].strip().split()
ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split()
for item in ldadd:
if item.startswith('-'):
continue
if item.endswith('.a'):
objs.extend(mv['%s_OBJECTS' % automake_escape(item)].strip().split())
sys.stderr.write('processing %s...\n' % t)
sys.stderr.flush()
#print(t, '\n\t', objs)
syms.load(t, objs)
syms.evaluate()
for obj, reports in sorted(syms.report.items()):
print('%s:' % obj)
for report in reports:
print('\t%r' % report)
write_html_report(syms)