topogen: add per router logging

TopoRouters now create a logger (which logs to /tmp/{router_name}.log)
on start to record all commands and events that it goes through. All log
messages contain timestamps that may be used in the future to:
(1) correlate commands call with events
(2) benchmark/time command speed
This commit is contained in:
Rafael Zalamena 2017-07-07 10:18:25 -03:00 committed by Donald Sharp
parent 6c131bd303
commit 13e1fc493b
2 changed files with 96 additions and 14 deletions

View File

@ -42,6 +42,7 @@ import os
import sys import sys
import json import json
import ConfigParser import ConfigParser
import glob
from mininet.net import Mininet from mininet.net import Mininet
from mininet.log import setLogLevel from mininet.log import setLogLevel
@ -81,14 +82,21 @@ class Topogen(object):
CONFIG_SECTION = 'topogen' CONFIG_SECTION = 'topogen'
def __init__(self, cls): def __init__(self, cls, modname='unnamed'):
"""
Topogen initialization function, takes the following arguments:
* `cls`: the topology class that is child of mininet.topo
* `modname`: module name must be a unique name to identify logs later.
"""
self.config = None self.config = None
self.topo = None self.topo = None
self.net = None self.net = None
self.gears = {} self.gears = {}
self.routern = 1 self.routern = 1
self.switchn = 1 self.switchn = 1
self.modname = modname
self._init_topo(cls) self._init_topo(cls)
logger.info('loading topology: {}'.format(self.modname))
@staticmethod @staticmethod
def _mininet_reset(): def _mininet_reset():
@ -213,13 +221,16 @@ class Topogen(object):
""" """
# If log_level is not specified use the configuration. # If log_level is not specified use the configuration.
if log_level is None: if log_level is None:
log_level = self.config.get('topogen', 'verbosity') log_level = self.config.get(self.CONFIG_SECTION, 'verbosity')
# Set python logger level # Set python logger level
logger_config.set_log_level(log_level) logger_config.set_log_level(log_level)
# Run mininet # Run mininet
setLogLevel(log_level) if log_level == 'debug':
setLogLevel(log_level)
logger.info('starting topology: {}'.format(self.modname))
self.net.start() self.net.start()
def start_router(self, router=None): def start_router(self, router=None):
@ -239,6 +250,7 @@ class Topogen(object):
def stop_topology(self): def stop_topology(self):
"Stops the network topology" "Stops the network topology"
logger.info('stopping topology: {}'.format(self.modname))
self.net.stop() self.net.stop()
def mininet_cli(self): def mininet_cli(self):
@ -252,6 +264,27 @@ class Topogen(object):
CLI(self.net) CLI(self.net)
def is_memleak_enabled(self):
"Returns `True` if memory leak report is enable, otherwise `False`."
memleak_file = os.environ.get('TOPOTESTS_CHECK_MEMLEAK')
if memleak_file is None:
return False
return True
def report_memory_leaks(self, testname=None):
"Run memory leak test and reports."
if not self.is_memleak_enabled():
return
# If no name was specified, use the test module name
if testname is None:
testname = self.modname
router_list = self.routers().values()
for router in router_list:
router.report_memory_leaks(self.modname)
# #
# Topology gears (equipment) # Topology gears (equipment)
# #
@ -306,6 +339,9 @@ class TopoGear(object):
else: else:
operation = 'down' operation = 'down'
logger.info('setting node "{}" link "{}" to state "{}"'.format(
self.name, myif, operation
))
return self.run('ip link set dev {} {}'.format(myif, operation)) return self.run('ip link set dev {} {}'.format(myif, operation))
def peer_link_enable(self, myif, enabled=True): def peer_link_enable(self, myif, enabled=True):
@ -399,6 +435,18 @@ class TopoRouter(TopoGear):
params['privateDirs'] = self.PRIVATE_DIRS params['privateDirs'] = self.PRIVATE_DIRS
self.options['memleak_path'] = params.get('memleak_path', None) self.options['memleak_path'] = params.get('memleak_path', None)
# Create new log directory
self.logdir = '/tmp/topotests/{}'.format(self.tgen.modname)
# Clean up before starting new log files: avoids removing just created
# log files.
self._prepare_tmpfiles()
# Propagate the router log directory
params['logdir'] = self.logdir
# Open router log file
logfile = '{}/{}.log'.format(self.logdir, name)
self.logger = logger_config.get_logger(name=name, target=logfile)
self.tgen.topo.addNode(self.name, cls=self.cls, **params) self.tgen.topo.addNode(self.name, cls=self.cls, **params)
def __str__(self): def __str__(self):
@ -406,6 +454,18 @@ class TopoRouter(TopoGear):
gear += ' TopoRouter<>' gear += ' TopoRouter<>'
return gear return gear
def _prepare_tmpfiles(self):
# Create directories if they don't exist
try:
os.makedirs(self.logdir, 0755)
except OSError:
pass
# Try to find relevant old logfiles in /tmp and delete them
map(os.remove, glob.glob('{}/*{}*.log'.format(self.logdir, self.name)))
# Remove old core files
map(os.remove, glob.glob('{}/{}*.dmp'.format(self.logdir, self.name)))
def load_config(self, daemon, source=None): def load_config(self, daemon, source=None):
""" """
Loads daemon configuration from the specified source Loads daemon configuration from the specified source
@ -415,12 +475,14 @@ class TopoRouter(TopoGear):
TopoRouter.RD_PIM. TopoRouter.RD_PIM.
""" """
daemonstr = self.RD.get(daemon) daemonstr = self.RD.get(daemon)
self.logger.info('loading "{}" configuration: {}'.format(daemonstr, source))
self.tgen.net[self.name].loadConf(daemonstr, source) self.tgen.net[self.name].loadConf(daemonstr, source)
def check_router_running(self): def check_router_running(self):
""" """
Run a series of checks and returns a status string. Run a series of checks and returns a status string.
""" """
self.logger.info('checking if daemons are running')
return self.tgen.net[self.name].checkRouterRunning() return self.tgen.net[self.name].checkRouterRunning()
def start(self): def start(self):
@ -431,8 +493,17 @@ class TopoRouter(TopoGear):
* Configure interfaces * Configure interfaces
* Start daemons (e.g. FRR/Quagga) * Start daemons (e.g. FRR/Quagga)
""" """
self.logger.debug('starting')
return self.tgen.net[self.name].startRouter() return self.tgen.net[self.name].startRouter()
def stop(self):
"""
Stop router:
* Kill daemons
"""
self.logger.debug('stopping')
return self.tgen.net[self.name].stopRouter()
def vtysh_cmd(self, command, isjson=False): def vtysh_cmd(self, command, isjson=False):
""" """
Runs the provided command string in the vty shell and returns a string Runs the provided command string in the vty shell and returns a string
@ -447,6 +518,8 @@ class TopoRouter(TopoGear):
vtysh_command = 'vtysh -c "{}" 2>/dev/null'.format(command) vtysh_command = 'vtysh -c "{}" 2>/dev/null'.format(command)
output = self.run(vtysh_command) output = self.run(vtysh_command)
self.logger.info('\nvtysh command => {}\nvtysh output <= {}'.format(
command, output))
if isjson is False: if isjson is False:
return output return output
@ -473,6 +546,9 @@ class TopoRouter(TopoGear):
res = self.run(vtysh_command) res = self.run(vtysh_command)
os.unlink(fname) os.unlink(fname)
self.logger.info('\nvtysh command => "{}"\nvtysh output <= "{}"'.format(
vtysh_command, res))
return res return res
def report_memory_leaks(self, testname): def report_memory_leaks(self, testname):
@ -485,10 +561,10 @@ class TopoRouter(TopoGear):
""" """
memleak_file = os.environ.get('TOPOTESTS_CHECK_MEMLEAK') or self.options['memleak_path'] memleak_file = os.environ.get('TOPOTESTS_CHECK_MEMLEAK') or self.options['memleak_path']
if memleak_file is None: if memleak_file is None:
print "SKIPPED check on Memory leaks: Disabled (TOPOTESTS_CHECK_MEMLEAK undefined)"
return return
self.tgen.net[self.name].stopRouter() self.stop()
self.logger.info('running memory leak report')
self.tgen.net[self.name].report_memory_leaks(memleak_file, testname) self.tgen.net[self.name].report_memory_leaks(memleak_file, testname)
class TopoSwitch(TopoGear): class TopoSwitch(TopoGear):

View File

@ -255,6 +255,7 @@ class Router(Node):
def __init__(self, name, **params): def __init__(self, name, **params):
super(Router, self).__init__(name, **params) super(Router, self).__init__(name, **params)
self.logdir = params.get('logdir', '/tmp')
self.daemondir = None self.daemondir = None
self.routertype = 'frr' self.routertype = 'frr'
self.daemons = {'zebra': 0, 'ripd': 0, 'ripngd': 0, 'ospfd': 0, self.daemons = {'zebra': 0, 'ripd': 0, 'ripngd': 0, 'ospfd': 0,
@ -309,7 +310,7 @@ class Router(Node):
# Enable coredumps # Enable coredumps
assert_sysctl(self, 'kernel.core_uses_pid', 1) assert_sysctl(self, 'kernel.core_uses_pid', 1)
assert_sysctl(self, 'fs.suid_dumpable', 2) assert_sysctl(self, 'fs.suid_dumpable', 2)
corefile = '/tmp/{0}_%e_core-sig_%s-pid_%p.dmp'.format(self.name) corefile = '{}/{}_%e_core-sig_%s-pid_%p.dmp'.format(self.logdir, self.name)
assert_sysctl(self, 'kernel.core_pattern', corefile) assert_sysctl(self, 'kernel.core_pattern', corefile)
self.cmd('ulimit -c unlimited') self.cmd('ulimit -c unlimited')
# Set ownership of config files # Set ownership of config files
@ -359,6 +360,7 @@ class Router(Node):
# Disable integrated-vtysh-config # Disable integrated-vtysh-config
self.cmd('echo "no service integrated-vtysh-config" >> /etc/%s/vtysh.conf' % self.routertype) self.cmd('echo "no service integrated-vtysh-config" >> /etc/%s/vtysh.conf' % self.routertype)
self.cmd('chown %s:%svty /etc/%s/vtysh.conf' % (self.routertype, self.routertype, self.routertype)) self.cmd('chown %s:%svty /etc/%s/vtysh.conf' % (self.routertype, self.routertype, self.routertype))
# TODO remove the following lines after all tests are migrated to Topogen.
# Try to find relevant old logfiles in /tmp and delete them # Try to find relevant old logfiles in /tmp and delete them
map(os.remove, glob.glob("/tmp/*%s*.log" % self.name)) map(os.remove, glob.glob("/tmp/*%s*.log" % self.name))
# Remove old core files # Remove old core files
@ -391,8 +393,8 @@ class Router(Node):
# Start Zebra first # Start Zebra first
if self.daemons['zebra'] == 1: if self.daemons['zebra'] == 1:
zebra_path = os.path.join(self.daemondir, 'zebra') zebra_path = os.path.join(self.daemondir, 'zebra')
self.cmd('{0} > /tmp/{1}-zebra.out 2> /tmp/{1}-zebra.err &'.format( self.cmd('{0} > {1}/{2}-zebra.out 2> {1}/{2}-zebra.err &'.format(
zebra_path, self.name zebra_path, self.logdir, self.name
)) ))
self.waitOutput() self.waitOutput()
logger.debug('{}: {} zebra started'.format(self, self.routertype)) logger.debug('{}: {} zebra started'.format(self, self.routertype))
@ -407,8 +409,8 @@ class Router(Node):
continue continue
daemon_path = os.path.join(self.daemondir, daemon) daemon_path = os.path.join(self.daemondir, daemon)
self.cmd('{0} > /tmp/{1}-{2}.out 2> /tmp/{1}-{2}.err &'.format( self.cmd('{0} > {1}/{2}-{3}.out 2> {1}/{2}-{3}.err &'.format(
daemon_path, self.name, daemon daemon_path, self.logdir, self.name, daemon
)) ))
self.waitOutput() self.waitOutput()
logger.debug('{}: {} {} started'.format(self, self.routertype, daemon)) logger.debug('{}: {} {} started'.format(self, self.routertype, daemon))
@ -417,7 +419,7 @@ class Router(Node):
def getStdOut(self, daemon): def getStdOut(self, daemon):
return self.getLog('out', daemon) return self.getLog('out', daemon)
def getLog(self, log, daemon): def getLog(self, log, daemon):
return self.cmd('cat /tmp/%s-%s.%s' % (self.name, daemon, log) ) return self.cmd('cat {}/{}-{}.{}'.format(self.logdir, self.name, daemon, log))
def checkRouterRunning(self): def checkRouterRunning(self):
"Check if router daemons are running and collect crashinfo they don't run" "Check if router daemons are running and collect crashinfo they don't run"
@ -432,7 +434,8 @@ class Router(Node):
if (self.daemons[daemon] == 1) and not (daemon in daemonsRunning): if (self.daemons[daemon] == 1) and not (daemon in daemonsRunning):
sys.stderr.write("%s: Daemon %s not running\n" % (self.name, daemon)) sys.stderr.write("%s: Daemon %s not running\n" % (self.name, daemon))
# Look for core file # Look for core file
corefiles = glob.glob("/tmp/%s_%s_core*.dmp" % (self.name, daemon)) corefiles = glob.glob('{}/{}_{}_core*.dmp'.format(
self.logdir, self.name, daemon))
if (len(corefiles) > 0): if (len(corefiles) > 0):
daemon_path = os.path.join(self.daemondir, daemon) daemon_path = os.path.join(self.daemondir, daemon)
backtrace = subprocess.check_output([ backtrace = subprocess.check_output([
@ -442,8 +445,11 @@ class Router(Node):
sys.stderr.write("%s\n" % backtrace) sys.stderr.write("%s\n" % backtrace)
else: else:
# No core found - If we find matching logfile in /tmp, then print last 20 lines from it. # No core found - If we find matching logfile in /tmp, then print last 20 lines from it.
if os.path.isfile("/tmp/%s-%s.log" % (self.name, daemon)): if os.path.isfile('{}/{}-{}.log'.format(self.logdir, self.name, daemon)):
log_tail = subprocess.check_output(["tail -n20 /tmp/%s-%s.log 2> /dev/null" % (self.name, daemon)], shell=True) log_tail = subprocess.check_output([
"tail -n20 {}/{}-{}.log 2> /dev/null".format(
self.logdir, self.name, daemon)
], shell=True)
sys.stderr.write("\nFrom %s %s %s log file:\n" % (self.routertype, self.name, daemon)) sys.stderr.write("\nFrom %s %s %s log file:\n" % (self.routertype, self.name, daemon))
sys.stderr.write("%s\n" % log_tail) sys.stderr.write("%s\n" % log_tail)