topotests: stop wasting time at exit

... and clean up zombie child processes

Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
This commit is contained in:
David Lamparter 2020-07-15 18:48:18 +02:00
parent ba5410e32f
commit f033a78a99

View File

@ -35,6 +35,7 @@ import tempfile
import platform import platform
import difflib import difflib
import time import time
import signal
from lib.topolog import logger from lib.topolog import logger
from copy import deepcopy from copy import deepcopy
@ -450,6 +451,10 @@ def pid_exists(pid):
if pid <= 0: if pid <= 0:
return False return False
try:
os.waitpid(pid, os.WNOHANG)
except:
pass
try: try:
os.kill(pid, 0) os.kill(pid, 0)
except OSError as err: except OSError as err:
@ -1021,8 +1026,8 @@ class Router(Node):
os.system("chmod -R go+rw /tmp/topotests") os.system("chmod -R go+rw /tmp/topotests")
# Return count of running daemons # Return count of running daemons
def countDaemons(self): def listDaemons(self):
numRunning = 0 ret = []
rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype) rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype)
errors = "" errors = ""
if re.search(r"No such file or directory", rundaemons): if re.search(r"No such file or directory", rundaemons):
@ -1031,12 +1036,11 @@ class Router(Node):
for d in StringIO.StringIO(rundaemons): for d in StringIO.StringIO(rundaemons):
daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip() daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
if daemonpid.isdigit() and pid_exists(int(daemonpid)): if daemonpid.isdigit() and pid_exists(int(daemonpid)):
numRunning += 1 ret.append(os.path.basename(d.rstrip().rsplit(".", 1)[0]))
return numRunning return ret
def stopRouter(self, wait=True, assertOnError=True, minErrorVersion="5.1"): def stopRouter(self, wait=True, assertOnError=True, minErrorVersion="5.1"):
# Stop Running FRR Daemons # Stop Running FRR Daemons
numRunning = 0
rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype) rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype)
errors = "" errors = ""
if re.search(r"No such file or directory", rundaemons): if re.search(r"No such file or directory", rundaemons):
@ -1045,24 +1049,36 @@ class Router(Node):
for d in StringIO.StringIO(rundaemons): for d in StringIO.StringIO(rundaemons):
daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip() daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
if daemonpid.isdigit() and pid_exists(int(daemonpid)): if daemonpid.isdigit() and pid_exists(int(daemonpid)):
daemonname = os.path.basename(d.rstrip().rsplit(".", 1)[0])
logger.info( logger.info(
"{}: stopping {}".format( "{}: stopping {}".format(
self.name, os.path.basename(d.rstrip().rsplit(".", 1)[0]) self.name, daemonname
) )
) )
self.cmd("kill -TERM %s" % daemonpid) try:
self.waitOutput() os.kill(int(daemonpid), signal.SIGTERM)
if pid_exists(int(daemonpid)): except OSError as err:
numRunning += 1 if err.errno == errno.ESRCH:
logger.error("{}: {} left a dead pidfile (pid={})".format(self.name, daemonname, daemonpid))
else:
logger.info("{}: {} could not kill pid {}: {}".format(self.name, daemonname, daemonpid, str(err)))
if wait and numRunning > 0: if not wait:
counter = 5 return errors
while counter > 0 and numRunning > 0:
sleep(2, "{}: waiting for daemons stopping".format(self.name)) running = self.listDaemons()
numRunning = self.countDaemons()
if running:
sleep(0.1, "{}: waiting for daemons stopping: {}".format(self.name, ', '.join(running)))
running = self.listDaemons()
counter = 20
while counter > 0 and running:
sleep(0.5, "{}: waiting for daemons stopping: {}".format(self.name, ', '.join(running)))
running = self.listDaemons()
counter -= 1 counter -= 1
if wait and numRunning > 0: if running:
# 2nd round of kill if daemons didn't exit # 2nd round of kill if daemons didn't exit
for d in StringIO.StringIO(rundaemons): for d in StringIO.StringIO(rundaemons):
daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip() daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
@ -1077,13 +1093,15 @@ class Router(Node):
self.waitOutput() self.waitOutput()
self.cmd("rm -- {}".format(d.rstrip())) self.cmd("rm -- {}".format(d.rstrip()))
if wait: if not wait:
errors = self.checkRouterCores(reportOnce=True) return errors
if self.checkRouterVersion("<", minErrorVersion):
# ignore errors in old versions errors = self.checkRouterCores(reportOnce=True)
errors = "" if self.checkRouterVersion("<", minErrorVersion):
if assertOnError and len(errors) > 0: # ignore errors in old versions
assert "Errors found - details follow:" == 0, errors errors = ""
if assertOnError and len(errors) > 0:
assert "Errors found - details follow:" == 0, errors
return errors return errors
def removeIPs(self): def removeIPs(self):