mirror of
https://git.proxmox.com/git/mirror_frr
synced 2025-06-15 13:30:21 +00:00
topotests: stop wasting time at exit
... and clean up zombie child processes Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
This commit is contained in:
parent
ba5410e32f
commit
f033a78a99
@ -35,6 +35,7 @@ import tempfile
|
|||||||
import platform
|
import platform
|
||||||
import difflib
|
import difflib
|
||||||
import time
|
import time
|
||||||
|
import signal
|
||||||
|
|
||||||
from lib.topolog import logger
|
from lib.topolog import logger
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
@ -450,6 +451,10 @@ def pid_exists(pid):
|
|||||||
|
|
||||||
if pid <= 0:
|
if pid <= 0:
|
||||||
return False
|
return False
|
||||||
|
try:
|
||||||
|
os.waitpid(pid, os.WNOHANG)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
try:
|
try:
|
||||||
os.kill(pid, 0)
|
os.kill(pid, 0)
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
@ -1021,8 +1026,8 @@ class Router(Node):
|
|||||||
os.system("chmod -R go+rw /tmp/topotests")
|
os.system("chmod -R go+rw /tmp/topotests")
|
||||||
|
|
||||||
# Return count of running daemons
|
# Return count of running daemons
|
||||||
def countDaemons(self):
|
def listDaemons(self):
|
||||||
numRunning = 0
|
ret = []
|
||||||
rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype)
|
rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype)
|
||||||
errors = ""
|
errors = ""
|
||||||
if re.search(r"No such file or directory", rundaemons):
|
if re.search(r"No such file or directory", rundaemons):
|
||||||
@ -1031,12 +1036,11 @@ class Router(Node):
|
|||||||
for d in StringIO.StringIO(rundaemons):
|
for d in StringIO.StringIO(rundaemons):
|
||||||
daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
|
daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
|
||||||
if daemonpid.isdigit() and pid_exists(int(daemonpid)):
|
if daemonpid.isdigit() and pid_exists(int(daemonpid)):
|
||||||
numRunning += 1
|
ret.append(os.path.basename(d.rstrip().rsplit(".", 1)[0]))
|
||||||
return numRunning
|
return ret
|
||||||
|
|
||||||
def stopRouter(self, wait=True, assertOnError=True, minErrorVersion="5.1"):
|
def stopRouter(self, wait=True, assertOnError=True, minErrorVersion="5.1"):
|
||||||
# Stop Running FRR Daemons
|
# Stop Running FRR Daemons
|
||||||
numRunning = 0
|
|
||||||
rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype)
|
rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype)
|
||||||
errors = ""
|
errors = ""
|
||||||
if re.search(r"No such file or directory", rundaemons):
|
if re.search(r"No such file or directory", rundaemons):
|
||||||
@ -1045,24 +1049,36 @@ class Router(Node):
|
|||||||
for d in StringIO.StringIO(rundaemons):
|
for d in StringIO.StringIO(rundaemons):
|
||||||
daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
|
daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
|
||||||
if daemonpid.isdigit() and pid_exists(int(daemonpid)):
|
if daemonpid.isdigit() and pid_exists(int(daemonpid)):
|
||||||
|
daemonname = os.path.basename(d.rstrip().rsplit(".", 1)[0])
|
||||||
logger.info(
|
logger.info(
|
||||||
"{}: stopping {}".format(
|
"{}: stopping {}".format(
|
||||||
self.name, os.path.basename(d.rstrip().rsplit(".", 1)[0])
|
self.name, daemonname
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.cmd("kill -TERM %s" % daemonpid)
|
try:
|
||||||
self.waitOutput()
|
os.kill(int(daemonpid), signal.SIGTERM)
|
||||||
if pid_exists(int(daemonpid)):
|
except OSError as err:
|
||||||
numRunning += 1
|
if err.errno == errno.ESRCH:
|
||||||
|
logger.error("{}: {} left a dead pidfile (pid={})".format(self.name, daemonname, daemonpid))
|
||||||
|
else:
|
||||||
|
logger.info("{}: {} could not kill pid {}: {}".format(self.name, daemonname, daemonpid, str(err)))
|
||||||
|
|
||||||
if wait and numRunning > 0:
|
if not wait:
|
||||||
counter = 5
|
return errors
|
||||||
while counter > 0 and numRunning > 0:
|
|
||||||
sleep(2, "{}: waiting for daemons stopping".format(self.name))
|
running = self.listDaemons()
|
||||||
numRunning = self.countDaemons()
|
|
||||||
|
if running:
|
||||||
|
sleep(0.1, "{}: waiting for daemons stopping: {}".format(self.name, ', '.join(running)))
|
||||||
|
running = self.listDaemons()
|
||||||
|
|
||||||
|
counter = 20
|
||||||
|
while counter > 0 and running:
|
||||||
|
sleep(0.5, "{}: waiting for daemons stopping: {}".format(self.name, ', '.join(running)))
|
||||||
|
running = self.listDaemons()
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
if wait and numRunning > 0:
|
if running:
|
||||||
# 2nd round of kill if daemons didn't exit
|
# 2nd round of kill if daemons didn't exit
|
||||||
for d in StringIO.StringIO(rundaemons):
|
for d in StringIO.StringIO(rundaemons):
|
||||||
daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
|
daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
|
||||||
@ -1077,13 +1093,15 @@ class Router(Node):
|
|||||||
self.waitOutput()
|
self.waitOutput()
|
||||||
self.cmd("rm -- {}".format(d.rstrip()))
|
self.cmd("rm -- {}".format(d.rstrip()))
|
||||||
|
|
||||||
if wait:
|
if not wait:
|
||||||
errors = self.checkRouterCores(reportOnce=True)
|
return errors
|
||||||
if self.checkRouterVersion("<", minErrorVersion):
|
|
||||||
# ignore errors in old versions
|
errors = self.checkRouterCores(reportOnce=True)
|
||||||
errors = ""
|
if self.checkRouterVersion("<", minErrorVersion):
|
||||||
if assertOnError and len(errors) > 0:
|
# ignore errors in old versions
|
||||||
assert "Errors found - details follow:" == 0, errors
|
errors = ""
|
||||||
|
if assertOnError and len(errors) > 0:
|
||||||
|
assert "Errors found - details follow:" == 0, errors
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
def removeIPs(self):
|
def removeIPs(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user