lib: Allow zclient do-over of connect on initial attempt

When a protocol is attempting to connect to the zebra daemon
through it's socket.  If the inital attempt fails, give it a
few more attempts before giving up and leaving the daemon in
a bizarre state.

This problem was found by Ashley Penney, and Ashley was of
immense help in debugging and testing the fix for this issue.

Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
Tested-by: Ashley Penney <apenney@ntoggle.com>
This commit is contained in:
Donald Sharp 2016-01-22 10:46:08 -05:00
parent 385f703b16
commit 4ecc09d394

View File

@ -202,6 +202,7 @@ zclient_socket(void)
ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv)); ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv));
if (ret < 0) if (ret < 0)
{ {
zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
close (sock); close (sock);
return -1; return -1;
} }
@ -237,6 +238,7 @@ zclient_socket_un (const char *path)
ret = connect (sock, (struct sockaddr *) &addr, len); ret = connect (sock, (struct sockaddr *) &addr, len);
if (ret < 0) if (ret < 0)
{ {
zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
close (sock); close (sock);
return -1; return -1;
} }
@ -443,11 +445,23 @@ zclient_start (struct zclient *zclient)
if (zclient->t_connect) if (zclient->t_connect)
return 0; return 0;
if (zclient_socket_connect(zclient) < 0) /*
* If we fail to connect to the socket on initialization,
* Let's wait a second and see if we can reconnect.
* Cause if we don't connect, we never attempt to
* reconnect. On startup if zebra is slow we
* can get into this situation.
*/
while (zclient_socket_connect(zclient) < 0 && zclient->fail < 5)
{ {
if (zclient_debug) if (zclient_debug)
zlog_debug ("zclient connection fail"); zlog_debug ("zclient connection fail");
zclient->fail++; zclient->fail++;
sleep (1);
}
if (zclient->sock < 0)
{
zclient_event (ZCLIENT_CONNECT, zclient); zclient_event (ZCLIENT_CONNECT, zclient);
return -1; return -1;
} }