From 20814a39865249adb0f3b6975fc1a5e45c5e6507 Mon Sep 17 00:00:00 2001 From: Dominik Csapak Date: Fri, 29 Apr 2022 12:04:52 +0200 Subject: [PATCH] proxmox-backup-proxy: stop accept() loop on daemon shutdown On reload the old process hands over to the new process but needs to keep running until all its worker tasks are finished to avoid breaking a in-progress action like a xterm.js web shell or a backup creation/restore. During that wait time the receiving channel was already closed, but the TCP sockt accept listener was still left active by mistake. That paired with the `SO_REUSEPORT` being set on the underlying socket, made the kernel choose either the old or new process for new incoming connections, both still listened for them after all and reuse-port + multiple processes is often used as load-balancer mechanism. As the old proxy accepted connections but didn't process them anymore one could observer sporadic connection failures on any API call, well any new connection to the proxy, depending on which process got the it assigned. The fix is to stop accepting new connections one we shutdown, so poll the shutdown_future too during accept and just exit the accept-loop on shutdown. Note: This part of the code, nor other parts that could influence it, wasn't changed at all in recent times, so it's still unresolved for why it pops up only now. Signed-off-by: Dominik Csapak Co-authored-by: Wolfgang Bumiller [ T: add more (root cause) info and reword a bit ] Signed-off-by: Thomas Lamprecht --- src/bin/proxmox-backup-proxy.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs index 744a93f9..6a305b7c 100644 --- a/src/bin/proxmox-backup-proxy.rs +++ b/src/bin/proxmox-backup-proxy.rs @@ -394,14 +394,18 @@ async fn accept_connection( sender: tokio::sync::mpsc::Sender, ) { let accept_counter = Arc::new(()); + let mut shutdown_future = proxmox_rest_server::shutdown_future().fuse(); loop { - let (sock, peer) = match listener.accept().await { - Ok(conn) => conn, - Err(err) => { - eprintln!("error accepting tcp connection: {}", err); - continue; - } + let (sock, peer) = select! { + res = listener.accept().fuse() => match res { + Ok(conn) => conn, + Err(err) => { + eprintln!("error accepting tcp connection: {}", err); + continue; + } + }, + _ = shutdown_future => break, }; sock.set_nodelay(true).unwrap();