From 2492083e3704617fcb8243080e18dd56ad954c5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Wed, 20 Nov 2024 19:55:04 +0100
Subject: [PATCH] push: reduce initial capacity of known chunks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

one million chunks are a bit much, considering that chunks are representing
1-2MB (dynamic) to 4MB (fixed) of input data, that would mean 1-4TB of re-used
input data in a single snapshot.

64k chunks are still representing 64-256GB of input data, which should be
plenty (and for such big snapshots with lots of re-used chunks, growing the
allocation of the HashSet should not be the bottleneck), and is also the
default capacity used for pulling.

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
 src/server/push.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/server/push.rs b/src/server/push.rs
index cb6afbe8..8a36a6d6 100644
--- a/src/server/push.rs
+++ b/src/server/push.rs
@@ -781,7 +781,7 @@ pub(crate) async fn push_snapshot(
     };
 
     // Avoid double upload penalty by remembering already seen chunks
-    let known_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024 * 1024)));
+    let known_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64 * 1024)));
 
     for entry in source_manifest.files() {
         let mut path = backup_dir.full_path();