diff --git a/Cargo.toml b/Cargo.toml index e70a37cb..31835de5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ exclude = [ "build", "debian", "tests/catar_data/test_symlink/symlink1"] [workspace] members = [ "pbs-buildcfg", + "pbs-datastore", "pbs-runtime", "pbs-tools", ] @@ -94,6 +95,7 @@ proxmox-http = { version = "0.2.1", features = [ "client", "http-helpers", "webs proxmox-openid = "0.6.0" pbs-buildcfg = { path = "pbs-buildcfg" } +pbs-datastore = { path = "pbs-datastore" } pbs-runtime = { path = "pbs-runtime" } pbs-tools = { path = "pbs-tools" } diff --git a/Makefile b/Makefile index 684d1f6e..58678ce7 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,7 @@ RESTORE_BIN := \ SUBCRATES := \ pbs-buildcfg \ + pbs-datastore \ pbs-runtime \ pbs-tools diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml new file mode 100644 index 00000000..aca6bbad --- /dev/null +++ b/pbs-datastore/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "pbs-datastore" +version = "0.1.0" +authors = ["Proxmox Support Team "] +edition = "2018" +description = "low level pbs data storage access" + +[dependencies] +anyhow = "1.0" +crc32fast = "1" +endian_trait = { version = "0.6", features = [ "arrays" ] } +nix = "0.19.1" +openssl = "0.10" +serde = { version = "1.0", features = ["derive"] } +zstd = { version = "0.6", features = [ "bindgen" ] } + +pathpatterns = "0.1.2" +pxar = { version = "0.10.1", features = [ "tokio-io" ] } + +proxmox = { version = "0.11.5", default-features = false, features = [ "api-macro" ] } + +pbs-tools = { path = "../pbs-tools" } diff --git a/src/backup/catalog.rs b/pbs-datastore/src/catalog.rs similarity index 96% rename from src/backup/catalog.rs rename to pbs-datastore/src/catalog.rs index a307f9d8..26003bf8 100644 --- a/src/backup/catalog.rs +++ b/pbs-datastore/src/catalog.rs @@ -9,12 +9,27 @@ use anyhow::{bail, format_err, Error}; use pathpatterns::{MatchList, MatchType}; use proxmox::tools::io::ReadExt; -use crate::backup::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0; -use crate::pxar::catalog::BackupCatalogWriter; +use crate::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0; + +/// Trait for writing file list catalogs. +/// +/// A file list catalog simply stores a directory tree. Such catalogs may be used as index to do a +/// fast search for files. +pub trait BackupCatalogWriter { + fn start_directory(&mut self, name: &CStr) -> Result<(), Error>; + fn end_directory(&mut self) -> Result<(), Error>; + fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error>; + fn add_symlink(&mut self, name: &CStr) -> Result<(), Error>; + fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error>; + fn add_block_device(&mut self, name: &CStr) -> Result<(), Error>; + fn add_char_device(&mut self, name: &CStr) -> Result<(), Error>; + fn add_fifo(&mut self, name: &CStr) -> Result<(), Error>; + fn add_socket(&mut self, name: &CStr) -> Result<(), Error>; +} #[repr(u8)] #[derive(Copy,Clone,PartialEq)] -pub(crate) enum CatalogEntryType { +pub enum CatalogEntryType { Directory = b'd', File = b'f', Symlink = b'l', diff --git a/src/backup/checksum_reader.rs b/pbs-datastore/src/checksum_reader.rs similarity index 100% rename from src/backup/checksum_reader.rs rename to pbs-datastore/src/checksum_reader.rs diff --git a/src/backup/checksum_writer.rs b/pbs-datastore/src/checksum_writer.rs similarity index 100% rename from src/backup/checksum_writer.rs rename to pbs-datastore/src/checksum_writer.rs diff --git a/src/backup/chunker.rs b/pbs-datastore/src/chunker.rs similarity index 100% rename from src/backup/chunker.rs rename to pbs-datastore/src/chunker.rs diff --git a/src/backup/crypt_config.rs b/pbs-datastore/src/crypt_config.rs similarity index 99% rename from src/backup/crypt_config.rs rename to pbs-datastore/src/crypt_config.rs index e8d69e82..8d681d58 100644 --- a/src/backup/crypt_config.rs +++ b/pbs-datastore/src/crypt_config.rs @@ -1,6 +1,6 @@ //! Wrappers for OpenSSL crypto functions //! -//! We use this to encrypt and decryprt data chunks. Cipher is +//! We use this to encrypt and decrypt data chunks. Cipher is //! AES_256_GCM, which is fast and provides authenticated encryption. //! //! See the Wikipedia Artikel for [Authenticated diff --git a/src/backup/crypt_reader.rs b/pbs-datastore/src/crypt_reader.rs similarity index 99% rename from src/backup/crypt_reader.rs rename to pbs-datastore/src/crypt_reader.rs index 8bf15cfd..20e219b5 100644 --- a/src/backup/crypt_reader.rs +++ b/pbs-datastore/src/crypt_reader.rs @@ -1,7 +1,8 @@ -use anyhow::{bail, Error}; use std::sync::Arc; use std::io::{Read, BufRead}; +use anyhow::{bail, Error}; + use super::CryptConfig; pub struct CryptReader { diff --git a/src/backup/crypt_writer.rs b/pbs-datastore/src/crypt_writer.rs similarity index 98% rename from src/backup/crypt_writer.rs rename to pbs-datastore/src/crypt_writer.rs index a75f91b2..f99bca88 100644 --- a/src/backup/crypt_writer.rs +++ b/pbs-datastore/src/crypt_writer.rs @@ -1,7 +1,8 @@ -use anyhow::{Error}; use std::sync::Arc; use std::io::Write; +use anyhow::Error; + use super::CryptConfig; pub struct CryptWriter { diff --git a/src/backup/data_blob.rs b/pbs-datastore/src/data_blob.rs similarity index 99% rename from src/backup/data_blob.rs rename to pbs-datastore/src/data_blob.rs index 2c36ac9a..c582e6b7 100644 --- a/src/backup/data_blob.rs +++ b/pbs-datastore/src/data_blob.rs @@ -1,6 +1,7 @@ -use anyhow::{bail, Error}; use std::convert::TryInto; +use anyhow::{bail, Error}; + use proxmox::tools::io::{ReadExt, WriteExt}; use super::file_formats::*; diff --git a/pbs-datastore/src/data_blob_reader.rs b/pbs-datastore/src/data_blob_reader.rs new file mode 100644 index 00000000..8b37dbe7 --- /dev/null +++ b/pbs-datastore/src/data_blob_reader.rs @@ -0,0 +1,177 @@ +use std::io::{BufReader, Read}; +use std::sync::Arc; + +use anyhow::{bail, format_err, Error}; +use proxmox::tools::io::ReadExt; + +use crate::checksum_reader::ChecksumReader; +use crate::crypt_config::CryptConfig; +use crate::crypt_reader::CryptReader; +use crate::file_formats::{self, DataBlobHeader}; + +enum BlobReaderState<'reader, R: Read> { + Uncompressed { + expected_crc: u32, + csum_reader: ChecksumReader, + }, + Compressed { + expected_crc: u32, + decompr: zstd::stream::read::Decoder<'reader, BufReader>>, + }, + Encrypted { + expected_crc: u32, + decrypt_reader: CryptReader>>, + }, + EncryptedCompressed { + expected_crc: u32, + decompr: zstd::stream::read::Decoder< + 'reader, + BufReader>>>, + >, + }, +} + +/// Read data blobs +pub struct DataBlobReader<'reader, R: Read> { + state: BlobReaderState<'reader, R>, +} + +// zstd_safe::DCtx is not sync but we are, since +// the only public interface is on mutable reference +unsafe impl Sync for DataBlobReader<'_, R> {} + +impl DataBlobReader<'_, R> { + pub fn new(mut reader: R, config: Option>) -> Result { + let head: DataBlobHeader = unsafe { reader.read_le_value()? }; + match head.magic { + file_formats::UNCOMPRESSED_BLOB_MAGIC_1_0 => { + let expected_crc = u32::from_le_bytes(head.crc); + let csum_reader = ChecksumReader::new(reader, None); + Ok(Self { + state: BlobReaderState::Uncompressed { + expected_crc, + csum_reader, + }, + }) + } + file_formats::COMPRESSED_BLOB_MAGIC_1_0 => { + let expected_crc = u32::from_le_bytes(head.crc); + let csum_reader = ChecksumReader::new(reader, None); + + let decompr = zstd::stream::read::Decoder::new(csum_reader)?; + Ok(Self { + state: BlobReaderState::Compressed { + expected_crc, + decompr, + }, + }) + } + file_formats::ENCRYPTED_BLOB_MAGIC_1_0 => { + let config = config + .ok_or_else(|| format_err!("unable to read encrypted blob without key"))?; + let expected_crc = u32::from_le_bytes(head.crc); + let mut iv = [0u8; 16]; + let mut expected_tag = [0u8; 16]; + reader.read_exact(&mut iv)?; + reader.read_exact(&mut expected_tag)?; + let csum_reader = ChecksumReader::new(reader, None); + let decrypt_reader = CryptReader::new( + BufReader::with_capacity(64 * 1024, csum_reader), + iv, + expected_tag, + config, + )?; + Ok(Self { + state: BlobReaderState::Encrypted { + expected_crc, + decrypt_reader, + }, + }) + } + file_formats::ENCR_COMPR_BLOB_MAGIC_1_0 => { + let config = config + .ok_or_else(|| format_err!("unable to read encrypted blob without key"))?; + let expected_crc = u32::from_le_bytes(head.crc); + let mut iv = [0u8; 16]; + let mut expected_tag = [0u8; 16]; + reader.read_exact(&mut iv)?; + reader.read_exact(&mut expected_tag)?; + let csum_reader = ChecksumReader::new(reader, None); + let decrypt_reader = CryptReader::new( + BufReader::with_capacity(64 * 1024, csum_reader), + iv, + expected_tag, + config, + )?; + let decompr = zstd::stream::read::Decoder::new(decrypt_reader)?; + Ok(Self { + state: BlobReaderState::EncryptedCompressed { + expected_crc, + decompr, + }, + }) + } + _ => bail!("got wrong magic number {:?}", head.magic), + } + } + + pub fn finish(self) -> Result { + match self.state { + BlobReaderState::Uncompressed { + csum_reader, + expected_crc, + } => { + let (reader, crc, _) = csum_reader.finish()?; + if crc != expected_crc { + bail!("blob crc check failed"); + } + Ok(reader) + } + BlobReaderState::Compressed { + expected_crc, + decompr, + } => { + let csum_reader = decompr.finish().into_inner(); + let (reader, crc, _) = csum_reader.finish()?; + if crc != expected_crc { + bail!("blob crc check failed"); + } + Ok(reader) + } + BlobReaderState::Encrypted { + expected_crc, + decrypt_reader, + } => { + let csum_reader = decrypt_reader.finish()?.into_inner(); + let (reader, crc, _) = csum_reader.finish()?; + if crc != expected_crc { + bail!("blob crc check failed"); + } + Ok(reader) + } + BlobReaderState::EncryptedCompressed { + expected_crc, + decompr, + } => { + let decrypt_reader = decompr.finish().into_inner(); + let csum_reader = decrypt_reader.finish()?.into_inner(); + let (reader, crc, _) = csum_reader.finish()?; + if crc != expected_crc { + bail!("blob crc check failed"); + } + Ok(reader) + } + } + } +} + +impl Read for DataBlobReader<'_, R> { + fn read(&mut self, buf: &mut [u8]) -> Result { + match &mut self.state { + BlobReaderState::Uncompressed { csum_reader, .. } => csum_reader.read(buf), + BlobReaderState::Compressed { decompr, .. } => decompr.read(buf), + BlobReaderState::Encrypted { decrypt_reader, .. } => decrypt_reader.read(buf), + BlobReaderState::EncryptedCompressed { decompr, .. } => decompr.read(buf), + } + } +} diff --git a/src/backup/data_blob_writer.rs b/pbs-datastore/src/data_blob_writer.rs similarity index 51% rename from src/backup/data_blob_writer.rs rename to pbs-datastore/src/data_blob_writer.rs index 82bd14c6..200aac1c 100644 --- a/src/backup/data_blob_writer.rs +++ b/pbs-datastore/src/data_blob_writer.rs @@ -1,15 +1,26 @@ -use anyhow::{Error}; -use std::sync::Arc; -use std::io::{Write, Seek, SeekFrom}; +use anyhow::Error; use proxmox::tools::io::WriteExt; +use std::io::{Seek, SeekFrom, Write}; +use std::sync::Arc; -use super::*; +use crate::checksum_writer::ChecksumWriter; +use crate::crypt_config::CryptConfig; +use crate::crypt_writer::CryptWriter; +use crate::file_formats::{self, DataBlobHeader, EncryptedDataBlobHeader}; enum BlobWriterState<'writer, W: Write> { - Uncompressed { csum_writer: ChecksumWriter }, - Compressed { compr: zstd::stream::write::Encoder<'writer, ChecksumWriter> }, - Encrypted { crypt_writer: CryptWriter> }, - EncryptedCompressed { compr: zstd::stream::write::Encoder<'writer, CryptWriter>> }, + Uncompressed { + csum_writer: ChecksumWriter, + }, + Compressed { + compr: zstd::stream::write::Encoder<'writer, ChecksumWriter>, + }, + Encrypted { + crypt_writer: CryptWriter>, + }, + EncryptedCompressed { + compr: zstd::stream::write::Encoder<'writer, CryptWriter>>, + }, } /// Data blob writer @@ -17,33 +28,45 @@ pub struct DataBlobWriter<'writer, W: Write> { state: BlobWriterState<'writer, W>, } -impl DataBlobWriter<'_, W> { - +impl DataBlobWriter<'_, W> { pub fn new_uncompressed(mut writer: W) -> Result { writer.seek(SeekFrom::Start(0))?; - let head = DataBlobHeader { magic: UNCOMPRESSED_BLOB_MAGIC_1_0, crc: [0; 4] }; + let head = DataBlobHeader { + magic: file_formats::UNCOMPRESSED_BLOB_MAGIC_1_0, + crc: [0; 4], + }; unsafe { writer.write_le_value(head)?; } let csum_writer = ChecksumWriter::new(writer, None); - Ok(Self { state: BlobWriterState::Uncompressed { csum_writer }}) + Ok(Self { + state: BlobWriterState::Uncompressed { csum_writer }, + }) } pub fn new_compressed(mut writer: W) -> Result { - writer.seek(SeekFrom::Start(0))?; - let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: [0; 4] }; + writer.seek(SeekFrom::Start(0))?; + let head = DataBlobHeader { + magic: file_formats::COMPRESSED_BLOB_MAGIC_1_0, + crc: [0; 4], + }; unsafe { writer.write_le_value(head)?; } let csum_writer = ChecksumWriter::new(writer, None); let compr = zstd::stream::write::Encoder::new(csum_writer, 1)?; - Ok(Self { state: BlobWriterState::Compressed { compr }}) + Ok(Self { + state: BlobWriterState::Compressed { compr }, + }) } pub fn new_encrypted(mut writer: W, config: Arc) -> Result { writer.seek(SeekFrom::Start(0))?; let head = EncryptedDataBlobHeader { - head: DataBlobHeader { magic: ENCRYPTED_BLOB_MAGIC_1_0, crc: [0; 4] }, + head: DataBlobHeader { + magic: file_formats::ENCRYPTED_BLOB_MAGIC_1_0, + crc: [0; 4], + }, iv: [0u8; 16], tag: [0u8; 16], }; @@ -52,14 +75,22 @@ impl DataBlobWriter<'_, W> { } let csum_writer = ChecksumWriter::new(writer, None); - let crypt_writer = CryptWriter::new(csum_writer, config)?; - Ok(Self { state: BlobWriterState::Encrypted { crypt_writer }}) + let crypt_writer = CryptWriter::new(csum_writer, config)?; + Ok(Self { + state: BlobWriterState::Encrypted { crypt_writer }, + }) } - pub fn new_encrypted_compressed(mut writer: W, config: Arc) -> Result { + pub fn new_encrypted_compressed( + mut writer: W, + config: Arc, + ) -> Result { writer.seek(SeekFrom::Start(0))?; let head = EncryptedDataBlobHeader { - head: DataBlobHeader { magic: ENCR_COMPR_BLOB_MAGIC_1_0, crc: [0; 4] }, + head: DataBlobHeader { + magic: file_formats::ENCR_COMPR_BLOB_MAGIC_1_0, + crc: [0; 4], + }, iv: [0u8; 16], tag: [0u8; 16], }; @@ -68,9 +99,11 @@ impl DataBlobWriter<'_, W> { } let csum_writer = ChecksumWriter::new(writer, None); - let crypt_writer = CryptWriter::new(csum_writer, config)?; + let crypt_writer = CryptWriter::new(csum_writer, config)?; let compr = zstd::stream::write::Encoder::new(crypt_writer, 1)?; - Ok(Self { state: BlobWriterState::EncryptedCompressed { compr }}) + Ok(Self { + state: BlobWriterState::EncryptedCompressed { compr }, + }) } pub fn finish(self) -> Result { @@ -78,7 +111,10 @@ impl DataBlobWriter<'_, W> { BlobWriterState::Uncompressed { csum_writer } => { // write CRC let (mut writer, crc, _) = csum_writer.finish()?; - let head = DataBlobHeader { magic: UNCOMPRESSED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() }; + let head = DataBlobHeader { + magic: file_formats::UNCOMPRESSED_BLOB_MAGIC_1_0, + crc: crc.to_le_bytes(), + }; writer.seek(SeekFrom::Start(0))?; unsafe { @@ -91,7 +127,10 @@ impl DataBlobWriter<'_, W> { let csum_writer = compr.finish()?; let (mut writer, crc, _) = csum_writer.finish()?; - let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() }; + let head = DataBlobHeader { + magic: file_formats::COMPRESSED_BLOB_MAGIC_1_0, + crc: crc.to_le_bytes(), + }; writer.seek(SeekFrom::Start(0))?; unsafe { @@ -105,8 +144,12 @@ impl DataBlobWriter<'_, W> { let (mut writer, crc, _) = csum_writer.finish()?; let head = EncryptedDataBlobHeader { - head: DataBlobHeader { magic: ENCRYPTED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() }, - iv, tag, + head: DataBlobHeader { + magic: file_formats::ENCRYPTED_BLOB_MAGIC_1_0, + crc: crc.to_le_bytes(), + }, + iv, + tag, }; writer.seek(SeekFrom::Start(0))?; unsafe { @@ -120,8 +163,12 @@ impl DataBlobWriter<'_, W> { let (mut writer, crc, _) = csum_writer.finish()?; let head = EncryptedDataBlobHeader { - head: DataBlobHeader { magic: ENCR_COMPR_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() }, - iv, tag, + head: DataBlobHeader { + magic: file_formats::ENCR_COMPR_BLOB_MAGIC_1_0, + crc: crc.to_le_bytes(), + }, + iv, + tag, }; writer.seek(SeekFrom::Start(0))?; unsafe { @@ -133,39 +180,30 @@ impl DataBlobWriter<'_, W> { } } -impl Write for DataBlobWriter<'_, W> { - +impl Write for DataBlobWriter<'_, W> { fn write(&mut self, buf: &[u8]) -> Result { match self.state { - BlobWriterState::Uncompressed { ref mut csum_writer } => { - csum_writer.write(buf) - } - BlobWriterState::Compressed { ref mut compr } => { - compr.write(buf) - } - BlobWriterState::Encrypted { ref mut crypt_writer } => { - crypt_writer.write(buf) - } - BlobWriterState::EncryptedCompressed { ref mut compr } => { - compr.write(buf) - } + BlobWriterState::Uncompressed { + ref mut csum_writer, + } => csum_writer.write(buf), + BlobWriterState::Compressed { ref mut compr } => compr.write(buf), + BlobWriterState::Encrypted { + ref mut crypt_writer, + } => crypt_writer.write(buf), + BlobWriterState::EncryptedCompressed { ref mut compr } => compr.write(buf), } } fn flush(&mut self) -> Result<(), std::io::Error> { match self.state { - BlobWriterState::Uncompressed { ref mut csum_writer } => { - csum_writer.flush() - } - BlobWriterState::Compressed { ref mut compr } => { - compr.flush() - } - BlobWriterState::Encrypted { ref mut crypt_writer } => { - crypt_writer.flush() - } - BlobWriterState::EncryptedCompressed { ref mut compr } => { - compr.flush() - } + BlobWriterState::Uncompressed { + ref mut csum_writer, + } => csum_writer.flush(), + BlobWriterState::Compressed { ref mut compr } => compr.flush(), + BlobWriterState::Encrypted { + ref mut crypt_writer, + } => crypt_writer.flush(), + BlobWriterState::EncryptedCompressed { ref mut compr } => compr.flush(), } } } diff --git a/src/backup/file_formats.rs b/pbs-datastore/src/file_formats.rs similarity index 100% rename from src/backup/file_formats.rs rename to pbs-datastore/src/file_formats.rs diff --git a/src/backup/index.rs b/pbs-datastore/src/index.rs similarity index 100% rename from src/backup/index.rs rename to pbs-datastore/src/index.rs diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs new file mode 100644 index 00000000..0a24176d --- /dev/null +++ b/pbs-datastore/src/lib.rs @@ -0,0 +1,199 @@ +//! This module implements the data storage and access layer. +//! +//! # Data formats +//! +//! PBS splits large files into chunks, and stores them deduplicated using +//! a content addressable storage format. +//! +//! Backup snapshots are stored as folders containing a manifest file and +//! potentially one or more index or blob files. +//! +//! The manifest contains hashes of all other files and can be signed by +//! the client. +//! +//! Blob files contain data directly. They are used for config files and +//! the like. +//! +//! Index files are used to reconstruct an original file. They contain a +//! list of SHA256 checksums. The `DynamicIndex*` format is able to deal +//! with dynamic chunk sizes (CT and host backups), whereas the +//! `FixedIndex*` format is an optimization to store a list of equal sized +//! chunks (VMs, whole block devices). +//! +//! A chunk is defined as a binary blob, which is stored inside a +//! [ChunkStore](struct.ChunkStore.html) instead of the backup directory +//! directly, and can be addressed by its SHA256 digest. +//! +//! +//! # Garbage Collection (GC) +//! +//! Deleting backups is as easy as deleting the corresponding .idx files. +//! However, this does not free up any storage, because those files just +//! contain references to chunks. +//! +//! To free up some storage, we run a garbage collection process at +//! regular intervals. The collector uses a mark and sweep approach. In +//! the first phase, it scans all .idx files to mark used chunks. The +//! second phase then removes all unmarked chunks from the store. +//! +//! The locking mechanisms mentioned below make sure that we are the only +//! process running GC. We still want to be able to create backups during +//! GC, so there may be multiple backup threads/tasks running, either +//! started before GC, or while GC is running. +//! +//! ## `atime` based GC +//! +//! The idea here is to mark chunks by updating the `atime` (access +//! timestamp) on the chunk file. This is quite simple and does not need +//! additional RAM. +//! +//! One minor problem is that recent Linux versions use the `relatime` +//! mount flag by default for performance reasons (and we want that). When +//! enabled, `atime` data is written to the disk only if the file has been +//! modified since the `atime` data was last updated (`mtime`), or if the +//! file was last accessed more than a certain amount of time ago (by +//! default 24h). So we may only delete chunks with `atime` older than 24 +//! hours. +//! +//! Another problem arises from running backups. The mark phase does not +//! find any chunks from those backups, because there is no .idx file for +//! them (created after the backup). Chunks created or touched by those +//! backups may have an `atime` as old as the start time of those backups. +//! Please note that the backup start time may predate the GC start time. +//! So we may only delete chunks older than the start time of those +//! running backup jobs, which might be more than 24h back (this is the +//! reason why ProcessLocker exclusive locks only have to be exclusive +//! between processes, since within one we can determine the age of the +//! oldest shared lock). +//! +//! ## Store `marks` in RAM using a HASH +//! +//! Might be better. Under investigation. +//! +//! +//! # Locking +//! +//! Since PBS allows multiple potentially interfering operations at the +//! same time (e.g. garbage collect, prune, multiple backup creations +//! (only in separate groups), forget, ...), these need to lock against +//! each other in certain scenarios. There is no overarching global lock +//! though, instead always the finest grained lock possible is used, +//! because running these operations concurrently is treated as a feature +//! on its own. +//! +//! ## Inter-process Locking +//! +//! We need to be able to restart the proxmox-backup service daemons, so +//! that we can update the software without rebooting the host. But such +//! restarts must not abort running backup jobs, so we need to keep the +//! old service running until those jobs are finished. This implies that +//! we need some kind of locking for modifying chunks and indices in the +//! ChunkStore. +//! +//! Please note that it is perfectly valid to have multiple +//! parallel ChunkStore writers, even when they write the same chunk +//! (because the chunk would have the same name and the same data, and +//! writes are completed atomically via a rename). The only problem is +//! garbage collection, because we need to avoid deleting chunks which are +//! still referenced. +//! +//! To do this we use the +//! [ProcessLocker](../tools/struct.ProcessLocker.html). +//! +//! ### ChunkStore-wide +//! +//! * Create Index Files: +//! +//! Acquire shared lock for ChunkStore. +//! +//! Note: When creating .idx files, we create a temporary .tmp file, +//! then do an atomic rename. +//! +//! * Garbage Collect: +//! +//! Acquire exclusive lock for ChunkStore. If we have +//! already a shared lock for the ChunkStore, try to upgrade that +//! lock. +//! +//! Exclusive locks only work _between processes_. It is valid to have an +//! exclusive and one or more shared locks held within one process. Writing +//! chunks within one process is synchronized using the gc_mutex. +//! +//! On server restart, we stop any running GC in the old process to avoid +//! having the exclusive lock held for too long. +//! +//! ## Locking table +//! +//! Below table shows all operations that play a role in locking, and which +//! mechanisms are used to make their concurrent usage safe. +//! +//! | starting >
v during | read index file | create index file | GC mark | GC sweep | update manifest | forget | prune | create backup | verify | reader api | +//! |-|-|-|-|-|-|-|-|-|-|-| +//! | **read index file** | / | / | / | / | / | mmap stays valid, oldest_shared_lock prevents GC | see forget column | / | / | / | +//! | **create index file** | / | / | / | / | / | / | / | /, happens at the end, after all chunks are touched | /, only happens without a manifest | / | +//! | **GC mark** | / | Datastore process-lock shared | gc_mutex, exclusive ProcessLocker | gc_mutex | /, GC only cares about index files, not manifests | tells GC about removed chunks | see forget column | /, index files don’t exist yet | / | / | +//! | **GC sweep** | / | Datastore process-lock shared | gc_mutex, exclusive ProcessLocker | gc_mutex | / | /, chunks already marked | see forget column | chunks get touched; chunk_store.mutex; oldest PL lock | / | / | +//! | **update manifest** | / | / | / | / | update_manifest lock | update_manifest lock, remove dir under lock | see forget column | /, “write manifest” happens at the end | /, can call “write manifest”, see that column | / | +//! | **forget** | / | / | removed_during_gc mutex is held during unlink | marking done, doesn’t matter if forgotten now | update_manifest lock, forget waits for lock | /, unlink is atomic | causes forget to fail, but that’s OK | running backup has snapshot flock | /, potentially detects missing folder | shared snap flock | +//! | **prune** | / | / | see forget row | see forget row | see forget row | causes warn in prune, but no error | see forget column | running and last non-running can’t be pruned | see forget row | shared snap flock | +//! | **create backup** | / | only time this happens, thus has snapshot flock | / | chunks get touched; chunk_store.mutex; oldest PL lock | no lock, but cannot exist beforehand | snapshot flock, can’t be forgotten | running and last non-running can’t be pruned | snapshot group flock, only one running per group | /, won’t be verified since manifest missing | / | +//! | **verify** | / | / | / | / | see “update manifest” row | /, potentially detects missing folder | see forget column | / | /, but useless (“update manifest” protects itself) | / | +//! | **reader api** | / | / | / | /, open snap can’t be forgotten, so ref must exist | / | prevented by shared snap flock | prevented by shared snap flock | / | / | /, lock is shared |! +//! * / = no interaction +//! * shared/exclusive from POV of 'starting' process + +use anyhow::{format_err, Error}; + +// Note: .pcat1 => Proxmox Catalog Format version 1 +pub const CATALOG_NAME: &str = "catalog.pcat1.didx"; + +#[macro_export] +macro_rules! PROXMOX_BACKUP_PROTOCOL_ID_V1 { + () => { + "proxmox-backup-protocol-v1" + }; +} + +#[macro_export] +macro_rules! PROXMOX_BACKUP_READER_PROTOCOL_ID_V1 { + () => { + "proxmox-backup-reader-protocol-v1" + }; +} + +/// Unix system user used by proxmox-backup-proxy +pub const BACKUP_USER_NAME: &str = "backup"; +/// Unix system group used by proxmox-backup-proxy +pub const BACKUP_GROUP_NAME: &str = "backup"; + +/// Return User info for the 'backup' user (``getpwnam_r(3)``) +pub fn backup_user() -> Result { + nix::unistd::User::from_name(BACKUP_USER_NAME)? + .ok_or_else(|| format_err!("Unable to lookup backup user.")) +} + +/// Return Group info for the 'backup' group (``getgrnam(3)``) +pub fn backup_group() -> Result { + nix::unistd::Group::from_name(BACKUP_GROUP_NAME)? + .ok_or_else(|| format_err!("Unable to lookup backup user.")) +} + +pub mod catalog; +pub mod checksum_reader; +pub mod checksum_writer; +pub mod chunker; +pub mod crypt_config; +pub mod crypt_reader; +pub mod crypt_writer; +pub mod data_blob; +pub mod data_blob_reader; +pub mod data_blob_writer; +pub mod file_formats; +pub mod index; + +pub use checksum_reader::ChecksumReader; +pub use checksum_writer::ChecksumWriter; +pub use chunker::Chunker; +pub use crypt_config::{CryptConfig, CryptMode}; +pub use crypt_reader::CryptReader; +pub use crypt_writer::CryptWriter; diff --git a/src/api2/types/mod.rs b/src/api2/types/mod.rs index 9d24f620..c9ba6db6 100644 --- a/src/api2/types/mod.rs +++ b/src/api2/types/mod.rs @@ -7,12 +7,13 @@ use proxmox::api::{api, schema::*}; use proxmox::const_regex; use proxmox::{IPRE, IPRE_BRACKET, IPV4RE, IPV6RE, IPV4OCTET, IPV6H16, IPV6LS32}; +use pbs_datastore::catalog::CatalogEntryType; + use crate::{ backup::{ CryptMode, Fingerprint, DirEntryAttribute, - CatalogEntryType, }, server::UPID, config::acl::Role, diff --git a/src/backup/data_blob_reader.rs b/src/backup/data_blob_reader.rs deleted file mode 100644 index 11a4613e..00000000 --- a/src/backup/data_blob_reader.rs +++ /dev/null @@ -1,125 +0,0 @@ -use anyhow::{bail, format_err, Error}; -use std::sync::Arc; -use std::io::{Read, BufReader}; -use proxmox::tools::io::ReadExt; - -use super::*; - -enum BlobReaderState<'reader, R: Read> { - Uncompressed { expected_crc: u32, csum_reader: ChecksumReader }, - Compressed { expected_crc: u32, decompr: zstd::stream::read::Decoder<'reader, BufReader>> }, - Encrypted { expected_crc: u32, decrypt_reader: CryptReader>> }, - EncryptedCompressed { expected_crc: u32, decompr: zstd::stream::read::Decoder<'reader, BufReader>>>> }, -} - -/// Read data blobs -pub struct DataBlobReader<'reader, R: Read> { - state: BlobReaderState<'reader, R>, -} - -// zstd_safe::DCtx is not sync but we are, since -// the only public interface is on mutable reference -unsafe impl Sync for DataBlobReader<'_, R> {} - -impl DataBlobReader<'_, R> { - - pub fn new(mut reader: R, config: Option>) -> Result { - - let head: DataBlobHeader = unsafe { reader.read_le_value()? }; - match head.magic { - UNCOMPRESSED_BLOB_MAGIC_1_0 => { - let expected_crc = u32::from_le_bytes(head.crc); - let csum_reader = ChecksumReader::new(reader, None); - Ok(Self { state: BlobReaderState::Uncompressed { expected_crc, csum_reader }}) - } - COMPRESSED_BLOB_MAGIC_1_0 => { - let expected_crc = u32::from_le_bytes(head.crc); - let csum_reader = ChecksumReader::new(reader, None); - - let decompr = zstd::stream::read::Decoder::new(csum_reader)?; - Ok(Self { state: BlobReaderState::Compressed { expected_crc, decompr }}) - } - ENCRYPTED_BLOB_MAGIC_1_0 => { - let config = config.ok_or_else(|| format_err!("unable to read encrypted blob without key"))?; - let expected_crc = u32::from_le_bytes(head.crc); - let mut iv = [0u8; 16]; - let mut expected_tag = [0u8; 16]; - reader.read_exact(&mut iv)?; - reader.read_exact(&mut expected_tag)?; - let csum_reader = ChecksumReader::new(reader, None); - let decrypt_reader = CryptReader::new(BufReader::with_capacity(64*1024, csum_reader), iv, expected_tag, config)?; - Ok(Self { state: BlobReaderState::Encrypted { expected_crc, decrypt_reader }}) - } - ENCR_COMPR_BLOB_MAGIC_1_0 => { - let config = config.ok_or_else(|| format_err!("unable to read encrypted blob without key"))?; - let expected_crc = u32::from_le_bytes(head.crc); - let mut iv = [0u8; 16]; - let mut expected_tag = [0u8; 16]; - reader.read_exact(&mut iv)?; - reader.read_exact(&mut expected_tag)?; - let csum_reader = ChecksumReader::new(reader, None); - let decrypt_reader = CryptReader::new(BufReader::with_capacity(64*1024, csum_reader), iv, expected_tag, config)?; - let decompr = zstd::stream::read::Decoder::new(decrypt_reader)?; - Ok(Self { state: BlobReaderState::EncryptedCompressed { expected_crc, decompr }}) - } - _ => bail!("got wrong magic number {:?}", head.magic) - } - } - - pub fn finish(self) -> Result { - match self.state { - BlobReaderState::Uncompressed { csum_reader, expected_crc } => { - let (reader, crc, _) = csum_reader.finish()?; - if crc != expected_crc { - bail!("blob crc check failed"); - } - Ok(reader) - } - BlobReaderState::Compressed { expected_crc, decompr } => { - let csum_reader = decompr.finish().into_inner(); - let (reader, crc, _) = csum_reader.finish()?; - if crc != expected_crc { - bail!("blob crc check failed"); - } - Ok(reader) - } - BlobReaderState::Encrypted { expected_crc, decrypt_reader } => { - let csum_reader = decrypt_reader.finish()?.into_inner(); - let (reader, crc, _) = csum_reader.finish()?; - if crc != expected_crc { - bail!("blob crc check failed"); - } - Ok(reader) - } - BlobReaderState::EncryptedCompressed { expected_crc, decompr } => { - let decrypt_reader = decompr.finish().into_inner(); - let csum_reader = decrypt_reader.finish()?.into_inner(); - let (reader, crc, _) = csum_reader.finish()?; - if crc != expected_crc { - bail!("blob crc check failed"); - } - Ok(reader) - } - } - } -} - -impl Read for DataBlobReader<'_, R> { - - fn read(&mut self, buf: &mut [u8]) -> Result { - match &mut self.state { - BlobReaderState::Uncompressed { csum_reader, .. } => { - csum_reader.read(buf) - } - BlobReaderState::Compressed { decompr, .. } => { - decompr.read(buf) - } - BlobReaderState::Encrypted { decrypt_reader, .. } => { - decrypt_reader.read(buf) - } - BlobReaderState::EncryptedCompressed { decompr, .. } => { - decompr.read(buf) - } - } - } -} diff --git a/src/backup/mod.rs b/src/backup/mod.rs index 7bf29a5a..3340ae55 100644 --- a/src/backup/mod.rs +++ b/src/backup/mod.rs @@ -178,44 +178,36 @@ pub fn backup_group() -> Result { } } -mod file_formats; -pub use file_formats::*; - -mod manifest; -pub use manifest::*; - -mod crypt_config; -pub use crypt_config::*; +pub use pbs_datastore::catalog::*; +pub use pbs_datastore::catalog; +pub use pbs_datastore::file_formats::*; +pub use pbs_datastore::file_formats; +pub use pbs_datastore::index::*; +pub use pbs_datastore::index; +pub use pbs_datastore::crypt_config; +pub use pbs_datastore::crypt_config::*; +pub use pbs_datastore::crypt_reader; +pub use pbs_datastore::crypt_reader::*; +pub use pbs_datastore::crypt_writer; +pub use pbs_datastore::crypt_writer::*; +pub use pbs_datastore::data_blob; +pub use pbs_datastore::data_blob::*; +pub use pbs_datastore::data_blob_reader; +pub use pbs_datastore::data_blob_reader::*; +pub use pbs_datastore::data_blob_writer; +pub use pbs_datastore::data_blob_writer::*; +pub use pbs_datastore::checksum_reader; +pub use pbs_datastore::checksum_reader::*; +pub use pbs_datastore::checksum_writer; +pub use pbs_datastore::checksum_writer::*; +pub use pbs_datastore::chunker; +pub use pbs_datastore::chunker::*; mod key_derivation; pub use key_derivation::*; -mod crypt_reader; -pub use crypt_reader::*; - -mod crypt_writer; -pub use crypt_writer::*; - -mod checksum_reader; -pub use checksum_reader::*; - -mod checksum_writer; -pub use checksum_writer::*; - -mod chunker; -pub use chunker::*; - -mod data_blob; -pub use data_blob::*; - -mod data_blob_reader; -pub use data_blob_reader::*; - -mod data_blob_writer; -pub use data_blob_writer::*; - -mod catalog; -pub use catalog::*; +mod manifest; +pub use manifest::*; mod chunk_stream; pub use chunk_stream::*; @@ -229,9 +221,6 @@ pub use read_chunk::*; mod chunk_store; pub use chunk_store::*; -mod index; -pub use index::*; - mod fixed_index; pub use fixed_index::*; diff --git a/src/bin/proxmox-backup-client.rs b/src/bin/proxmox-backup-client.rs index 6ec0a805..b110763e 100644 --- a/src/bin/proxmox-backup-client.rs +++ b/src/bin/proxmox-backup-client.rs @@ -30,6 +30,8 @@ use proxmox::{ }; use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation}; +use pbs_datastore::catalog::BackupCatalogWriter; + use proxmox_backup::tools::{ self, StdChannelWriter, @@ -38,7 +40,6 @@ use proxmox_backup::tools::{ use proxmox_backup::api2::types::*; use proxmox_backup::api2::version; use proxmox_backup::client::*; -use proxmox_backup::pxar::catalog::*; use proxmox_backup::backup::{ archive_type, decrypt_key, diff --git a/src/pxar/catalog.rs b/src/pxar/catalog.rs deleted file mode 100644 index 02b57dfa..00000000 --- a/src/pxar/catalog.rs +++ /dev/null @@ -1,19 +0,0 @@ -//! Trait for file list catalog -//! -//! A file list catalog simply store a directory tree. Such catalogs -//! may be used as index to do a fast search for files. - -use anyhow::{Error}; -use std::ffi::CStr; - -pub trait BackupCatalogWriter { - fn start_directory(&mut self, name: &CStr) -> Result<(), Error>; - fn end_directory(&mut self) -> Result<(), Error>; - fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error>; - fn add_symlink(&mut self, name: &CStr) -> Result<(), Error>; - fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error>; - fn add_block_device(&mut self, name: &CStr) -> Result<(), Error>; - fn add_char_device(&mut self, name: &CStr) -> Result<(), Error>; - fn add_fifo(&mut self, name: &CStr) -> Result<(), Error>; - fn add_socket(&mut self, name: &CStr) -> Result<(), Error>; -} diff --git a/src/pxar/create.rs b/src/pxar/create.rs index 011e2d8e..a3ce3565 100644 --- a/src/pxar/create.rs +++ b/src/pxar/create.rs @@ -24,9 +24,9 @@ use proxmox::sys::error::SysError; use proxmox::tools::fd::RawFdNum; use proxmox::tools::vec; +use pbs_datastore::catalog::BackupCatalogWriter; use pbs_tools::fs; -use crate::pxar::catalog::BackupCatalogWriter; use crate::pxar::metadata::errno_is_unsupported; use crate::pxar::Flags; use crate::pxar::tools::assert_single_path_component; diff --git a/src/pxar/mod.rs b/src/pxar/mod.rs index 13eb9bd4..6ad913dc 100644 --- a/src/pxar/mod.rs +++ b/src/pxar/mod.rs @@ -47,7 +47,6 @@ //! (user, group, acl, ...) because this is already defined by the //! linked `ENTRY`. -pub mod catalog; pub(crate) mod create; pub(crate) mod dir_stack; pub(crate) mod extract;