From 66a5ebd000a4ee75bdb62ebfd9a40a0b0ac4859d Mon Sep 17 00:00:00 2001 From: iequidoo Date: Tue, 25 Jul 2023 20:37:48 -0300 Subject: [PATCH] feat: Use random filename suffixes for blobstorage (#4309) Recently there was an accident with a chatbot that replaced its avatar set from the command line with an unrelated avatar of a contact. Both the `selfavatar` setting and the contact avatar `i` param pointed to `$BLOBDIR/avatar.png` at the time it was detected. How this happened is unclear, but it is possible that `avatar.png` was removed, unmounted or otherwise not detected by the core, and the core stored avatar received from the contact as `avatar.png`, while `selfavatar` config still pointed to `$BLOBDIR/avatar.png`. Such bugs are unavoidable even if the core itself has no bugs as we cannot rely on blobdir not reside on the faulty network filesystem, being incorrectly backed up and restored etc., so we should assume that files may be randomly removed. Then there may be dangling `$BLOBDIR/...` references in the database which may accidentally point to unrelated files, could even be an `avatar.png` file sent to the bot in private. To prevent such bugs, we add random filename suffixes for the blobdir objects. Thanks to the added Param::Filename these random suffixes aren't sent over the network. --- deltachat-ffi/deltachat.h | 3 +- python/src/deltachat/message.py | 3 +- python/tests/test_3_offline.py | 17 +++-- src/blob.rs | 109 ++++++++++++++++++-------------- src/chat.rs | 7 +- src/config.rs | 8 +-- src/constants.rs | 3 + src/message.rs | 3 + src/mimeparser.rs | 7 +- 9 files changed, 93 insertions(+), 67 deletions(-) diff --git a/deltachat-ffi/deltachat.h b/deltachat-ffi/deltachat.h index 8d12301618..0a8b3b2a6f 100644 --- a/deltachat-ffi/deltachat.h +++ b/deltachat-ffi/deltachat.h @@ -4095,7 +4095,8 @@ char* dc_msg_get_subject (const dc_msg_t* msg); * * Typically files are associated with images, videos, audios, documents. * Plain text messages do not have a file. - * File name may be mangled. To obtain the original attachment filename use dc_msg_get_filename(). + * The file name is mangled -- a random suffix is added before the extension. This suffix is + * preserved across calls. To obtain the original attachment filename use dc_msg_get_filename(). * * @memberof dc_msg_t * @param msg The message object. diff --git a/python/src/deltachat/message.py b/python/src/deltachat/message.py index 9ffd980c19..491a6520d9 100644 --- a/python/src/deltachat/message.py +++ b/python/src/deltachat/message.py @@ -108,7 +108,7 @@ def set_html(self, html_text): @props.with_doc def filename(self): - """filename if there was an attachment, otherwise empty string.""" + """file path if there was an attachment, otherwise empty string.""" return from_dc_charpointer(lib.dc_msg_get_file(self._dc_msg)) def set_file(self, path, mime_type=None): @@ -121,7 +121,6 @@ def set_file(self, path, mime_type=None): @props.with_doc def basename(self) -> str: """basename of the attachment if it exists, otherwise empty string.""" - # FIXME, it does not return basename return from_dc_charpointer(lib.dc_msg_get_filename(self._dc_msg)) @props.with_doc diff --git a/python/tests/test_3_offline.py b/python/tests/test_3_offline.py index 20a64e6e8e..74f3fbf62c 100644 --- a/python/tests/test_3_offline.py +++ b/python/tests/test_3_offline.py @@ -444,27 +444,30 @@ def test_message_image(self, chat1, data, lp): assert msg.filemime == "image/png" @pytest.mark.parametrize( - ("fn", "typein", "typeout"), + ("stem", "ext", "typein", "typeout"), [ - ("r", None, "application/octet-stream"), - ("r.txt", None, "text/plain"), - ("r.txt", "text/plain", "text/plain"), - ("r.txt", "image/png", "image/png"), + ("r", "", None, "application/octet-stream"), + ("r", ".txt", None, "text/plain"), + ("r", ".txt", "text/plain", "text/plain"), + ("r", ".txt", "image/png", "image/png"), ], ) - def test_message_file(self, chat1, data, lp, fn, typein, typeout): + def test_message_file(self, chat1, data, lp, stem, ext, typein, typeout): lp.sec("sending file") + fn = stem + ext fp = data.get_path(fn) msg = chat1.send_file(fp, typein) assert msg assert msg.id > 0 assert msg.is_file() assert os.path.exists(msg.filename) - assert msg.filename.endswith(msg.basename) + assert msg.filename.endswith(ext) + assert msg.basename == fn assert msg.filemime == typeout msg2 = chat1.send_file(fp, typein) assert msg2 != msg assert msg2.filename != msg.filename + assert msg2.basename == fn def test_create_contact(self, acfactory): ac1 = acfactory.get_pseudo_configured_account() diff --git a/src/blob.rs b/src/blob.rs index f9435c90b6..f58050f410 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -19,7 +19,7 @@ use tokio::{fs, io}; use tokio_stream::wrappers::ReadDirStream; use crate::config::Config; -use crate::constants::{self, MediaQuality}; +use crate::constants::{self, MediaQuality, BLOB_CREATE_ATTEMPTS}; use crate::context::Context; use crate::events::EventType; use crate::log::LogExt; @@ -80,10 +80,9 @@ impl<'a> BlobObject<'a> { stem: &str, ext: &str, ) -> Result<(String, fs::File)> { - const MAX_ATTEMPT: u32 = 16; let mut attempt = 0; - let mut name = format!("{stem}{ext}"); loop { + let name = format!("{}-{:016x}{}", stem, rand::random::(), ext); attempt += 1; let path = dir.join(&name); match fs::OpenOptions::new() @@ -94,12 +93,10 @@ impl<'a> BlobObject<'a> { { Ok(file) => return Ok((name, file)), Err(err) => { - if attempt >= MAX_ATTEMPT { + if attempt >= BLOB_CREATE_ATTEMPTS { return Err(err).context("failed to create file"); } else if attempt == 1 && !dir.exists() { fs::create_dir_all(dir).await.log_err(context).ok(); - } else { - name = format!("{}-{}{}", stem, rand::random::(), ext); } } } @@ -743,6 +740,7 @@ fn add_white_bg(img: &mut DynamicImage) { #[cfg(test)] mod tests { use fs::File; + use regex::Regex; use super::*; use crate::chat::{self, create_group_chat, ProtectionStatus}; @@ -762,32 +760,43 @@ mod tests { async fn test_create() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo", b"hello").await.unwrap(); - let fname = t.get_blobdir().join("foo"); + let re = Regex::new("^foo-[[:xdigit:]]{16}$").unwrap(); + assert!(re.is_match(blob.as_file_name())); + let fname = t.get_blobdir().join(blob.as_file_name()); let data = fs::read(fname).await.unwrap(); assert_eq!(data, b"hello"); - assert_eq!(blob.as_name(), "$BLOBDIR/foo"); - assert_eq!(blob.to_abs_path(), t.get_blobdir().join("foo")); + assert_eq!( + blob.as_name(), + "$BLOBDIR/".to_string() + blob.as_file_name() + ); + assert_eq!( + blob.to_abs_path(), + t.get_blobdir().join(blob.as_file_name()) + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_lowercase_ext() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.TXT", b"hello").await.unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/foo.txt"); + let re = Regex::new("^\\$BLOBDIR/foo-[[:xdigit:]]{16}.txt$").unwrap(); + assert!(re.is_match(blob.as_name())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_as_file_name() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - assert_eq!(blob.as_file_name(), "foo.txt"); + let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + assert!(re.is_match(blob.as_file_name())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_as_rel_path() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - assert_eq!(blob.as_rel_path(), Path::new("foo.txt")); + let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -802,30 +811,30 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_create_dup() { let t = TestContext::new().await; - BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - let foo_path = t.get_blobdir().join("foo.txt"); + let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + + let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); + assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); + let foo_path = t.get_blobdir().join(blob.as_file_name()); assert!(foo_path.exists()); - BlobObject::create(&t, "foo.txt", b"world").await.unwrap(); - let mut dir = fs::read_dir(t.get_blobdir()).await.unwrap(); - while let Ok(Some(dirent)) = dir.next_entry().await { - let fname = dirent.file_name(); - if fname == foo_path.file_name().unwrap() { - assert_eq!(fs::read(&foo_path).await.unwrap(), b"hello"); - } else { - let name = fname.to_str().unwrap(); - assert!(name.starts_with("foo")); - assert!(name.ends_with(".txt")); - } - } + + let blob = BlobObject::create(&t, "foo.txt", b"world").await.unwrap(); + assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); + let foo_path2 = t.get_blobdir().join(blob.as_file_name()); + assert!(foo_path2.exists()); + + assert!(foo_path != foo_path2); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_double_ext_preserved() { let t = TestContext::new().await; - BlobObject::create(&t, "foo.tar.gz", b"hello") + let blob = BlobObject::create(&t, "foo.tar.gz", b"hello") .await .unwrap(); - let foo_path = t.get_blobdir().join("foo.tar.gz"); + let re = Regex::new("^foo-[[:xdigit:]]{16}.tar.gz$").unwrap(); + assert!(re.is_match(blob.as_file_name())); + let foo_path = t.get_blobdir().join(blob.as_file_name()); assert!(foo_path.exists()); BlobObject::create(&t, "foo.tar.gz", b"world") .await @@ -859,7 +868,8 @@ mod tests { let src = t.dir.path().join("src"); fs::write(&src, b"boo").await.unwrap(); let blob = BlobObject::create_and_copy(&t, src.as_ref()).await.unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/src"); + let re = Regex::new("^\\$BLOBDIR/src-[[:xdigit:]]{16}$").unwrap(); + assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); @@ -880,7 +890,8 @@ mod tests { let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) .await .unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/external"); + let re = Regex::new("^\\$BLOBDIR/external-[[:xdigit:]]{16}$").unwrap(); + assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); @@ -891,6 +902,7 @@ mod tests { let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); } + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_create_from_name_long() { let t = TestContext::new().await; @@ -899,10 +911,10 @@ mod tests { let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) .await .unwrap(); - assert_eq!( - blob.as_name(), - "$BLOBDIR/autocrypt-setup-message-4137848473.html" - ); + let re = + Regex::new("^\\$BLOBDIR/autocrypt-setup-message-4137848473-[[:xdigit:]]{16}.html$") + .unwrap(); + assert!(re.is_match(blob.as_name())); } #[test] @@ -994,19 +1006,21 @@ mod tests { let avatar_src = t.dir.path().join("avatar.jpg"); let avatar_bytes = include_bytes!("../test-data/image/avatar1000x1000.jpg"); fs::write(&avatar_src, avatar_bytes).await.unwrap(); - let avatar_blob = t.get_blobdir().join("avatar.jpg"); - assert!(!avatar_blob.exists()); t.set_config(Config::Selfavatar, Some(avatar_src.to_str().unwrap())) .await .unwrap(); + let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); + let blobdir = t.get_blobdir().to_str().unwrap(); + assert!(avatar_blob.starts_with(blobdir)); + let re = Regex::new("avatar-[[:xdigit:]]{16}.jpg$").unwrap(); + assert!(re.is_match(&avatar_blob)); + let avatar_blob = Path::new(&avatar_blob); assert!(avatar_blob.exists()); assert!(fs::metadata(&avatar_blob).await.unwrap().len() < avatar_bytes.len() as u64); - let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap(); - assert_eq!(avatar_cfg, avatar_blob.to_str().map(|s| s.to_string())); check_image_size(avatar_src, 1000, 1000); check_image_size( - &avatar_blob, + avatar_blob, constants::BALANCED_AVATAR_SIZE, constants::BALANCED_AVATAR_SIZE, ); @@ -1016,7 +1030,7 @@ mod tests { file.metadata().await.unwrap().len() } - let mut blob = BlobObject::new_from_path(&t, &avatar_blob).await.unwrap(); + let mut blob = BlobObject::new_from_path(&t, avatar_blob).await.unwrap(); let maybe_sticker = &mut false; let strict_limits = true; blob.recode_to_size( @@ -1028,8 +1042,8 @@ mod tests { strict_limits, ) .unwrap(); - assert!(file_size(&avatar_blob).await <= 3000); - assert!(file_size(&avatar_blob).await > 2000); + assert!(file_size(avatar_blob).await <= 3000); + assert!(file_size(avatar_blob).await > 2000); tokio::task::block_in_place(move || { let img = image::open(avatar_blob).unwrap(); assert!(img.width() > 130); @@ -1069,18 +1083,19 @@ mod tests { let avatar_src = t.dir.path().join("avatar.png"); let avatar_bytes = include_bytes!("../test-data/image/avatar64x64.png"); fs::write(&avatar_src, avatar_bytes).await.unwrap(); - let avatar_blob = t.get_blobdir().join("avatar.png"); - assert!(!avatar_blob.exists()); t.set_config(Config::Selfavatar, Some(avatar_src.to_str().unwrap())) .await .unwrap(); - assert!(avatar_blob.exists()); + let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); + let blobdir = t.get_blobdir().to_str().unwrap(); + assert!(avatar_blob.starts_with(blobdir)); + let re = Regex::new("avatar-[[:xdigit:]]{16}.png$").unwrap(); + assert!(re.is_match(&avatar_blob)); + assert!(Path::new(&avatar_blob).exists()); assert_eq!( fs::metadata(&avatar_blob).await.unwrap().len(), avatar_bytes.len() as u64 ); - let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap(); - assert_eq!(avatar_cfg, avatar_blob.to_str().map(|s| s.to_string())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] diff --git a/src/chat.rs b/src/chat.rs index 33818350b2..ec434e2259 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -4641,6 +4641,7 @@ mod tests { use crate::message::delete_msgs; use crate::receive_imf::receive_imf; use crate::test_utils::{sync, TestContext, TestContextManager}; + use regex::Regex; use strum::IntoEnumIterator; use tokio::fs; @@ -7278,9 +7279,11 @@ mod tests { // the file bob receives should not contain BIDI-control characters assert_eq!( - Some("$BLOBDIR/harmless_file.txt.exe"), - msg.param.get(Param::File), + msg.param.get(Param::Filename).unwrap(), + "harmless_file.txt.exe" ); + let re = Regex::new("^\\$BLOBDIR/harmless_file-[[:xdigit:]]{16}.txt.exe$").unwrap(); + assert!(re.is_match(msg.param.get(Param::File).unwrap())); Ok(()) } diff --git a/src/config.rs b/src/config.rs index bbe74021ec..6c52f0c43c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1018,10 +1018,10 @@ mod tests { // this. let self_chat = alice0.get_self_chat().await; let self_chat_avatar_path = self_chat.get_profile_image(&alice0).await?.unwrap(); - assert_eq!( - self_chat_avatar_path, - alice0.get_blobdir().join("icon-saved-messages.png") - ); + assert_eq!(self_chat_avatar_path.extension().unwrap(), "png"); + let self_chat_avatar = fs::read(self_chat_avatar_path).await?; + let expected_avatar = include_bytes!("../assets/icon-saved-messages.png").to_vec(); + assert_eq!(self_chat_avatar, expected_avatar); assert!(alice1 .get_config(Config::Selfavatar) .await? diff --git a/src/constants.rs b/src/constants.rs index deb7d9cdff..95e947d0df 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -211,6 +211,9 @@ pub(crate) const DC_FOLDERS_CONFIGURED_KEY: &str = "folders_configured"; // this value can be increased if the folder configuration is changed and must be redone on next program start pub(crate) const DC_FOLDERS_CONFIGURED_VERSION: i32 = 4; +// Maximum attemps to create a blob file. +pub(crate) const BLOB_CREATE_ATTEMPTS: u32 = 2; + // If more recipients are needed in SMTP's `RCPT TO:` header, the recipient list is split into // chunks. This does not affect MIME's `To:` header. Can be overwritten by setting // `max_smtp_rcpt_to` in the provider db. diff --git a/src/message.rs b/src/message.rs index 9f6ee2c5dc..e9c07cacd7 100644 --- a/src/message.rs +++ b/src/message.rs @@ -604,6 +604,9 @@ impl Message { } /// Returns the full path to the file associated with a message. + /// + /// The filename is mangled -- a random suffix is added before the extension. This suffix is + /// preserved across calls. pub fn get_file(&self, context: &Context) -> Option { self.param.get_path(Param::File, context).unwrap_or(None) } diff --git a/src/mimeparser.rs b/src/mimeparser.rs index e0a1b4594e..3358144c86 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -2290,6 +2290,7 @@ mod tests { #![allow(clippy::indexing_slicing)] use mailparse::ParsedMail; + use regex::Regex; use super::*; use crate::{ @@ -3852,10 +3853,8 @@ Message. mime_message.parts[0].msg, "this is a classic email – I attached the .EML file".to_string() ); - assert_eq!( - mime_message.parts[0].param.get(Param::File), - Some("$BLOBDIR/.eml") - ); + let re = Regex::new("^\\$BLOBDIR/-[[:xdigit:]]{16}.eml$").unwrap(); + assert!(re.is_match(mime_message.parts[0].param.get(Param::File).unwrap())); assert_eq!(mime_message.parts[0].org_filename, Some(".eml".to_string()));