From 8c9d39a794f2ee02b3c1eb73f074b4b4a78a5127 Mon Sep 17 00:00:00 2001 From: Tom Kirchner Date: Mon, 4 Nov 2019 11:12:17 -0800 Subject: [PATCH] Allow more characters in API Key names This change allows for more characters to appear in our key names. Specifically, "." and "/" are valid in Kubernetes labels and taints, but were disallowed by the existing API system. The size of this change reflects some of the complexity of keys: * Their names are used commonly in discussions, documentation, user data, and code. We want the names to be convenient to reference. * Their naming has security considerations. The character set of key names might not line up with the allowed character set in the data store implementation, for example dots and slashes in the filesystem. * They're used everywhere in API-related code, so while we add functionality and security, we can't make them hard to use. Here are the changes at 30,000 feet: * Special characters in key names are percent-encoded ("URL-encoded") when used as filenames on the filesystem. For example, "%2E" for "." * Key names are "TOML-quoted" elsewhere. This fits usage in user data, and is relatively intuitive. For example: settings.kubernetes.node-labels."group.name" This key name leads to "group.name" being a single segment instead of two, and therefore "group.name" being the name of a Kubernetes label. * Key name segments in API requests and responses don't need to be quoted because they're already separated structurally, for example: {"settings": {"kubernetes": {"node-labels": {"group.name": "value"}}}} * No existing key names or file paths have to change, because keys without special characters are still valid in both schemes. Here are the changes at 10,000 feet: * Key names are treated as a series of segments, rather than a single string. This allows for more precise specification and testing of names. * Key names are parsed and encoded from segments, rather than checking them with a regex. * Key name stripping / appending is done more carefully, by segment. * Internal functions (for example in the data store) take Keys rather than strings, where feasible, to improve specificity. * Some, like to_pairs_with_prefix, continue to take strings, when it makes a simple use case more friendly. * migrations continue to use strings so they don't have to link to the data store, and to make writing them simpler. * Keys no longer Deref to string, AsRef, etc., to reduce the possibility of a mixup resulting in mistaken/double quoting. Other implementation notes: * `get_prefix` had the `strip_prefix` parameter removed because it was no longer used, and would have had to change to taking segments/Key anyway. --- workspaces/Cargo.lock | 3 +- workspaces/api/apiserver/Cargo.toml | 3 +- .../src/datastore/deserialization/error.rs | 20 +- .../src/datastore/deserialization/pairs.rs | 214 ++++--- .../api/apiserver/src/datastore/error.rs | 8 +- .../api/apiserver/src/datastore/filesystem.rs | 172 ++++-- workspaces/api/apiserver/src/datastore/key.rs | 547 +++++++++++++++--- .../api/apiserver/src/datastore/memory.rs | 17 +- workspaces/api/apiserver/src/datastore/mod.rs | 39 +- .../src/datastore/serialization/mod.rs | 5 +- .../src/datastore/serialization/pairs.rs | 139 +++-- .../api/apiserver/src/server/controller.rs | 72 +-- workspaces/api/apiserver/src/server/mod.rs | 3 +- .../migration-helpers/src/datastore.rs | 54 +- .../migration/migration-helpers/src/error.rs | 3 +- .../migration/migration-helpers/src/lib.rs | 16 +- workspaces/api/servicedog/src/main.rs | 28 +- workspaces/api/storewolf/src/main.rs | 21 +- workspaces/api/sundog/src/main.rs | 25 +- 19 files changed, 972 insertions(+), 417 deletions(-) diff --git a/workspaces/Cargo.lock b/workspaces/Cargo.lock index 5a18715704e..821903b2e60 100644 --- a/workspaces/Cargo.lock +++ b/workspaces/Cargo.lock @@ -273,12 +273,11 @@ dependencies = [ "actix-web 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", "base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", "cargo-readme 3.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "maplit 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "nix 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", "simplelog 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/workspaces/api/apiserver/Cargo.toml b/workspaces/api/apiserver/Cargo.toml index e7c08a44c41..e9f7cc10adb 100644 --- a/workspaces/api/apiserver/Cargo.toml +++ b/workspaces/api/apiserver/Cargo.toml @@ -9,9 +9,8 @@ build = "build.rs" [dependencies] actix-web = { version = "1.0.5", default-features = false, features = ["uds"] } base64 = "0.10" -lazy_static = "1.2" log = "0.4" -regex = "1.1" +percent-encoding = "2.1" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" simplelog = "0.7" diff --git a/workspaces/api/apiserver/src/datastore/deserialization/error.rs b/workspaces/api/apiserver/src/datastore/deserialization/error.rs index c08c7e122c8..7562691a430 100644 --- a/workspaces/api/apiserver/src/datastore/deserialization/error.rs +++ b/workspaces/api/apiserver/src/datastore/deserialization/error.rs @@ -1,7 +1,7 @@ use serde::de; use snafu::{IntoError, NoneError as NoSource, Snafu}; -use crate::datastore::ScalarError; +use crate::datastore::{Error as DataStoreError, ScalarError}; /// Potential errors from deserialization. #[derive(Debug, Snafu)] @@ -18,6 +18,24 @@ pub enum Error { "Data store deserializer must be used on a struct, or you must give a prefix" ))] BadRoot {}, + + #[snafu(display( + "Removal of prefix '{}' from key '{}' failed: {}", + prefix, + name, + source + ))] + StripPrefix { + prefix: String, + name: String, + source: DataStoreError, + }, + + #[snafu(display("Prefix '{}' is not a valid key: {}", prefix, source))] + InvalidPrefix { + prefix: String, + source: DataStoreError, + }, } pub type Result = std::result::Result; diff --git a/workspaces/api/apiserver/src/datastore/deserialization/pairs.rs b/workspaces/api/apiserver/src/datastore/deserialization/pairs.rs index 8f3ecce4a19..f989a163e04 100644 --- a/workspaces/api/apiserver/src/datastore/deserialization/pairs.rs +++ b/workspaces/api/apiserver/src/datastore/deserialization/pairs.rs @@ -35,11 +35,11 @@ use serde::de::{value::MapDeserializer, IntoDeserializer, Visitor}; use serde::{forward_to_deserialize_any, Deserialize}; use snafu::ResultExt; use std::borrow::Borrow; -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap, HashSet, VecDeque}; use std::hash::Hash; use super::{error, Error, Result}; -use crate::datastore::{deserializer_for_scalar, ScalarDeserializer, KEY_SEPARATOR}; +use crate::datastore::{deserializer_for_scalar, Key, KeyType, ScalarDeserializer}; /// This is the primary interface to deserialization. We turn the input map into the requested /// output type, assuming all non-Option fields are provided, etc. @@ -50,18 +50,14 @@ use crate::datastore::{deserializer_for_scalar, ScalarDeserializer, KEY_SEPARATO /// The BuildHasher bound on the input HashMap lets you use a HashMap with any hashing /// implementation. This is just an implementation detail and not something you have to specify /// about your input HashMap - any HashMap using string-like key/value types is fine. -pub fn from_map<'de, S1, S2, T, BH>(map: &'de HashMap) -> Result +pub fn from_map<'de, K, S, T, BH>(map: &'de HashMap) -> Result where - S1: Borrow + Eq + Hash, - S2: AsRef, + K: Borrow + Eq + Hash, + S: AsRef, T: Deserialize<'de>, BH: std::hash::BuildHasher, { - let de = CompoundDeserializer::new( - map, - map.keys().map(|s| s.borrow().to_string()).collect(), - None, - ); + let de = CompoundDeserializer::new(map, map.keys().map(|s| s.borrow().clone()).collect(), None); trace!("Deserializing keys: {:?}", de.keys); T::deserialize(de) } @@ -78,20 +74,26 @@ where /// /// This isn't necessary for structs because serde knows the struct's name, so we /// can strip it automatically. -pub fn from_map_with_prefix<'de, S1, S2, T, BH>( +pub fn from_map_with_prefix<'de, K, S, T, BH>( prefix: Option, - map: &'de HashMap, + map: &'de HashMap, ) -> Result where - S1: Borrow + Eq + Hash, - S2: AsRef, + K: Borrow + Eq + Hash, + S: AsRef, T: Deserialize<'de>, BH: std::hash::BuildHasher, { + let key_prefix = match prefix { + None => None, + Some(ref p) => { + Some(Key::new(KeyType::Data, p).context(error::InvalidPrefix { prefix: p })?) + } + }; let de = CompoundDeserializer::new( map, - map.keys().map(|s| s.borrow().to_string()).collect(), - prefix, + map.keys().map(|s| s.borrow().clone()).collect(), + key_prefix, ); trace!( "Deserializing keys with prefix {:?}: {:?}", @@ -105,15 +107,15 @@ where /// key name and a deserializer for it on each iteration, i.e. for each field. Based on whether /// the key name has a dot, we know if we need to recurse again or just deserialize a final value, /// which we represent as the two arms of the enum. -enum ValueDeserializer<'de, S1, S2, BH> { +enum ValueDeserializer<'de, K, S, BH> { Scalar(ScalarDeserializer<'de>), - Compound(CompoundDeserializer<'de, S1, S2, BH>), + Compound(CompoundDeserializer<'de, K, S, BH>), } -impl<'de, S1, S2, BH> serde::de::Deserializer<'de> for ValueDeserializer<'de, S1, S2, BH> +impl<'de, K, S, BH> serde::de::Deserializer<'de> for ValueDeserializer<'de, K, S, BH> where - S1: Borrow + Eq + Hash, - S2: AsRef, + K: Borrow + Eq + Hash, + S: AsRef, BH: std::hash::BuildHasher, { type Error = Error; @@ -163,10 +165,10 @@ where } } -impl<'de, S1, S2, BH> IntoDeserializer<'de, Error> for ValueDeserializer<'de, S1, S2, BH> +impl<'de, K, S, BH> IntoDeserializer<'de, Error> for ValueDeserializer<'de, K, S, BH> where - S1: Borrow + Eq + Hash, - S2: AsRef, + K: Borrow + Eq + Hash, + S: AsRef, BH: std::hash::BuildHasher, { type Deserializer = Self; @@ -178,26 +180,26 @@ where /// CompoundDeserializer is our main structure that drives serde's MapDeserializer and stores the /// state we need to understand the recursive structure of the output. -struct CompoundDeserializer<'de, S1, S2, BH> { +struct CompoundDeserializer<'de, K, S, BH> { /// A reference to the input data we're deserializing. - map: &'de HashMap, + map: &'de HashMap, /// The keys that we need to consider in this iteration. Starts out the same as the keys /// of the input map, but on recursive calls it's only the keys that are relevant to the /// sub-struct we're handling, with the duplicated prefix (the 'path') removed. - keys: HashSet, + keys: HashSet, /// The path tells us where we are in our recursive structures. - path: Option, + path: Option, } -impl<'de, S1, S2, BH> CompoundDeserializer<'de, S1, S2, BH> +impl<'de, K, S, BH> CompoundDeserializer<'de, K, S, BH> where BH: std::hash::BuildHasher, { fn new( - map: &'de HashMap, - keys: HashSet, - path: Option, - ) -> CompoundDeserializer<'de, S1, S2, BH> { + map: &'de HashMap, + keys: HashSet, + path: Option, + ) -> CompoundDeserializer<'de, K, S, BH> { CompoundDeserializer { map, keys, path } } } @@ -206,10 +208,10 @@ fn bad_root() -> Result { error::BadRoot.fail() } -impl<'de, S1, S2, BH> serde::de::Deserializer<'de> for CompoundDeserializer<'de, S1, S2, BH> +impl<'de, K, S, BH> serde::de::Deserializer<'de> for CompoundDeserializer<'de, K, S, BH> where - S1: Borrow + Eq + Hash, - S2: AsRef, + K: Borrow + Eq + Hash, + S: AsRef, BH: std::hash::BuildHasher, { type Error = Error; @@ -230,8 +232,15 @@ where // MapDeserializer.) if !name.is_empty() { trace!("Path before name check: {:?}", self.path); - self.path - .get_or_insert_with(|| name.to_lowercase().to_owned()); + if self.path.is_none() { + self.path = Some( + // to_lowercase handles the discrepancy between key naming and struct naming; + // this initial 'path' creation is the only place we take the struct name from + // serde, per above comment. + Key::from_segments(KeyType::Data, &[name.to_lowercase()]) + .context(error::InvalidPrefix { prefix: name })?, + ); + } trace!("Path after name check: {:?}", self.path); } @@ -241,11 +250,16 @@ where // handing it to the MapDeserializer. (Our real customer is the one specifying the // dotted keys, and we always use the struct name there for clarity.) trace!("Keys before path strip: {:?}", self.keys); - self.keys = self - .keys - .iter() - .map(|k| k.replacen(&format!("{}.", path.to_lowercase()), "", 1)) - .collect(); + let mut new_keys = HashSet::new(); + for key in self.keys { + new_keys.insert(key.strip_prefix_segments(&path.segments()).context( + error::StripPrefix { + prefix: path.name(), + name: key.name(), + }, + )?); + } + self.keys = new_keys; trace!("Keys after path strip: {:?}", self.keys); } @@ -259,42 +273,71 @@ where // give it an iterator that yields (key, deserializer) pairs. The nested deserializers // have the appropriate 'path' and a subset of 'keys' so they can do their job. visitor.visit_map(MapDeserializer::new(self.keys.iter().filter_map(|key| { - let struct_name = key.split('.').next().unwrap(); - trace!("Visiting key '{}', struct name '{}'", key, struct_name); - - // If we have a path, add a separator, otherwise start with an empty string. - let old_path = self - .path - .as_ref() - .map(|s| s.clone() + KEY_SEPARATOR) - .unwrap_or_default(); - trace!("Old path: {}", &old_path); - let new_path = old_path + struct_name; - trace!("New path: {}", &new_path); - - if key.contains('.') { + let mut segments: VecDeque<_> = key.segments().clone().into(); + // Inside this filter_map closure, we can't return early from the outer function, so we + // log an error and skip the key. Errors in this path are generally logic errors + // rather than user errors, so this isn't so bad. + let struct_name = match segments.pop_front() { + Some(s) => s, + None => { + error!("Logic error - Key segments.pop_front failed, empty Key?"); + return None; + } + }; + trace!("Visiting key '{}', struct name '{}'", key, &struct_name); + + // At the top level (None path) we start with struct_name as Key, otherwise append + // struct_name. + trace!("Old path: {:?}", &self.path); + let path = match self.path { + None => match Key::from_segments(KeyType::Data, &[&struct_name]) { + Ok(key) => key, + Err(e) => { + error!( + "Tried to construct invalid key from struct name '{}', skipping: {}", + &struct_name, e + ); + return None; + } + }, + Some(ref old_path) => match old_path.append_segments(&[&struct_name]) { + Ok(key) => key, + Err(e) => { + error!( + "Appending '{}' to existing key '{}' resulted in invalid key, skipping: {}", + old_path, &struct_name, e + ); + return None; + } + } + }; + trace!("New path: {}", &path); + + if !segments.is_empty() { if structs_done.contains(&struct_name) { // We've handled this structure with a recursive call, so we're done. - trace!("Already handled struct '{}', skipping", struct_name); + trace!("Already handled struct '{}', skipping", &struct_name); None } else { // Otherwise, mark it, and recurse. - structs_done.insert(struct_name); + structs_done.insert(struct_name.clone()); // Subset the keys so the recursive call knows what it needs to handle - // only things starting with the new path. - let dotted_prefix = struct_name.to_owned() + KEY_SEPARATOR; let keys = self .keys .iter() - .filter(|new_key| new_key.starts_with(&dotted_prefix)) - .map(|new_key| new_key[dotted_prefix.len()..].to_owned()) + .filter(|new_key| new_key.starts_with_segments(&[&struct_name])) + // Remove the prefix - should always work, but log and skip the key otherwise + .filter_map(|new_key| new_key + .strip_prefix(&struct_name) + .map_err(|e| error!("Key starting with segment '{}' couldn't remove it as prefix: {}", &struct_name, e)).ok()) .collect(); // And here's what MapDeserializer expects, the key and deserializer for it trace!( "Recursing for struct '{}' with keys: {:?}", - struct_name, + &struct_name, keys ); Some(( @@ -302,7 +345,7 @@ where ValueDeserializer::Compound(CompoundDeserializer::new( self.map, keys, - Some(new_path), + Some(path), )), )) } @@ -310,12 +353,12 @@ where // No dot, so we have a scalar; hand the data to a scalar deserializer. trace!( "Key '{}' is scalar, getting '{}' from input to deserialize", - key, - new_path + struct_name, + path ); - let val = self.map.get(&new_path)?; + let val = self.map.get(&path)?; Some(( - key.to_owned(), + struct_name, ValueDeserializer::Scalar(deserializer_for_scalar(val.as_ref())), )) } @@ -365,12 +408,19 @@ where #[cfg(test)] mod test { use super::{from_map, from_map_with_prefix}; - use crate::datastore::deserialization::Error; + use crate::datastore::{deserialization::Error, Key, KeyType}; use maplit::hashmap; use serde::Deserialize; use std::collections::HashMap; + // Helper macro for making a data Key for testing whose name we know is valid. + macro_rules! key { + ($name:expr) => { + Key::new(KeyType::Data, $name).unwrap() + }; + } + #[derive(Debug, Deserialize, PartialEq)] struct A { id: Option, @@ -396,7 +446,7 @@ mod test { #[test] fn basic_struct_works() { let c: C = from_map(&hashmap! { - "c.boolean".to_string() => "true".to_string(), + key!("c.boolean") => "true".to_string(), }) .unwrap(); assert_eq!(c, C { boolean: true }); @@ -405,14 +455,14 @@ mod test { #[test] fn deep_struct_works() { let a: A = from_map(&hashmap! { - "a.id".to_string() => "1".to_string(), - "a.name".to_string() => "\"it's my name\"".to_string(), - "a.list".to_string() => "[1,2, 3, 4]".to_string(), - "a.map.a".to_string() => "\"answer is always map\"".to_string(), - "a.nested.a".to_string() => "\"quite nested\"".to_string(), - "a.nested.b".to_string() => "false".to_string(), - "a.nested.c".to_string() => "null".to_string(), - "a.nested.d.boolean".to_string() => "true".to_string(), + key!("a.id") => "1".to_string(), + key!("a.name") => "\"it's my name\"".to_string(), + key!("a.list") => "[1,2, 3, 4]".to_string(), + key!("a.map.a") => "\"answer is always map\"".to_string(), + key!("a.nested.a") => "\"quite nested\"".to_string(), + key!("a.nested.b") => "false".to_string(), + key!("a.nested.c") => "null".to_string(), + key!("a.nested.d.boolean") => "true".to_string(), }) .unwrap(); assert_eq!( @@ -437,8 +487,8 @@ mod test { #[test] fn map_doesnt_work_at_root() { let a: Result, Error> = from_map(&hashmap! { - "a".to_string() => "\"it's a\"".to_string(), - "b".to_string() => "\"it's b\"".to_string(), + key!("a") => "\"it's a\"".to_string(), + key!("b") => "\"it's b\"".to_string(), }); a.unwrap_err(); } @@ -446,7 +496,7 @@ mod test { #[test] fn map_works_at_root_with_prefix() { let map = &hashmap! { - "x.boolean".to_string() => "true".to_string() + key!("x.boolean") => "true".to_string() }; let x: HashMap = from_map_with_prefix(Some("x".to_string()), map).unwrap(); assert_eq!( @@ -465,7 +515,7 @@ mod test { #[test] fn disallowed_data_type() { let bad: Result = from_map(&hashmap! { - "id".to_string() => "42".to_string(), + key!("id") => "42".to_string(), }); bad.unwrap_err(); } diff --git a/workspaces/api/apiserver/src/datastore/error.rs b/workspaces/api/apiserver/src/datastore/error.rs index 14d9b0f9901..124f7b53b95 100644 --- a/workspaces/api/apiserver/src/datastore/error.rs +++ b/workspaces/api/apiserver/src/datastore/error.rs @@ -51,12 +51,8 @@ pub enum Error { ))] ListedMetaNotPresent { meta_key: String, data_key: String }, - #[snafu(display( - "Key name '{}' has invalid format, should match regex: {}", - name, - pattern - ))] - InvalidKey { name: String, pattern: regex::Regex }, + #[snafu(display("Key name '{}' has invalid format: {}", name, msg))] + InvalidKey { name: String, msg: String }, #[snafu(display("Key name beyond maximum length {}: {}", name, max))] KeyTooLong { name: String, max: usize }, diff --git a/workspaces/api/apiserver/src/datastore/filesystem.rs b/workspaces/api/apiserver/src/datastore/filesystem.rs index 31e09a0ea0a..37e76d14839 100644 --- a/workspaces/api/apiserver/src/datastore/filesystem.rs +++ b/workspaces/api/apiserver/src/datastore/filesystem.rs @@ -4,6 +4,7 @@ //! Data is kept in files with paths resembling the keys, e.g. a/b/c for a.b.c, and metadata is //! kept in a suffixed file next to the data, e.g. a/b/c.meta for metadata "meta" about a.b.c +use percent_encoding::{percent_decode_str, utf8_percent_encode, AsciiSet, NON_ALPHANUMERIC}; use snafu::{ensure, OptionExt, ResultExt}; use std::collections::{HashMap, HashSet}; use std::fs; @@ -11,10 +12,17 @@ use std::io; use std::path::{self, Path, PathBuf}; use walkdir::{DirEntry, WalkDir}; -use super::key::{Key, KeyType, KEY_SEPARATOR}; +use super::key::{Key, KeyType}; use super::{error, Committed, DataStore, Result}; -const METADATA_KEY_PREFIX: char = '.'; +const METADATA_KEY_PREFIX: &str = "."; + +// This describes the set of characters we encode when making the filesystem path for a given key. +// Any non-ASCII characters, plus these ones, will be encoded. +// We start off very strict (anything not alphanumeric) and remove characters we'll allow. +// To make inspecting the filesystem easier, we allow any filesystem-safe characters that are +// allowed in a Key. +const ENCODE_CHARACTERS: &AsciiSet = &NON_ALPHANUMERIC.remove(b'_').remove(b'-'); #[derive(Debug)] pub struct FilesystemDataStore { @@ -42,8 +50,10 @@ impl FilesystemDataStore { fn data_path(&self, key: &Key, committed: Committed) -> Result { let base_path = self.base_path(committed); - // turn dot-separated key into slash-separated path suffix - let path_suffix = key.replace(KEY_SEPARATOR, &path::MAIN_SEPARATOR.to_string()); + // Encode key segments so they're filesystem-safe + let encoded: Vec<_> = key.segments().iter().map(encode_path_component).collect(); + // Join segments with filesystem separator to get path underneath data store + let path_suffix = encoded.join(&path::MAIN_SEPARATOR.to_string()); // Make path from base + prefix // FIXME: canonicalize requires that the full path exists. We know our Key is checked @@ -54,7 +64,7 @@ impl FilesystemDataStore { // Confirm no path traversal outside of base ensure!( path != *base_path && path.starts_with(base_path), - error::PathTraversal { name: key.as_ref() } + error::PathTraversal { name: key.name() } ); Ok(path) @@ -67,22 +77,57 @@ impl FilesystemDataStore { data_key: &Key, committed: Committed, ) -> Result { - let data_path = self.data_path(data_key, committed)?; - let data_path_str = data_path.to_str().expect("Key paths must be UTF-8"); + let path = self.data_path(data_key, committed)?; + + // We want to add to the existing file name, not create new path components (directories), + // so we use a string type rather than a path type. + let mut path_str = path.into_os_string(); + + // Key names have quotes as necessary to identify segments with special characters, so + // we don't think "a.b" is actually two segments, for example. + // Metadata keys only have a single segment, and we encode that as a single path + // component, so we don't need the quotes in the filename. + let raw_key_name = metadata_key.segments().get(0).context(error::Internal { + msg: "metadata key with no segments", + })?; - let segments: Vec<&str> = data_path_str.rsplitn(2, path::MAIN_SEPARATOR).collect(); - let (basename, dirname) = match segments.len() { - 2 => (segments[0], segments[1]), - _ => panic!("Grave error with path generation; invalid base path?"), - }; + let encoded_meta = encode_path_component(raw_key_name); + path_str.push(METADATA_KEY_PREFIX); + path_str.push(encoded_meta); - let filename = basename.to_owned() + &METADATA_KEY_PREFIX.to_string() + metadata_key; - Ok(Path::new(dirname).join(filename)) + Ok(path_str.into()) } } // Filesystem helpers +/// Encodes a string so that it's safe to use as a filesystem path component. +fn encode_path_component>(segment: S) -> String { + let encoded = utf8_percent_encode(segment.as_ref(), ENCODE_CHARACTERS); + encoded.to_string() +} + +/// Decodes a path component, removing the encoding that's applied to make it filesystem-safe. +fn decode_path_component(segment: S, path: P) -> Result +where + S: AsRef, + P: AsRef, +{ + let segment = segment.as_ref(); + + percent_decode_str(segment) + .decode_utf8() + // Get back a plain String. + .map(|cow| cow.into_owned()) + // decode_utf8 will only fail if someone messed with the filesystem contents directly + // and created a filename that contains percent-encoded bytes that are invalid UTF-8. + .ok() + .context(error::Corruption { + path: path.as_ref(), + msg: format!("invalid UTF-8 in encoded segment '{}'", segment), + }) +} + /// Helper for reading a key from the filesystem. Returns Ok(None) if the file doesn't exist /// rather than erroring. fn read_file_for_key(key: &Key, path: &Path) -> Result> { @@ -93,7 +138,7 @@ fn read_file_for_key(key: &Key, path: &Path) -> Result> { return Ok(None); } - Err(e).context(error::KeyRead { key: key.as_ref() }) + Err(e).context(error::KeyRead { key: key.name() }) } } } @@ -132,24 +177,48 @@ struct KeyPath { } impl KeyPath { - fn new(path: &Path) -> Result { + /// Given a DirEntry, gives you a KeyPath if it's a valid path to a key. Specifically, we return + /// Ok(Some(Key)) if it seems like a datastore key. Returns Ok(None) if it doesn't seem like a + /// datastore key, e.g. a directory, or if it's a file otherwise invalid as a key. Returns Err if + /// we weren't able to check. + fn from_entry>( + entry: &DirEntry, + strip_path_prefix: P, + ) -> Result> { + if !entry.file_type().is_file() { + trace!("Skipping non-file entry: {}", entry.path().display()); + return Ok(None); + } + + let key_path_raw = entry + .path() + .strip_prefix(strip_path_prefix) + .context(error::Path)?; + // If from_path doesn't think this is an OK key, we'll return Ok(None), otherwise the KeyPath + Ok(Self::from_path(key_path_raw).ok()) + } + + fn from_path(path: &Path) -> Result { let path_str = path.to_str().context(error::Corruption { msg: "Non-UTF8 path", path, })?; - let mut segments = path_str.splitn(2, '.'); - - // Split the data and metadata parts - let data_key_raw = segments.next().context(error::Internal { + // Split the data and metadata parts. + // Any dots in key names are encoded. + let mut keys = path_str.splitn(2, '.'); + let data_key_raw = keys.next().context(error::Internal { msg: "KeyPath given empty path", })?; // Turn the data path into a dotted key - let data_key_str = data_key_raw.replace("/", KEY_SEPARATOR); - let data_key = Key::new(KeyType::Data, data_key_str)?; + let data_segments = data_key_raw + .split(path::MAIN_SEPARATOR) + .map(|s| decode_path_component(s, path)) + .collect::>>()?; + let data_key = Key::from_segments(KeyType::Data, &data_segments)?; // If we have a metadata portion, make that a Key too - let metadata_key = match segments.next() { + let metadata_key = match keys.next() { Some(meta_key_str) => Some(Key::new(KeyType::Meta, meta_key_str)?), None => None, }; @@ -168,25 +237,6 @@ impl KeyPath { } } -/// Given a DirEntry, gives you a KeyPath if it's a valid path to a key. Specifically, we return -/// Ok(Some(Key)) if it seems like a datastore key. Returns Ok(None) if it doesn't seem like a -/// datastore key, e.g. a directory, or if it's a file otherwise invalid as a key. Returns Err if -/// we weren't able to check. -fn key_path_for_entry>( - entry: &DirEntry, - strip_path_prefix: P, -) -> Result> { - if !entry.file_type().is_file() { - trace!("Skipping non-file entry: {}", entry.path().display()); - return Ok(None); - } - - let path = entry.path(); - let key_path_raw = path.strip_prefix(strip_path_prefix).context(error::Path)?; - // If KeyPath doesn't think this is an OK key, we'll return Ok(None), otherwise the KeyPath - Ok(KeyPath::new(key_path_raw).ok()) -} - /// Helper to walk through the filesystem to find populated keys of the given type, starting with /// the given prefix. Each item in the returned set is a KeyPath representing a data or metadata /// key. @@ -238,8 +288,8 @@ fn find_populated_key_paths>( // For anything we find, confirm it matches the user's filters, and add it to results. for entry in walker { let entry = entry.context(error::ListKeys)?; - if let Some(kp) = key_path_for_entry(&entry, &base)? { - if !kp.data_key.as_ref().starts_with(prefix.as_ref()) { + if let Some(kp) = KeyPath::from_entry(&entry, &base)? { + if !kp.data_key.name().starts_with(prefix.as_ref()) { trace!( "Discarded {:?} key whose data_key '{}' doesn't start with prefix '{}'", kp.key_type(), @@ -309,7 +359,7 @@ impl DataStore for FilesystemDataStore { // If the user requested specific metadata, move to the next key unless it matches. if let Some(name) = metadata_key_name { - if name.as_ref() != meta_key.as_ref() { + if name.as_ref() != meta_key.name() { continue; } } @@ -358,12 +408,8 @@ impl DataStore for FilesystemDataStore { return Ok(Default::default()); } - // Turn String keys of pending data into Key keys, for return - let try_pending_keys: Result> = pending_data - .keys() - .map(|s| Key::new(KeyType::Data, s)) - .collect(); - let pending_keys = try_pending_keys?; + // Save Keys for return value + let pending_keys: HashSet = pending_data.keys().cloned().collect(); // Apply changes to live debug!("Writing pending keys to live"); @@ -398,7 +444,7 @@ impl DataStore for FilesystemDataStore { #[cfg(test)] mod test { - use super::{Committed, FilesystemDataStore, Key, KeyType}; + use super::*; #[test] fn data_path() { @@ -428,4 +474,26 @@ mod test { .unwrap(); assert_eq!(live.into_os_string(), "/base/live/a/b/c.my-metadata"); } + + #[test] + fn encode_path_component_works() { + assert_eq!(encode_path_component("a-b_42"), "a-b_42"); + assert_eq!(encode_path_component("a.b"), "a%2Eb"); + assert_eq!(encode_path_component("a/b"), "a%2Fb"); + assert_eq!(encode_path_component("a b%ce"), "a%20b%25c%3Cd%3Ee"); + } + + #[test] + fn decode_path_component_works() { + assert_eq!(decode_path_component("a-b_42", "").unwrap(), "a-b_42"); + assert_eq!(decode_path_component("a%2Eb", "").unwrap(), "a.b"); + assert_eq!(decode_path_component("a%2Fb", "").unwrap(), "a/b"); + assert_eq!( + decode_path_component("a%20b%25c%3Cd%3Ee", "").unwrap(), + "a b%ce" + ); + + // Invalid UTF-8 + decode_path_component("%C3%28", "").unwrap_err(); + } } diff --git a/workspaces/api/apiserver/src/datastore/key.rs b/workspaces/api/apiserver/src/datastore/key.rs index 712bd911d8d..7e5a55d258c 100644 --- a/workspaces/api/apiserver/src/datastore/key.rs +++ b/workspaces/api/apiserver/src/datastore/key.rs @@ -1,41 +1,20 @@ // Note: this only allows reading and writing UTF-8 keys and values; is that OK? -use lazy_static::lazy_static; -use regex::Regex; use serde::{Serialize, Serializer}; -use std::borrow::Borrow; +use snafu::ensure; use std::fmt; -use std::ops::Deref; +use std::hash::{Hash, Hasher}; use super::{error, Result}; -pub const KEY_SEPARATOR: &str = "."; - -/// String that can be used in a regex to validate segments of key names. -/// The character set was chosen to match TOML for ease of serialization. -pub const KEY_SEGMENT_STR: &str = "[a-zA-Z0-9_-]+"; +pub const KEY_SEPARATOR: char = '.'; +// String refs are more convenient for some Rust functions +pub const KEY_SEPARATOR_STR: &str = "."; /// Maximum key name length matches the maximum filename length of 255; if we need to have longer /// keys (up to 4096) we could make prefixes not count against this limit. const MAX_KEY_NAME_LENGTH: usize = 255; -lazy_static! { - /// Pattern to validate a single key name segment, e.g. between separators. - pub(crate) static ref KEY_SEGMENT: Regex = Regex::new( - &format!(r"^{segment}$", segment=KEY_SEGMENT_STR) - ).unwrap(); - - /// Pattern to validate a user-specified data key. - // Optional dot-separated prefix segments, with at least one final segment. - pub(crate) static ref DATA_KEY: Regex = Regex::new( - &format!(r"^(?P({segment}\.)*)(?P{segment})$", segment=KEY_SEGMENT_STR) - ).unwrap(); - - /// Pattern to validate a user-specified metadata key. - // No prefixes, just one name segment, so we reuse the regex. - pub(crate) static ref METADATA_KEY: Regex = KEY_SEGMENT.clone(); -} - /// KeyType represents whether we want to check a Key as a data key or metadata key. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub enum KeyType { @@ -46,70 +25,347 @@ pub enum KeyType { /// A Key is a pointer into the datastore with a convenient name. Their names are simply dotted /// strings ("a.b.c") with the dots implying hierarchy, so "a.b.c" and "a.b.d" are probably /// related. -// Note: it's important that Key only has the name String, or that it otherwise hashes the same as -// the name String, so that deserializing with from_map behaves the same whether we have a map -// whose keys are Strings or Keys containing those Strings. If we wanted to store KeyType in the -// Key, for example, we'd probably want to ensure we always deserialize with String or Key maps, -// rather than allowing both, so we don't have subtle error conditions involving missing data. -// (We probably don't want Data and Meta keys hashing the same, so customizing Hash is bad.) -#[derive(Clone, Debug, Eq, PartialEq, Hash)] +/// +/// Keys that need to include dots in the name can quote that segment of the name, for example the +/// key a."b.c".d has three segments: "a", "b.c", and "d". +#[derive(Clone, Debug)] pub struct Key { name: String, + segments: Vec, } impl Key { - pub fn new>(key_type: KeyType, name: S) -> Result { + /// Returns a list of the segments that make up the key name. + /// + /// Examples: + /// * a.b.c -> ["a", "b", "c"] + /// * "a.b".c -> ["a.b", "c"] + pub fn segments(&self) -> &Vec { + &self.segments + } + + /// Returns the name of the key. + /// + /// If you created the Key using with_segments(), the segments are quoted as necessary to + /// handle special characters. Examples: + /// * created with segments ["a", "b", "c"] -> a.b.c + /// * created with segments ["a.b", "c"] -> "a.b".c + pub fn name(&self) -> &String { + &self.name + } + + /// Creates a Key of the given type from the given name. + /// + /// If there are special characters in the name, like "." which is used as a separator, + /// then you should quote that segment, for example: a."b.c".d to represent three segments + /// "a", "b.c", and "d". If possible, you should use `Key::from_segments` instead, to more + /// accurately represent the individual segments. + pub fn new>(key_type: KeyType, name: S) -> Result { + let segments = Self::parse_name_segments(&name)?; + + Self::check_key(key_type, &name, &segments)?; + + Ok(Self { + name: name.as_ref().to_string(), + segments, + }) + } + + /// Creates a Key of the given type from the given name segments. + /// + /// For example, passing &["a", "b.c", "c"] will create a key named: a."b.c".c + pub fn from_segments(key_type: KeyType, segments: &[S]) -> Result + where + S: AsRef, + { + let name = Self::encode_name_segments(&segments)?; + + Self::check_key(key_type, &name, &segments)?; + + Ok(Self { + name, + segments: segments.iter().map(|s| s.as_ref().into()).collect(), + }) + } + + /// Removes the given prefix from the key name, returning a new Key. + /// + /// This is intended to remove key name segments from the beginning of the name, therefore + /// this only makes sense for Data keys, not Meta keys. A Data key will be returned. + /// + /// You should not include an ending separator (dot), it will be removed for you. + /// + /// If the key name does not begin with the given prefix, the returned key will be + /// identical. + /// + /// Fails if the new key would be invalid, e.g. if the prefix is the entire key. + pub(super) fn strip_prefix(&self, prefix: S) -> Result + where + S: AsRef, + { + let prefix = prefix.as_ref(); + ensure!( + prefix != self.name, + error::InvalidKey { + name: "", + msg: format!("strip_prefix of '{}' matches key", prefix) + } + ); + + let strip = prefix.to_string() + "."; + + // Check starts_with so we don't replace in the middle of the string... + let name = if self.name.starts_with(&strip) { + self.name.replacen(&strip, "", 1) + } else { + self.name.clone() + }; + + Self::new(KeyType::Data, name) + } + + /// Removes the given key segments from the beginning of the key, returning a new Key. + /// + /// This only makes sense for Data keys because Meta keys only have one segment. A Data key + /// will be returned. + /// + /// If the key does not begin with all of the given segments, no segments will be removed, + /// so the returned key will be identical. + /// + /// Fails if the new key would be invalid, e.g. if the given segments are the entire key. + pub(super) fn strip_prefix_segments(&self, prefix: &[S]) -> Result + where + S: AsRef, + { + // We walk through the given prefix segments, looking for anything that doesn't match + // our segments, at which point we know we're going to return an unchanged key. + for (i, theirs) in prefix.iter().enumerate() { + match self.segments().get(i) { + // If we run out of our segments, the prefix is longer than the existing key, + // and therefore can't match; we return an unchanged key. + None => return Ok(self.clone()), + Some(ours) => { + // Difference found; return an unchanged key. + if ours != theirs.as_ref() { + return Ok(self.clone()); + } + } + } + } + + // No differences were found, so we remove the given segments. + Self::from_segments(KeyType::Data, &self.segments[prefix.len()..]) + } + + /// Adds the given segments to the key name, returning a new Key. + /// + /// The given segments should not be quoted even if they contain the separator character; + /// using a segment list allows us to be precise about the distinction between segments. + /// + /// Fails if the new key would be invalid, e.g. the suffix contains invalid characters. + pub(super) fn append_segments(&self, segments: &[S]) -> Result + where + S: AsRef, + { + let our_segments = self.segments().iter().map(|s| s.as_ref()); + let their_segments = segments.iter().map(|s| s.as_ref()); + + let new_segments: Vec<_> = our_segments.chain(their_segments).collect(); + Self::from_segments(KeyType::Data, &new_segments) + } + + /// Adds the given key's name to this key name and returns a new Key. + /// + /// This is done precisely using each key's segments, so handling of separators and quoting + /// is automatic. + /// + /// Fails if the new key would be invalid, e.g. too long. + pub(super) fn append_key(&self, key: &Key) -> Result { + let our_segments = self.segments().iter(); + let their_segments = key.segments().iter(); + + let new_segments: Vec<_> = our_segments.chain(their_segments).collect(); + Self::from_segments(KeyType::Data, &new_segments) + } + + /// Additional safety checks for parsed or generated keys. + fn check_key(key_type: KeyType, name: S1, segments: &[S2]) -> Result<()> + where + S1: AsRef, + S2: AsRef, + { let name = name.as_ref(); - if name.len() > MAX_KEY_NAME_LENGTH { - return error::KeyTooLong { + + ensure!( + name.len() <= MAX_KEY_NAME_LENGTH, + error::KeyTooLong { name, max: MAX_KEY_NAME_LENGTH, } - .fail(); + ); + + match key_type { + KeyType::Data => { + ensure!( + segments.len() >= 1, + error::InvalidKey { + name, + msg: "data keys must have at least one segment", + } + ); + } + KeyType::Meta => { + ensure!( + segments.len() == 1, + error::InvalidKey { + name, + msg: "meta keys may only have one segment", + } + ); + } } - let name_pattern = match key_type { - KeyType::Data => &*DATA_KEY, - KeyType::Meta => &*METADATA_KEY, - }; + Ok(()) + } + + /// Determines whether a character is acceptable within a segment of a key name. This is + /// separate from quoting; if a character isn't valid, it isn't valid quoted, either. + fn valid_character(c: char) -> bool { + match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '/' => true, + _ => false, + } + } - if !name_pattern.is_match(name) { - return error::InvalidKey { + /// Given a key name, returns a list of its name segments, separated by KEY_SEPARATOR. + /// Respects quoting of segments so they can contain dots. + /// + /// Examples: + /// * a.b.c -> ["a", "b", "c"] + /// * "a.b".c -> ["a.b", "c"] + fn parse_name_segments>(name: S) -> Result> { + let name = name.as_ref(); + + ensure!( + !name.is_empty(), + error::InvalidKey { name, - pattern: name_pattern.clone(), + msg: "cannot be empty", + } + ); + + // The full list of name segments we're going to return. + let mut segments = Vec::new(); + // The current name segment we're checking. + let mut segment = String::new(); + // Track whether we're inside a quoted section of the key name + let mut in_quotes = false; + + // Walk through each character, looking for quotes or separators to update state + for c in name.chars() { + if c == '"' { + // Quotes don't go into the name segments, so we just flip the flag. + in_quotes = !in_quotes; + } else if c == KEY_SEPARATOR { + if in_quotes { + // If we see a separator inside quotes, it's just like any other character. + segment.push(c); + } else { + // If we see a separator outside quotes, it should be ending a segment. + // Segments can't be empty. + ensure!( + !segment.is_empty(), + error::InvalidKey { + name, + msg: "empty key segment", + } + ); + // Save the segment we just saw and start a new one. + segments.push(segment); + segment = String::new(); + } + } else { + // Not a special character; make sure it's a valid part of a name segment. + if Self::valid_character(c) { + segment.push(c); + } else { + return error::InvalidKey { + name, + msg: format!("invalid character in key: '{}'", c), + } + .fail(); + } } - .fail(); } - let copy = name.to_string(); - Ok(Key { name: copy }) - } -} + ensure!( + !in_quotes, + error::InvalidKey { + name, + msg: "unbalanced quotes", + } + ); + ensure!( + !segment.is_empty(), + error::InvalidKey { + name, + msg: "ends with separator", + } + ); -// These trait implementations let you treat a Key like a string most of the time. + // Push final segment (keys don't end with a dot, which is when we normally push) + segments.push(segment); -impl Deref for Key { - type Target = str; - fn deref(&self) -> &Self::Target { - &self.name + trace!("Parsed key name '{}' to segments {:?}", name, segments); + Ok(segments) } -} -impl Borrow for Key { - fn borrow(&self) -> &String { - &self.name - } -} + /// Given a list of key name segments, encodes them into a name string. Any segments with + /// special characters (like the separator) are quoted. + fn encode_name_segments>(segments: &[S]) -> Result { + let segments: Vec<_> = segments.iter().map(|s| s.as_ref()).collect(); + let mut outputs = Vec::new(); + + // Check whether we need quoting for each segment. + for segment in segments.iter() { + for chr in segment.chars() { + ensure!( + chr == KEY_SEPARATOR || Self::valid_character(chr), + error::InvalidKey { + // Give an understandable key name in the error, even if it's invalid + name: segments.join("."), + msg: format!("Segment '{}' contains invalid character '{}'", segment, chr), + } + ); + } -impl Borrow for Key { - fn borrow(&self) -> &str { - &self.name + if segment.chars().any(|c| c == KEY_SEPARATOR) { + // Includes separator; quote the segment. + outputs.push(format!("\"{}\"", segment)); + } else { + // No special characters, no escaping needed. + outputs.push(segment.to_string()); + } + } + + // Join the (possibly quoted) segments with our separator. + let name = outputs.join(KEY_SEPARATOR_STR); + trace!("Encoded key '{}' from segments {:?}", name, segments); + Ok(name) } -} -impl AsRef for Key { - fn as_ref(&self) -> &str { - &self.name + pub fn starts_with_segments(&self, segments: &[S]) -> bool + where + S: AsRef, + { + if self.segments.len() < segments.len() { + return false; + } + + let ours = self.segments()[0..segments.len()].iter(); + let theirs = segments.iter().map(|s| s.as_ref()); + + ours.zip(theirs).all(|(a, b)| a == b) } } @@ -130,9 +386,22 @@ impl Serialize for Key { } } +// The segments are our source of truth. +impl PartialEq for Key { + fn eq(&self, other: &Key) -> bool { + self.segments == other.segments + } +} +impl Eq for Key {} +impl Hash for Key { + fn hash(&self, state: &mut H) { + self.segments.hash(state); + } +} + #[cfg(test)] mod test { - use super::{Key, KeyType, DATA_KEY, KEY_SEGMENT, MAX_KEY_NAME_LENGTH, METADATA_KEY}; + use super::{Key, KeyType, MAX_KEY_NAME_LENGTH}; // Helper macro for testing conditions that apply to both data and metadata keys macro_rules! data_and_meta { @@ -148,15 +417,41 @@ mod test { } #[test] - fn nested_data_key_ok() { + fn dotted_data_key_ok() { assert!(Key::new(KeyType::Data, "a.b.c.d.e.f.g").is_ok()); } #[test] - fn nested_metadata_key_fails() { + fn dotted_metadata_key_fails() { assert!(Key::new(KeyType::Meta, "a.b.c.d.e.f.g").is_err()); } + #[test] + fn quoted_data_key_ok() { + let name = "a.\"b.c\".d"; + let key = Key::new(KeyType::Data, name).unwrap(); + assert_eq!(key.name(), name); + assert_eq!(key.segments(), &["a", "b.c", "d"]); + } + + #[test] + fn quoted_metadata_key_ok() { + // Metadata keys can only have one segment, but it can be quoted + let name = "\"b.c\""; + let key = Key::new(KeyType::Data, name).unwrap(); + assert_eq!(key.name(), name); + assert_eq!(key.segments(), &["b.c"]); + } + + #[test] + fn from_segments() { + let name = "a.\"b.c\".d"; + let segments = &["a", "b.c", "d"]; + let key = Key::from_segments(KeyType::Data, segments).unwrap(); + assert_eq!(key.name(), name); + assert_eq!(key.segments(), segments); + } + #[test] fn key_with_special_chars_ok() { data_and_meta!(|t| assert!(Key::new(t, "a-b_c").is_ok())); @@ -188,23 +483,111 @@ mod test { } #[test] - fn segment_regex() { - assert!(KEY_SEGMENT.is_match("abcd123_-")); - assert!(!KEY_SEGMENT.is_match("abcd.123")); - assert!(!KEY_SEGMENT.is_match("!")); + fn strip_prefix_ok() { + // Remove plain prefix + let key = Key::new(KeyType::Data, "a.b.c.d").unwrap(); + let prefix = "a.b"; + assert_eq!(key.strip_prefix(prefix).unwrap().name(), "c.d"); + + // Don't remove non-matching prefix; no change + let key = Key::new(KeyType::Data, "a.b.c.d").unwrap(); + let prefix = "x.y"; + assert_eq!(key.strip_prefix(prefix).unwrap().name(), "a.b.c.d"); + + // Don't remove prefix that doesn't match whole quoted segment + let key = Key::new(KeyType::Data, "a.\"b.c\".d").unwrap(); + let prefix = "a.b"; + assert_eq!(key.strip_prefix(prefix).unwrap().name(), "a.\"b.c\".d"); + + // Do remove prefix that does match whole quoted segment + let key = Key::new(KeyType::Data, "a.\"b.c\".d").unwrap(); + let prefix = "a.\"b.c\""; + assert_eq!(key.strip_prefix(prefix).unwrap().name(), "d"); + } + + #[test] + fn strip_prefix_err() { + let key = Key::new(KeyType::Data, "a.b.c.d").unwrap(); + let prefix = "a.b.c.d"; + key.strip_prefix(prefix).unwrap_err(); + } + + #[test] + fn strip_prefix_segments_ok() { + // Remove plain prefix + let key = Key::new(KeyType::Data, "a.b.c.d").unwrap(); + let prefix = &["a", "b"]; + assert_eq!(key.strip_prefix_segments(prefix).unwrap().name(), "c.d"); + + // Don't remove non-matching prefix; no change + let key = Key::new(KeyType::Data, "a.b.c.d").unwrap(); + let prefix = &["x", "y"]; + assert_eq!(key.strip_prefix_segments(prefix).unwrap().name(), "a.b.c.d"); + + // Don't remove prefix that doesn't match whole quoted segment + let key = Key::new(KeyType::Data, "a.\"b.c\".d").unwrap(); + let prefix = &["a", "b"]; + assert_eq!( + key.strip_prefix_segments(prefix).unwrap().name(), + "a.\"b.c\".d" + ); + + // Do remove prefix that does match whole quoted segment + let key = Key::new(KeyType::Data, "a.\"b.c\".d").unwrap(); + let prefix = &["a", "b.c"]; + assert_eq!(key.strip_prefix_segments(prefix).unwrap().name(), "d"); + } + + #[test] + fn strip_prefix_segments_err() { + let key = Key::new(KeyType::Data, "a.b.c.d").unwrap(); + let prefix = &["a", "b", "c", "d"]; + key.strip_prefix_segments(prefix).unwrap_err(); + } + + #[test] + fn append_segments_ok() { + let key = Key::new(KeyType::Data, "a.b").unwrap(); + let new = key.append_segments(&["x"]).unwrap(); + assert_eq!(new.name(), "a.b.x"); + + let new = key.append_segments(&["x.y"]).unwrap(); + assert_eq!(new.name(), "a.b.\"x.y\""); + + let new = key.append_segments(&["x", "y"]).unwrap(); + assert_eq!(new.name(), "a.b.x.y"); + } + + #[test] + fn append_segments_err() { + let key = Key::new(KeyType::Data, "a.b").unwrap(); + key.append_segments(&["@"]).unwrap_err(); + } + + #[test] + fn append_key_ok() { + let key = Key::new(KeyType::Data, "a.b").unwrap(); + let key2 = Key::new(KeyType::Data, "c.d").unwrap(); + let new = key.append_key(&key2).unwrap(); + assert_eq!(new.name(), "a.b.c.d"); + + let key2 = Key::new(KeyType::Data, "\"c.d\"").unwrap(); + let new = key.append_key(&key2).unwrap(); + assert_eq!(new.name(), "a.b.\"c.d\""); } #[test] - fn metadata_regex() { - assert!(METADATA_KEY.is_match("abcd123_-")); - assert!(!METADATA_KEY.is_match("abcd.123")); - assert!(!METADATA_KEY.is_match("!")); + fn append_key_err() { + let long_key = Key::new(KeyType::Data, "a".repeat(MAX_KEY_NAME_LENGTH)).unwrap(); + let key2 = Key::new(KeyType::Data, "b").unwrap(); + long_key.append_key(&key2).unwrap_err(); } #[test] - fn data_regex() { - assert!(DATA_KEY.is_match("abcd123_-")); - assert!(DATA_KEY.is_match("abcd.123")); - assert!(!DATA_KEY.is_match("!")); + fn starts_with_segments() { + let key = Key::new(KeyType::Data, "a.b").unwrap(); + assert!(key.starts_with_segments(&["a"])); + assert!(!key.starts_with_segments(&["\"a.b\""])); + assert!(!key.starts_with_segments(&["a."])); } } diff --git a/workspaces/api/apiserver/src/datastore/memory.rs b/workspaces/api/apiserver/src/datastore/memory.rs index 47aee8fc681..70d3fb9915f 100644 --- a/workspaces/api/apiserver/src/datastore/memory.rs +++ b/workspaces/api/apiserver/src/datastore/memory.rs @@ -3,7 +3,6 @@ //! Mimics some of the decisions made for FilesystemDataStore, e.g. metadata being committed //! immediately. -use std::borrow::Borrow; use std::collections::{HashMap, HashSet}; use std::mem; @@ -54,7 +53,7 @@ impl DataStore for MemoryDataStore { Ok(dataset .keys() // Make sure the data keys start with the given prefix. - .filter(|k| k.starts_with(prefix.as_ref())) + .filter(|k| k.name().starts_with(prefix.as_ref())) .cloned() .collect()) } @@ -71,7 +70,7 @@ impl DataStore for MemoryDataStore { let mut result = HashMap::new(); for (data_key, meta_map) in self.metadata.iter() { // Confirm data key matches requested prefix. - if !data_key.starts_with(prefix.as_ref()) { + if !data_key.name().starts_with(prefix.as_ref()) { continue; } @@ -79,7 +78,7 @@ impl DataStore for MemoryDataStore { for (meta_key, _value) in meta_map { // Confirm metadata key matches requested name, if any. if let Some(name) = metadata_key_name { - if name.as_ref() != meta_key.as_ref() { + if name.as_ref() != meta_key.name() { continue; } } @@ -95,8 +94,7 @@ impl DataStore for MemoryDataStore { } fn get_key(&self, key: &Key, committed: Committed) -> Result> { - let map_key: &str = key.borrow(); - Ok(self.dataset(committed).get(map_key).cloned()) + Ok(self.dataset(committed).get(key).cloned()) } fn set_key>(&mut self, key: &Key, value: S, committed: Committed) -> Result<()> { @@ -106,15 +104,14 @@ impl DataStore for MemoryDataStore { } fn key_populated(&self, key: &Key, committed: Committed) -> Result { - let map_key: &str = key.borrow(); - Ok(self.dataset(committed).contains_key(map_key)) + Ok(self.dataset(committed).contains_key(key)) } fn get_metadata_raw(&self, metadata_key: &Key, data_key: &Key) -> Result> { - let metadata_for_data = self.metadata.get(data_key.as_ref()); + let metadata_for_data = self.metadata.get(data_key); // If we have a metadata entry for this data key, then we can try fetching the requested // metadata key, otherwise we'll return early with Ok(None). - let result = metadata_for_data.and_then(|m| m.get(metadata_key.as_ref())); + let result = metadata_for_data.and_then(|m| m.get(metadata_key)); Ok(result.cloned()) } diff --git a/workspaces/api/apiserver/src/datastore/mod.rs b/workspaces/api/apiserver/src/datastore/mod.rs index b75bb68296b..baf5c3776d4 100644 --- a/workspaces/api/apiserver/src/datastore/mod.rs +++ b/workspaces/api/apiserver/src/datastore/mod.rs @@ -19,7 +19,7 @@ pub mod serialization; pub use error::{Error, Result}; pub use filesystem::FilesystemDataStore; -pub use key::{Key, KeyType, KEY_SEPARATOR}; +pub use key::{Key, KeyType, KEY_SEPARATOR, KEY_SEPARATOR_STR}; use serde::{Deserialize, Serialize}; use snafu::OptionExt; @@ -66,17 +66,14 @@ pub trait DataStore { /// earlier in the tree, if more specific values are not found later. fn get_metadata(&self, metadata_key: &Key, data_key: &Key) -> Result> { let mut result = Ok(None); - let mut current_path = String::with_capacity(data_key.len()); + let mut current_path = Vec::new(); // Walk through segments of the data key in order, returning the last metadata we find - for component in data_key.split(KEY_SEPARATOR) { - if !current_path.is_empty() { - current_path.push_str(KEY_SEPARATOR); - } - current_path.push_str(component); + for component in data_key.segments() { + current_path.push(component); - let data_key = Key::new(KeyType::Data, ¤t_path).unwrap_or_else(|_| { - unreachable!("Prefix of Key failed to make Key: {}", current_path) + let data_key = Key::from_segments(KeyType::Data, ¤t_path).unwrap_or_else(|_| { + unreachable!("Prefix of Key failed to make Key: {:?}", current_path) }); if let Some(md) = self.get_metadata_raw(metadata_key, &data_key)? { @@ -107,15 +104,13 @@ pub trait DataStore { /// /// Implementers can replace the default implementation if there's a faster way than setting /// each key individually. - fn set_keys(&mut self, pairs: &HashMap, committed: Committed) -> Result<()> + fn set_keys(&mut self, pairs: &HashMap, committed: Committed) -> Result<()> where - S1: AsRef, - S2: AsRef, + S: AsRef, { - for (key_str, value) in pairs { - trace!("Setting data key {}", key_str.as_ref()); - let key = Key::new(KeyType::Data, key_str)?; - self.set_key(&key, value, committed)?; + for (key, value) in pairs { + trace!("Setting data key {}", key.name()); + self.set_key(key, value, committed)?; } Ok(()) } @@ -140,7 +135,7 @@ pub trait DataStore { trace!("Pulling value from datastore for key: {}", key); let value = self .get_key(&key, committed)? - .context(error::ListedKeyNotPresent { key: key.as_ref() })?; + .context(error::ListedKeyNotPresent { key: key.name() })?; result.insert(key, value); } @@ -172,7 +167,7 @@ pub trait DataStore { // If the user requested specific metadata, move to the next key unless it // matches. if let Some(name) = metadata_key_name { - if name.as_ref() != meta_key.as_ref() { + if name.as_ref() != meta_key.name() { continue; } } @@ -185,8 +180,8 @@ pub trait DataStore { ); let value = self.get_metadata(&meta_key, &data_key)?.context( error::ListedMetaNotPresent { - meta_key: meta_key.as_ref(), - data_key: data_key.as_ref(), + meta_key: meta_key.name(), + data_key: data_key.name(), }, )?; @@ -253,8 +248,8 @@ mod test { let v1 = "memvalue1".to_string(); let v2 = "memvalue2".to_string(); let data = hashmap!( - &k1 => &v1, - &k2 => &v2, + k1.clone() => &v1, + k2.clone() => &v2, ); m.set_keys(&data, Committed::Pending).unwrap(); diff --git a/workspaces/api/apiserver/src/datastore/serialization/mod.rs b/workspaces/api/apiserver/src/datastore/serialization/mod.rs index 0a72e9f0597..af14fe3caf1 100644 --- a/workspaces/api/apiserver/src/datastore/serialization/mod.rs +++ b/workspaces/api/apiserver/src/datastore/serialization/mod.rs @@ -50,10 +50,7 @@ impl ser::Serializer for &MapKeySerializer { fn serialize_str(self, value: &str) -> Result { // Make sure string is valid as a key. - // Note: we check as a metadata key here, because metadata keys are simpler - no dotted - // components, just one. We wouldn't want dotted components in a single map key because - // it would falsely imply nesting. - let key = Key::new(KeyType::Meta, value) + let key = Key::from_segments(KeyType::Data, &[value]) .map_err(|e| { debug!("MapKeySerializer got invalid key name: {}", value); error::InvalidKey { msg: format!("{}", e) }.into_error(NoSource) diff --git a/workspaces/api/apiserver/src/datastore/serialization/pairs.rs b/workspaces/api/apiserver/src/datastore/serialization/pairs.rs index 328408de2d1..eaa88803171 100644 --- a/workspaces/api/apiserver/src/datastore/serialization/pairs.rs +++ b/workspaces/api/apiserver/src/datastore/serialization/pairs.rs @@ -8,18 +8,18 @@ //! to be sure we support the various forms of input/output we care about. use serde::{ser, Serialize}; -use snafu::{OptionExt, ResultExt}; +use snafu::{IntoError, NoneError as NoSource, OptionExt, ResultExt}; use std::collections::HashMap; use super::{error, Error, MapKeySerializer, Result}; -use crate::datastore::{serialize_scalar, ScalarError, KEY_SEPARATOR}; +use crate::datastore::{serialize_scalar, Key, KeyType, ScalarError}; /// This is the primary interface to our serialization. We turn anything implementing Serialize /// into pairs of datastore keys and serialized values. For example, a nested struct like this: /// Settings -> DockerSettings -> bridge_ip = u64 /// would turn into a key of "settings.docker-settings.bridge-ip" and a serialized String /// representing the u64 data. -pub fn to_pairs(value: &T) -> Result> { +pub fn to_pairs(value: &T) -> Result> { let mut output = HashMap::new(); let serializer = Serializer::new(&mut output, None); value.serialize(serializer)?; @@ -28,12 +28,21 @@ pub fn to_pairs(value: &T) -> Result> { /// Like to_pairs, but lets you add an arbitrary prefix to the resulting keys. A separator will /// automatically be added after the prefix. -pub fn to_pairs_with_prefix( - prefix: String, - value: &T, -) -> Result> { +pub fn to_pairs_with_prefix(prefix: S, value: &T) -> Result> +where + S: AsRef, + T: Serialize, +{ + let prefix = prefix.as_ref(); + let prefix_key = Key::new(KeyType::Data, prefix).map_err(|e| { + error::InvalidKey { + msg: format!("Prefix '{}' not valid as Key: {}", prefix, e), + } + .into_error(NoSource) + })?; + let mut output = HashMap::new(); - let serializer = Serializer::new(&mut output, Some(prefix)); + let serializer = Serializer::new(&mut output, Some(prefix_key)); value.serialize(serializer)?; Ok(output) } @@ -50,16 +59,18 @@ pub fn to_pairs_with_prefix( /// /// (We could handle lists as proper compound structures by improving the data store such that it /// can store unnamed sub-components, perhaps by using a visible index ("a.b.c[0]", "a.b.c[1]"). +/// It's more common to use a HashMap in the model, and then to use named keys instead of indexes, +/// which works fine.) struct Serializer<'a> { - output: &'a mut HashMap, - prefix: Option, + output: &'a mut HashMap, + prefix: Option, // This is temporary storage for serializing maps, because serde gives us keys and values // separately. See the SerializeMap implementation below. - key: Option, + key: Option, } impl<'a> Serializer<'a> { - fn new(output: &'a mut HashMap, prefix: Option) -> Self { + fn new(output: &'a mut HashMap, prefix: Option) -> Self { Self { output, prefix, @@ -70,7 +81,7 @@ impl<'a> Serializer<'a> { /// This helps us handle the cases where we have to have an existing prefix in order to output a /// value. It creates an explanatory error if the given prefix is None. -fn expect_prefix(maybe_prefix: Option, value: &str) -> Result { +fn expect_prefix(maybe_prefix: Option, value: &str) -> Result { maybe_prefix.context(error::MissingPrefix { value }) } @@ -139,10 +150,17 @@ impl<'a> ser::Serializer for Serializer<'a> { trace!("Serializing struct '{}' at prefix {:?}", name, self.prefix); // If we already have a prefix, use it - could be because we're in a nested struct, or the // user gave a prefix. Otherwise, use the given name - this is a top-level struct. - let prefix = self.prefix.or_else(|| { - trace!("Had no prefix, starting with struct name: {}", name); - Some(name.to_string()) - }); + let prefix = match self.prefix { + p @ Some(_) => p, + None => { + trace!("Had no prefix, starting with struct name: {}", name); + let key = Key::from_segments(KeyType::Data, &[&name]) + .map_err(|e| error::InvalidKey { + msg: format!("struct '{}' not valid as Key: {}", name, e) + }.into_error(NoSource))?; + Some(key) + } + }; Ok(Serializer::new(self.output, prefix)) } @@ -186,11 +204,19 @@ impl<'a> ser::Serializer for Serializer<'a> { } /// Helper that combines the existing prefix, if any, with a separator and the new key. -fn dotted_prefix(old_prefix: Option, key: String) -> String { +fn key_append_or_create(old_prefix: &Option, key: &Key) -> Result { if let Some(old_prefix) = old_prefix { - old_prefix + KEY_SEPARATOR + &key + old_prefix.append_key(&key).map_err(|e| { + error::InvalidKey { + msg: format!( + "appending '{}' to '{}' is invalid as Key: {}", + key, old_prefix, e + ), + } + .into_error(NoSource) + }) } else { - key + Ok(key.clone()) } } @@ -214,10 +240,20 @@ impl<'a> ser::SerializeMap for Serializer<'a> { where T: ?Sized + Serialize, { - // Store the key to use later in serialize_value. trace!("Serializing map key at prefix {:?}", self.prefix); + // We're given a serializable thing; need to serialize it to get a string we can work with. let key_str = key.serialize(&MapKeySerializer::new())?; - self.key = Some(dotted_prefix(self.prefix.clone(), key_str)); + // It should be valid as a Key. + // Note: we use 'new', not 'from_segments', because we just serialized into a string, + // meaning it's in quoted form. + let key = Key::new(KeyType::Data, &key_str).map_err(|e| { + error::InvalidKey { + msg: format!("serialized map key '{}' not valid as Key: {}", &key_str, e), + } + .into_error(NoSource) + })?; + // Store the key to use later in serialize_value. + self.key = Some(key_append_or_create(&self.prefix, &key)?); Ok(()) } @@ -255,16 +291,23 @@ impl<'a> ser::SerializeStruct for Serializer<'a> { type Ok = (); type Error = Error; - fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + fn serialize_field(&mut self, key_str: &'static str, value: &T) -> Result<()> where T: ?Sized + Serialize, { - let new_root = dotted_prefix(self.prefix.clone(), key.to_string()); + let key = Key::from_segments(KeyType::Data, &[&key_str]).map_err(|e| { + error::InvalidKey { + msg: format!("struct field '{}' not valid as Key: {}", key_str, e), + } + .into_error(NoSource) + })?; + + let new_root = key_append_or_create(&self.prefix, &key)?; trace!( "Recursively serializing struct with new root '{}' from prefix '{:?}' and key '{}'", new_root, self.prefix, - key + &key ); value.serialize(Serializer::new(self.output, Some(new_root))) } @@ -287,13 +330,13 @@ impl<'a> ser::SerializeStruct for Serializer<'a> { /// serialization steps, and then at the end, deserialize the strings back into a list of the /// original type, and serialize the entire list. Sorry. struct FlatSerializer<'a> { - output: &'a mut HashMap, - prefix: String, + output: &'a mut HashMap, + prefix: Key, list: Vec, } impl<'a> FlatSerializer<'a> { - fn new(output: &'a mut HashMap, prefix: String) -> Self { + fn new(output: &'a mut HashMap, prefix: Key) -> Self { FlatSerializer { output, prefix, @@ -340,9 +383,17 @@ impl<'a> ser::SerializeSeq for FlatSerializer<'a> { #[cfg(test)] mod test { use super::{to_pairs, to_pairs_with_prefix}; + use crate::datastore::{Key, KeyType}; use maplit::hashmap; use serde::Serialize; + // Helper macro for making a data Key for testing whose name we know is valid. + macro_rules! key { + ($name:expr) => { + Key::new(KeyType::Data, $name).unwrap() + }; + } + #[derive(PartialEq, Serialize)] struct A { id: u8, @@ -365,8 +416,8 @@ mod test { assert_eq!( keys, hashmap!( - "B.list".to_string() => "[3,4,5]".to_string(), - "B.boolean".to_string() => "true".to_string(), + key!("B.list") => "[3,4,5]".to_string(), + key!("B.boolean") => "true".to_string(), ) ); } @@ -389,9 +440,9 @@ mod test { assert_eq!( keys, hashmap!( - "A.b.list".to_string() => "[5,6,7]".to_string(), - "A.b.boolean".to_string() => "true".to_string(), - "A.id".to_string() => "42".to_string(), + key!("A.b.list") => "[5,6,7]".to_string(), + key!("A.b.boolean") => "true".to_string(), + key!("A.id") => "42".to_string(), ) ); } @@ -399,17 +450,17 @@ mod test { #[test] fn map() { let m = hashmap!( - "A".to_string() => hashmap!( - "id".to_string() => 42, - "ie".to_string() => 43, + key!("A") => hashmap!( + key!("id") => 42, + key!("ie") => 43, ), ); - let keys = to_pairs_with_prefix("map".to_string(), &m).unwrap(); + let keys = to_pairs_with_prefix("map", &m).unwrap(); assert_eq!( keys, hashmap!( - "map.A.id".to_string() => "42".to_string(), - "map.A.ie".to_string() => "43".to_string(), + key!("map.A.id") => "42".to_string(), + key!("map.A.ie") => "43".to_string(), ) ); } @@ -417,17 +468,17 @@ mod test { #[test] fn map_no_root() { let m = hashmap!( - "A".to_string() => hashmap!( - "id".to_string() => 42, - "ie".to_string() => 43, + key!("A") => hashmap!( + key!("id") => 42, + key!("ie") => 43, ), ); let keys = to_pairs(&m).unwrap(); assert_eq!( keys, hashmap!( - "A.id".to_string() => "42".to_string(), - "A.ie".to_string() => "43".to_string(), + key!("A.id") => "42".to_string(), + key!("A.ie") => "43".to_string(), ) ); } diff --git a/workspaces/api/apiserver/src/server/controller.rs b/workspaces/api/apiserver/src/server/controller.rs index 6157334cc0e..cd6762f595b 100644 --- a/workspaces/api/apiserver/src/server/controller.rs +++ b/workspaces/api/apiserver/src/server/controller.rs @@ -18,14 +18,8 @@ use crate::server::error::{self, Result}; /// Build a Settings based on pending data in the datastore; the Settings will be empty if there /// are no pending settings. pub(crate) fn get_pending_settings(datastore: &D) -> Result { - get_prefix( - datastore, - Committed::Pending, - "settings.", - None as Option<&str>, - None, - ) - .map(|maybe_settings| maybe_settings.unwrap_or_else(Settings::default)) + get_prefix(datastore, Committed::Pending, "settings.", None) + .map(|maybe_settings| maybe_settings.unwrap_or_else(Settings::default)) } /// Delete any settings that have been received but not committed @@ -37,16 +31,10 @@ pub(crate) fn delete_pending_settings(datastore: &mut D) -> Result /// Build a Settings based on the data in the datastore. Errors if no settings are found. pub(crate) fn get_settings(datastore: &D, committed: Committed) -> Result { - get_prefix( - datastore, - committed, - "settings.", - None as Option<&str>, - None, - ) - .transpose() - // None is not OK here - we always have *some* settings - .context(error::MissingData { prefix: "settings" })? + get_prefix(datastore, committed, "settings.", None) + .transpose() + // None is not OK here - we always have *some* settings + .context(error::MissingData { prefix: "settings" })? } /// Build a Settings based on the data in the datastore that begins with the given prefix. @@ -56,7 +44,7 @@ pub(crate) fn get_settings_prefix>( committed: Committed, ) -> Result { let prefix = "settings.".to_string() + prefix.as_ref(); - get_prefix(datastore, committed, &prefix, None as Option<&str>, None) + get_prefix(datastore, committed, &prefix, None) .transpose() // None is OK here - they could ask for a prefix we don't have .unwrap_or_else(|| Ok(Settings::default())) @@ -68,7 +56,6 @@ pub(crate) fn get_services(datastore: &D) -> Result { datastore, Committed::Live, "services.", - None as Option<&str>, Some("services".to_string()), ) .transpose() @@ -82,7 +69,6 @@ pub(crate) fn get_configuration_files(datastore: &D) -> Result, Some("configuration-files".to_string()), ) .transpose() @@ -93,26 +79,23 @@ pub(crate) fn get_configuration_files(datastore: &D) -> Result( +/// into the desired type. map_prefix should be the prefix to remove if you're deserializing into +/// a map; see docs on from_map_with_prefix. Returns Err if we couldn't pull expected data; +/// returns Ok(None) if we found there were no populated keys. +fn get_prefix( datastore: &D, committed: Committed, - find_prefix: S1, - strip_prefix: Option, + find_prefix: S, map_prefix: Option, ) -> Result> where D: DataStore, T: DeserializeOwned, - S1: AsRef, - S2: AsRef, + S: AsRef, { let find_prefix = find_prefix.as_ref(); - let mut data = datastore + let data = datastore .get_prefix(find_prefix, committed) .with_context(|| error::DataStore { op: format!("get_prefix '{}' for {:?}", find_prefix, committed), @@ -121,23 +104,6 @@ where return Ok(None); } - if let Some(ref strip_prefix) = strip_prefix { - data = data - .into_iter() - .map(|(mut key, value)| { - let strip_prefix = strip_prefix.as_ref(); - if key.starts_with(strip_prefix) { - let stripped = &key[strip_prefix.len()..]; - trace!("Stripped prefix of key, result: {}", stripped); - key = Key::new(KeyType::Data, &stripped).unwrap_or_else(|_| { - unreachable!("Stripping prefix of Key failed to make Key: {}", stripped) - }); - } - (key, value) - }) - .collect(); - } - from_map_with_prefix(map_prefix, &data).context(error::Deserialization { given: find_prefix }) } @@ -162,7 +128,7 @@ pub(crate) fn get_settings_keys( // TODO: confirm we want to skip requested keys if not populated, or error None => continue, }; - data.insert(key_str.to_string(), value); + data.insert(key, value); } let settings = from_map(&data).context(error::Deserialization { @@ -273,10 +239,8 @@ pub(crate) fn get_metadata_for_data_keys>( Err(_) => continue, }; trace!("Deserializing scalar from metadata"); - let value: Value = - deserialize_scalar::<_, ScalarError>(&value_str).context(error::InvalidMetadata { - key: md_key.as_ref(), - })?; + let value: Value = deserialize_scalar::<_, ScalarError>(&value_str) + .context(error::InvalidMetadata { key: md_key.name() })?; result.insert(data_key.to_string(), value); } @@ -302,7 +266,7 @@ pub(crate) fn get_metadata_for_all_data_keys>( trace!("Deserializing scalar from metadata"); let value: Value = deserialize_scalar::<_, ScalarError>(&value_str).context( error::InvalidMetadata { - key: meta_key.as_ref(), + key: meta_key.name(), }, )?; result.insert(data_key.to_string(), value); diff --git a/workspaces/api/apiserver/src/server/mod.rs b/workspaces/api/apiserver/src/server/mod.rs index c3d79442e71..a28c13818fc 100644 --- a/workspaces/api/apiserver/src/server/mod.rs +++ b/workspaces/api/apiserver/src/server/mod.rs @@ -197,7 +197,8 @@ fn commit_and_apply_settings(data: web::Data) -> Result( .get_prefix("", committed) .context(error::GetData { committed })?; - // Deserialize values to Value so there's a consistent input type. (We can't specify item - // types because we'd have to know the model structure.) let mut data = HashMap::new(); - for (data_key, value) in raw_data.into_iter() { - let value = deserialize_scalar(&value).context(error::Deserialize { input: value })?; - data.insert(data_key, value); + for (data_key, value_str) in raw_data.into_iter() { + // Store keys with just their name, rather than the full Key, so that migrations are easier + // to write, and we don't tie migrations to any specific data store version. Migrations + // shouldn't need to link against data store code. + let key_name = data_key.name(); + // Deserialize values to Value so there's a consistent input type. (We can't specify item + // types because we'd have to know the model structure.) + let value = + deserialize_scalar(&value_str).context(error::Deserialize { input: value_str })?; + data.insert(key_name.clone(), value); } // Metadata isn't committed, it goes live immediately, so we only populate the metadata @@ -36,11 +43,16 @@ pub(crate) fn get_input_data( .get_metadata_prefix("", &None as &Option<&str>) .context(error::GetMetadata)?; for (data_key, meta_map) in raw_metadata.into_iter() { - let data_entry = metadata.entry(data_key).or_insert_with(HashMap::new); - for (metadata_key, value) in meta_map.into_iter() { - let value = - deserialize_scalar(&value).context(error::Deserialize { input: value })?; - data_entry.insert(metadata_key, value); + // See notes above about storing key Strings and Values. + let data_key_name = data_key.name(); + let data_entry = metadata + .entry(data_key_name.clone()) + .or_insert_with(HashMap::new); + for (metadata_key, value_str) in meta_map.into_iter() { + let metadata_key_name = metadata_key.name(); + let value = deserialize_scalar(&value_str) + .context(error::Deserialize { input: value_str })?; + data_entry.insert(metadata_key_name.clone(), value); } } } @@ -58,7 +70,12 @@ pub(crate) fn set_output_data( ) -> Result<()> { // Prepare serialized data let mut data = HashMap::new(); - for (data_key, raw_value) in &input.data { + for (data_key_name, raw_value) in &input.data { + // See notes above about storing key Strings and Values. + let data_key = Key::new(KeyType::Data, data_key_name).context(error::InvalidKey { + key_type: KeyType::Data, + key: data_key_name, + })?; let value = serialize_scalar(raw_value).context(error::Serialize)?; data.insert(data_key, value); } @@ -71,8 +88,17 @@ pub(crate) fn set_output_data( .context(error::DataStoreWrite)?; // Set metadata in a loop (currently no batch API) - for (data_key, meta_map) in &input.metadata { - for (metadata_key, raw_value) in meta_map.into_iter() { + for (data_key_name, meta_map) in &input.metadata { + let data_key = Key::new(KeyType::Data, data_key_name).context(error::InvalidKey { + key_type: KeyType::Data, + key: data_key_name, + })?; + for (metadata_key_name, raw_value) in meta_map.into_iter() { + let metadata_key = + Key::new(KeyType::Meta, metadata_key_name).context(error::InvalidKey { + key_type: KeyType::Meta, + key: metadata_key_name, + })?; let value = serialize_scalar(&raw_value).context(error::Serialize)?; datastore .set_metadata(&metadata_key, &data_key, value) diff --git a/workspaces/api/migration/migration-helpers/src/error.rs b/workspaces/api/migration/migration-helpers/src/error.rs index 6bb0bcb96ce..e0228447d6a 100644 --- a/workspaces/api/migration/migration-helpers/src/error.rs +++ b/workspaces/api/migration/migration-helpers/src/error.rs @@ -40,8 +40,9 @@ pub enum Error { #[snafu(display("Migration requires missing key: {}", key))] MissingData { key: String }, - #[snafu(display("Migration used invalid key '{}': {}", key, source))] + #[snafu(display("Migration used invalid {:?} key '{}': {}", key_type, key, source))] InvalidKey { + key_type: datastore::KeyType, key: String, source: datastore::Error, }, diff --git a/workspaces/api/migration/migration-helpers/src/lib.rs b/workspaces/api/migration/migration-helpers/src/lib.rs index 3611b135ceb..dbdb00ecaa0 100644 --- a/workspaces/api/migration/migration-helpers/src/lib.rs +++ b/workspaces/api/migration/migration-helpers/src/lib.rs @@ -20,7 +20,7 @@ use std::env; use std::fmt; use apiserver::datastore::{Committed, Value}; -pub use apiserver::datastore::{DataStore, FilesystemDataStore, Key, KeyType}; +pub use apiserver::datastore::{DataStore, FilesystemDataStore}; use args::parse_args; use datastore::{get_input_data, set_output_data}; @@ -50,19 +50,19 @@ pub trait Migration { fn backward(&mut self, input: MigrationData) -> Result; } -/// Mapping of metadata key to arbitrary value. Each data key can have a Metadata describing its -/// metadata keys. -pub type Metadata = HashMap; +/// Mapping of metadata key name to arbitrary value. Each data key can have a Metadata describing +/// its metadata keys. +pub type Metadata = HashMap; /// MigrationData holds all data that can be migrated in a migration, and serves as the input and /// output format of migrations. A serde Value type is used to hold the arbitrary data of each /// key because we can't represent types when they could change in the migration. #[derive(Debug)] pub struct MigrationData { - /// Mapping of data keys to their arbitrary values. - pub data: HashMap, - /// Mapping of data keys to their metadata. - pub metadata: HashMap, + /// Mapping of data key names to their arbitrary values. + pub data: HashMap, + /// Mapping of data key names to their metadata. + pub metadata: HashMap, } /// Returns the default settings for a given path so you can easily replace a given section of the diff --git a/workspaces/api/servicedog/src/main.rs b/workspaces/api/servicedog/src/main.rs index a99f69367b3..9fbe13ce4f8 100644 --- a/workspaces/api/servicedog/src/main.rs +++ b/workspaces/api/servicedog/src/main.rs @@ -26,6 +26,7 @@ use std::process::{self, Command}; use std::str::FromStr; use apiserver::datastore::serialization::to_pairs_with_prefix; +use apiserver::datastore::{Key, KeyType}; use apiserver::model; // FIXME Get from configuration in the future @@ -39,7 +40,7 @@ mod error { use snafu::Snafu; use std::process::{Command, Output}; - use apiserver::datastore::serialization; + use apiserver::datastore::{self, serialization}; #[derive(Debug, Snafu)] #[snafu(visibility = "pub(super)")] @@ -74,6 +75,12 @@ mod error { #[snafu(display("Error serializing settings: {} ", source))] SerializeSettings { source: serialization::Error }, + #[snafu(display("Unable to create key '{}': {}", key, source))] + InvalidKey { + key: String, + source: datastore::Error, + }, + #[snafu(display( "Unknown value for '{}': got '{}', expected 'true' or 'false'", setting, @@ -82,7 +89,7 @@ mod error { UnknownSettingState { setting: String, state: String }, #[snafu(display("Setting '{}' does not exist in the data store", setting))] - NonexistentSettting { setting: String }, + NonexistentSetting { setting: String }, #[snafu(display("Failed to execute '{:?}': {}", command, source))] ExecutionFailure { @@ -134,14 +141,15 @@ impl SettingState { // We can then serialize this structure to a map of // dotted.key.setting -> value. Using this map we can get the setting value. // FIXME remove this when we have an API client. -fn query_setting_value(setting: S) -> Result +fn query_setting_value(key_str: S) -> Result where S: AsRef, { - let setting = setting.as_ref(); - debug!("Querying the API for setting: {}", setting); + let key_str = key_str.as_ref(); + let key = Key::new(KeyType::Data, key_str).context(error::InvalidKey { key: key_str })?; + debug!("Querying the API for setting: {}", key_str); - let uri = format!("{}?keys={}", API_SETTINGS_URI, setting); + let uri = format!("{}?keys={}", API_SETTINGS_URI, key_str); let (code, response_body) = apiclient::raw_request(DEFAULT_API_SOCKET, &uri, "GET", None) .context(error::APIRequest { method: "GET", @@ -163,15 +171,15 @@ where // Serialize the Settings struct into key/value pairs. This builds the dotted // string representation of the setting - let setting_keypair = to_pairs_with_prefix("settings".to_string(), &settings) - .context(error::SerializeSettings)?; + let setting_keypair = + to_pairs_with_prefix("settings", &settings).context(error::SerializeSettings)?; debug!("Retrieved setting keypair: {:#?}", &setting_keypair); // (Hopefully) get the value from the map using the dotted string supplied // to the function Ok(setting_keypair - .get(setting) - .context(error::NonexistentSettting { setting: setting })? + .get(&key) + .context(error::NonexistentSetting { setting: key_str })? .to_string()) } diff --git a/workspaces/api/storewolf/src/main.rs b/workspaces/api/storewolf/src/main.rs index 462f4ef9906..2d5b76f8c06 100644 --- a/workspaces/api/storewolf/src/main.rs +++ b/workspaces/api/storewolf/src/main.rs @@ -338,20 +338,17 @@ fn populate_default_datastore>( // before serializing so we have full dotted keys like // "settings.foo.bar" and not just "foo.bar". We use a HashMap // to rebuild the nested structure. - let def_settings = to_pairs_with_prefix("settings".to_string(), &def_settings_table) - .context(error::Serialization { + let def_settings = to_pairs_with_prefix("settings", &def_settings_table).context( + error::Serialization { given: "default settings", - })?; + }, + )?; // For each of the default settings, check if it exists in the // datastore. If not, add it to the map of settings to write let mut settings_to_write = HashMap::new(); for (key, val) in def_settings { - let setting = Key::new(KeyType::Data, &key).context(error::InvalidKey { - key_type: KeyType::Data, - key: key.to_string(), - })?; - if !existing_data.contains(&setting) { + if !existing_data.contains(&key) { settings_to_write.insert(key, val); } } @@ -432,12 +429,8 @@ fn populate_default_datastore>( let mut other_defaults_to_write = HashMap::new(); if !defaults.is_empty() { - for (key, val) in defaults.iter() { - let data_key = Key::new(KeyType::Data, &key).context(error::InvalidKey { - key_type: KeyType::Data, - key, - })?; - if !existing_data.contains(&data_key) { + for (key, val) in defaults { + if !existing_data.contains(&key) { other_defaults_to_write.insert(key, val); } } diff --git a/workspaces/api/sundog/src/main.rs b/workspaces/api/sundog/src/main.rs index b8144fbc7e1..e8c71763ef7 100644 --- a/workspaces/api/sundog/src/main.rs +++ b/workspaces/api/sundog/src/main.rs @@ -21,7 +21,7 @@ use std::process; use std::str::{self, FromStr}; use apiserver::datastore::serialization::to_pairs_with_prefix; -use apiserver::datastore::{self, deserialization}; +use apiserver::datastore::{self, deserialization, Key, KeyType}; use apiserver::model; // FIXME Get from configuration in the future @@ -35,9 +35,7 @@ mod error { use http::StatusCode; use snafu::Snafu; - use apiserver::datastore; - use apiserver::datastore::deserialization; - use apiserver::datastore::serialization; + use apiserver::datastore::{self, deserialization, serialization, KeyType}; /// Potential errors during dynamic settings retrieval #[derive(Debug, Snafu)] @@ -142,6 +140,13 @@ mod error { source: datastore::ScalarError, }, + #[snafu(display("Unable to create {:?} key '{}': {}", key_type, key, source))] + InvalidKey { + key_type: KeyType, + key: String, + source: datastore::Error, + }, + #[snafu(display("Logger setup error: {}", source))] Logger { source: simplelog::TermLogError }, } @@ -180,7 +185,7 @@ where /// Given a list of settings, query the API for any that are currently /// set or are in pending state. -fn get_populated_settings

(socket_path: P, to_query: Vec<&str>) -> Result> +fn get_populated_settings

(socket_path: P, to_query: Vec<&str>) -> Result> where P: AsRef, { @@ -213,8 +218,8 @@ where // Serialize the Settings struct into key/value pairs. This builds the dotted // string representation of the setting - let settings_keypairs = to_pairs_with_prefix("settings".to_string(), &settings) - .context(error::SerializeSettings)?; + let settings_keypairs = + to_pairs_with_prefix("settings", &settings).context(error::SerializeSettings)?; // Put the setting into our hashset of populated keys for (k, _) in settings_keypairs { @@ -243,7 +248,11 @@ where let populated_settings = get_populated_settings(&socket_path, settings_to_query)?; // For each generator, run it and capture the output - for (setting, generator) in generators { + for (setting_str, generator) in generators { + let setting = Key::new(KeyType::Data, &setting_str).context(error::InvalidKey { + key_type: KeyType::Data, + key: &setting_str, + })?; // Don't clobber settings that are already populated if populated_settings.contains(&setting) { debug!("Setting '{}' is already populated, skipping", setting);