Skip to content

Commit

Permalink
chore(bors): merge pull request #710
Browse files Browse the repository at this point in the history
710: feat: capacity limit for volumes r=chriswldenyer a=chriswldenyer

Add mechanism for limiting the total volume size on the system by rejecting gRPC volume creation requests.

Co-authored-by: chriswldenyer <christopher.denyer@mayadata.io>
  • Loading branch information
mayastor-bors and chriswldenyer committed Jan 19, 2024
2 parents 6060b09 + 05e54a2 commit fa61700
Show file tree
Hide file tree
Showing 15 changed files with 152 additions and 8 deletions.
87 changes: 87 additions & 0 deletions control-plane/agents/src/bin/core/tests/volume/capacity_limit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#![cfg(test)]

use deployer_cluster::ClusterBuilder;
use grpc::operations::volume::traits::VolumeOperations;
use std::time::Duration;
use stor_port::{
transport_api::ReplyErrorKind,
types::v0::transport::{CreateVolume, DestroyVolume},
};
use uuid::Uuid;

const SIZE: u64 = 5242880;
const EXCESS: u64 = 1000000;

#[tokio::test]
async fn volume_create_with_capacity_limit() {
let cache_period = Duration::from_millis(250);
let reconcile_period = Duration::from_millis(3000);
let cluster = ClusterBuilder::builder()
.with_rest(false)
.with_io_engines(1)
.with_pool(0, "malloc:///p1?size_mb=300")
.with_cache_period(&humantime::Duration::from(cache_period).to_string())
.with_reconcile_period(reconcile_period, reconcile_period)
.build()
.await
.unwrap();

let volume_client = cluster.grpc_client().volume();

// test with no capacity limit
grpc_create_volume_with_limit(&volume_client, SIZE, None, None).await;

// test exceeding the capacity limit
grpc_create_volume_with_limit(
&volume_client,
SIZE,
Some(SIZE - EXCESS), // capacity smaller than volume
Some(ReplyErrorKind::CapacityLimitExceeded {}),
)
.await;

// test at the capacity limit
grpc_create_volume_with_limit(&volume_client, SIZE, Some(SIZE), None).await;

// test below the capacity limit
grpc_create_volume_with_limit(&volume_client, SIZE, Some(SIZE + EXCESS), None).await;
}

async fn grpc_create_volume_with_limit(
volume_client: &dyn VolumeOperations,
size: u64,
capacity: Option<u64>,
expected_error: Option<ReplyErrorKind>,
) {
let vol_uuid = Uuid::new_v4();

let volume = volume_client
.create(
&CreateVolume {
uuid: vol_uuid.try_into().unwrap(),
size,
replicas: 1,
cluster_capacity_limit: capacity,
..Default::default()
},
None,
)
.await;
match volume {
Ok(_) => {
volume_client
.destroy(
&DestroyVolume {
uuid: vol_uuid.try_into().unwrap(),
},
None,
)
.await
.unwrap();
assert!(expected_error.is_none());
}
Err(e) => {
assert_eq!(expected_error, Some(e.kind)); // wrong error
}
}
}
1 change: 1 addition & 0 deletions control-plane/agents/src/bin/core/tests/volume/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

mod affinity_group;
mod capacity;
mod capacity_limit;
mod garbage_collection;
mod helpers;
mod hotspare;
Expand Down
4 changes: 2 additions & 2 deletions control-plane/agents/src/bin/core/volume/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,7 @@ impl ResourceLifecycleExt<CreateVolume> for OperationGuardArc<VolumeSpec> {
) -> Result<Self::CreateOutput, SvcError> {
let specs = registry.specs();
let mut volume = specs
.get_or_create_volume(&CreateVolumeSource::None(request))
.get_or_create_volume(&CreateVolumeSource::None(request))?
.operation_guard_wait()
.await?;
let volume_clone = volume.start_create(registry, request).await?;
Expand Down Expand Up @@ -815,7 +815,7 @@ impl ResourceLifecycleExt<CreateVolumeSource<'_>> for OperationGuardArc<VolumeSp

let specs = registry.specs();
let mut volume = specs
.get_or_create_volume(request_src)
.get_or_create_volume(request_src)?
.operation_guard_wait()
.await?;
let volume_clone = volume.start_create_update(registry, request).await?;
Expand Down
23 changes: 19 additions & 4 deletions control-plane/agents/src/bin/core/volume/specs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -795,12 +795,27 @@ impl ResourceSpecsLocked {
pub(crate) fn get_or_create_volume(
&self,
request: &CreateVolumeSource,
) -> ResourceMutex<VolumeSpec> {
) -> Result<ResourceMutex<VolumeSpec>, SvcError> {
let mut specs = self.write();
if let Some(volume) = specs.volumes.get(&request.source().uuid) {
volume.clone()
Ok(volume.clone())
} else {
match request {
// if request has a capacity limit, add up the volumes and reject
// if the capacity limit would be exceeded
match request.source().cluster_capacity_limit {
None => {} // no limit, no check needed
Some(limit) => {
let mut total: u64 = specs.volumes.values().map(|v| v.lock().size).sum();
total += request.source().size;
if total > limit {
return Err(SvcError::CapacityLimitExceeded {
cluster_capacity_limit: limit,
excess: total - limit,
});
}
}
}
Ok(match request {
CreateVolumeSource::None(_) => {
specs.volumes.insert(VolumeSpec::from(request.source()))
}
Expand All @@ -809,7 +824,7 @@ impl ResourceSpecsLocked {
spec.set_content_source(Some(create_from_snap.to_snapshot_source()));
specs.volumes.insert(spec)
}
}
})
}
}

Expand Down
15 changes: 15 additions & 0 deletions control-plane/agents/src/common/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,15 @@ pub enum SvcError {
DrainNotAllowedWhenHAisDisabled {},
#[snafu(display("Target switchover is not allowed without HA"))]
SwitchoverNotAllowedWhenHAisDisabled {},
#[snafu(display(
"The volume would exceed the cluster capacity limit of {}B by {}B",
cluster_capacity_limit,
excess
))]
CapacityLimitExceeded {
cluster_capacity_limit: u64,
excess: u64,
},
}

impl SvcError {
Expand Down Expand Up @@ -966,6 +975,12 @@ impl From<SvcError> for ReplyError {
source,
extra,
},
SvcError::CapacityLimitExceeded { .. } => ReplyError {
kind: ReplyErrorKind::CapacityLimitExceeded,
resource: ResourceKind::Volume,
source,
extra,
},
}
}
}
Expand Down
1 change: 1 addition & 0 deletions control-plane/grpc/proto/v1/misc/common.proto
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ enum ReplyErrorKind {
ReplicaCreateNumber = 27;
VolumeNoReplicas = 28;
InUse = 29;
CapacityLimitExceeded = 30;
}

// ResourceKind for the resource which has undergone this error
Expand Down
2 changes: 2 additions & 0 deletions control-plane/grpc/proto/v1/volume/volume.proto
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ message CreateVolumeRequest {
bool thin = 8;
// Affinity Group related information.
optional AffinityGroup affinity_group = 9;
// maximum total volume size
optional uint64 cluster_capacity_limit = 10;
}

// Publish a volume on a node
Expand Down
2 changes: 2 additions & 0 deletions control-plane/grpc/src/misc/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ impl From<ReplyErrorKind> for common::ReplyErrorKind {
ReplyErrorKind::ReplicaCreateNumber => Self::ReplicaCreateNumber,
ReplyErrorKind::VolumeNoReplicas => Self::VolumeNoReplicas,
ReplyErrorKind::InUse => Self::InUse,
ReplyErrorKind::CapacityLimitExceeded => Self::CapacityLimitExceeded,
}
}
}
Expand Down Expand Up @@ -135,6 +136,7 @@ impl From<common::ReplyErrorKind> for ReplyErrorKind {
common::ReplyErrorKind::ReplicaCreateNumber => Self::ReplicaCreateNumber,
common::ReplyErrorKind::VolumeNoReplicas => Self::VolumeNoReplicas,
common::ReplyErrorKind::InUse => Self::InUse,
common::ReplyErrorKind::CapacityLimitExceeded => Self::CapacityLimitExceeded,
}
}
}
Expand Down
12 changes: 12 additions & 0 deletions control-plane/grpc/src/operations/volume/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,8 @@ pub trait CreateVolumeInfo: Send + Sync + std::fmt::Debug {
fn thin(&self) -> bool;
/// Affinity Group related information.
fn affinity_group(&self) -> Option<AffinityGroup>;
/// Capacity Limit.
fn cluster_capacity_limit(&self) -> Option<u64>;
}

impl CreateVolumeInfo for CreateVolume {
Expand Down Expand Up @@ -924,6 +926,10 @@ impl CreateVolumeInfo for CreateVolume {
fn affinity_group(&self) -> Option<AffinityGroup> {
self.affinity_group.clone()
}

fn cluster_capacity_limit(&self) -> Option<u64> {
self.cluster_capacity_limit
}
}

/// Intermediate structure that validates the conversion to CreateVolumeRequest type.
Expand Down Expand Up @@ -972,6 +978,10 @@ impl CreateVolumeInfo for ValidatedCreateVolumeRequest {
fn affinity_group(&self) -> Option<AffinityGroup> {
self.inner.affinity_group.clone().map(|ag| ag.into())
}

fn cluster_capacity_limit(&self) -> Option<u64> {
self.inner.cluster_capacity_limit
}
}

impl ValidateRequestTypes for CreateVolumeRequest {
Expand Down Expand Up @@ -1008,6 +1018,7 @@ impl From<&dyn CreateVolumeInfo> for CreateVolume {
labels: data.labels(),
thin: data.thin(),
affinity_group: data.affinity_group(),
cluster_capacity_limit: data.cluster_capacity_limit(),
}
}
}
Expand All @@ -1025,6 +1036,7 @@ impl From<&dyn CreateVolumeInfo> for CreateVolumeRequest {
.map(|labels| crate::common::StringMapValue { value: labels }),
thin: data.thin(),
affinity_group: data.affinity_group().map(|ag| ag.into()),
cluster_capacity_limit: data.cluster_capacity_limit(),
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion control-plane/rest/openapi-specs/v0_api_spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2911,6 +2911,7 @@ components:
- FailedPersist
- Deleting
- InUse
- CapacityLimitExceeded
required:
- details
- kind
Expand Down Expand Up @@ -3827,4 +3828,4 @@ components:
content:
application/json:
schema:
$ref: '#/components/schemas/RestJsonError'
$ref: '#/components/schemas/RestJsonError'
1 change: 1 addition & 0 deletions control-plane/rest/src/versions/v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ impl CreateVolumeBody {
labels: self.labels.clone(),
thin: self.thin,
affinity_group: self.affinity_group.clone(),
cluster_capacity_limit: None,
}
}
/// Convert into rpc request type.
Expand Down
1 change: 1 addition & 0 deletions control-plane/stor-port/src/transport_api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ pub enum ReplyErrorKind {
ReplicaCreateNumber,
VolumeNoReplicas,
InUse,
CapacityLimitExceeded,
}

impl From<tonic::Code> for ReplyErrorKind {
Expand Down
4 changes: 4 additions & 0 deletions control-plane/stor-port/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ impl From<ReplyError> for RestError<RestJsonError> {
let error = RestJsonError::new(details, message, Kind::FailedPrecondition);
(StatusCode::PRECONDITION_FAILED, error)
}
ReplyErrorKind::CapacityLimitExceeded => {
let error = RestJsonError::new(details, message, Kind::CapacityLimitExceeded);
(StatusCode::INSUFFICIENT_STORAGE, error)
}
};

RestError::new(status, error)
Expand Down
2 changes: 2 additions & 0 deletions control-plane/stor-port/src/types/v0/transport/volume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,8 @@ pub struct CreateVolume {
pub thin: bool,
/// Affinity Group related information.
pub affinity_group: Option<AffinityGroup>,
/// Maximum total system volume size.
pub cluster_capacity_limit: Option<u64>,
}

/// Resize volume request.
Expand Down
2 changes: 1 addition & 1 deletion utils/dependencies

0 comments on commit fa61700

Please sign in to comment.