diff --git a/proxy/src/console/messages.rs b/proxy/src/console/messages.rs index 3b7d681a41d1..d28d13ba692b 100644 --- a/proxy/src/console/messages.rs +++ b/proxy/src/console/messages.rs @@ -5,7 +5,7 @@ use std::fmt::{self, Display}; use crate::auth::IpPattern; use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt}; -use crate::proxy::retry::ShouldRetry; +use crate::proxy::retry::CouldRetry; /// Generic error response with human-readable description. /// Note that we can't always present it to user as is. @@ -64,45 +64,47 @@ impl Display for ConsoleError { } } -impl ShouldRetry for ConsoleError { +impl CouldRetry for ConsoleError { fn could_retry(&self) -> bool { - if self.status.is_none() || self.status.as_ref().unwrap().details.retry_info.is_none() { - // retry some temporary failures because the compute was in a bad state - // (bad request can be returned when the endpoint was in transition) - return match &self { - ConsoleError { - http_status_code: http::StatusCode::BAD_REQUEST, - .. - } => true, - // don't retry when quotas are exceeded - ConsoleError { - http_status_code: http::StatusCode::UNPROCESSABLE_ENTITY, - ref error, - .. - } => !error.contains("compute time quota of non-primary branches is exceeded"), - // locked can be returned when the endpoint was in transition - // or when quotas are exceeded. don't retry when quotas are exceeded - ConsoleError { - http_status_code: http::StatusCode::LOCKED, - ref error, - .. - } => { - !error.contains("quota exceeded") - && !error.contains("the limit for current plan reached") - } - _ => false, - }; + // If the error message does not have a status, + // the error is unknown and probably should not retry automatically + let Some(status) = &self.status else { + return false; + }; + + // retry if the retry info is set. + if status.details.retry_info.is_some() { + return true; } - // retry if the response has a retry delay - if let Some(retry_info) = self - .status - .as_ref() - .and_then(|s| s.details.retry_info.as_ref()) - { - retry_info.retry_delay_ms > 0 - } else { - false + // if no retry info set, attempt to use the error code to guess the retry state. + let reason = status + .details + .error_info + .map_or(Reason::Unknown, |e| e.reason); + match reason { + // not a transitive error + Reason::RoleProtected => false, + // on retry, it will still not be found + Reason::ResourceNotFound + | Reason::ProjectNotFound + | Reason::EndpointNotFound + | Reason::BranchNotFound => false, + // we were asked to go away + Reason::RateLimitExceeded + | Reason::NonDefaultBranchComputeTimeExceeded + | Reason::ActiveTimeQuotaExceeded + | Reason::ComputeTimeQuotaExceeded + | Reason::WrittenDataQuotaExceeded + | Reason::DataTransferQuotaExceeded + | Reason::LogicalSizeQuotaExceeded => false, + // transitive error. control plane is currently busy + // but might be ready soon + Reason::RunningOperations => true, + Reason::ConcurrencyLimitReached => true, + Reason::LockAlreadyTaken => true, + // unknown error. better not retry it. + Reason::Unknown => false, } } } @@ -121,7 +123,7 @@ pub struct Details { pub user_facing_message: Option, } -#[derive(Debug, Deserialize)] +#[derive(Copy, Clone, Debug, Deserialize)] pub struct ErrorInfo { pub reason: Reason, // Schema could also have `metadata` field, but it's not structured. Skip it for now. @@ -129,30 +131,59 @@ pub struct ErrorInfo { #[derive(Clone, Copy, Debug, Deserialize, Default)] pub enum Reason { + /// RoleProtected indicates that the role is protected and the attempted operation is not permitted on protected roles. #[serde(rename = "ROLE_PROTECTED")] RoleProtected, + /// ResourceNotFound indicates that a resource (project, endpoint, branch, etc.) wasn't found, + /// usually due to the provided ID not being correct or because the subject doesn't have enough permissions to + /// access the requested resource. + /// Prefer a more specific reason if possible, e.g., ProjectNotFound, EndpointNotFound, etc. #[serde(rename = "RESOURCE_NOT_FOUND")] ResourceNotFound, + /// ProjectNotFound indicates that the project wasn't found, usually due to the provided ID not being correct, + /// or that the subject doesn't have enough permissions to access the requested project. #[serde(rename = "PROJECT_NOT_FOUND")] ProjectNotFound, + /// EndpointNotFound indicates that the endpoint wasn't found, usually due to the provided ID not being correct, + /// or that the subject doesn't have enough permissions to access the requested endpoint. #[serde(rename = "ENDPOINT_NOT_FOUND")] EndpointNotFound, + /// BranchNotFound indicates that the branch wasn't found, usually due to the provided ID not being correct, + /// or that the subject doesn't have enough permissions to access the requested branch. #[serde(rename = "BRANCH_NOT_FOUND")] BranchNotFound, + /// RateLimitExceeded indicates that the rate limit for the operation has been exceeded. #[serde(rename = "RATE_LIMIT_EXCEEDED")] RateLimitExceeded, + /// NonDefaultBranchComputeTimeExceeded indicates that the compute time quota of non-default branches has been + /// exceeded. #[serde(rename = "NON_PRIMARY_BRANCH_COMPUTE_TIME_EXCEEDED")] - NonPrimaryBranchComputeTimeExceeded, + NonDefaultBranchComputeTimeExceeded, + /// ActiveTimeQuotaExceeded indicates that the active time quota was exceeded. #[serde(rename = "ACTIVE_TIME_QUOTA_EXCEEDED")] ActiveTimeQuotaExceeded, + /// ComputeTimeQuotaExceeded indicates that the compute time quota was exceeded. #[serde(rename = "COMPUTE_TIME_QUOTA_EXCEEDED")] ComputeTimeQuotaExceeded, + /// WrittenDataQuotaExceeded indicates that the written data quota was exceeded. #[serde(rename = "WRITTEN_DATA_QUOTA_EXCEEDED")] WrittenDataQuotaExceeded, + /// DataTransferQuotaExceeded indicates that the data transfer quota was exceeded. #[serde(rename = "DATA_TRANSFER_QUOTA_EXCEEDED")] DataTransferQuotaExceeded, + /// LogicalSizeQuotaExceeded indicates that the logical size quota was exceeded. #[serde(rename = "LOGICAL_SIZE_QUOTA_EXCEEDED")] LogicalSizeQuotaExceeded, + /// RunningOperations indicates that the project already has some running operations + /// and scheduling of new ones is prohibited. + #[serde(rename = "RUNNING_OPERATIONS")] + RunningOperations, + /// ConcurrencyLimitReached indicates that the concurrency limit for an action was reached. + #[serde(rename = "CONCURRENCY_LIMIT_REACHED")] + ConcurrencyLimitReached, + /// LockAlreadyTaken indicates that the we attempted to take a lock that was already taken. + #[serde(rename = "LOCK_ALREADY_TAKEN")] + LockAlreadyTaken, #[default] #[serde(other)] Unknown, @@ -170,7 +201,7 @@ impl Reason { } } -#[derive(Debug, Deserialize)] +#[derive(Copy, Clone, Debug, Deserialize)] pub struct RetryInfo { pub retry_delay_ms: u64, } diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs index 915c2ee7a64e..bec55a83435f 100644 --- a/proxy/src/console/provider.rs +++ b/proxy/src/console/provider.rs @@ -25,9 +25,9 @@ use tracing::info; pub mod errors { use crate::{ - console::messages::{self, ConsoleError}, + console::messages::{self, ConsoleError, Reason}, error::{io_error, ReportableError, UserFacingError}, - proxy::retry::ShouldRetry, + proxy::retry::CouldRetry, }; use thiserror::Error; @@ -76,21 +76,22 @@ pub mod errors { ApiError::Console(e) => { use crate::error::ErrorKind::*; match e.get_reason() { - crate::console::messages::Reason::RoleProtected => User, - crate::console::messages::Reason::ResourceNotFound => User, - crate::console::messages::Reason::ProjectNotFound => User, - crate::console::messages::Reason::EndpointNotFound => User, - crate::console::messages::Reason::BranchNotFound => User, - crate::console::messages::Reason::RateLimitExceeded => ServiceRateLimit, - crate::console::messages::Reason::NonPrimaryBranchComputeTimeExceeded => { - User - } - crate::console::messages::Reason::ActiveTimeQuotaExceeded => User, - crate::console::messages::Reason::ComputeTimeQuotaExceeded => User, - crate::console::messages::Reason::WrittenDataQuotaExceeded => User, - crate::console::messages::Reason::DataTransferQuotaExceeded => User, - crate::console::messages::Reason::LogicalSizeQuotaExceeded => User, - crate::console::messages::Reason::Unknown => match &e { + Reason::RoleProtected => User, + Reason::ResourceNotFound => User, + Reason::ProjectNotFound => User, + Reason::EndpointNotFound => User, + Reason::BranchNotFound => User, + Reason::RateLimitExceeded => ServiceRateLimit, + Reason::NonDefaultBranchComputeTimeExceeded => User, + Reason::ActiveTimeQuotaExceeded => User, + Reason::ComputeTimeQuotaExceeded => User, + Reason::WrittenDataQuotaExceeded => User, + Reason::DataTransferQuotaExceeded => User, + Reason::LogicalSizeQuotaExceeded => User, + Reason::ConcurrencyLimitReached => ControlPlane, + Reason::LockAlreadyTaken => ControlPlane, + Reason::RunningOperations => ControlPlane, + Reason::Unknown => match &e { ConsoleError { http_status_code: http::StatusCode::NOT_FOUND | http::StatusCode::NOT_ACCEPTABLE, @@ -128,7 +129,7 @@ pub mod errors { } } - impl ShouldRetry for ApiError { + impl CouldRetry for ApiError { fn could_retry(&self) -> bool { match self { // retry some transport errors @@ -239,6 +240,17 @@ pub mod errors { } } } + + impl CouldRetry for WakeComputeError { + fn could_retry(&self) -> bool { + match self { + WakeComputeError::BadComputeAddress(_) => false, + WakeComputeError::ApiError(e) => e.could_retry(), + WakeComputeError::TooManyConnections => false, + WakeComputeError::TooManyConnectionAttempts(_) => false, + } + } + } } /// Auth secret which is managed by the cloud. diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index 409d45b39a34..82180aaee3cf 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -7,7 +7,7 @@ use crate::{ error::ReportableError, metrics::{ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType}, proxy::{ - retry::{retry_after, ShouldRetry}, + retry::{retry_after, should_retry, CouldRetry}, wake_compute::wake_compute, }, Host, @@ -17,6 +17,8 @@ use pq_proto::StartupMessageParams; use tokio::time; use tracing::{error, info, warn}; +use super::retry::ShouldRetryWakeCompute; + const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2); /// If we couldn't connect, a cached connection info might be to blame @@ -104,7 +106,7 @@ pub async fn connect_to_compute( connect_to_compute_retry_config: RetryConfig, ) -> Result where - M::ConnectError: ShouldRetry + std::fmt::Debug, + M::ConnectError: CouldRetry + ShouldRetryWakeCompute + std::fmt::Debug, M::Error: From, { let mut num_retries = 0; @@ -139,10 +141,10 @@ where error!(error = ?err, "could not connect to compute node"); - let node_info = if !node_info.cached() || !err.should_retry_database_address() { + let node_info = if !node_info.cached() || !err.should_retry_wake_compute() { // If we just recieved this from cplane and dodn't get it from cache, we shouldn't retry. // Do not need to retrieve a new node_info, just return the old one. - if !err.should_retry(num_retries, connect_to_compute_retry_config) { + if should_retry(&err, num_retries, connect_to_compute_retry_config) { Metrics::get().proxy.retries_metric.observe( RetriesMetricGroup { outcome: ConnectOutcome::Failed, @@ -188,9 +190,8 @@ where return Ok(res); } Err(e) => { - let retriable = e.should_retry(num_retries, connect_to_compute_retry_config); - if !retriable { - error!(error = ?e, num_retries, retriable, "couldn't connect to compute node"); + if !should_retry(&e, num_retries, connect_to_compute_retry_config) { + error!(error = ?e, num_retries, retriable = false, "couldn't connect to compute node"); Metrics::get().proxy.retries_metric.observe( RetriesMetricGroup { outcome: ConnectOutcome::Failed, @@ -200,9 +201,10 @@ where ); return Err(e.into()); } - warn!(error = ?e, num_retries, retriable, "couldn't connect to compute node"); + + warn!(error = ?e, num_retries, retriable = true, "couldn't connect to compute node"); } - } + }; let wait_duration = retry_after(num_retries, connect_to_compute_retry_config); num_retries += 1; diff --git a/proxy/src/proxy/retry.rs b/proxy/src/proxy/retry.rs index 8dec1f1137a2..644b183a9183 100644 --- a/proxy/src/proxy/retry.rs +++ b/proxy/src/proxy/retry.rs @@ -2,20 +2,22 @@ use crate::{compute, config::RetryConfig}; use std::{error::Error, io}; use tokio::time; -pub trait ShouldRetry { +pub trait CouldRetry { + /// Returns true if the error could be retried fn could_retry(&self) -> bool; - fn should_retry(&self, num_retries: u32, config: RetryConfig) -> bool { - match self { - _ if num_retries >= config.max_retries => false, - err => err.could_retry(), - } - } - fn should_retry_database_address(&self) -> bool { - true - } } -impl ShouldRetry for io::Error { +pub trait ShouldRetryWakeCompute { + /// Returns true if we need to invalidate the cache for this node. + /// If false, we can continue retrying with the current node cache. + fn should_retry_wake_compute(&self) -> bool; +} + +pub fn should_retry(err: &impl CouldRetry, num_retries: u32, config: RetryConfig) -> bool { + num_retries < config.max_retries && err.could_retry() +} + +impl CouldRetry for io::Error { fn could_retry(&self) -> bool { use std::io::ErrorKind; matches!( @@ -25,7 +27,7 @@ impl ShouldRetry for io::Error { } } -impl ShouldRetry for tokio_postgres::error::DbError { +impl CouldRetry for tokio_postgres::error::DbError { fn could_retry(&self) -> bool { use tokio_postgres::error::SqlState; matches!( @@ -36,7 +38,9 @@ impl ShouldRetry for tokio_postgres::error::DbError { | &SqlState::SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION, ) } - fn should_retry_database_address(&self) -> bool { +} +impl ShouldRetryWakeCompute for tokio_postgres::error::DbError { + fn should_retry_wake_compute(&self) -> bool { use tokio_postgres::error::SqlState; // Here are errors that happens after the user successfully authenticated to the database. // TODO: there are pgbouncer errors that should be retried, but they are not listed here. @@ -53,7 +57,7 @@ impl ShouldRetry for tokio_postgres::error::DbError { } } -impl ShouldRetry for tokio_postgres::Error { +impl CouldRetry for tokio_postgres::Error { fn could_retry(&self) -> bool { if let Some(io_err) = self.source().and_then(|x| x.downcast_ref()) { io::Error::could_retry(io_err) @@ -63,29 +67,33 @@ impl ShouldRetry for tokio_postgres::Error { false } } - fn should_retry_database_address(&self) -> bool { - if let Some(io_err) = self.source().and_then(|x| x.downcast_ref()) { - io::Error::should_retry_database_address(io_err) - } else if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) { - tokio_postgres::error::DbError::should_retry_database_address(db_err) +} +impl ShouldRetryWakeCompute for tokio_postgres::Error { + fn should_retry_wake_compute(&self) -> bool { + if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) { + tokio_postgres::error::DbError::should_retry_wake_compute(db_err) } else { + // likely an IO error. Possible the compute has shutdown and the + // cache is stale. true } } } -impl ShouldRetry for compute::ConnectionError { +impl CouldRetry for compute::ConnectionError { fn could_retry(&self) -> bool { match self { compute::ConnectionError::Postgres(err) => err.could_retry(), compute::ConnectionError::CouldNotConnect(err) => err.could_retry(), + compute::ConnectionError::WakeComputeError(err) => err.could_retry(), _ => false, } } - fn should_retry_database_address(&self) -> bool { +} +impl ShouldRetryWakeCompute for compute::ConnectionError { + fn should_retry_wake_compute(&self) -> bool { match self { - compute::ConnectionError::Postgres(err) => err.should_retry_database_address(), - compute::ConnectionError::CouldNotConnect(err) => err.should_retry_database_address(), + compute::ConnectionError::Postgres(err) => err.should_retry_wake_compute(), // the cache entry was not checked for validity compute::ConnectionError::TooManyConnectionAttempts(_) => false, _ => true, diff --git a/proxy/src/proxy/tests.rs b/proxy/src/proxy/tests.rs index 96683511fec5..8119f39fae6b 100644 --- a/proxy/src/proxy/tests.rs +++ b/proxy/src/proxy/tests.rs @@ -5,21 +5,21 @@ mod mitm; use std::time::Duration; use super::connect_compute::ConnectMechanism; -use super::retry::ShouldRetry; +use super::retry::CouldRetry; use super::*; use crate::auth::backend::{ ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, MaybeOwned, TestBackend, }; use crate::config::{CertResolver, RetryConfig}; use crate::console::caches::NodeInfoCache; -use crate::console::messages::{ConsoleError, MetricsAuxInfo}; +use crate::console::messages::{ConsoleError, Details, MetricsAuxInfo, Status}; use crate::console::provider::{CachedAllowedIps, CachedRoleSecret, ConsoleBackend}; use crate::console::{self, CachedNodeInfo, NodeInfo}; use crate::error::ErrorKind; -use crate::proxy::retry::retry_after; use crate::{http, sasl, scram, BranchId, EndpointId, ProjectId}; use anyhow::{bail, Context}; use async_trait::async_trait; +use retry::{retry_after, ShouldRetryWakeCompute}; use rstest::rstest; use rustls::pki_types; use tokio_postgres::config::SslMode; @@ -438,11 +438,16 @@ impl std::fmt::Display for TestConnectError { impl std::error::Error for TestConnectError {} -impl ShouldRetry for TestConnectError { +impl CouldRetry for TestConnectError { fn could_retry(&self) -> bool { self.retryable } } +impl ShouldRetryWakeCompute for TestConnectError { + fn should_retry_wake_compute(&self) -> bool { + true + } +} #[async_trait] impl ConnectMechanism for TestConnectMechanism { @@ -485,7 +490,7 @@ impl TestBackend for TestConnectMechanism { ConnectAction::Wake => Ok(helper_create_cached_node_info(self.cache)), ConnectAction::WakeFail => { let err = console::errors::ApiError::Console(ConsoleError { - http_status_code: http::StatusCode::FORBIDDEN, + http_status_code: http::StatusCode::BAD_REQUEST, error: "TEST".into(), status: None, }); @@ -496,7 +501,15 @@ impl TestBackend for TestConnectMechanism { let err = console::errors::ApiError::Console(ConsoleError { http_status_code: http::StatusCode::BAD_REQUEST, error: "TEST".into(), - status: None, + status: Some(Status { + code: "error".into(), + message: "error".into(), + details: Details { + error_info: None, + retry_info: Some(console::messages::RetryInfo { retry_delay_ms: 1 }), + user_facing_message: None, + }, + }), }); assert!(err.could_retry()); Err(console::errors::WakeComputeError::ApiError(err)) diff --git a/proxy/src/proxy/wake_compute.rs b/proxy/src/proxy/wake_compute.rs index c166cf4389dc..fef349aac04e 100644 --- a/proxy/src/proxy/wake_compute.rs +++ b/proxy/src/proxy/wake_compute.rs @@ -1,18 +1,16 @@ use crate::config::RetryConfig; -use crate::console::messages::ConsoleError; +use crate::console::messages::{ConsoleError, Reason}; use crate::console::{errors::WakeComputeError, provider::CachedNodeInfo}; use crate::context::RequestMonitoring; use crate::metrics::{ ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType, WakeupFailureKind, }; -use crate::proxy::retry::retry_after; +use crate::proxy::retry::{retry_after, should_retry}; use hyper1::StatusCode; -use std::ops::ControlFlow; use tracing::{error, info, warn}; use super::connect_compute::ComputeConnectBackend; -use super::retry::ShouldRetry; pub async fn wake_compute( num_retries: &mut u32, @@ -22,9 +20,8 @@ pub async fn wake_compute( ) -> Result { let retry_type = RetryType::WakeCompute; loop { - let wake_res = api.wake_compute(ctx).await; - match handle_try_wake(wake_res, *num_retries, config) { - Err(e) => { + match api.wake_compute(ctx).await { + Err(e) if !should_retry(&e, *num_retries, config) => { error!(error = ?e, num_retries, retriable = false, "couldn't wake compute node"); report_error(&e, false); Metrics::get().proxy.retries_metric.observe( @@ -36,11 +33,11 @@ pub async fn wake_compute( ); return Err(e); } - Ok(ControlFlow::Continue(e)) => { + Err(e) => { warn!(error = ?e, num_retries, retriable = true, "couldn't wake compute node"); report_error(&e, true); } - Ok(ControlFlow::Break(n)) => { + Ok(n) => { Metrics::get().proxy.retries_metric.observe( RetriesMetricGroup { outcome: ConnectOutcome::Success, @@ -63,70 +60,28 @@ pub async fn wake_compute( } } -/// Attempts to wake up the compute node. -/// * Returns Ok(Continue(e)) if there was an error waking but retries are acceptable -/// * Returns Ok(Break(node)) if the wakeup succeeded -/// * Returns Err(e) if there was an error -pub fn handle_try_wake( - result: Result, - num_retries: u32, - config: RetryConfig, -) -> Result, WakeComputeError> { - match result { - Err(err) => match &err { - WakeComputeError::ApiError(api) if api.should_retry(num_retries, config) => { - Ok(ControlFlow::Continue(err)) - } - _ => Err(err), - }, - // Ready to try again. - Ok(new) => Ok(ControlFlow::Break(new)), - } -} - fn report_error(e: &WakeComputeError, retry: bool) { use crate::console::errors::ApiError; let kind = match e { WakeComputeError::BadComputeAddress(_) => WakeupFailureKind::BadComputeAddress, WakeComputeError::ApiError(ApiError::Transport(_)) => WakeupFailureKind::ApiTransportError, WakeComputeError::ApiError(ApiError::Console(e)) => match e.get_reason() { - crate::console::messages::Reason::RoleProtected => { - WakeupFailureKind::ApiConsoleBadRequest - } - crate::console::messages::Reason::ResourceNotFound => { - WakeupFailureKind::ApiConsoleBadRequest - } - crate::console::messages::Reason::ProjectNotFound => { - WakeupFailureKind::ApiConsoleBadRequest - } - crate::console::messages::Reason::EndpointNotFound => { - WakeupFailureKind::ApiConsoleBadRequest - } - crate::console::messages::Reason::BranchNotFound => { - WakeupFailureKind::ApiConsoleBadRequest - } - crate::console::messages::Reason::RateLimitExceeded => { - WakeupFailureKind::ApiConsoleLocked - } - crate::console::messages::Reason::NonPrimaryBranchComputeTimeExceeded => { - WakeupFailureKind::QuotaExceeded - } - crate::console::messages::Reason::ActiveTimeQuotaExceeded => { - WakeupFailureKind::QuotaExceeded - } - crate::console::messages::Reason::ComputeTimeQuotaExceeded => { - WakeupFailureKind::QuotaExceeded - } - crate::console::messages::Reason::WrittenDataQuotaExceeded => { - WakeupFailureKind::QuotaExceeded - } - crate::console::messages::Reason::DataTransferQuotaExceeded => { - WakeupFailureKind::QuotaExceeded - } - crate::console::messages::Reason::LogicalSizeQuotaExceeded => { - WakeupFailureKind::QuotaExceeded - } - crate::console::messages::Reason::Unknown => match e { + Reason::RoleProtected => WakeupFailureKind::ApiConsoleBadRequest, + Reason::ResourceNotFound => WakeupFailureKind::ApiConsoleBadRequest, + Reason::ProjectNotFound => WakeupFailureKind::ApiConsoleBadRequest, + Reason::EndpointNotFound => WakeupFailureKind::ApiConsoleBadRequest, + Reason::BranchNotFound => WakeupFailureKind::ApiConsoleBadRequest, + Reason::RateLimitExceeded => WakeupFailureKind::ApiConsoleLocked, + Reason::NonDefaultBranchComputeTimeExceeded => WakeupFailureKind::QuotaExceeded, + Reason::ActiveTimeQuotaExceeded => WakeupFailureKind::QuotaExceeded, + Reason::ComputeTimeQuotaExceeded => WakeupFailureKind::QuotaExceeded, + Reason::WrittenDataQuotaExceeded => WakeupFailureKind::QuotaExceeded, + Reason::DataTransferQuotaExceeded => WakeupFailureKind::QuotaExceeded, + Reason::LogicalSizeQuotaExceeded => WakeupFailureKind::QuotaExceeded, + Reason::ConcurrencyLimitReached => WakeupFailureKind::ApiConsoleLocked, + Reason::LockAlreadyTaken => WakeupFailureKind::ApiConsoleLocked, + Reason::RunningOperations => WakeupFailureKind::ApiConsoleLocked, + Reason::Unknown => match e { ConsoleError { http_status_code: StatusCode::LOCKED, ref error, diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 05d60612385c..6c34d48338b3 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -16,7 +16,10 @@ use crate::{ context::RequestMonitoring, error::{ErrorKind, ReportableError, UserFacingError}, intern::EndpointIdInt, - proxy::{connect_compute::ConnectMechanism, retry::ShouldRetry}, + proxy::{ + connect_compute::ConnectMechanism, + retry::{CouldRetry, ShouldRetryWakeCompute}, + }, rate_limiter::EndpointRateLimiter, Host, }; @@ -179,7 +182,7 @@ impl UserFacingError for HttpConnError { } } -impl ShouldRetry for HttpConnError { +impl CouldRetry for HttpConnError { fn could_retry(&self) -> bool { match self { HttpConnError::ConnectionError(e) => e.could_retry(), @@ -190,9 +193,11 @@ impl ShouldRetry for HttpConnError { HttpConnError::TooManyConnectionAttempts(_) => false, } } - fn should_retry_database_address(&self) -> bool { +} +impl ShouldRetryWakeCompute for HttpConnError { + fn should_retry_wake_compute(&self) -> bool { match self { - HttpConnError::ConnectionError(e) => e.should_retry_database_address(), + HttpConnError::ConnectionError(e) => e.should_retry_wake_compute(), // we never checked cache validity HttpConnError::TooManyConnectionAttempts(_) => false, _ => true,