Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

PVF: add landlock sandboxing #7303

Merged
merged 19 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions node/core/pvf/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,12 @@ sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "
sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }

[target.'cfg(target_os = "linux")'.dependencies]
landlock = "0.2.0"

[dev-dependencies]
assert_matches = "1.4.0"
tempfile = "3.3.0"

[build-dependencies]
substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" }
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

//! Functionality common to both prepare and execute workers.

pub mod security;

use crate::LOG_TARGET;
use cpu_time::ProcessTime;
use futures::never::Never;
Expand Down Expand Up @@ -203,7 +205,7 @@ pub mod thread {
};

/// Contains the outcome of waiting on threads, or `Pending` if none are ready.
#[derive(Clone, Copy)]
#[derive(Debug, Clone, Copy)]
pub enum WaitOutcome {
Finished,
TimedOut,
Expand All @@ -224,8 +226,14 @@ pub mod thread {
Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()))
}

/// Runs a thread, afterwards notifying the threads waiting on the condvar. Catches panics and
/// resumes them after triggering the condvar, so that the waiting thread is notified on panics.
/// Runs a worker thread. Will first enable security features, and afterwards notify the threads waiting on the
/// condvar. Catches panics during execution and resumes the panics after triggering the condvar, so that the
/// waiting thread is notified on panics.
///
/// # Returns
///
/// Returns the thread's join handle. Calling `.join()` on it returns the result of executing
/// `f()`, as well as whether we were able to enable sandboxing.
pub fn spawn_worker_thread<F, R>(
name: &str,
f: F,
Expand Down
188 changes: 188 additions & 0 deletions node/core/pvf/common/src/worker/security.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

//! Functionality for securing workers.
//!
//! This is needed because workers are used to compile and execute untrusted code (PVFs).

/// To what degree landlock is enabled. It's a separate struct from `RulesetStatus` because that is
/// only available on Linux, plus this has a nicer name.
pub enum LandlockStatus {
FullyEnforced,
PartiallyEnforced,
NotEnforced,
/// Thread panicked, we don't know what the status is.
Unavailable,
}

impl LandlockStatus {
#[cfg(target_os = "linux")]
pub fn from_ruleset_status(ruleset_status: ::landlock::RulesetStatus) -> Self {
use ::landlock::RulesetStatus::*;
match ruleset_status {
FullyEnforced => LandlockStatus::FullyEnforced,
PartiallyEnforced => LandlockStatus::PartiallyEnforced,
NotEnforced => LandlockStatus::NotEnforced,
}
}
}

/// The [landlock] docs say it best:
///
/// > "Landlock is a security feature available since Linux 5.13. The goal is to enable to restrict
/// ambient rights (e.g., global filesystem access) for a set of processes by creating safe security
/// sandboxes as new security layers in addition to the existing system-wide access-controls. This
/// kind of sandbox is expected to help mitigate the security impact of bugs, unexpected or
/// malicious behaviors in applications. Landlock empowers any process, including unprivileged ones,
/// to securely restrict themselves."
///
/// [landlock]: https://docs.rs/landlock/latest/landlock/index.html
#[cfg(target_os = "linux")]
pub mod landlock {
use landlock::{Access, AccessFs, Ruleset, RulesetAttr, RulesetError, RulesetStatus, ABI};

/// Landlock ABI version. We use ABI V1 because:
///
/// 1. It is supported by our reference kernel version.
/// 2. Later versions do not (yet) provide additional security.
///
/// # Versions (June 2023)
///
/// - Polkadot reference kernel version: 5.16+
/// - ABI V1: 5.13 - introduces landlock, including full restrictions on file reads
/// - ABI V2: 5.19 - adds ability to configure file renaming (not used by us)
///
/// # Determinism
///
/// You may wonder whether we could always use the latest ABI instead of only the ABI supported
/// by the reference kernel version. It seems plausible, since landlock provides a best-effort
/// approach to enabling sandboxing. For example, if the reference version only supported V1 and
/// we were on V2, then landlock would use V2 if it was supported on the current machine, and
/// just fall back to V1 if not.
///
/// The issue with this is indeterminacy. If half of validators were on V2 and half were on V1,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic doesn't parse well for me, if V1 is less restrictive and makes validators vulnerable then I would guess having just a part of them vulnerable is better than all of them isn't it ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question and I wondered the same. The issue is that it opens up a possibility of different behavior on different validators, and this itself can be exploited to attack consensus. Say that in the future we enable ABI V3 and executing some PVF tries to truncate a file (which will be banned on ABI V3)1 - some validators will error and some won't. If the split in voting among validators is less than 33%, there will be a dispute and all the losing validators will get slashed. If the split is more than 33%, it violates our assumptions about consensus and finality will stall.

So indeed there is a very interesting trade-off between security for the validator and security for the chain, and I think we have to prioritize the latter while providing as much validator security as possible. If a small amount of validators are behind in security and vote wrongly then some slashing is okay, and it can be reverted with governance, but I think we really don't want finality stalls.

Although, I guess it would also be really bad if a bunch of validator keys got stolen and an attacker started impersonating them... And anyway there are other sources of indeterminacy to attack the chain with... Fortunately ABI V1 already fully protects against reading unauthorized data, so in this case it is enough to protect validators' keys and it is still the correct decision. (The only other thing I would want to feel safe is to block all network access. Maybe it's possible to set up a firewall per-thread?)

There are similar considerations that made the seccomp sandboxing harder than anticipated. Maybe @eskimor can double-check my analysis.

Footnotes

  1. Using V3 in this example because V2 doesn't actually provide additional restrictions on top of V1.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the explanation, makes sense now! I guess V1 is the best we can do.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be the default for validators to have these security measures in place, ideally we would have none without them. Anyhow, the risk of disputes should be very low as this is already a second level defense mechanism. I would rather have a dispute than some PVF being able to read the validator's disk. We should make damn sure that there are no legitimate disk accesses of course, but checking that should be independent of PVF or candidate, so also rather easy. At least at the moment, I can't think of a legitimate disk access that would only happen on some PVFs ..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @eskimor. Determinism is still a goal, and given that ABI v2 and v3 don't add to the security I would stick to v1 here.1 I will update the docs as the determinacy is still relevant but not the only factor. And if in the future a version is released with meaningful new features like network blocking (which is an eventual goal of landlock) we can enable it immediately. We should keep in mind that attackers can exploit any difference in operation to attack the chain, but the risk is low and there are other indeterminacy vectors anyway.

Footnotes

  1. v2 adds a new config option which we don't use, and v3 additionally blocks file truncation - which might be annoying, but not really critical to security, and validators should have backups, right?

/// they may have different semantics on some PVFs. So a malicious PVF now has a new attack
/// vector: they can exploit this indeterminism between landlock ABIs!
///
/// On the other hand we do want validators to be as secure as possible and protect their keys
/// from attackers. And, the risk with indeterminacy is low and there are other indeterminacy
/// vectors anyway. So we will only upgrade to a new ABI if either the reference kernel version
/// supports it or if it introduces some new feature that is beneficial to security.
pub const LANDLOCK_ABI: ABI = ABI::V1;

// TODO: <https://github.com/landlock-lsm/rust-landlock/issues/36>
/// Returns to what degree landlock is enabled with the given ABI on the current Linux
/// environment.
pub fn get_status() -> Result<RulesetStatus, Box<dyn std::error::Error>> {
match std::thread::spawn(|| try_restrict_thread()).join() {
Ok(Ok(status)) => Ok(status),
Ok(Err(ruleset_err)) => Err(ruleset_err.into()),
Err(_err) => Err("a panic occurred in try_restrict_thread".into()),
}
}

/// Basaed on the given `status`, returns a single bool indicating whether the given landlock
/// ABI is fully enabled on the current Linux environment.
pub fn status_is_fully_enabled(
status: &Result<RulesetStatus, Box<dyn std::error::Error>>,
) -> bool {
matches!(status, Ok(RulesetStatus::FullyEnforced))
}

/// Runs a check for landlock and returns a single bool indicating whether the given landlock
/// ABI is fully enabled on the current Linux environment.
pub fn check_is_fully_enabled() -> bool {
mrcnski marked this conversation as resolved.
Show resolved Hide resolved
status_is_fully_enabled(&get_status())
}

/// Tries to restrict the current thread with the following landlock access controls:
///
/// 1. all global filesystem access
/// 2. ... more may be supported in the future.
///
/// If landlock is not supported in the current environment this is simply a noop.
///
/// # Returns
///
/// The status of the restriction (whether it was fully, partially, or not-at-all enforced).
pub fn try_restrict_thread() -> Result<RulesetStatus, RulesetError> {
let status = Ruleset::new()
.handle_access(AccessFs::from_all(LANDLOCK_ABI))?
.create()?
.restrict_self()?;
Ok(status.ruleset)
}

#[cfg(test)]
mod tests {
use super::*;
use std::{fs, io::ErrorKind, thread};

#[test]
fn restricted_thread_cannot_access_fs() {
// TODO: This would be nice: <https://github.com/rust-lang/rust/issues/68007>.
if !check_is_fully_enabled() {
return
}

// Restricted thread cannot read from FS.
let handle = thread::spawn(|| {
// Write to a tmp file, this should succeed before landlock is applied.
let text = "foo";
let tmpfile = tempfile::NamedTempFile::new().unwrap();
let path = tmpfile.path();
fs::write(path, text).unwrap();
let s = fs::read_to_string(path).unwrap();
assert_eq!(s, text);

let status = try_restrict_thread().unwrap();
if !matches!(status, RulesetStatus::FullyEnforced) {
panic!("Ruleset should be enforced since we checked if landlock is enabled");
}

// Try to read from the tmp file after landlock.
let result = fs::read_to_string(path);
assert!(matches!(
result,
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
));
});

assert!(handle.join().is_ok());

// Restricted thread cannot write to FS.
let handle = thread::spawn(|| {
let text = "foo";
let tmpfile = tempfile::NamedTempFile::new().unwrap();
let path = tmpfile.path();

let status = try_restrict_thread().unwrap();
if !matches!(status, RulesetStatus::FullyEnforced) {
panic!("Ruleset should be enforced since we checked if landlock is enabled");
}

// Try to write to the tmp file after landlock.
let result = fs::write(path, text);
assert!(matches!(
result,
Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied)
));
});

assert!(handle.join().is_ok());
}
}
}
46 changes: 37 additions & 9 deletions node/core/pvf/execute-worker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ use polkadot_node_core_pvf_common::{
executor_intf::NATIVE_STACK_MAX,
framed_recv, framed_send,
worker::{
bytes_to_path, cpu_time_monitor_loop, stringify_panic_payload,
bytes_to_path, cpu_time_monitor_loop,
security::LandlockStatus,
stringify_panic_payload,
thread::{self, WaitOutcome},
worker_event_loop,
},
Expand Down Expand Up @@ -170,11 +172,22 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
let execute_thread = thread::spawn_worker_thread_with_stack_size(
"execute thread",
move || {
validate_using_artifact(
&compiled_artifact_blob,
&params,
executor_2,
cpu_time_start,
// Try to enable landlock.
#[cfg(target_os = "linux")]
let landlock_status = polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread()
alexggh marked this conversation as resolved.
Show resolved Hide resolved
.map(LandlockStatus::from_ruleset_status)
.map_err(|e| e.to_string());
#[cfg(not(target_os = "linux"))]
let landlock_status: Result<LandlockStatus, String> = Ok(LandlockStatus::NotEnforced);

(
validate_using_artifact(
&compiled_artifact_blob,
&params,
executor_2,
cpu_time_start,
),
landlock_status,
)
},
Arc::clone(&condvar),
Expand All @@ -187,9 +200,24 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
let response = match outcome {
WaitOutcome::Finished => {
let _ = cpu_time_monitor_tx.send(());
execute_thread
.join()
.unwrap_or_else(|e| Response::Panic(stringify_panic_payload(e)))
let (result, landlock_status) = execute_thread.join().unwrap_or_else(|e| {
(
Response::Panic(stringify_panic_payload(e)),
Ok(LandlockStatus::Unavailable),
)
});

// Log if landlock threw an error.
if let Err(err) = landlock_status {
gum::warn!(
target: LOG_TARGET,
%worker_pid,
"error enabling landlock: {}",
err
);
}

result
},
// If the CPU thread is not selected, we signal it to end, the join handle is
// dropped and the thread will finish in the background.
Expand Down
Loading