diff --git a/Dockerfile b/Dockerfile index ae32a72cf..a4b37b440 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,8 @@ COPY src src/ RUN cargo build --release FROM registry.fedoraproject.org/fedora:32 -RUN dnf install -y /usr/bin/gpg /usr/bin/lsblk /usr/sbin/udevadm && \ +RUN dnf install -y /usr/bin/gpg /usr/sbin/kpartx /usr/bin/lsblk \ + /usr/sbin/udevadm && \ dnf clean all COPY --from=builder /build/target/release/coreos-installer /usr/sbin ENTRYPOINT ["/usr/sbin/coreos-installer"] diff --git a/src/blockdev.rs b/src/blockdev.rs index 646d9d34f..a7314aba3 100644 --- a/src/blockdev.rs +++ b/src/blockdev.rs @@ -18,7 +18,7 @@ use nix::{errno::Errno, mount}; use regex::Regex; use std::collections::HashMap; use std::convert::TryInto; -use std::fs::{metadata, read_dir, read_to_string, remove_dir, File, OpenOptions}; +use std::fs::{metadata, read_dir, read_link, read_to_string, remove_dir, File, OpenOptions}; use std::num::{NonZeroU32, NonZeroU64}; use std::os::linux::fs::MetadataExt; use std::os::raw::c_int; @@ -76,16 +76,6 @@ impl Disk { } } - pub fn get_busy_partitions(&self) -> Result> { - let mut ret: Vec = Vec::new(); - for d in self.get_partitions()? { - if d.mountpoint.is_some() || d.swap || !d.get_holders()?.is_empty() { - ret.push(d) - } - } - Ok(ret) - } - fn get_partitions(&self) -> Result> { // have lsblk enumerate partitions on the device // Older lsblk, e.g. in CentOS 7.6, doesn't support PATH, but -p option @@ -132,6 +122,184 @@ impl Disk { } Ok(result) } + + /// Return an empty list if we have exclusive access to the device, or + /// a list of partitions preventing us from gaining exclusive access. + pub fn get_busy_partitions(self) -> Result> { + // Try rereading the partition table. This is the most complete + // check, but it only works on partitionable devices. + let rereadpt_result = { + let mut f = OpenOptions::new() + .write(true) + .open(&self.path) + .chain_err(|| format!("opening {}", &self.path))?; + reread_partition_table(&mut f).map(|_| Vec::new()) + }; + if rereadpt_result.is_ok() { + return rereadpt_result; + } + + // Walk partitions, record the ones that are reported in use, + // and return the list if any + let mut busy: Vec = Vec::new(); + for d in self.get_partitions()? { + if d.mountpoint.is_some() || d.swap || !d.get_holders()?.is_empty() { + busy.push(d) + } + } + if !busy.is_empty() { + return Ok(busy); + } + + // Our investigation found nothing. If the device is expected to be + // partitionable but reread failed, we evidently missed something, + // so error out for safety + if !self.is_dm_device() { + return rereadpt_result; + } + + Ok(Vec::new()) + } + + /// Get a handle to the set of device nodes for individual partitions + /// of the device. + pub fn get_partition_table(&self) -> Result> { + if self.is_dm_device() { + Ok(Box::new(PartTableKpartx::new(&self.path)?)) + } else { + Ok(Box::new(PartTableKernel::new(&self.path)?)) + } + } + + fn is_dm_device(&self) -> bool { + self.path.starts_with("/dev/mapper/") || self.path.starts_with("/dev/dm-") + } +} + +/// A handle to the set of device nodes for individual partitions of a +/// device. Must be held as long as the device nodes are needed; they might +/// be removed upon drop. +pub trait PartTable { + /// Update device nodes for the current state of the partition table + fn reread(&mut self) -> Result<()>; +} + +/// Device nodes for partitionable kernel devices, managed by the kernel. +#[derive(Debug)] +pub struct PartTableKernel { + path: String, + file: File, +} + +impl PartTableKernel { + fn new(path: &str) -> Result { + let file = OpenOptions::new() + .write(true) + .open(path) + .chain_err(|| format!("opening {}", path))?; + Ok(Self { + path: path.to_string(), + file, + }) + } +} + +impl PartTable for PartTableKernel { + fn reread(&mut self) -> Result<()> { + reread_partition_table(&mut self.file)?; + udev_settle() + } +} + +/// Device nodes for non-partitionable kernel devices, managed by running +/// kpartx to parse the partition table and create device-mapper devices for +/// each partition. +#[derive(Debug)] +pub struct PartTableKpartx { + path: String, + need_teardown: bool, +} + +impl PartTableKpartx { + fn new(path: &str) -> Result { + let mut table = Self { + path: path.to_string(), + need_teardown: !Self::already_set_up(path)?, + }; + // create/sync partition devices if missing + table.reread()?; + Ok(table) + } + + // We only want to kpartx -d on drop if we're the one initially + // creating the partition devices. There's no good way to detect + // this. + fn already_set_up(path: &str) -> Result { + let re = Regex::new(r"^p[0-9]+$").expect("compiling RE"); + let expected = Path::new(path) + .file_name() + .chain_err(|| format!("getting filename of {}", path))? + .to_os_string() + .into_string() + .map_err(|_| format!("converting filename of {}", path))?; + for ent in read_dir("/dev/mapper").chain_err(|| "listing /dev/mapper")? { + let ent = ent.chain_err(|| "reading /dev/mapper entry")?; + let found = ent.file_name().into_string().map_err(|_| { + format!( + "converting filename of {}", + Path::new(&ent.file_name()).display() + ) + })?; + if found.starts_with(&expected) && re.is_match(&found[expected.len()..]) { + return Ok(true); + } + } + Ok(false) + } + + fn run_kpartx(&self, flag: &str) -> Result<()> { + // Swallow stderr on success. Avoids spurious warnings: + // GPT:Primary header thinks Alt. header is not at the end of the disk. + // GPT:Alternate GPT header not at the end of the disk. + // GPT: Use GNU Parted to correct GPT errors. + // + // By default, kpartx waits for udev to settle before returning, + // but this blocks indefinitely inside a container. See e.g. + // https://github.com/moby/moby/issues/22025 + // Use -n to skip blocking on udev, and then manually settle. + let result = Command::new("kpartx") + .arg(flag) + .arg("-n") + .arg(&self.path) + .output() + .chain_err(|| format!("running kpartx {} {}", flag, self.path))?; + if !result.status.success() { + // copy out its stderr + eprint!("{}", String::from_utf8_lossy(&*result.stderr)); + bail!("kpartx {} {} failed: {}", flag, self.path, result.status); + } + udev_settle()?; + Ok(()) + } +} + +impl PartTable for PartTableKpartx { + fn reread(&mut self) -> Result<()> { + self.run_kpartx("-u") + } +} + +impl Drop for PartTableKpartx { + /// If we created the partition devices (rather than finding them + /// already existing), delete them afterward so we don't leave DM + /// devices attached to the specified disk. + fn drop(&mut self) { + if self.need_teardown { + if let Err(e) = self.run_kpartx("-d") { + eprintln!("{}", e) + } + } + } } #[derive(Debug)] @@ -173,7 +341,22 @@ impl Partition { } pub fn get_holders(&self) -> Result> { - let holders = Path::new("/sys/block") + let holders = self.get_sysfs_dir()?.join("holders"); + let mut ret: Vec = Vec::new(); + for ent in read_dir(&holders).chain_err(|| format!("reading {}", &holders.display()))? { + let ent = ent.chain_err(|| format!("reading {} entry", &holders.display()))?; + ret.push(format!("/dev/{}", ent.file_name().to_string_lossy())); + } + Ok(ret) + } + + // Try to locate the device directory in sysfs. + fn get_sysfs_dir(&self) -> Result { + let basedir = Path::new("/sys/block"); + + // First assume we have a regular partition. + // /sys/block/sda/sda1 + let devdir = basedir .join( Path::new(&self.parent) .file_name() @@ -183,14 +366,36 @@ impl Partition { Path::new(&self.path) .file_name() .chain_err(|| format!("path {} has no filename", self.path))?, - ) - .join("holders"); - let mut ret: Vec = Vec::new(); - for ent in read_dir(&holders).chain_err(|| format!("reading {}", &holders.display()))? { - let ent = ent.chain_err(|| format!("reading {}", &holders.display()))?; - ret.push(format!("/dev/{}", ent.file_name().to_string_lossy())); + ); + if devdir.exists() { + return Ok(devdir); } - Ok(ret) + + // Now assume a kpartx "partition", where the path is a symlink to + // an unpartitioned DM device node. + // /sys/block/dm-1 + match read_link(Path::new(&self.path)) { + Ok(target) => { + let devdir = basedir.join( + Path::new(&target) + .file_name() + .chain_err(|| format!("target {} has no filename", self.path))?, + ); + if devdir.exists() { + return Ok(devdir); + } + } + // ignore if not symlink + Err(e) if e.kind() == std::io::ErrorKind::InvalidInput => (), + Err(e) => return Err(e).chain_err(|| format!("reading link {}", self.path)), + }; + + // Give up + bail!( + "couldn't find /sys/block directory for partition {} of {}", + &self.path, + &self.parent + ); } } @@ -283,7 +488,7 @@ impl Drop for Mount { } } -pub fn reread_partition_table(file: &mut File) -> Result<()> { +fn reread_partition_table(file: &mut File) -> Result<()> { let fd = file.as_raw_fd(); // Reread sometimes fails inexplicably. Retry several times before // giving up. @@ -367,7 +572,7 @@ mod ioctl { ioctl_read!(blkgetsize64, 0x12, 114, libc::size_t); } -pub fn udev_settle() -> Result<()> { +fn udev_settle() -> Result<()> { // "udevadm settle" silently no-ops if the udev socket is missing, and // then lsblk can't find partition labels. Catch this early. if !Path::new("/run/udev/control").exists() { diff --git a/src/install.rs b/src/install.rs index fe51f8334..06891f2be 100644 --- a/src/install.rs +++ b/src/install.rs @@ -111,15 +111,20 @@ pub fn install(config: &InstallConfig) -> Result<()> { { bail!("{} is not a block device", &config.device); } - if let Err(e) = reread_partition_table(&mut dest) { - report_busy_partitions(&config.device)?; - Err(e).chain_err(|| format!("checking for exclusive access to {}", &config.device))?; - } + ensure_exclusive_access(&config.device) + .chain_err(|| format!("checking for exclusive access to {}", &config.device))?; + + // get reference to partition table + // For kpartx partitioning, this will conditionally call kpartx -d + // when dropped + let mut table = Disk::new(&config.device) + .get_partition_table() + .chain_err(|| format!("getting partition table for {}", &config.device))?; // copy and postprocess disk image // On failure, clear and reread the partition table to prevent the disk // from accidentally being used. - if let Err(err) = write_disk(&config, &mut source, &mut dest) { + if let Err(err) = write_disk(&config, &mut source, &mut dest, &mut *table) { // log the error so the details aren't dropped if we encounter // another error during cleanup eprint!("{}", ChainedError::display_chain(&err)); @@ -128,7 +133,7 @@ pub fn install(config: &InstallConfig) -> Result<()> { if config.preserve_on_error { eprintln!("Preserving partition table as requested"); } else { - clear_partition_table(&mut dest)?; + clear_partition_table(&mut dest, &mut *table)?; } // return a generic error so our exit status is right @@ -139,12 +144,12 @@ pub fn install(config: &InstallConfig) -> Result<()> { Ok(()) } -fn report_busy_partitions(device: &str) -> Result<()> { +fn ensure_exclusive_access(device: &str) -> Result<()> { let mut parts = Disk::new(device).get_busy_partitions()?; - parts.sort_unstable_by_key(|p| p.path.to_string()); if parts.is_empty() { return Ok(()); } + parts.sort_unstable_by_key(|p| p.path.to_string()); eprintln!("Partitions in use on {}:", device); for part in parts { if let Some(mountpoint) = part.mountpoint.as_ref() { @@ -157,20 +162,24 @@ fn report_busy_partitions(device: &str) -> Result<()> { eprintln!(" {} in use by {}", part.path, holder); } } - Ok(()) + bail!("found busy partitions"); } /// Copy the image source to the target disk and do all post-processing. /// If this function fails, the caller should wipe the partition table /// to ensure the user doesn't boot from a partially-written disk. -fn write_disk(config: &InstallConfig, source: &mut ImageSource, dest: &mut File) -> Result<()> { +fn write_disk( + config: &InstallConfig, + source: &mut ImageSource, + dest: &mut File, + table: &mut dyn PartTable, +) -> Result<()> { // Get sector size of destination, for comparing with image let sector_size = get_sector_size(dest)?; // copy the image write_image(source, dest, true, Some(sector_size))?; - reread_partition_table(dest)?; - udev_settle()?; + table.reread()?; // postprocess if config.ignition.is_some() @@ -431,7 +440,7 @@ fn copy_network_config(mountpoint: &Path, net_config_src: &str) -> Result<()> { } /// Clear the partition table. For use after a failure. -fn clear_partition_table(dest: &mut File) -> Result<()> { +fn clear_partition_table(dest: &mut File, table: &mut dyn PartTable) -> Result<()> { eprintln!("Clearing partition table"); dest.seek(SeekFrom::Start(0)) .chain_err(|| "seeking to start of disk")?; @@ -442,8 +451,7 @@ fn clear_partition_table(dest: &mut File) -> Result<()> { .chain_err(|| "flushing partition table to disk")?; dest.sync_all() .chain_err(|| "syncing partition table to disk")?; - reread_partition_table(dest)?; - udev_settle()?; + table.reread()?; Ok(()) }