neondatabase · arpad-m · May 9, 2024 · May 8, 2024 · May 8, 2024 · May 8, 2024
@@ -24,7 +24,9 @@ use tracing::{debug, info};
 use std::collections::{HashSet, VecDeque};
 use std::ops::Range;
 
-use crate::helpers::{accum_key_values, keyspace_total_size, merge_delta_keys, overlaps_with};
+use crate::helpers::{
+    accum_key_values, keyspace_total_size, merge_delta_keys_buffered, overlaps_with,
+};
 use crate::interface::*;
 use utils::lsn::Lsn;
 
@@ -535,7 +537,10 @@ where
             }
         }
         // Open stream
-        let key_value_stream = std::pin::pin!(merge_delta_keys::<E>(deltas.as_slice(), ctx));
+        let key_value_stream =
+            std::pin::pin!(merge_delta_keys_buffered::<E>(deltas.as_slice(), ctx)
+                .await?
+                .map(|k| Result::<_, anyhow::Error>::Ok(k)));
         let mut new_jobs = Vec::new();
 
         // Slide a window through the keyspace

@@ -11,9 +11,10 @@ use std::collections::BinaryHeap;
 use std::collections::VecDeque;
 use std::fmt::Display;
 use std::future::Future;
-use std::ops::{DerefMut, Range};
+use std::ops::Range;
 use std::pin::Pin;
 use std::task::{ready, Poll};
+use utils::lsn::Lsn;
 
 pub fn keyspace_total_size<K>(
     keyspace: &CompactionKeySpace<K>,
@@ -109,6 +110,23 @@ pub fn merge_delta_keys<'a, E: CompactionJobExecutor>(
     }
 }
 
+pub async fn merge_delta_keys_buffered<'a, E: CompactionJobExecutor + 'a>(
+    layers: &'a [E::DeltaLayer],
+    ctx: &'a E::RequestContext,
+) -> anyhow::Result<impl Stream<Item = <E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>>
+{
+    let mut keys = Vec::new();
+    for l in layers {
+        // Boxing and casting to LoadFuture is required to obtain the right Sync bound.
+        // If we do l.load_keys(ctx).await? directly, there is a compilation error.
+        let load_future: LoadFuture<'a, _> = Box::pin(l.load_keys(ctx));
+        keys.extend(load_future.await?.into_iter());
+    }
+    keys.sort_by_key(|k| (k.key(), k.lsn()));
+    let stream = futures::stream::iter(keys.into_iter());
+    Ok(stream)
+}
+
 enum LazyLoadLayer<'a, E: CompactionJobExecutor> {
     Loaded(VecDeque<<E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>),
     Unloaded(&'a E::DeltaLayer),
@@ -120,6 +138,12 @@ impl<'a, E: CompactionJobExecutor> LazyLoadLayer<'a, E> {
             Self::Unloaded(dl) => dl.key_range().start,
         }
     }
+    fn lsn(&self) -> Lsn {
+        match self {
+            Self::Loaded(entries) => entries.front().unwrap().lsn(),
+            Self::Unloaded(dl) => dl.lsn_range().start,
+        }
+    }
 }
 impl<'a, E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'a, E> {
     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
@@ -129,12 +153,12 @@ impl<'a, E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'a, E> {
 impl<'a, E: CompactionJobExecutor> Ord for LazyLoadLayer<'a, E> {
     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
         // reverse order so that we get a min-heap
-        other.key().cmp(&self.key())
+        (other.key(), other.lsn()).cmp(&(self.key(), self.lsn()))
     }
 }
 impl<'a, E: CompactionJobExecutor> PartialEq for LazyLoadLayer<'a, E> {
     fn eq(&self, other: &Self) -> bool {
-        self.key().eq(&other.key())
+        self.cmp(other) == std::cmp::Ordering::Equal
     }
 }
 impl<'a, E: CompactionJobExecutor> Eq for LazyLoadLayer<'a, E> {}
@@ -171,8 +195,8 @@ where
                 match ready!(load_future.as_mut().poll(cx)) {
                     Ok(entries) => {
                         this.load_future.set(None);
-                        *this.heap.peek_mut().unwrap() =
-                            LazyLoadLayer::Loaded(VecDeque::from(entries));
+                        this.heap
+                            .push(LazyLoadLayer::Loaded(VecDeque::from(entries)));
                     }
                     Err(e) => {
                         return Poll::Ready(Some(Err(e)));
@@ -184,23 +208,25 @@ where
             // loading it. Otherwise return the next entry from it and update
             // the layer's position in the heap (this decreaseKey operation is
             // performed implicitly when `top` is dropped).
-            if let Some(mut top) = this.heap.peek_mut() {
-                match top.deref_mut() {
-                    LazyLoadLayer::Unloaded(ref mut l) => {
-                        let fut = l.load_keys(this.ctx);
-                        this.load_future.set(Some(Box::pin(fut)));
-                        continue;
-                    }
-                    LazyLoadLayer::Loaded(ref mut entries) => {
-                        let result = entries.pop_front().unwrap();
-                        if entries.is_empty() {
-                            std::collections::binary_heap::PeekMut::pop(top);
-                        }
-                        return Poll::Ready(Some(Ok(result)));
+            // We have to remove the layer from the heap and then re-add it,
+            // because loading it (or just removing a key from it) can change
+            // its ordering relative to the other layers in the heap.
+            let Some(mut top) = this.heap.pop() else {
+                return Poll::Ready(None);
+            };
+            match top {
+                LazyLoadLayer::Unloaded(ref mut l) => {
+                    let fut = l.load_keys(this.ctx);
+                    this.load_future.set(Some(Box::pin(fut)));
+                    continue;
+                }
+                LazyLoadLayer::Loaded(ref mut entries) => {
+                    let result = entries.pop_front().unwrap();
+                    if !entries.is_empty() {
+                        this.heap.push(top);
                     }
+                    return Poll::Ready(Some(Ok(result)));
                 }
-            } else {
-                return Poll::Ready(None);
             }
         }
     }