From 60085774f2554568396932fbd25c7cdafdf400e2 Mon Sep 17 00:00:00 2001
From: Nick Fitzgerald <fitzgen@gmail.com>
Date: Wed, 14 Aug 2024 10:45:40 -0700
Subject: [PATCH] Pulley: Add memory access instructions with 64-bit offsets
 (#9085)

* Pulley: Add memory access instructions with 64-bit offsets

I had trimmed these instructions from the original upstreaming of the Pulley
interpreter because I had mistakenly believed that they were unused. Turns out
they are needed for Cranelift's Pulley backend to allow for lowering certain
address modes to a single instruction. The alternative, lowering the address
modes to a sequence of instructions, would be a bit annoying and these
instructions seem generally useful.

* rebase on top of indexing changes for `MachineState`
---
 pulley/fuzz/src/interp.rs  |   5 +
 pulley/src/interp.rs       |  55 +++++++++++
 pulley/src/lib.rs          |  12 +++
 pulley/tests/all/disas.rs  |   4 +-
 pulley/tests/all/interp.rs | 192 +++++++++++++++++++++++++++++++++++++
 5 files changed, 266 insertions(+), 2 deletions(-)
diff --git a/pulley/fuzz/src/interp.rs b/pulley/fuzz/src/interp.rs
index abc15033e686..079a4c3a1e1e 100644
--- a/pulley/fuzz/src/interp.rs
+++ b/pulley/fuzz/src/interp.rs
@@ -77,11 +77,16 @@ fn op_is_safe_for_fuzzing(op: &Op) -> bool {
         Op::Load64(_) => false,
         Op::Load32UOffset8(_) => false,
         Op::Load32SOffset8(_) => false,
+        Op::Load32UOffset64(_) => false,
+        Op::Load32SOffset64(_) => false,
         Op::Load64Offset8(_) => false,
+        Op::Load64Offset64(_) => false,
         Op::Store32(_) => false,
         Op::Store64(_) => false,
         Op::Store32SOffset8(_) => false,
+        Op::Store32SOffset64(_) => false,
         Op::Store64Offset8(_) => false,
+        Op::Store64Offset64(_) => false,
         Op::BitcastIntFromFloat32(op::BitcastIntFromFloat32 { dst, .. }) => !dst.is_special(),
         Op::BitcastIntFromFloat64(op::BitcastIntFromFloat64 { dst, .. }) => !dst.is_special(),
         Op::BitcastFloatFromInt32(_) => true,
diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs
index 032c9e44ef46..9b221f56c315 100644
--- a/pulley/src/interp.rs
+++ b/pulley/src/interp.rs
@@ -883,6 +883,28 @@ impl OpVisitor for InterpreterVisitor<'_> {
         Continuation::Continue
     }
 
+    fn load32_u_offset64(&mut self, dst: XReg, ptr: XReg, offset: i64) -> Self::Return {
+        let val = unsafe {
+            self.state[ptr]
+                .get_ptr::<u32>()
+                .byte_offset(offset as isize)
+                .read_unaligned()
+        };
+        self.state[dst].set_u64(u64::from(val));
+        Continuation::Continue
+    }
+
+    fn load32_s_offset64(&mut self, dst: XReg, ptr: XReg, offset: i64) -> Self::Return {
+        let val = unsafe {
+            self.state[ptr]
+                .get_ptr::<i32>()
+                .byte_offset(offset as isize)
+                .read_unaligned()
+        };
+        self.state[dst].set_i64(i64::from(val));
+        Continuation::Continue
+    }
+
     fn load64_offset8(&mut self, dst: XReg, ptr: XReg, offset: i8) -> Self::Return {
         let val = unsafe {
             self.state[ptr]
@@ -894,6 +916,17 @@ impl OpVisitor for InterpreterVisitor<'_> {
         Continuation::Continue
     }
 
+    fn load64_offset64(&mut self, dst: XReg, ptr: XReg, offset: i64) -> Self::Return {
+        let val = unsafe {
+            self.state[ptr]
+                .get_ptr::<u64>()
+                .byte_offset(offset as isize)
+                .read_unaligned()
+        };
+        self.state[dst].set_u64(val);
+        Continuation::Continue
+    }
+
     fn store32(&mut self, ptr: XReg, src: XReg) -> Self::Return {
         let ptr = self.state[ptr].get_ptr::<u32>();
         let val = self.state[src].get_u32();
@@ -934,6 +967,28 @@ impl OpVisitor for InterpreterVisitor<'_> {
         Continuation::Continue
     }
 
+    fn store32_offset64(&mut self, ptr: XReg, offset: i64, src: XReg) -> Self::Return {
+        let val = self.state[src].get_u32();
+        unsafe {
+            self.state[ptr]
+                .get_ptr::<u32>()
+                .byte_offset(offset as isize)
+                .write_unaligned(val);
+        }
+        Continuation::Continue
+    }
+
+    fn store64_offset64(&mut self, ptr: XReg, offset: i64, src: XReg) -> Self::Return {
+        let val = self.state[src].get_u64();
+        unsafe {
+            self.state[ptr]
+                .get_ptr::<u64>()
+                .byte_offset(offset as isize)
+                .write_unaligned(val);
+        }
+        Continuation::Continue
+    }
+
     fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> Self::Return {
         let val = self.state[src].get_f32();
         self.state[dst].set_u64(u32::from_ne_bytes(val.to_ne_bytes()).into());
diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs
index 30485f643d94..644c295efd1b 100644
--- a/pulley/src/lib.rs
+++ b/pulley/src/lib.rs
@@ -113,6 +113,13 @@ macro_rules! for_each_op {
             /// `dst = load64(ptr + offset8)`
             load64_offset8 = Load64Offset8 { dst: XReg, ptr: XReg, offset: i8 };
 
+            /// `dst = zero_extend(load32(ptr + offset64))`
+            load32_u_offset64 = Load32UOffset64 { dst: XReg, ptr: XReg, offset: i64 };
+            /// `dst = sign_extend(load32(ptr + offset64))`
+            load32_s_offset64 = Load32SOffset64 { dst: XReg, ptr: XReg, offset: i64 };
+            /// `dst = load64(ptr + offset64)`
+            load64_offset64 = Load64Offset64 { dst: XReg, ptr: XReg, offset: i64 };
+
             /// `*ptr = low32(src)`
             store32 = Store32 { ptr: XReg, src: XReg };
             /// `*ptr = src`
@@ -123,6 +130,11 @@ macro_rules! for_each_op {
             /// `*(ptr + sign_extend(offset8)) = src`
             store64_offset8 = Store64Offset8 { ptr: XReg, offset: i8, src: XReg };
 
+            /// `*(ptr + sign_extend(offset64)) = low32(src)`
+            store32_offset64 = Store32SOffset64 { ptr: XReg, offset: i64, src: XReg };
+            /// `*(ptr + sign_extend(offset64)) = src`
+            store64_offset64 = Store64Offset64 { ptr: XReg, offset: i64, src: XReg };
+
             /// `low32(dst) = bitcast low32(src) as i32`
             bitcast_int_from_float_32 = BitcastIntFromFloat32 { dst: XReg, src: FReg };
             /// `dst = bitcast src as i64`
diff --git a/pulley/tests/all/disas.rs b/pulley/tests/all/disas.rs
index ca0f1e5bb9ad..e643f273d220 100644
--- a/pulley/tests/all/disas.rs
+++ b/pulley/tests/all/disas.rs
@@ -86,8 +86,8 @@ fn simple() {
         r#"
        0: 0e 1f f0                        xconst8 x31, -16
        3: 12 20 20 1f                     xadd32 sp, sp, x31
-       7: 29 20 08 21                     store64_offset8 sp, 8, lr
-       b: 27 20 22                        store64 sp, fp
+       7: 2c 20 08 21                     store64_offset8 sp, 8, lr
+       b: 2a 20 22                        store64 sp, fp
        e: 0b 22 20                        xmov fp, sp
       11: 12 00 00 01                     xadd32 x0, x0, x1
       15: 0b 20 22                        xmov sp, fp
diff --git a/pulley/tests/all/interp.rs b/pulley/tests/all/interp.rs
index 26c867ece5b5..a5342619c7a6 100644
--- a/pulley/tests/all/interp.rs
+++ b/pulley/tests/all/interp.rs
@@ -686,6 +686,112 @@ fn load64_offset8() {
     }
 }
 
+#[test]
+fn load32_u_offset64() {
+    let a = UnsafeCell::new([11u32, 22]);
+    let b = UnsafeCell::new([33u32, 44]);
+    let c = UnsafeCell::new([55u32, 66]);
+    let d = UnsafeCell::new([i32::MIN as u32, i32::MAX as u32]);
+
+    for (expected, addr, offset) in [
+        (11, a.get(), 0),
+        (22, a.get(), 4),
+        (33, b.get(), 0),
+        (44, b.get(), 4),
+        (55, c.get(), 0),
+        (66, c.get(), 4),
+        (i32::MIN as u32 as u64, d.get(), 0),
+        (i32::MAX as u32 as u64, d.get(), 4),
+    ] {
+        unsafe {
+            assert_one(
+                [
+                    (x(0), Val::from(0x1234567812345678u64)),
+                    (x(1), Val::from(addr.cast::<u8>())),
+                ],
+                Load32UOffset64 {
+                    dst: x(0),
+                    ptr: x(1),
+                    offset,
+                },
+                x(0),
+                expected,
+            );
+        }
+    }
+}
+
+#[test]
+fn load32_s_offset64() {
+    let a = UnsafeCell::new([11u32, 22]);
+    let b = UnsafeCell::new([33u32, 44]);
+    let c = UnsafeCell::new([55u32, 66]);
+    let d = UnsafeCell::new([-1i32 as u32, i32::MAX as u32]);
+
+    for (expected, addr, offset) in [
+        (11, a.get(), 0),
+        (22, a.get(), 4),
+        (33, b.get(), 0),
+        (44, b.get(), 4),
+        (55, c.get(), 0),
+        (55, unsafe { c.get().byte_add(4) }, -4),
+        (66, c.get(), 4),
+        (-1i64 as u64, d.get(), 0),
+        (i32::MAX as u32 as u64, d.get(), 4),
+    ] {
+        unsafe {
+            assert_one(
+                [
+                    (x(0), Val::from(0x1234567812345678u64)),
+                    (x(1), Val::from(addr.cast::<u8>())),
+                ],
+                Load32SOffset64 {
+                    dst: x(0),
+                    ptr: x(1),
+                    offset,
+                },
+                x(0),
+                expected,
+            );
+        }
+    }
+}
+
+#[test]
+fn load64_offset64() {
+    let a = UnsafeCell::new([11u64, 22]);
+    let b = UnsafeCell::new([33u64, 44]);
+    let c = UnsafeCell::new([55u64, 66]);
+    let d = UnsafeCell::new([-1i64 as u64, i64::MAX as u64]);
+
+    for (expected, addr, offset) in [
+        (11, a.get(), 0),
+        (22, a.get(), 8),
+        (33, b.get(), 0),
+        (44, b.get(), 8),
+        (55, c.get(), 0),
+        (66, c.get(), 8),
+        (-1i64 as u64, d.get(), 0),
+        (i64::MAX as u64, d.get(), 8),
+    ] {
+        unsafe {
+            assert_one(
+                [
+                    (x(0), Val::from(0x1234567812345678u64)),
+                    (x(1), Val::from(addr)),
+                ],
+                Load64Offset64 {
+                    dst: x(0),
+                    ptr: x(1),
+                    offset,
+                },
+                x(0),
+                expected,
+            );
+        }
+    }
+}
+
 #[test]
 fn store32() {
     let a = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]);
@@ -859,6 +965,92 @@ fn store64_offset8() {
     assert_eq!(c, expected);
 }
 
+#[test]
+fn store32_offset64() {
+    let a = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]);
+    let b = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]);
+    let c = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]);
+
+    unsafe {
+        for (val, addr, offset) in [
+            (0x11111111u32, a.get(), 0),
+            (0x22222222, b.get(), 4),
+            (0x33333333, c.get(), 2),
+        ] {
+            let val = val as u64;
+            assert_one(
+                [(x(0), Val::from(addr)), (x(1), Val::from(val))],
+                Store32SOffset64 {
+                    ptr: x(0),
+                    src: x(1),
+                    offset,
+                },
+                x(1),
+                val,
+            );
+        }
+    }
+
+    let a = u64::from_be_bytes(a.into_inner());
+    let expected = 0x1111111112345678u64;
+    eprintln!("expected(a) = {expected:#018x}");
+    eprintln!("actual(a)   = {a:#018x}");
+    assert_eq!(a, expected);
+
+    let b = u64::from_be_bytes(b.into_inner());
+    let expected = 0x1234567822222222u64;
+    eprintln!("expected(b) = {expected:#018x}");
+    eprintln!("actual(b)   = {b:#018x}");
+    assert_eq!(b, expected);
+
+    let c = u64::from_be_bytes(c.into_inner());
+    let expected = 0x1234333333335678u64;
+    eprintln!("expected(c) = {expected:#018x}");
+    eprintln!("actual(c)   = {c:#018x}");
+    assert_eq!(c, expected);
+}
+
+#[test]
+fn store64_offset64() {
+    let a = UnsafeCell::new([0x1234567812345678, 0x1234567812345678, 0x1234567812345678]);
+
+    unsafe {
+        for (val, addr, offset) in [
+            (0x1111111111111111u64, a.get(), 0),
+            (0x2222222222222222, a.get(), 8),
+            (0x3333333333333333, a.get(), 16),
+        ] {
+            assert_one(
+                [(x(0), Val::from(addr)), (x(1), Val::from(val))],
+                Store64Offset64 {
+                    ptr: x(0),
+                    src: x(1),
+                    offset,
+                },
+                x(1),
+                val,
+            );
+        }
+    }
+
+    let [a, b, c] = a.into_inner();
+
+    let expected = 0x1111111111111111u64;
+    eprintln!("expected(a) = {expected:#018x}");
+    eprintln!("actual(a)   = {a:#018x}");
+    assert_eq!(a, expected);
+
+    let expected = 0x2222222222222222u64;
+    eprintln!("expected(b) = {expected:#018x}");
+    eprintln!("actual(b)   = {b:#018x}");
+    assert_eq!(b, expected);
+
+    let expected = 0x3333333333333333u64;
+    eprintln!("expected(c) = {expected:#018x}");
+    eprintln!("actual(c)   = {c:#018x}");
+    assert_eq!(c, expected);
+}
+
 #[test]
 fn bitcast_int_from_float_32() {
     for val in [