diff --git a/src/bn256/assembly.rs b/src/bn256/assembly.rs
index 495b6b8..da23ae6 100644
--- a/src/bn256/assembly.rs
+++ b/src/bn256/assembly.rs
@@ -1,18 +1,6 @@
 macro_rules! assembly_field {
     ($field:ident, $modulus:ident, $inv:ident) => {
         impl $field {
-            /// Returns zero, the additive identity.
-            #[inline]
-            pub const fn zero() -> $field {
-                $field([0, 0, 0, 0])
-            }
-
-            /// Returns one, the multiplicative identity.
-            #[inline]
-            pub const fn one() -> $field {
-                R
-            }
-
             /// Doubles this field element.
             #[inline]
             pub fn double(&self) -> $field {
@@ -68,92 +56,6 @@ macro_rules! assembly_field {
                 $field([r0, r1, r2, r3])
             }
 
-            fn from_u512(limbs: [u64; 8]) -> $field {
-                // We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits
-                // with the higher bits multiplied by 2^256. Thus, we perform two reductions
-                //
-                // 1. the lower bits are multiplied by R^2, as normal
-                // 2. the upper bits are multiplied by R^2 * 2^256 = R^3
-                //
-                // and computing their sum in the field. It remains to see that arbitrary 256-bit
-                // numbers can be placed into Montgomery form safely using the reduction. The
-                // reduction works so long as the product is less than R=2^256 multiplied by
-                // the modulus. This holds because for any `c` smaller than the modulus, we have
-                // that (2^256 - 1)*c is an acceptable product for the reduction. Therefore, the
-                // reduction always works so long as `c` is in the field; in this case it is either the
-                // constant `R2` or `R3`.
-                let d0 = $field([limbs[0], limbs[1], limbs[2], limbs[3]]);
-                let d1 = $field([limbs[4], limbs[5], limbs[6], limbs[7]]);
-                // Convert to Montgomery form
-                d0 * R2 + d1 * R3
-            }
-
-            /// Converts from an integer represented in little endian
-            /// into its (congruent) `Fr` representation.
-            pub const fn from_raw(val: [u64; 4]) -> $field {
-                let (r0, carry) = mac(0, val[0], R2.0[0], 0);
-                let (r1, carry) = mac(0, val[0], R2.0[1], carry);
-                let (r2, carry) = mac(0, val[0], R2.0[2], carry);
-                let (r3, r4) = mac(0, val[0], R2.0[3], carry);
-
-                let (r1, carry) = mac(r1, val[1], R2.0[0], 0);
-                let (r2, carry) = mac(r2, val[1], R2.0[1], carry);
-                let (r3, carry) = mac(r3, val[1], R2.0[2], carry);
-                let (r4, r5) = mac(r4, val[1], R2.0[3], carry);
-
-                let (r2, carry) = mac(r2, val[2], R2.0[0], 0);
-                let (r3, carry) = mac(r3, val[2], R2.0[1], carry);
-                let (r4, carry) = mac(r4, val[2], R2.0[2], carry);
-                let (r5, r6) = mac(r5, val[2], R2.0[3], carry);
-
-                let (r3, carry) = mac(r3, val[3], R2.0[0], 0);
-                let (r4, carry) = mac(r4, val[3], R2.0[1], carry);
-                let (r5, carry) = mac(r5, val[3], R2.0[2], carry);
-                let (r6, r7) = mac(r6, val[3], R2.0[3], carry);
-
-                let k = r0.wrapping_mul(INV);
-                let (_, carry) = mac(r0, k, $modulus.0[0], 0);
-                let (r1, carry) = mac(r1, k, $modulus.0[1], carry);
-                let (r2, carry) = mac(r2, k, $modulus.0[2], carry);
-                let (r3, carry) = mac(r3, k, $modulus.0[3], carry);
-                let (r4, carry2) = adc(r4, 0, carry);
-
-                let k = r1.wrapping_mul(INV);
-                let (_, carry) = mac(r1, k, $modulus.0[0], 0);
-                let (r2, carry) = mac(r2, k, $modulus.0[1], carry);
-                let (r3, carry) = mac(r3, k, $modulus.0[2], carry);
-                let (r4, carry) = mac(r4, k, $modulus.0[3], carry);
-                let (r5, carry2) = adc(r5, carry2, carry);
-
-                let k = r2.wrapping_mul(INV);
-                let (_, carry) = mac(r2, k, $modulus.0[0], 0);
-                let (r3, carry) = mac(r3, k, $modulus.0[1], carry);
-                let (r4, carry) = mac(r4, k, $modulus.0[2], carry);
-                let (r5, carry) = mac(r5, k, $modulus.0[3], carry);
-                let (r6, carry2) = adc(r6, carry2, carry);
-
-                let k = r3.wrapping_mul(INV);
-                let (_, carry) = mac(r3, k, $modulus.0[0], 0);
-                let (r4, carry) = mac(r4, k, $modulus.0[1], carry);
-                let (r5, carry) = mac(r5, k, $modulus.0[2], carry);
-                let (r6, carry) = mac(r6, k, $modulus.0[3], carry);
-                let (r7, _) = adc(r7, carry2, carry);
-
-                let (d0, borrow) = sbb(r4, $modulus.0[0], 0);
-                let (d1, borrow) = sbb(r5, $modulus.0[1], borrow);
-                let (d2, borrow) = sbb(r6, $modulus.0[2], borrow);
-                let (d3, borrow) = sbb(r7, $modulus.0[3], borrow);
-
-                // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise
-                // borrow = 0x000...000. Thus, we use it as a mask to conditionally add the modulus.
-                let (d0, carry) = adc(d0, $modulus.0[0] & borrow, 0);
-                let (d1, carry) = adc(d1, $modulus.0[1] & borrow, carry);
-                let (d2, carry) = adc(d2, $modulus.0[2] & borrow, carry);
-                let (d3, _) = adc(d3, $modulus.0[3] & borrow, carry);
-
-                $field([d0, d1, d2, d3])
-            }
-
             /// Squares this element.
             #[inline]
             pub fn square(&self) -> $field {
diff --git a/src/bn256/fq.rs b/src/bn256/fq.rs
index 4fd2ecf..4da19d4 100644
--- a/src/bn256/fq.rs
+++ b/src/bn256/fq.rs
@@ -7,7 +7,7 @@ use rand::RngCore;
 use std::io::{self, Read, Write};
 use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption};
 
-#[cfg(feature = "asm")]
+#[cfg(all(feature = "asm", target_arch = "x86_64"))]
 use super::assembly::assembly_field;
 
 #[derive(Clone, Copy, Eq)]
@@ -214,9 +214,6 @@ impl<'a, 'b> Mul<&'b Fq> for &'a Fq {
 impl_binops_additive!(Fq, Fq);
 impl_binops_multiplicative!(Fq, Fq);
 
-#[cfg(feature = "asm")]
-assembly_field!(Fq, MODULUS, INV);
-
 impl Fq {
     pub const fn size() -> usize {
         32
@@ -254,10 +251,10 @@ impl Fq {
     pub fn to_bytes(&self) -> [u8; 32] {
         // Turn into canonical form by computing
         // (a.R) / R = a
-        #[cfg(feature = "asm")]
+        #[cfg(all(feature = "asm", target_arch = "x86_64"))]
         let tmp = Fq::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);
 
-        #[cfg(not(feature = "asm"))]
+        #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
         let tmp = Fq::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);
 
         let mut res = [0; 32];
@@ -287,10 +284,7 @@ impl Fq {
             LegendreSymbol::QuadraticNonResidue
         }
     }
-}
 
-#[cfg(not(feature = "asm"))]
-impl Fq {
     /// Returns zero, the additive identity.
     #[inline]
     pub const fn zero() -> Fq {
@@ -303,13 +297,6 @@ impl Fq {
         R
     }
 
-    /// Doubles this field element.
-    #[inline]
-    pub const fn double(&self) -> Fq {
-        // TODO: This can be achieved more efficiently with a bitshift.
-        self.add(self)
-    }
-
     fn from_u512(limbs: [u64; 8]) -> Fq {
         // We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits
         // with the higher bits multiplied by 2^256. Thus, we perform two reductions
@@ -335,6 +322,19 @@ impl Fq {
     pub const fn from_raw(val: [u64; 4]) -> Self {
         (&Fq(val)).mul(&R2)
     }
+}
+
+#[cfg(all(feature = "asm", target_arch = "x86_64"))]
+assembly_field!(Fq, MODULUS, INV);
+
+#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
+impl Fq {
+    /// Doubles this field element.
+    #[inline]
+    pub const fn double(&self) -> Fq {
+        // TODO: This can be achieved more efficiently with a bitshift.
+        self.add(self)
+    }
 
     /// Squares this element.
     #[inline]
@@ -598,11 +598,11 @@ impl ff::PrimeField for Fq {
     fn to_repr(&self) -> Self::Repr {
         // Turn into canonical form by computing
         // (a.R) / R = a
-        #[cfg(feature = "asm")]
+        #[cfg(all(feature = "asm", target_arch = "x86_64"))]
         let tmp =
             Self::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);
 
-        #[cfg(not(feature = "asm"))]
+        #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
         let tmp = Self::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);
 
         let mut res = [0; 32];
@@ -690,10 +690,10 @@ impl FieldExt for Fq {
     /// Gets the lower 128 bits of this field element when expressed
     /// canonically.
     fn get_lower_128(&self) -> u128 {
-        #[cfg(feature = "asm")]
+        #[cfg(all(feature = "asm", target_arch = "x86_64"))]
         let tmp = Fq::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);
 
-        #[cfg(not(feature = "asm"))]
+        #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
         let tmp = Fq::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);
 
         u128::from(tmp.0[0]) | (u128::from(tmp.0[1]) << 64)
diff --git a/src/bn256/fr.rs b/src/bn256/fr.rs
index 9d10662..c67a42b 100644
--- a/src/bn256/fr.rs
+++ b/src/bn256/fr.rs
@@ -8,7 +8,7 @@ use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption};
 
 use crate::arithmetic::{adc, mac, sbb, BaseExt, FieldExt, Group};
 
-#[cfg(feature = "asm")]
+#[cfg(all(feature = "asm", target_arch = "x86_64"))]
 use super::assembly::assembly_field;
 
 #[derive(Clone, Copy, Eq, Hash)]
@@ -222,17 +222,11 @@ impl<'a, 'b> Mul<&'b Fr> for &'a Fr {
 impl_binops_additive!(Fr, Fr);
 impl_binops_multiplicative!(Fr, Fr);
 
-#[cfg(feature = "asm")]
-assembly_field!(Fr, MODULUS, INV);
-
 impl Fr {
     pub fn legendre(&self) -> LegendreSymbol {
         unimplemented!()
     }
-}
 
-#[cfg(not(feature = "asm"))]
-impl Fr {
     /// Returns zero, the additive identity.
     #[inline]
     pub const fn zero() -> Fr {
@@ -245,13 +239,6 @@ impl Fr {
         R
     }
 
-    /// Doubles this field element.
-    #[inline]
-    pub const fn double(&self) -> Fr {
-        // TODO: This can be achieved more efficiently with a bitshift.
-        self.add(self)
-    }
-
     fn from_u512(limbs: [u64; 8]) -> Fr {
         // We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits
         // with the higher bits multiplied by 2^256. Thus, we perform two reductions
@@ -277,6 +264,19 @@ impl Fr {
     pub const fn from_raw(val: [u64; 4]) -> Self {
         (&Fr(val)).mul(&R2)
     }
+}
+
+#[cfg(all(feature = "asm", target_arch = "x86_64"))]
+assembly_field!(Fr, MODULUS, INV);
+
+#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
+impl Fr {
+    /// Doubles this field element.
+    #[inline]
+    pub const fn double(&self) -> Fr {
+        // TODO: This can be achieved more efficiently with a bitshift.
+        self.add(self)
+    }
 
     /// Squares this element.
     #[inline]
@@ -538,10 +538,10 @@ impl ff::PrimeField for Fr {
     fn to_repr(&self) -> Self::Repr {
         // Turn into canonical form by computing
         // (a.R) / R = a
-        #[cfg(feature = "asm")]
+        #[cfg(all(feature = "asm", target_arch = "x86_64"))]
         let tmp = Fr::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);
 
-        #[cfg(not(feature = "asm"))]
+        #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
         let tmp = Fr::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);
 
         let mut res = [0; 32];
@@ -672,10 +672,10 @@ impl FieldExt for Fr {
     /// Gets the lower 128 bits of this field element when expressed
     /// canonically.
     fn get_lower_128(&self) -> u128 {
-        #[cfg(feature = "asm")]
+        #[cfg(all(feature = "asm", target_arch = "x86_64"))]
         let tmp = Fr::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);
 
-        #[cfg(not(feature = "asm"))]
+        #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
         let tmp = Fr::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);
 
         u128::from(tmp.0[0]) | (u128::from(tmp.0[1]) << 64)
diff --git a/src/bn256/mod.rs b/src/bn256/mod.rs
index b31e18b..c6c9467 100644
--- a/src/bn256/mod.rs
+++ b/src/bn256/mod.rs
@@ -6,7 +6,7 @@ mod fq6;
 mod fr;
 mod g;
 
-#[cfg(feature = "asm")]
+#[cfg(all(feature = "asm", target_arch = "x86_64"))]
 mod assembly;
 
 pub use engine::*;