diff --git a/Cargo.lock b/Cargo.lock
index 1599cd127..955e555ff 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5338,7 +5338,7 @@ dependencies = [
 [[package]]
 name = "tract-core"
 version = "0.20.23-pre"
-source = "git+https://github.com/sonos/tract/?rev=4ee813d#4ee813dd327d3df3e5c958cab9903fb332c9ceca"
+source = "git+https://github.com/sonos/tract/?rev=7b1aa33b2f7d1f19b80e270c83320f0f94daff69#7b1aa33b2f7d1f19b80e270c83320f0f94daff69"
 dependencies = [
  "anyhow",
  "bit-set",
@@ -5362,7 +5362,7 @@ dependencies = [
 [[package]]
 name = "tract-data"
 version = "0.20.23-pre"
-source = "git+https://github.com/sonos/tract/?rev=4ee813d#4ee813dd327d3df3e5c958cab9903fb332c9ceca"
+source = "git+https://github.com/sonos/tract/?rev=7b1aa33b2f7d1f19b80e270c83320f0f94daff69#7b1aa33b2f7d1f19b80e270c83320f0f94daff69"
 dependencies = [
  "anyhow",
  "half 2.2.1",
@@ -5381,7 +5381,7 @@ dependencies = [
 [[package]]
 name = "tract-hir"
 version = "0.20.23-pre"
-source = "git+https://github.com/sonos/tract/?rev=4ee813d#4ee813dd327d3df3e5c958cab9903fb332c9ceca"
+source = "git+https://github.com/sonos/tract/?rev=7b1aa33b2f7d1f19b80e270c83320f0f94daff69#7b1aa33b2f7d1f19b80e270c83320f0f94daff69"
 dependencies = [
  "derive-new",
  "log",
@@ -5391,7 +5391,7 @@ dependencies = [
 [[package]]
 name = "tract-linalg"
 version = "0.20.23-pre"
-source = "git+https://github.com/sonos/tract/?rev=4ee813d#4ee813dd327d3df3e5c958cab9903fb332c9ceca"
+source = "git+https://github.com/sonos/tract/?rev=7b1aa33b2f7d1f19b80e270c83320f0f94daff69#7b1aa33b2f7d1f19b80e270c83320f0f94daff69"
 dependencies = [
  "cc",
  "derive-new",
@@ -5415,7 +5415,7 @@ dependencies = [
 [[package]]
 name = "tract-nnef"
 version = "0.20.23-pre"
-source = "git+https://github.com/sonos/tract/?rev=4ee813d#4ee813dd327d3df3e5c958cab9903fb332c9ceca"
+source = "git+https://github.com/sonos/tract/?rev=7b1aa33b2f7d1f19b80e270c83320f0f94daff69#7b1aa33b2f7d1f19b80e270c83320f0f94daff69"
 dependencies = [
  "byteorder",
  "flate2",
@@ -5429,7 +5429,7 @@ dependencies = [
 [[package]]
 name = "tract-onnx"
 version = "0.20.23-pre"
-source = "git+https://github.com/sonos/tract/?rev=4ee813d#4ee813dd327d3df3e5c958cab9903fb332c9ceca"
+source = "git+https://github.com/sonos/tract/?rev=7b1aa33b2f7d1f19b80e270c83320f0f94daff69#7b1aa33b2f7d1f19b80e270c83320f0f94daff69"
 dependencies = [
  "bytes",
  "derive-new",
@@ -5446,7 +5446,7 @@ dependencies = [
 [[package]]
 name = "tract-onnx-opl"
 version = "0.20.23-pre"
-source = "git+https://github.com/sonos/tract/?rev=4ee813d#4ee813dd327d3df3e5c958cab9903fb332c9ceca"
+source = "git+https://github.com/sonos/tract/?rev=7b1aa33b2f7d1f19b80e270c83320f0f94daff69#7b1aa33b2f7d1f19b80e270c83320f0f94daff69"
 dependencies = [
  "getrandom",
  "log",
diff --git a/Cargo.toml b/Cargo.toml
index 88d5d8e73..940bb355b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,7 +54,7 @@ tokio-util = { version = "0.7.9", features = ["codec"] }
 pyo3 = { version = "0.18.3", features = ["extension-module", "abi3-py37", "macros"],  default_features = false, optional = true }
 pyo3-asyncio = { version = "0.18.0",  features = ["attributes", "tokio-runtime"],  default_features = false, optional = true }
 pyo3-log = { version = "0.8.1", default_features = false, optional = true }
-tract-onnx = { git = "https://github.com/sonos/tract/", rev= "4ee813d", default_features = false, optional = true }
+tract-onnx = { git = "https://github.com/sonos/tract/", rev= "7b1aa33b2f7d1f19b80e270c83320f0f94daff69", default_features = false, optional = true }
 tabled = { version = "0.12.0", optional = true }
 
 
diff --git a/benches/accum_conv.rs b/benches/accum_conv.rs
index a2f62a11e..79787d33f 100644
--- a/benches/accum_conv.rs
+++ b/benches/accum_conv.rs
@@ -28,8 +28,8 @@ const K: usize = 17;
 #[derive(Clone, Debug)]
 struct MyCircuit {
     image: ValTensor<Fr>,
-    kernel: Tensor<Fr>,
-    bias: Tensor<Fr>,
+    kernel: ValTensor<Fr>,
+    bias: ValTensor<Fr>,
 }
 
 impl Circuit<Fr> for MyCircuit {
@@ -65,10 +65,8 @@ impl Circuit<Fr> for MyCircuit {
                 config
                     .layout(
                         &mut region,
-                        &[self.image.clone()],
+                        &[self.image.clone(), self.kernel.clone(), self.bias.clone()],
                         Box::new(PolyOp::Conv {
-                            kernel: self.kernel.clone(),
-                            bias: Some(self.bias.clone()),
                             padding: [(0, 0); 2],
                             stride: (1, 1),
                         }),
@@ -116,8 +114,8 @@ fn runcnvrl(c: &mut Criterion) {
 
             let circuit = MyCircuit {
                 image: ValTensor::from(image),
-                kernel,
-                bias,
+                kernel: ValTensor::try_from(kernel).unwrap(),
+                bias: ValTensor::try_from(bias).unwrap(),
             };
 
             group.throughput(Throughput::Elements(*size as u64));
diff --git a/examples/conv2d_mnist/main.rs b/examples/conv2d_mnist/main.rs
index f13d683de..59e60514d 100644
--- a/examples/conv2d_mnist/main.rs
+++ b/examples/conv2d_mnist/main.rs
@@ -82,8 +82,8 @@ struct MyCircuit<
     // Given the stateless ConvConfig type information, a DNN trace is determined by its input and the parameters of its layers.
     // Computing the trace still requires a forward pass. The intermediate activations are stored only by the layouter.
     input: ValTensor<F>,
-    l0_params: [Tensor<F>; 2],
-    l2_params: [Tensor<F>; 2],
+    l0_params: [ValTensor<F>; 2],
+    l2_params: [ValTensor<F>; 2],
 }
 
 impl<
@@ -202,14 +202,20 @@ where
                     let mut region = RegionCtx::new(region, 0, NUM_INNER_COLS);
 
                     let op = PolyOp::Conv {
-                        kernel: self.l0_params[0].clone(),
-                        bias: Some(self.l0_params[1].clone()),
                         padding: [(PADDING, PADDING); 2],
                         stride: (STRIDE, STRIDE),
                     };
                     let x = config
                         .layer_config
-                        .layout(&mut region, &[self.input.clone()], Box::new(op))
+                        .layout(
+                            &mut region,
+                            &[
+                                self.input.clone(),
+                                self.l0_params[0].clone(),
+                                self.l0_params[1].clone(),
+                            ],
+                            Box::new(op),
+                        )
                         .unwrap();
 
                     let x = config
@@ -233,7 +239,7 @@ where
                         .layer_config
                         .layout(
                             &mut region,
-                            &[self.l2_params[0].clone().try_into().unwrap(), x],
+                            &[self.l2_params[0].clone(), x],
                             Box::new(PolyOp::Einsum {
                                 equation: "ij,j->ik".to_string(),
                             }),
@@ -245,7 +251,7 @@ where
                         .layer_config
                         .layout(
                             &mut region,
-                            &[x, self.l2_params[1].clone().try_into().unwrap()],
+                            &[x, self.l2_params[1].clone()],
                             Box::new(PolyOp::Add),
                         )
                         .unwrap()
@@ -345,9 +351,13 @@ pub fn runconv() {
         .unwrap();
     l0_kernels.set_visibility(&ezkl::graph::Visibility::Private);
 
+    let l0_kernels = l0_kernels.try_into().unwrap();
+
     let mut l0_bias = Tensor::<F>::from((0..OUT_CHANNELS).map(|_| fieldutils::i32_to_felt(0)));
     l0_bias.set_visibility(&ezkl::graph::Visibility::Private);
 
+    let l0_bias = l0_bias.try_into().unwrap();
+
     let mut l2_biases = Tensor::<F>::from(myparams.biases.into_iter().map(|fl| {
         let dx = fl * 32_f32;
         let rounded = dx.round();
@@ -357,6 +367,8 @@ pub fn runconv() {
     l2_biases.set_visibility(&ezkl::graph::Visibility::Private);
     l2_biases.reshape(&[l2_biases.len(), 1]).unwrap();
 
+    let l2_biases = l2_biases.try_into().unwrap();
+
     let mut l2_weights = Tensor::<F>::from(myparams.weights.into_iter().flatten().map(|fl| {
         let dx = fl * 32_f32;
         let rounded = dx.round();
@@ -366,6 +378,8 @@ pub fn runconv() {
     l2_weights.set_visibility(&ezkl::graph::Visibility::Private);
     l2_weights.reshape(&[CLASSES, LEN]).unwrap();
 
+    let l2_weights = l2_weights.try_into().unwrap();
+
     let circuit = MyCircuit::<
         LEN,
         10,
diff --git a/examples/onnx/bitshift/gen.py b/examples/onnx/bitshift/gen.py
new file mode 100644
index 000000000..ac2462f72
--- /dev/null
+++ b/examples/onnx/bitshift/gen.py
@@ -0,0 +1,51 @@
+from torch import nn
+import torch
+import json
+import numpy as np
+
+
+class MyModel(nn.Module):
+    def __init__(self):
+        super(MyModel, self).__init__()
+
+    def forward(self, w, x, y, z):
+      
+        return x << 2, y >> 3, z << 1, w >> 4
+
+
+circuit = MyModel()
+
+# random integers between 0 and 100
+x = torch.empty(1, 3).uniform_(0, 100).to(torch.int32)
+y = torch.empty(1, 3).uniform_(0, 100).to(torch.int32)
+z = torch.empty(1, 3).uniform_(0, 100).to(torch.int32)
+w = torch.empty(1, 3).uniform_(0, 100).to(torch.int32)
+
+torch.onnx.export(circuit, (w, x, y, z), "network.onnx",
+                  export_params=True,        # store the trained parameter weights inside the model file
+                  opset_version=16,          # the ONNX version to export the model to
+                  do_constant_folding=True,  # whether to execute constant folding for optimization
+                  input_names=['input', 'input1', 'input2',
+                               'input3'],   # the model's input names
+                  output_names=['output'],  # the model's output names
+                  dynamic_axes={'input': {0: 'batch_size'},  # variable length axes
+                                'input1': {0: 'batch_size'},
+                                'input2': {0: 'batch_size'},
+                                'input3': {0: 'batch_size'},
+                                'output': {0: 'batch_size'}, 
+                                'output1': {0: 'batch_size'},
+                                'output2': {0: 'batch_size'},
+                                'output3': {0: 'batch_size'}})
+
+
+d = ((w).detach().numpy()).reshape([-1]).tolist()
+d1 = ((x).detach().numpy()).reshape([-1]).tolist()
+d2 = ((y).detach().numpy()).reshape([-1]).tolist()
+d3 = ((z).detach().numpy()).reshape([-1]).tolist()
+
+data = dict(
+    input_data=[d, d1, d2, d3],
+)
+
+# Serialize data into file:
+json.dump(data, open("input.json", 'w'))
diff --git a/examples/onnx/bitshift/input.json b/examples/onnx/bitshift/input.json
new file mode 100644
index 000000000..fbf472a20
--- /dev/null
+++ b/examples/onnx/bitshift/input.json
@@ -0,0 +1 @@
+{"input_data": [[41, 39, 49], [13, 55, 66], [85, 60, 48], [25, 15, 15]]}
\ No newline at end of file
diff --git a/examples/onnx/bitshift/network.onnx b/examples/onnx/bitshift/network.onnx
new file mode 100644
index 000000000..c434da7f2
Binary files /dev/null and b/examples/onnx/bitshift/network.onnx differ
diff --git a/examples/onnx/bitwise_ops/input.json b/examples/onnx/bitwise_ops/input.json
index 6d235a478..066f99340 100644
--- a/examples/onnx/bitwise_ops/input.json
+++ b/examples/onnx/bitwise_ops/input.json
@@ -1 +1 @@
-{"input_data": [[true, true, false], [false, true, true], [true, true, true], [false, true, false]]}
\ No newline at end of file
+{"input_data": [[false, true, false], [false, true, true], [false, false, false], [false, true, true]]}
\ No newline at end of file
diff --git a/examples/onnx/remainder/gen.py b/examples/onnx/remainder/gen.py
new file mode 100644
index 000000000..4c4ae06f4
--- /dev/null
+++ b/examples/onnx/remainder/gen.py
@@ -0,0 +1,40 @@
+from torch import nn
+import torch
+import json
+import numpy as np
+
+
+class MyModel(nn.Module):
+    def __init__(self):
+        super(MyModel, self).__init__()
+
+    def forward(self, x):                  
+        return x % 0.5
+
+
+circuit = MyModel()
+
+x = torch.empty(1, 8).uniform_(0, 1)
+
+out = circuit(x)
+
+print(out)
+
+torch.onnx.export(circuit, x, "network.onnx",
+                  export_params=True,        # store the trained parameter weights inside the model file
+                  opset_version=17,          # the ONNX version to export the model to
+                  do_constant_folding=True,  # whether to execute constant folding for optimization
+                  input_names=['input'],   # the model's input names
+                  output_names=['output'],  # the model's output names
+                  dynamic_axes={'input': {0: 'batch_size'},  # variable length axes
+                                'output': {0: 'batch_size'}})
+
+
+d1 = ((x).detach().numpy()).reshape([-1]).tolist()
+
+data = dict(
+    input_data=[d1],
+)
+
+# Serialize data into file:
+json.dump(data, open("input.json", 'w'))
diff --git a/examples/onnx/remainder/input.json b/examples/onnx/remainder/input.json
new file mode 100644
index 000000000..cf57f8d6c
--- /dev/null
+++ b/examples/onnx/remainder/input.json
@@ -0,0 +1 @@
+{"input_data": [[0.24276268482208252, 0.7709522247314453, 0.3388288617134094, 0.04099464416503906, 0.5914043188095093, 0.6746469736099243, 0.32862555980682373, 0.6761162877082825]]}
\ No newline at end of file
diff --git a/examples/onnx/remainder/network.onnx b/examples/onnx/remainder/network.onnx
new file mode 100644
index 000000000..d0591b4f4
Binary files /dev/null and b/examples/onnx/remainder/network.onnx differ
diff --git a/src/circuit/ops/layouts.rs b/src/circuit/ops/layouts.rs
index d349c9e78..31e0b969e 100644
--- a/src/circuit/ops/layouts.rs
+++ b/src/circuit/ops/layouts.rs
@@ -2060,7 +2060,13 @@ pub fn conv<F: PrimeField + TensorType + PartialOrd + std::marker::Send + std::m
         let mut res = einsum(config, region, &[local_image, local_kernel], "i,i->")?;
 
         if has_bias {
-            let bias = values[2].get_single_elem(start_kernel_index)?;
+            let bias_index = if values[2].len() > 1 {
+                start_kernel_index
+            } else {
+                0
+            };
+
+            let bias = values[2].get_single_elem(bias_index)?;
             res = pairwise(config, region, &[res, bias], BaseOp::Add)?;
         }
         region.flush()?;
diff --git a/src/circuit/ops/lookup.rs b/src/circuit/ops/lookup.rs
index c111f8b95..eca6772c3 100644
--- a/src/circuit/ops/lookup.rs
+++ b/src/circuit/ops/lookup.rs
@@ -231,11 +231,7 @@ impl<F: PrimeField + TensorType + PartialOrd> Op<F> for LookupOp {
             | LookupOp::LessThan { .. }
             | LookupOp::GreaterThanEqual { .. }
             | LookupOp::LessThanEqual { .. }
-            | LookupOp::KroneckerDelta
-            | LookupOp::Round { .. }
-            | LookupOp::RoundHalfToEven { .. }
-            | LookupOp::Ceil { .. }
-            | LookupOp::Floor { .. } => 0,
+            | LookupOp::KroneckerDelta => 0,
             _ => inputs_scale[0],
         };
         Ok(scale)
diff --git a/src/circuit/ops/poly.rs b/src/circuit/ops/poly.rs
index 69b2c7aee..93b762669 100644
--- a/src/circuit/ops/poly.rs
+++ b/src/circuit/ops/poly.rs
@@ -8,7 +8,7 @@ use super::{base::BaseOp, *};
 #[allow(missing_docs)]
 /// An enum representing the operations that can be expressed as arithmetic (non lookup) operations.
 #[derive(Clone, Debug, Serialize, Deserialize)]
-pub enum PolyOp<F: PrimeField + TensorType + PartialOrd> {
+pub enum PolyOp {
     MultiBroadcastTo {
         shape: Vec<usize>,
     },
@@ -16,8 +16,6 @@ pub enum PolyOp<F: PrimeField + TensorType + PartialOrd> {
         equation: String,
     },
     Conv {
-        kernel: Tensor<F>,
-        bias: Option<Tensor<F>>,
         padding: [(usize, usize); 2],
         stride: (usize, usize),
     },
@@ -27,8 +25,6 @@ pub enum PolyOp<F: PrimeField + TensorType + PartialOrd> {
         modulo: usize,
     },
     DeConv {
-        kernel: Tensor<F>,
-        bias: Option<Tensor<F>>,
         padding: [(usize, usize); 2],
         output_padding: (usize, usize),
         stride: (usize, usize),
@@ -73,10 +69,8 @@ pub enum PolyOp<F: PrimeField + TensorType + PartialOrd> {
     Xor,
 }
 
-impl<F: PrimeField + TensorType + PartialOrd> PolyOp<F> {}
-
 impl<F: PrimeField + TensorType + PartialOrd + Serialize + for<'de> Deserialize<'de>> Op<F>
-    for PolyOp<F>
+    for PolyOp
 {
     /// Returns a reference to the Any trait.
     fn as_any(&self) -> &dyn Any {
@@ -166,31 +160,12 @@ impl<F: PrimeField + TensorType + PartialOrd + Serialize + for<'de> Deserialize<
             PolyOp::Neg => tensor::ops::neg(&inputs[0]),
             PolyOp::Sub => tensor::ops::sub(&inputs),
             PolyOp::Mult => tensor::ops::mult(&inputs),
-            PolyOp::Conv {
-                kernel: a,
-                bias,
-                padding,
-                stride,
-            } => {
-                inputs.push(a.clone());
-                if let Some(b) = bias {
-                    inputs.push(b.clone());
-                }
-                tensor::ops::conv(&inputs, *padding, *stride)
-            }
+            PolyOp::Conv { padding, stride } => tensor::ops::conv(&inputs, *padding, *stride),
             PolyOp::DeConv {
-                kernel: a,
-                bias,
                 padding,
                 output_padding,
                 stride,
-            } => {
-                inputs.push(a.clone());
-                if let Some(b) = bias {
-                    inputs.push(b.clone());
-                }
-                tensor::ops::deconv(&inputs, *padding, *output_padding, *stride)
-            }
+            } => tensor::ops::deconv(&inputs, *padding, *output_padding, *stride),
             PolyOp::Pack(base, scale) => {
                 if 1 != inputs.len() {
                     return Err(TensorError::DimMismatch("pack inputs".to_string()));
@@ -240,8 +215,6 @@ impl<F: PrimeField + TensorType + PartialOrd + Serialize + for<'de> Deserialize<
         region: &mut RegionCtx<F>,
         values: &[ValTensor<F>],
     ) -> Result<Option<ValTensor<F>>, Box<dyn Error>> {
-        let mut values = values.to_vec();
-
         Ok(Some(match self {
             PolyOp::MultiBroadcastTo { shape } => {
                 layouts::expand(config, region, values[..].try_into()?, shape)?
@@ -271,38 +244,21 @@ impl<F: PrimeField + TensorType + PartialOrd + Serialize + for<'de> Deserialize<
             PolyOp::Prod { axes, .. } => {
                 layouts::prod_axes(config, region, values[..].try_into()?, axes)?
             }
-            PolyOp::Conv {
-                kernel,
-                bias,
-                padding,
-                stride,
-            } => {
-                values.push(kernel.clone().try_into()?);
-                if let Some(bias) = bias {
-                    values.push(bias.clone().try_into()?);
-                }
+            PolyOp::Conv { padding, stride } => {
                 layouts::conv(config, region, values[..].try_into()?, *padding, *stride)?
             }
             PolyOp::DeConv {
-                kernel,
-                bias,
                 padding,
                 output_padding,
                 stride,
-            } => {
-                values.push(kernel.clone().try_into()?);
-                if let Some(bias) = bias {
-                    values.push(bias.clone().try_into()?);
-                }
-                layouts::deconv(
-                    config,
-                    region,
-                    values[..].try_into()?,
-                    *padding,
-                    *output_padding,
-                    *stride,
-                )?
-            }
+            } => layouts::deconv(
+                config,
+                region,
+                values[..].try_into()?,
+                *padding,
+                *output_padding,
+                *stride,
+            )?,
             PolyOp::Add => layouts::pairwise(config, region, values[..].try_into()?, BaseOp::Add)?,
             PolyOp::Sub => layouts::pairwise(config, region, values[..].try_into()?, BaseOp::Sub)?,
             PolyOp::Mult => {
@@ -350,32 +306,22 @@ impl<F: PrimeField + TensorType + PartialOrd + Serialize + for<'de> Deserialize<
             }
             PolyOp::Prod { len_prod, .. } => in_scales[0] * (*len_prod as crate::Scale),
             PolyOp::Sum { .. } => in_scales[0],
-            PolyOp::Conv { kernel, bias, .. } => {
-                let kernel_scale = match kernel.scale() {
-                    Some(s) => s,
-                    None => return Err("scale must be set for conv kernel".into()),
-                };
-                let output_scale = in_scales[0] + kernel_scale;
-                if let Some(b) = bias {
-                    let bias_scale = match b.scale() {
-                        Some(s) => s,
-                        None => return Err("scale must be set for conv bias".into()),
-                    };
+            PolyOp::Conv { .. } => {
+                let input_scale = in_scales[0];
+                let kernel_scale = in_scales[1];
+                let output_scale = input_scale + kernel_scale;
+                if in_scales.len() == 3 {
+                    let bias_scale = in_scales[2];
                     assert_eq!(output_scale, bias_scale);
                 }
                 output_scale
             }
-            PolyOp::DeConv { kernel, bias, .. } => {
-                let kernel_scale = match kernel.scale() {
-                    Some(s) => s,
-                    None => return Err("scale must be set for deconv kernel".into()),
-                };
-                let output_scale = in_scales[0] + kernel_scale;
-                if let Some(b) = bias {
-                    let bias_scale = match b.scale() {
-                        Some(s) => s,
-                        None => return Err("scale must be set for deconv bias".into()),
-                    };
+            PolyOp::DeConv { .. } => {
+                let input_scale = in_scales[0];
+                let kernel_scale = in_scales[1];
+                let output_scale = input_scale + kernel_scale;
+                if in_scales.len() == 3 {
+                    let bias_scale = in_scales[2];
                     assert_eq!(output_scale, bias_scale);
                 }
                 output_scale
diff --git a/src/circuit/tests.rs b/src/circuit/tests.rs
index b3c114acb..f1faeda56 100644
--- a/src/circuit/tests.rs
+++ b/src/circuit/tests.rs
@@ -1008,7 +1008,7 @@ mod conv {
 
     #[derive(Clone)]
     struct ConvCircuit<F: PrimeField + TensorType + PartialOrd> {
-        inputs: Vec<Tensor<F>>,
+        inputs: Vec<ValTensor<F>>,
         _marker: PhantomData<F>,
     }
 
@@ -1041,10 +1041,8 @@ mod conv {
                         config
                             .layout(
                                 &mut region,
-                                &[self.inputs[0].clone().try_into().unwrap()],
+                                &self.inputs,
                                 Box::new(PolyOp::Conv {
-                                    kernel: self.inputs[1].clone(),
-                                    bias: None,
                                     padding: [(1, 1); 2],
                                     stride: (2, 2),
                                 }),
@@ -1074,6 +1072,8 @@ mod conv {
             .unwrap();
         image.set_visibility(&crate::graph::Visibility::Private);
 
+        let image = ValTensor::try_from(image).unwrap();
+
         let mut kernels = Tensor::from(
             (0..{ out_channels * in_channels * kernel_height * kernel_width })
                 .map(|_| F::random(OsRng)),
@@ -1083,9 +1083,12 @@ mod conv {
             .unwrap();
         kernels.set_visibility(&crate::graph::Visibility::Private);
 
+        let kernels = ValTensor::try_from(kernels).unwrap();
         let mut bias = Tensor::from((0..{ out_channels }).map(|_| F::random(OsRng)));
         bias.set_visibility(&crate::graph::Visibility::Private);
 
+        let bias = ValTensor::try_from(bias).unwrap();
+
         let circuit = ConvCircuit::<F> {
             inputs: [image, kernels, bias].to_vec(),
             _marker: PhantomData,
@@ -1121,6 +1124,9 @@ mod conv {
             .unwrap();
         kernels.set_visibility(&crate::graph::Visibility::Private);
 
+        let image = ValTensor::try_from(image).unwrap();
+        let kernels = ValTensor::try_from(kernels).unwrap();
+
         let circuit = ConvCircuit::<F> {
             inputs: [image, kernels].to_vec(),
             _marker: PhantomData,
@@ -1144,7 +1150,7 @@ mod conv_col_ultra_overflow {
     #[derive(Clone)]
     struct ConvCircuit<F: PrimeField + TensorType + PartialOrd> {
         image: ValTensor<F>,
-        kernel: Tensor<F>,
+        kernel: ValTensor<F>,
         _marker: PhantomData<F>,
     }
 
@@ -1177,10 +1183,8 @@ mod conv_col_ultra_overflow {
                         config
                             .layout(
                                 &mut region,
-                                &[self.image.clone()],
+                                &[self.image.clone(), self.kernel.clone()],
                                 Box::new(PolyOp::Conv {
-                                    kernel: self.kernel.clone(),
-                                    bias: None,
                                     padding: [(1, 1); 2],
                                     stride: (2, 2),
                                 }),
@@ -1224,7 +1228,7 @@ mod conv_col_ultra_overflow {
 
         let circuit = ConvCircuit::<F> {
             image: ValTensor::try_from(image).unwrap(),
-            kernel: kernels,
+            kernel: ValTensor::try_from(kernels).unwrap(),
             _marker: PhantomData,
         };
 
@@ -1281,7 +1285,7 @@ mod conv_relu_col_ultra_overflow {
     #[derive(Clone)]
     struct ConvCircuit<F: PrimeField + TensorType + PartialOrd> {
         image: ValTensor<F>,
-        kernel: Tensor<F>,
+        kernel: ValTensor<F>,
         _marker: PhantomData<F>,
     }
 
@@ -1321,10 +1325,8 @@ mod conv_relu_col_ultra_overflow {
                         let output = config
                             .layout(
                                 &mut region,
-                                &[self.image.clone()],
+                                &[self.image.clone(), self.kernel.clone()],
                                 Box::new(PolyOp::Conv {
-                                    kernel: self.kernel.clone(),
-                                    bias: None,
                                     padding: [(1, 1); 2],
                                     stride: (2, 2),
                                 }),
@@ -1376,7 +1378,7 @@ mod conv_relu_col_ultra_overflow {
 
         let circuit = ConvCircuit::<F> {
             image: ValTensor::try_from(image).unwrap(),
-            kernel: kernels,
+            kernel: ValTensor::try_from(kernels).unwrap(),
             _marker: PhantomData,
         };
 
diff --git a/src/graph/model.rs b/src/graph/model.rs
index c7d008e87..e667c3737 100644
--- a/src/graph/model.rs
+++ b/src/graph/model.rs
@@ -784,7 +784,6 @@ impl Model {
             let symbol = model.symbol_table.sym(symbol);
             symbol_values = symbol_values.with(&symbol, *value as i64);
             info!("set {} to {}", symbol, value);
-            println!("set {} to {}", symbol, value);
         }
 
         // Note: do not optimize the model, as the layout will depend on underlying hardware
diff --git a/src/graph/node.rs b/src/graph/node.rs
index 42c46bb4a..af7e3b6d5 100644
--- a/src/graph/node.rs
+++ b/src/graph/node.rs
@@ -262,7 +262,7 @@ impl Op<Fp> for RebaseScale {
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub enum SupportedOp {
     /// A linear operation.
-    Linear(PolyOp<Fp>),
+    Linear(PolyOp),
     /// A nonlinear operation.
     Nonlinear(LookupOp),
     /// A hybrid operation.
@@ -356,7 +356,7 @@ impl SupportedOp {
 
 impl From<Box<dyn Op<Fp>>> for SupportedOp {
     fn from(value: Box<dyn Op<Fp>>) -> Self {
-        if let Some(op) = value.as_any().downcast_ref::<PolyOp<Fp>>() {
+        if let Some(op) = value.as_any().downcast_ref::<PolyOp>() {
             return SupportedOp::Linear(op.clone());
         };
 
diff --git a/src/graph/utilities.rs b/src/graph/utilities.rs
index 7ca931dd2..0ccfbfb18 100644
--- a/src/graph/utilities.rs
+++ b/src/graph/utilities.rs
@@ -7,6 +7,7 @@ use super::{Rescaled, SupportedOp, Visibility};
 use crate::circuit::hybrid::HybridOp;
 #[cfg(not(target_arch = "wasm32"))]
 use crate::circuit::lookup::LookupOp;
+#[cfg(not(target_arch = "wasm32"))]
 use crate::circuit::poly::PolyOp;
 use crate::circuit::Op;
 use crate::tensor::{Tensor, TensorError, TensorType};
@@ -24,7 +25,7 @@ use tract_onnx::prelude::{DatumType, Node as OnnxNode, TypedFact, TypedOp};
 use tract_onnx::tract_core::ops::{
     array::{Gather, GatherElements, MultiBroadcastTo, OneHot, ScatterElements, Slice, Topk},
     change_axes::AxisOp,
-    cnn::DeconvUnary,
+    cnn::{Conv, Deconv},
     einsum::EinSum,
     element_wise::ElementWiseOp,
     nn::{LeakyRelu, Reduce, Softmax},
@@ -34,7 +35,7 @@ use tract_onnx::tract_core::ops::{
 use tract_onnx::tract_hir::{
     internal::DimLike,
     ops::array::{Pad, PadMode, TypedConcat},
-    ops::cnn::{ConvUnary, PoolSpec},
+    ops::cnn::PoolSpec,
     ops::konst::Const,
     ops::nn::DataFormat,
     tract_core::ops::cast::Cast,
@@ -246,9 +247,74 @@ pub fn new_op_from_onnx(
 ) -> Result<(SupportedOp, Vec<usize>), Box<dyn std::error::Error>> {
     use crate::circuit::InputType;
 
+    let input_scales = inputs
+        .iter()
+        .flat_map(|x| x.out_scales())
+        .collect::<Vec<_>>();
+
+    let mut replace_const = |scale: crate::Scale,
+                             index: usize,
+                             default_op: SupportedOp|
+     -> Result<SupportedOp, Box<dyn std::error::Error>> {
+        let mut constant = inputs[index].opkind();
+        let constant = constant.get_mutable_constant();
+        if let Some(c) = constant {
+            inputs[index].bump_scale(scale);
+            c.rebase_scale(scale)?;
+            inputs[index].replace_opkind(SupportedOp::Constant(c.clone()));
+            Ok(SupportedOp::Linear(PolyOp::Identity))
+        } else {
+            Ok(default_op)
+        }
+    };
+
     debug!("Loading node: {:?}", node);
     let mut deleted_indices = vec![];
     let node = match node.op().name().as_ref() {
+        "ShiftLeft" => {
+            // load shift amount
+            if let Some(c) = inputs[1].opkind().get_mutable_constant() {
+                inputs[1].decrement_use();
+                deleted_indices.push(1);
+                let raw_values = &c.raw_values;
+                if raw_values.len() != 1 {
+                    return Err(Box::new(GraphError::InvalidDims(
+                        idx,
+                        "shift left".to_string(),
+                    )));
+                }
+                SupportedOp::Nonlinear(LookupOp::Div {
+                    denom: crate::circuit::utils::F32(1.0 / 2.0f32.powf(raw_values[0])),
+                })
+            } else {
+                return Err(Box::new(GraphError::OpMismatch(
+                    idx,
+                    "ShiftLeft".to_string(),
+                )));
+            }
+        }
+        "ShiftRight" => {
+            // load shift amount
+            if let Some(c) = inputs[1].opkind().get_mutable_constant() {
+                inputs[1].decrement_use();
+                deleted_indices.push(1);
+                let raw_values = &c.raw_values;
+                if raw_values.len() != 1 {
+                    return Err(Box::new(GraphError::InvalidDims(
+                        idx,
+                        "shift right".to_string(),
+                    )));
+                }
+                SupportedOp::Nonlinear(LookupOp::Div {
+                    denom: crate::circuit::utils::F32(2.0f32.powf(raw_values[0])),
+                })
+            } else {
+                return Err(Box::new(GraphError::OpMismatch(
+                    idx,
+                    "ShiftRight".to_string(),
+                )));
+            }
+        }
         "MultiBroadcastTo" => {
             let op = load_op::<MultiBroadcastTo>(node.op(), idx, node.op().name().to_string())?;
             let shape = op.shape.clone();
@@ -765,27 +831,8 @@ pub fn new_op_from_onnx(
         "Cast" => {
             let op = load_op::<Cast>(node.op(), idx, node.op().name().to_string())?;
             let dt = op.to;
-            let input_scales = inputs
-                .iter()
-                .flat_map(|x| x.out_scales())
-                .collect::<Vec<_>>();
-            assert_eq!(input_scales.len(), 1);
 
-            let mut constant = inputs[0].opkind();
-            let constant = constant.get_mutable_constant();
-
-            let replace_const = |scale: crate::Scale,
-                                 default_op: SupportedOp|
-             -> Result<SupportedOp, Box<dyn std::error::Error>> {
-                if let Some(c) = constant {
-                    inputs[0].bump_scale(scale);
-                    c.rebase_scale(scale)?;
-                    inputs[0].replace_opkind(SupportedOp::Constant(c.clone()));
-                    Ok(SupportedOp::Linear(PolyOp::Identity))
-                } else {
-                    Ok(default_op)
-                }
-            };
+            assert_eq!(input_scales.len(), 1);
 
             match dt {
                 DatumType::Bool
@@ -800,6 +847,7 @@ pub fn new_op_from_onnx(
                 | DatumType::U64 => {
                     if input_scales[0] != 0 {
                         replace_const(
+                            0,
                             0,
                             SupportedOp::Nonlinear(LookupOp::Cast {
                                 scale: crate::circuit::utils::F32(scale_to_multiplier(
@@ -1015,8 +1063,8 @@ pub fn new_op_from_onnx(
         }
         "Cube" => SupportedOp::Linear(PolyOp::Pow(3)),
         "Square" => SupportedOp::Linear(PolyOp::Pow(2)),
-        "ConvUnary" => {
-            let conv_node: &ConvUnary = match node.op().downcast_ref::<ConvUnary>() {
+        "Conv" => {
+            let conv_node: &Conv = match node.op().downcast_ref::<Conv>() {
                 Some(b) => b,
                 None => {
                     return Err(Box::new(GraphError::OpMismatch(idx, "conv".to_string())));
@@ -1074,37 +1122,31 @@ pub fn new_op_from_onnx(
                 }
             };
 
-            let kernel = extract_tensor_value(conv_node.kernel.clone())?;
-            let kernel = quantize_tensor(kernel, scales.params, param_visibility)?;
-
-            let bias = match conv_node.bias.clone() {
-                Some(b) => {
-                    let const_value = extract_tensor_value(b)?;
-
-                    let val = quantize_tensor(
-                        const_value,
-                        scales.params + inputs[0].out_scales()[0],
-                        param_visibility,
+            // if bias exists then rescale it to the input + kernel scale
+            if input_scales.len() == 3 {
+                let bias_scale = input_scales[2];
+                let input_scale = input_scales[0];
+                let kernel_scale = input_scales[1];
+
+                let output_scale = input_scale + kernel_scale;
+                if bias_scale != output_scale {
+                    replace_const(
+                        output_scale,
+                        2,
+                        SupportedOp::Unknown(crate::circuit::Unknown),
                     )?;
-                    Some(val)
                 }
-                None => None,
-            };
+            }
 
-            SupportedOp::Linear(PolyOp::Conv {
-                kernel,
-                bias,
-                padding,
-                stride,
-            })
+            SupportedOp::Linear(PolyOp::Conv { padding, stride })
         }
         "Not" => SupportedOp::Linear(PolyOp::Not),
         "And" => SupportedOp::Linear(PolyOp::And),
         "Or" => SupportedOp::Linear(PolyOp::Or),
         "Xor" => SupportedOp::Linear(PolyOp::Xor),
         "Equals" => SupportedOp::Hybrid(HybridOp::Equals),
-        "DeconvUnary" => {
-            let deconv_node: &DeconvUnary = match node.op().downcast_ref::<DeconvUnary>() {
+        "Deconv" => {
+            let deconv_node: &Deconv = match node.op().downcast_ref::<Deconv>() {
                 Some(b) => b,
                 None => {
                     return Err(Box::new(GraphError::OpMismatch(idx, "deconv".to_string())));
@@ -1152,29 +1194,26 @@ pub fn new_op_from_onnx(
                 }
             };
 
-            let kernel = extract_tensor_value(deconv_node.kernel.clone())?;
-            let kernel = quantize_tensor(kernel, scales.params, param_visibility)?;
-
-            let bias = match deconv_node.bias.clone() {
-                Some(b) => {
-                    let const_value = extract_tensor_value(b)?;
+            let output_padding: (usize, usize) =
+                (deconv_node.adjustments[0], deconv_node.adjustments[1]);
 
-                    let val = quantize_tensor(
-                        const_value,
-                        scales.params + inputs[0].out_scales()[0],
-                        param_visibility,
+            // if bias exists then rescale it to the input + kernel scale
+            if input_scales.len() == 3 {
+                let bias_scale = input_scales[2];
+                let input_scale = input_scales[0];
+                let kernel_scale = input_scales[1];
+
+                let output_scale = input_scale + kernel_scale;
+                if bias_scale != output_scale {
+                    replace_const(
+                        output_scale,
+                        2,
+                        SupportedOp::Unknown(crate::circuit::Unknown),
                     )?;
-                    Some(val)
                 }
-                None => None,
-            };
-
-            let output_padding: (usize, usize) =
-                (deconv_node.adjustments[0], deconv_node.adjustments[1]);
+            }
 
             SupportedOp::Linear(PolyOp::DeConv {
-                kernel,
-                bias,
                 padding,
                 output_padding,
                 stride,
@@ -1408,18 +1447,6 @@ pub fn extract_const_quantized_values(op: SupportedOp) -> Option<Tensor<Fp>> {
     }
 }
 
-/// Extract the quantized values from a conv op
-pub fn extract_conv_values(boxed_op: Box<dyn crate::circuit::Op<Fp>>) -> [Option<Tensor<Fp>>; 2] {
-    let op = boxed_op
-        .as_any()
-        .downcast_ref::<crate::circuit::ops::poly::PolyOp<Fp>>();
-
-    if let Some(PolyOp::Conv { kernel, bias, .. }) = op {
-        return [Some(kernel.clone()), bias.clone()];
-    }
-    [None, None]
-}
-
 /// Converts a tensor to a [ValTensor] with a given scale.
 pub fn quantize_tensor<F: PrimeField + TensorType + PartialOrd>(
     const_value: Tensor<f32>,
diff --git a/src/tensor/ops.rs b/src/tensor/ops.rs
index 47d9ebfb7..e37ac8077 100644
--- a/src/tensor/ops.rs
+++ b/src/tensor/ops.rs
@@ -1834,8 +1834,13 @@ pub fn conv<
     let kernel_dims = kernel.dims();
 
     if has_bias {
-        let bias = &inputs[2];
-        if (bias.dims().len() != 1) || (bias.dims()[0] != kernel.dims()[0]) {
+        let bias = &mut inputs[2].clone();
+
+        if bias.dims().is_empty() {
+            bias.reshape(&[1])?;
+        }
+
+        if (bias.dims().len() != 1) && (bias.dims()[0] != kernel.dims()[0]) {
             return Err(TensorError::DimMismatch("conv bias".to_string()));
         }
     }
@@ -1915,7 +1920,12 @@ pub fn conv<
 
         let res = dot(&[local_image, local_kernel]).unwrap()[0].clone();
         if has_bias {
-            *o = res + inputs[2][start_kernel_index].clone();
+            let bias_index = if inputs[2].len() > 1 {
+                start_kernel_index
+            } else {
+                0
+            };
+            *o = res + inputs[2][bias_index].clone();
         } else {
             *o = res;
         }
@@ -2189,13 +2199,18 @@ pub fn deconv<
 
     if stride.0 == 0 || stride.1 == 0 {
         return Err(TensorError::DimMismatch(
-            "non-positive stride is not supported for deconv".to_string(),
+            "nil stride is not supported for deconv".to_string(),
         ));
     }
 
     if has_bias {
-        let bias = &inputs[2];
-        if (bias.dims().len() != 1) || (bias.dims()[0] != kernel.dims()[0]) {
+        let bias = &mut inputs[2].clone();
+
+        if bias.dims().is_empty() {
+            bias.reshape(&[1])?;
+        }
+
+        if (bias.dims().len() != 1) && (bias.dims()[0] != kernel.dims()[0]) {
             return Err(TensorError::DimMismatch("deconv bias".to_string()));
         }
     }
@@ -2749,13 +2764,13 @@ pub mod nonlinearities {
     ///  &[3, 2],
     /// ).unwrap();
     /// let result = ceil(&x, 2.0);
-    /// let expected = Tensor::<i128>::new(Some(&[1, 1, 2, 2, 3, 3]), &[3, 2]).unwrap();
+    /// let expected = Tensor::<i128>::new(Some(&[2, 2, 4, 4, 6, 6]), &[3, 2]).unwrap();
     /// assert_eq!(result, expected);
     /// ```
     pub fn ceil(a: &Tensor<i128>, scale: f64) -> Tensor<i128> {
         a.par_enum_map(|_, a_i| {
             let kix = (a_i as f64) / scale;
-            let rounded = kix.ceil();
+            let rounded = kix.ceil() * scale;
             Ok::<_, TensorError>(rounded as i128)
         })
         .unwrap()
@@ -2774,13 +2789,13 @@ pub mod nonlinearities {
     ///  &[3, 2],
     /// ).unwrap();
     /// let result = floor(&x, 2.0);
-    /// let expected = Tensor::<i128>::new(Some(&[0, 1, 1, 2, 2, 3]), &[3, 2]).unwrap();
+    /// let expected = Tensor::<i128>::new(Some(&[0, 2, 2, 4, 4, 6]), &[3, 2]).unwrap();
     /// assert_eq!(result, expected);
     /// ```
     pub fn floor(a: &Tensor<i128>, scale: f64) -> Tensor<i128> {
         a.par_enum_map(|_, a_i| {
             let kix = (a_i as f64) / scale;
-            let rounded = kix.floor();
+            let rounded = kix.floor() * scale;
             Ok::<_, TensorError>(rounded as i128)
         })
         .unwrap()
@@ -2799,13 +2814,13 @@ pub mod nonlinearities {
     /// &[3, 2],
     /// ).unwrap();
     /// let result = round(&x, 2.0);
-    /// let expected = Tensor::<i128>::new(Some(&[1, 1, 2, 2, 3, 3]), &[3, 2]).unwrap();
+    /// let expected = Tensor::<i128>::new(Some(&[2, 2, 4, 4, 6, 6]), &[3, 2]).unwrap();
     /// assert_eq!(result, expected);
     /// ```
     pub fn round(a: &Tensor<i128>, scale: f64) -> Tensor<i128> {
         a.par_enum_map(|_, a_i| {
             let kix = (a_i as f64) / scale;
-            let rounded = kix.round();
+            let rounded = kix.round() * scale;
             Ok::<_, TensorError>(rounded as i128)
         })
         .unwrap()
@@ -2824,13 +2839,13 @@ pub mod nonlinearities {
     /// &[3, 2],
     /// ).unwrap();
     /// let result = round_half_to_even(&x, 2.0);
-    /// let expected = Tensor::<i128>::new(Some(&[0, 1, 2, 2, 2, 3]), &[3, 2]).unwrap();
+    /// let expected = Tensor::<i128>::new(Some(&[0, 2, 4, 4, 4, 6]), &[3, 2]).unwrap();
     /// assert_eq!(result, expected);
     /// ```
     pub fn round_half_to_even(a: &Tensor<i128>, scale: f64) -> Tensor<i128> {
         a.par_enum_map(|_, a_i| {
             let kix = (a_i as f64) / scale;
-            let rounded = kix.round_ties_even();
+            let rounded = kix.round_ties_even() * scale;
             Ok::<_, TensorError>(rounded as i128)
         })
         .unwrap()
diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs
index 0507618a0..e16163585 100644
--- a/tests/integration_tests.rs
+++ b/tests/integration_tests.rs
@@ -190,7 +190,7 @@ mod native_tests {
         "1l_prelu",
     ];
 
-    const TESTS: [&str; 75] = [
+    const TESTS: [&str; 77] = [
         "1l_mlp",
         "1l_slice",
         "1l_concat",
@@ -270,6 +270,8 @@ mod native_tests {
         "log_softmax",
         "eye",
         "ltsf",
+        "remainder",
+        "bitshift",
     ];
 
     const WASM_TESTS: [&str; 48] = [
@@ -496,7 +498,7 @@ mod native_tests {
             }
         });
 
-            seq!(N in 0..=74 {
+            seq!(N in 0..=76 {
 
             #(#[test_case(TESTS[N])])*
             #[ignore]
diff --git a/tests/output_comparison.py b/tests/output_comparison.py
index 54967a8ae..aba211840 100644
--- a/tests/output_comparison.py
+++ b/tests/output_comparison.py
@@ -32,12 +32,16 @@ def get_onnx_output(model_file, input_file):
         input_node = onnx_model.graph.input[i]
         dims = []
         elem_type = input_node.type.tensor_type.elem_type
+        print("elem_type: ", elem_type)
         for dim in input_node.type.tensor_type.shape.dim:
             if dim.dim_value == 0:
                 dims.append(1)
             else:
                 dims.append(dim.dim_value)
-        if elem_type == 7:
+        if elem_type == 6:
+            inputs_onnx = np.array(inputs['input_data'][i]).astype(
+                np.int32).reshape(dims)
+        elif elem_type == 7:
             inputs_onnx = np.array(inputs['input_data'][i]).astype(
                 np.int64).reshape(dims)
         elif elem_type == 9:
@@ -51,8 +55,7 @@ def get_onnx_output(model_file, input_file):
         onnx_session = onnxruntime.InferenceSession(model_file)
         onnx_output = onnx_session.run(None, onnx_input)
     except Exception as e:
-        print("Error in ONNX runtime: ", e)
-        print("using inputs[output_data]")
+        print("error: ", e)
         onnx_output = inputs['output_data']
     print("onnx ", onnx_output)
     return onnx_output[0]