diff --git a/src/encoder.rs b/src/encoder.rs
index 94d1edf6e9..29171df6ba 100644
--- a/src/encoder.rs
+++ b/src/encoder.rs
@@ -508,6 +508,7 @@ pub struct FrameInvariants<T: Pixel> {
   pub ac_delta_q: [i8; 3],
   pub lambda: f64,
   pub me_lambda: f64,
+  pub dist_scale: [f64; 3],
   pub me_range_scale: u8,
   pub use_tx_domain_distortion: bool,
   pub use_tx_domain_rate: bool,
@@ -684,6 +685,7 @@ impl<T: Pixel> FrameInvariants<T> {
       dc_delta_q: [0; 3],
       ac_delta_q: [0; 3],
       lambda: 0.0,
+      dist_scale: [1.0; 3],
       me_lambda: 0.0,
       me_range_scale: 1,
       use_tx_domain_distortion,
@@ -866,6 +868,7 @@ impl<T: Pixel> FrameInvariants<T> {
     self.lambda =
       qps.lambda * ((1 << (2 * (self.sequence.bit_depth - 8))) as f64);
     self.me_lambda = self.lambda.sqrt();
+    self.dist_scale = qps.dist_scale;
 
     let q = bexp64(qps.log_target_q as i64 + q57(QSCALE)) as f32;
     /* These coefficients were trained on libaom. */
@@ -1083,7 +1086,7 @@ pub fn encode_tx_block<T: Pixel>(
   alpha: i16,
   rdo_type: RDOType,
   need_recon_pixel: bool,
-) -> (bool, i64) {
+) -> (bool, ScaledDistortion) {
   let qidx = get_qidx(fi, ts, cw, tile_bo);
   assert_ne!(qidx, 0); // lossless is not yet supported
   let PlaneConfig { xdec, ydec, .. } = ts.input.planes[p].cfg;
@@ -1126,7 +1129,7 @@ pub fn encode_tx_block<T: Pixel>(
   }
 
   if skip {
-    return (false, -1);
+    return (false, ScaledDistortion::zero());
   }
 
   let mut residual_storage: AlignedArray<[i16; 64 * 64]> =
@@ -1192,7 +1195,7 @@ pub fn encode_tx_block<T: Pixel>(
     fi.ac_delta_q[p],
   );
 
-  let mut tx_dist: i64 = -1;
+  let mut tx_dist: u64 = 0;
 
   if !fi.use_tx_domain_distortion || need_recon_pixel {
     inverse_transform_add(
@@ -1212,27 +1215,24 @@ pub fn encode_tx_block<T: Pixel>(
         let c = *a as i32 - *b as i32;
         (c * c) as u64
       })
-      .sum::<u64>() as i64;
+      .sum::<u64>();
 
     let tx_dist_scale_bits = 2 * (3 - get_log_tx_scale(tx_size));
     let tx_dist_scale_rounding_offset = 1 << (tx_dist_scale_bits - 1);
     tx_dist = (tx_dist + tx_dist_scale_rounding_offset) >> tx_dist_scale_bits;
   }
   if fi.config.train_rdo {
-    ts.rdo.add_rate(
-      fi.base_q_idx,
-      tx_size,
-      tx_dist as u64,
-      cost_coeffs as u64,
-    );
+    ts.rdo.add_rate(fi.base_q_idx, tx_size, tx_dist, cost_coeffs as u64);
   }
 
   if rdo_type == RDOType::TxDistEstRate {
     // look up rate and distortion in table
-    let estimated_rate = estimate_rate(fi.base_q_idx, tx_size, tx_dist as u64);
+    let estimated_rate = estimate_rate(fi.base_q_idx, tx_size, tx_dist);
     w.add_bits_frac(estimated_rate as u32);
   }
-  (has_coeff, tx_dist)
+  let bias =
+    compute_distortion_bias(fi, ts.to_frame_block_offset(tile_bo), bsize);
+  (has_coeff, RawDistortion::new(tx_dist) * bias * fi.dist_scale[p])
 }
 
 pub fn motion_compensate<T: Pixel>(
@@ -1486,7 +1486,7 @@ pub fn encode_block_post_cdef<T: Pixel>(
   skip: bool, cfl: CFLParams, tx_size: TxSize, tx_type: TxType,
   mode_context: usize, mv_stack: &[CandidateMV], rdo_type: RDOType,
   need_recon_pixel: bool, record_stats: bool,
-) -> (bool, i64) {
+) -> (bool, ScaledDistortion) {
   let is_inter = !luma_mode.is_intra();
   if is_inter {
     assert!(luma_mode == chroma_mode);
@@ -1759,7 +1759,7 @@ pub fn write_tx_blocks<T: Pixel>(
   chroma_mode: PredictionMode, tile_bo: TileBlockOffset, bsize: BlockSize,
   tx_size: TxSize, tx_type: TxType, skip: bool, cfl: CFLParams,
   luma_only: bool, rdo_type: RDOType, need_recon_pixel: bool,
-) -> (bool, i64) {
+) -> (bool, ScaledDistortion) {
   let bw = bsize.width_mi() / tx_size.width_mi();
   let bh = bsize.height_mi() / tx_size.height_mi();
   let qidx = get_qidx(fi, ts, cw, tile_bo);
@@ -1767,7 +1767,7 @@ pub fn write_tx_blocks<T: Pixel>(
   let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
   let mut ac: AlignedArray<[i16; 32 * 32]> = AlignedArray::uninitialized();
   let mut partition_has_coeff: bool = false;
-  let mut tx_dist: i64 = 0;
+  let mut tx_dist = ScaledDistortion::zero();
   let do_chroma = has_chroma(tile_bo, bsize, xdec, ydec);
 
   ts.qc.update(
@@ -1809,9 +1809,6 @@ pub fn write_tx_blocks<T: Pixel>(
         need_recon_pixel,
       );
       partition_has_coeff |= has_coeff;
-      assert!(
-        !fi.use_tx_domain_distortion || need_recon_pixel || skip || dist >= 0
-      );
       tx_dist += dist;
     }
   }
@@ -1895,12 +1892,6 @@ pub fn write_tx_blocks<T: Pixel>(
             need_recon_pixel,
           );
           partition_has_coeff |= has_coeff;
-          assert!(
-            !fi.use_tx_domain_distortion
-              || need_recon_pixel
-              || skip
-              || dist >= 0
-          );
           tx_dist += dist;
         }
       }
@@ -1918,9 +1909,9 @@ pub fn write_tx_tree<T: Pixel>(
   tile_bo: TileBlockOffset, bsize: BlockSize, tx_size: TxSize,
   tx_type: TxType, skip: bool, luma_only: bool, rdo_type: RDOType,
   need_recon_pixel: bool,
-) -> (bool, i64) {
+) -> (bool, ScaledDistortion) {
   if skip {
-    return (false, -1);
+    return (false, ScaledDistortion::zero());
   }
   let bw = bsize.width_mi() / tx_size.width_mi();
   let bh = bsize.height_mi() / tx_size.height_mi();
@@ -1928,7 +1919,6 @@ pub fn write_tx_tree<T: Pixel>(
 
   let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
   let ac = &[0i16; 0];
-  let mut tx_dist: i64 = 0;
   let mut partition_has_coeff: bool = false;
 
   ts.qc.update(
@@ -1963,10 +1953,7 @@ pub fn write_tx_tree<T: Pixel>(
     need_recon_pixel,
   );
   partition_has_coeff |= has_coeff;
-  assert!(
-    !fi.use_tx_domain_distortion || need_recon_pixel || skip || dist >= 0
-  );
-  tx_dist += dist;
+  let mut tx_dist = dist;
 
   if luma_only {
     return (partition_has_coeff, tx_dist);
@@ -2039,12 +2026,6 @@ pub fn write_tx_tree<T: Pixel>(
             need_recon_pixel,
           );
           partition_has_coeff |= has_coeff;
-          assert!(
-            !fi.use_tx_domain_distortion
-              || need_recon_pixel
-              || skip
-              || dist >= 0
-          );
           tx_dist += dist;
         }
       }
@@ -2171,7 +2152,7 @@ fn encode_partition_bottomup<T: Pixel, W: Writer>(
       let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
       let tell = w.tell_frac();
       cw.write_partition(w, tile_bo, PartitionType::PARTITION_NONE, bsize);
-      compute_rd_cost(fi, w.tell_frac() - tell, 0)
+      compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero())
     } else {
       0.0
     };
@@ -2271,7 +2252,8 @@ fn encode_partition_bottomup<T: Pixel, W: Writer>(
           if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
         let tell = w.tell_frac();
         cw.write_partition(w, tile_bo, partition, bsize);
-        rd_cost = compute_rd_cost(fi, w.tell_frac() - tell, 0);
+        rd_cost =
+          compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero());
       }
 
       let four_partitions = [
diff --git a/src/rate.rs b/src/rate.rs
index dd8a9c6b1f..6c2fec5ad9 100644
--- a/src/rate.rs
+++ b/src/rate.rs
@@ -552,6 +552,7 @@ pub struct QuantizerParameters {
   pub dc_qi: [u8; 3],
   pub ac_qi: [u8; 3],
   pub lambda: f64,
+  pub dist_scale: [f64; 3],
 }
 
 const Q57_SQUARE_EXP_SCALE: f64 =
@@ -573,8 +574,16 @@ impl QuantizerParameters {
     let scale = q57(QSCALE + bit_depth as i32 - 8);
     let quantizer = bexp64(log_target_q + scale);
     let (offset_u, offset_v) = chroma_offset(log_target_q);
-    let quantizer_u = bexp64(log_target_q + offset_u + scale);
-    let quantizer_v = bexp64(log_target_q + offset_v + scale);
+    let log_target_q_u = log_target_q + offset_u;
+    let log_target_q_v = log_target_q + offset_v;
+    let quantizer_u = bexp64(log_target_q_u + scale);
+    let quantizer_v = bexp64(log_target_q_v + scale);
+    let lambda = (::std::f64::consts::LN_2 / 6.0)
+      * ((log_target_q as f64) * Q57_SQUARE_EXP_SCALE).exp();
+    let lambda_u = (::std::f64::consts::LN_2 / 6.0)
+      * ((log_target_q_u as f64) * Q57_SQUARE_EXP_SCALE).exp();
+    let lambda_v = (::std::f64::consts::LN_2 / 6.0)
+      * ((log_target_q_v as f64) * Q57_SQUARE_EXP_SCALE).exp();
     QuantizerParameters {
       log_base_q,
       log_target_q,
@@ -589,8 +598,8 @@ impl QuantizerParameters {
         select_ac_qi(quantizer_u, bit_depth).max(1),
         select_ac_qi(quantizer_v, bit_depth).max(1),
       ],
-      lambda: (::std::f64::consts::LN_2 / 6.0)
-        * ((log_target_q as f64) * Q57_SQUARE_EXP_SCALE).exp(),
+      lambda,
+      dist_scale: [1.0, lambda / lambda_u, lambda / lambda_v],
     }
   }
 }
diff --git a/src/rdo.rs b/src/rdo.rs
index 566531b509..2a30824226 100644
--- a/src/rdo.rs
+++ b/src/rdo.rs
@@ -197,7 +197,7 @@ pub fn estimate_rate(qindex: u8, ts: TxSize, fast_distortion: u64) -> u64 {
 #[allow(unused)]
 fn cdef_dist_wxh_8x8<T: Pixel>(
   src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, bit_depth: usize,
-) -> u64 {
+) -> RawDistortion {
   debug_assert!(src1.plane_cfg.xdec == 0);
   debug_assert!(src1.plane_cfg.ydec == 0);
   debug_assert!(src2.plane_cfg.xdec == 0);
@@ -228,14 +228,14 @@ fn cdef_dist_wxh_8x8<T: Pixel>(
   let ssim_boost = (4033_f64 / 16_384_f64)
     * (svar + dvar + (16_384 << (2 * coeff_shift)) as f64)
     / f64::sqrt((16_265_089u64 << (4 * coeff_shift)) as f64 + svar * dvar);
-  (sse * ssim_boost + 0.5_f64) as u64
+  RawDistortion::new((sse * ssim_boost + 0.5_f64) as u64)
 }
 
 #[allow(unused)]
 fn cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> f64>(
   src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
   bit_depth: usize, compute_bias: F,
-) -> u64 {
+) -> Distortion {
   assert!(w & 0x7 == 0);
   assert!(h & 0x7 == 0);
   debug_assert!(src1.plane_cfg.xdec == 0);
@@ -243,7 +243,7 @@ fn cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> f64>(
   debug_assert!(src2.plane_cfg.xdec == 0);
   debug_assert!(src2.plane_cfg.ydec == 0);
 
-  let mut sum: u64 = 0;
+  let mut sum = Distortion::zero();
   for j in 0isize..h as isize / 8 {
     for i in 0isize..w as isize / 8 {
       let area = Area::StartingAt { x: i * 8, y: j * 8 };
@@ -255,8 +255,7 @@ fn cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> f64>(
 
       // cdef is always called on non-subsampled planes, so BLOCK_8X8 is
       // correct here.
-      let bias = compute_bias(area, BlockSize::BLOCK_8X8);
-      sum += (value as f64 * bias) as u64;
+      sum += value * compute_bias(area, BlockSize::BLOCK_8X8);
     }
   }
   sum
@@ -266,7 +265,7 @@ fn cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> f64>(
 pub fn sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> f64>(
   src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
   compute_bias: F,
-) -> u64 {
+) -> Distortion {
   assert!(w & (MI_SIZE - 1) == 0);
   assert!(h & (MI_SIZE - 1) == 0);
 
@@ -278,7 +277,7 @@ pub fn sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> f64>(
   let block_w = imp_block_w >> src1.plane_cfg.xdec;
   let block_h = imp_block_h >> src1.plane_cfg.ydec;
 
-  let mut sse: u64 = 0;
+  let mut sse = Distortion::zero();
   for block_y in 0..h / block_h {
     for block_x in 0..w / block_w {
       let mut value = 0;
@@ -308,7 +307,7 @@ pub fn sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> f64>(
         },
         imp_bsize,
       );
-      sse += (value as f64 * bias) as u64;
+      sse += RawDistortion::new(value) * bias;
     }
   }
   sse
@@ -318,7 +317,7 @@ pub fn sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> f64>(
 fn compute_distortion<T: Pixel>(
   fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
   is_chroma_block: bool, tile_bo: TileBlockOffset, luma_only: bool,
-) -> u64 {
+) -> ScaledDistortion {
   let area = Area::BlockStartingAt { bo: tile_bo.0 };
   let input_region = ts.input_tile.planes[0].subregion(area);
   let rec_region = ts.rec.planes[0].subregion(area);
@@ -352,7 +351,7 @@ fn compute_distortion<T: Pixel>(
         )
       },
     ),
-  };
+  } * fi.dist_scale[0];
 
   if !luma_only {
     let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
@@ -383,7 +382,7 @@ fn compute_distortion<T: Pixel>(
               bsize,
             )
           },
-        );
+        ) * fi.dist_scale[p];
       }
     };
   }
@@ -393,9 +392,9 @@ fn compute_distortion<T: Pixel>(
 // Compute the transform-domain distortion for an encode
 fn compute_tx_distortion<T: Pixel>(
   fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
-  is_chroma_block: bool, tile_bo: TileBlockOffset, tx_dist: i64, skip: bool,
-  luma_only: bool,
-) -> u64 {
+  is_chroma_block: bool, tile_bo: TileBlockOffset, tx_dist: ScaledDistortion,
+  skip: bool, luma_only: bool,
+) -> ScaledDistortion {
   assert!(fi.config.tune == Tune::Psnr);
   let area = Area::BlockStartingAt { bo: tile_bo.0 };
   let input_region = ts.input_tile.planes[0].subregion(area);
@@ -413,12 +412,9 @@ fn compute_tx_distortion<T: Pixel>(
           bsize,
         )
       },
-    )
+    ) * fi.dist_scale[0]
   } else {
-    assert!(tx_dist >= 0);
-    let bias =
-      compute_distortion_bias(fi, ts.to_frame_block_offset(tile_bo), bsize);
-    (tx_dist as f64 * bias) as u64
+    tx_dist
   };
 
   if !luma_only && skip {
@@ -450,7 +446,7 @@ fn compute_tx_distortion<T: Pixel>(
               bsize,
             )
           },
-        );
+        ) * fi.dist_scale[p];
       }
     }
   }
@@ -475,7 +471,7 @@ fn compute_mean_importance<T: Pixel>(
   total_importance / (bsize.width_mi() * bsize.height_mi()) as f32
 }
 
-fn compute_distortion_bias<T: Pixel>(
+pub fn compute_distortion_bias<T: Pixel>(
   fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize,
 ) -> f64 {
   let mean_importance = compute_mean_importance(fi, frame_bo, bsize);
@@ -490,11 +486,64 @@ fn compute_distortion_bias<T: Pixel>(
   bias
 }
 
+#[repr(transparent)]
+pub struct RawDistortion(u64);
+
+#[repr(transparent)]
+pub struct Distortion(u64);
+
+#[repr(transparent)]
+pub struct ScaledDistortion(u64);
+
+impl RawDistortion {
+  pub fn new(dist: u64) -> Self {
+    Self(dist)
+  }
+}
+
+impl std::ops::Mul<f64> for RawDistortion {
+  type Output = Distortion;
+  fn mul(self, rhs: f64) -> Distortion {
+    Distortion((self.0 as f64 * rhs) as u64)
+  }
+}
+
+impl Distortion {
+  pub fn zero() -> Self {
+    Self(0)
+  }
+}
+
+impl std::ops::Mul<f64> for Distortion {
+  type Output = ScaledDistortion;
+  fn mul(self, rhs: f64) -> ScaledDistortion {
+    ScaledDistortion((self.0 as f64 * rhs) as u64)
+  }
+}
+
+impl std::ops::AddAssign for Distortion {
+  fn add_assign(&mut self, other: Self) {
+    self.0 += other.0;
+  }
+}
+
+impl ScaledDistortion {
+  pub fn zero() -> Self {
+    Self(0)
+  }
+}
+
+impl std::ops::AddAssign for ScaledDistortion {
+  fn add_assign(&mut self, other: Self) {
+    self.0 += other.0;
+  }
+}
+
 pub fn compute_rd_cost<T: Pixel>(
-  fi: &FrameInvariants<T>, rate: u32, distortion: u64,
+  fi: &FrameInvariants<T>, rate: u32, distortion: ScaledDistortion,
 ) -> f64 {
   let rate_in_bits = (rate as f64) / ((1 << OD_BITRES) as f64);
-  distortion as f64 + fi.lambda * rate_in_bits
+  distortion.0 as f64 + fi.lambda * rate_in_bits
 }
 
 pub fn rdo_tx_size_type<T: Pixel>(
@@ -709,6 +758,7 @@ fn luma_chroma_mode_rdo<T: Pixel>(
         } else {
           compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false)
         };
+        let is_zero_dist = distortion.0 == 0;
         let rd = compute_rd_cost(fi, rate, distortion);
         if rd < best.rd {
           //if rd < best.rd || luma_mode == PredictionMode::NEW_NEWMV {
@@ -722,7 +772,7 @@ fn luma_chroma_mode_rdo<T: Pixel>(
           best.tx_size = tx_size;
           best.tx_type = tx_type;
           best.sidx = sidx;
-          zero_distortion = distortion == 0;
+          zero_distortion = is_zero_dist;
         }
 
         cw.rollback(cw_checkpoint);
@@ -1260,6 +1310,7 @@ pub fn rdo_cfl_alpha<T: Pixel>(
           uv_tx_size.height(),
           |_, _| 1., // We're not doing RDO here.
         )
+        .0
       };
       let mut best = (alpha_cost(0), 0);
       let mut count = 2;
@@ -1545,7 +1596,11 @@ pub fn rdo_partition_decision<T: Pixel, W: Writer>(
             if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
           let tell = w.tell_frac();
           cw.write_partition(w, tile_bo, partition, bsize);
-          cost = compute_rd_cost(fi, w.tell_frac() - tell, 0);
+          cost = compute_rd_cost(
+            fi,
+            w.tell_frac() - tell,
+            ScaledDistortion::zero(),
+          );
         }
         let mut rd_cost_sum = 0.0;
 
@@ -1624,14 +1679,14 @@ fn rdo_loop_plane_error<T: Pixel>(
   sbo: TileSuperBlockOffset, tile_sbo: TileSuperBlockOffset, sb_w: usize,
   sb_h: usize, fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
   blocks: &TileBlocks<'_>, test: &Frame<T>, pli: usize,
-) -> u64 {
+) -> ScaledDistortion {
   let sb_w_blocks =
     if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_w;
   let sb_h_blocks =
     if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_h;
   // Each direction block is 8x8 in y, potentially smaller if subsampled in chroma
   // accumulating in-frame and unpadded
-  let mut err: u64 = 0;
+  let mut err = Distortion::zero();
   for by in 0..sb_h_blocks {
     for bx in 0..sb_w_blocks {
       let bo = tile_sbo.block_offset(bx << 1, by << 1);
@@ -1650,24 +1705,21 @@ fn rdo_loop_plane_error<T: Pixel>(
         let test_region =
           test_plane.region(Area::BlockStartingAt { bo: test_bo.0 });
 
-        let value = if pli == 0 {
-          cdef_dist_wxh_8x8(&in_region, &test_region, fi.sequence.bit_depth)
-        } else {
-          // The closure returns 1. because we bias the distortion right
-          // below.
-          sse_wxh(&in_region, &test_region, 8 >> xdec, 8 >> ydec, |_, _| 1.)
-        };
-
         let bias = compute_distortion_bias(
           fi,
           ts.to_frame_block_offset(bo),
           BlockSize::BLOCK_8X8,
         );
-        err += (value as f64 * bias) as u64;
+        err += if pli == 0 {
+          cdef_dist_wxh_8x8(&in_region, &test_region, fi.sequence.bit_depth)
+            * bias
+        } else {
+          sse_wxh(&in_region, &test_region, 8 >> xdec, 8 >> ydec, |_, _| bias)
+        };
       }
     }
   }
-  err
+  err * fi.dist_scale[pli]
 }
 
 // Passed in a superblock offset representing the upper left corner of
@@ -1790,7 +1842,7 @@ pub fn rdo_loop_decision<T: Pixel>(
           let mut best_new_index = -1i8;
 
           for cdef_index in 0..(1 << fi.cdef_bits) {
-            let mut err = 0;
+            let mut err = ScaledDistortion::zero();
             let mut rate = 0;
             cdef_filter_superblock(
               fi,