From 03386693e0928bd5f924da83aba98f0dc2cc9d11 Mon Sep 17 00:00:00 2001
From: Mike Oliphant <oliphant@nostatic.org>
Date: Mon, 2 Oct 2023 09:04:12 -0700
Subject: [PATCH 1/5] Switched to mono inputs/outputs. Removed input/output
 gain. Removed input buffer copy.

---
 NAM/convnet.cpp |  5 ++---
 NAM/dsp.cpp     | 52 ++++++++++++++++++-------------------------------
 NAM/dsp.h       | 24 ++++++++++-------------
 NAM/lstm.cpp    |  4 ++--
 NAM/wavenet.cpp | 13 ++++++-------
 5 files changed, 39 insertions(+), 59 deletions(-)

diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
index 2f092cf..9e89cae 100644
--- a/NAM/convnet.cpp
+++ b/NAM/convnet.cpp
@@ -124,8 +124,7 @@ void convnet::ConvNet::_process_core_()
   this->_update_buffers_();
   // Main computation!
   const long i_start = this->_input_buffer_offset;
-  const long num_frames = this->_input_post_gain.size();
-  const long i_end = i_start + num_frames;
+  const long i_end = i_start + _num_frames;
   // TODO one unnecessary copy :/ #speed
   for (auto i = i_start; i < i_end; i++)
     this->_block_vals[0](0, i) = this->_input_buffer[i];
@@ -134,7 +133,7 @@ void convnet::ConvNet::_process_core_()
   // TODO clean up this allocation
   this->_head.process_(this->_block_vals[this->_blocks.size()], this->_head_output, i_start, i_end);
   // Copy to required output array (TODO tighten this up)
-  for (int s = 0; s < num_frames; s++)
+  for (int s = 0; s < _num_frames; s++)
     this->_core_dsp_output[s] = this->_head_output(s);
   // Apply anti-pop
   this->_anti_pop_();
diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index 5bc076d..8ba1d44 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -32,15 +32,15 @@ DSP::DSP(const double loudness, const double expected_sample_rate)
 {
 }
 
-void DSP::process(NAM_SAMPLE** inputs, NAM_SAMPLE** outputs, const int num_channels, const int num_frames,
-                  const double input_gain, const double output_gain,
-                  const std::unordered_map<std::string, double>& params)
+void DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames, const std::unordered_map<std::string, double>& params)
 {
+  this->_input_samples = input;
+  this->_num_frames = num_frames;
+
   this->_get_params_(params);
-  this->_apply_input_level_(inputs, num_channels, num_frames, input_gain);
   this->_ensure_core_dsp_output_ready_();
   this->_process_core_();
-  this->_apply_output_level_(outputs, num_channels, num_frames, output_gain);
+  this->_apply_output_level_(output, _num_frames);
 }
 
 void DSP::finalize_(const int num_frames) {}
@@ -60,38 +60,25 @@ void DSP::_get_params_(const std::unordered_map<std::string, double>& input_para
   }
 }
 
-void DSP::_apply_input_level_(NAM_SAMPLE** inputs, const int num_channels, const int num_frames, const double gain)
-{
-  // Must match exactly; we're going to use the size of _input_post_gain later
-  // for num_frames.
-  if ((int)this->_input_post_gain.size() != num_frames)
-    this->_input_post_gain.resize(num_frames);
-  // MONO ONLY
-  const int channel = 0;
-  for (int i = 0; i < num_frames; i++)
-    this->_input_post_gain[i] = float(gain * inputs[channel][i]);
-}
-
 void DSP::_ensure_core_dsp_output_ready_()
 {
-  if (this->_core_dsp_output.size() < this->_input_post_gain.size())
-    this->_core_dsp_output.resize(this->_input_post_gain.size());
+  if (this->_core_dsp_output.size() < _num_frames)
+    this->_core_dsp_output.resize(_num_frames);
 }
 
 void DSP::_process_core_()
 {
   // Default implementation is the null operation
-  for (size_t i = 0; i < this->_input_post_gain.size(); i++)
-    this->_core_dsp_output[i] = this->_input_post_gain[i];
+  for (size_t i = 0; i < _num_frames; i++)
+    this->_core_dsp_output[i] = _input_samples[i];
 }
 
-void DSP::_apply_output_level_(NAM_SAMPLE** outputs, const int num_channels, const int num_frames, const double gain)
+void DSP::_apply_output_level_(NAM_SAMPLE* output, const int num_frames)
 {
   const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0);
-  const double finalGain = this->mNormalizeOutputLoudness ? gain * loudnessGain : gain;
-  for (int c = 0; c < num_channels; c++)
-    for (int s = 0; s < num_frames; s++)
-      outputs[c][s] = (NAM_SAMPLE)(finalGain * this->_core_dsp_output[s]);
+  const double finalGain = this->mNormalizeOutputLoudness ? loudnessGain : 1.0;
+  for (int s = 0; s < num_frames; s++)
+    output[s] = (NAM_SAMPLE)(finalGain * this->_core_dsp_output[s]);
 }
 
 // Buffer =====================================================================
@@ -122,11 +109,10 @@ void Buffer::_set_receptive_field(const int new_receptive_field, const int input
 
 void Buffer::_update_buffers_()
 {
-  const long int num_frames = this->_input_post_gain.size();
   // Make sure that the buffer is big enough for the receptive field and the
   // frames needed!
   {
-    const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames;
+    const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * _num_frames;
     if ((long)this->_input_buffer.size() < minimum_input_buffer_size)
     {
       long new_buffer_size = 2;
@@ -139,13 +125,13 @@ void Buffer::_update_buffers_()
 
   // If we'd run off the end of the input buffer, then we need to move the data
   // back to the start of the buffer and start again.
-  if (this->_input_buffer_offset + num_frames > (long)this->_input_buffer.size())
+  if (this->_input_buffer_offset + _num_frames > (long)this->_input_buffer.size())
     this->_rewind_buffers_();
   // Put the new samples into the input buffer
-  for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++)
-    this->_input_buffer[i] = this->_input_post_gain[j];
+  for (long i = this->_input_buffer_offset, j = 0; j < _num_frames; i++, j++)
+    this->_input_buffer[i] = _input_samples[j];
   // And resize the output buffer:
-  this->_output_buffer.resize(num_frames);
+  this->_output_buffer.resize(_num_frames);
   std::fill (this->_output_buffer.begin(), this->_output_buffer.end(), 0.0f);
 }
 
@@ -203,7 +189,7 @@ void Linear::_process_core_()
   this->Buffer::_update_buffers_();
 
   // Main computation!
-  for (size_t i = 0; i < this->_input_post_gain.size(); i++)
+  for (size_t i = 0; i < _num_frames; i++)
   {
     const size_t offset = this->_input_buffer_offset - this->_weight.size() + i + 1;
     auto input = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffer[offset], this->_receptive_field);
diff --git a/NAM/dsp.h b/NAM/dsp.h
index 776ce6a..46f016e 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -48,17 +48,15 @@ class DSP
   DSP(const double expected_sample_rate = -1.0);
   DSP(const double loudness, const double expected_sample_rate = -1.0);
   virtual ~DSP() = default;
-  // process() does all of the processing requried to take `inputs` array and
-  // fill in the required values on `outputs`.
+  // process() does all of the processing requried to take `input` array and
+  // fill in the required values on `output`.
   // To do this:
   // 1. The parameters from the plugin (I/O levels and any other parametric
   //    inputs) are gotten.
-  // 2. The input level is applied
-  // 3. The core DSP algorithm is run (This is what should probably be
+  // 2. The core DSP algorithm is run (This is what should probably be
   //    overridden in subclasses).
-  // 4. The output level is applied and the result stored to `output`.
-  virtual void process(NAM_SAMPLE** inputs, NAM_SAMPLE** outputs, const int num_channels, const int num_frames,
-                       const double input_gain, const double output_gain,
+  // 3. The output level is applied and the result stored to `output`.
+  virtual void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames,
                        const std::unordered_map<std::string, double>& params);
   // Anything to take care of before next buffer comes in.
   // For example:
@@ -82,8 +80,10 @@ class DSP
   std::unordered_map<std::string, double> _params;
   // If the params have changed since the last buffer was processed:
   bool _stale_params;
-  // Where to store the samples after applying input gain
-  std::vector<float> _input_post_gain;
+  // Input sample buffer
+  float* _input_samples;
+  // Number of samples in the input buffer
+  int _num_frames;
   // Location for the output of the core DSP algorithm.
   std::vector<float> _core_dsp_output;
 
@@ -94,10 +94,6 @@ class DSP
   // (TODO use "listener" approach)
   void _get_params_(const std::unordered_map<std::string, double>& input_params);
 
-  // Apply the input gain
-  // Result populates this->_input_post_gain
-  void _apply_input_level_(NAM_SAMPLE** inputs, const int num_channels, const int num_frames, const double gain);
-
   // i.e. ensure the size is correct.
   void _ensure_core_dsp_output_ready_();
 
@@ -107,7 +103,7 @@ class DSP
   virtual void _process_core_();
 
   // Copy this->_core_dsp_output to output and apply the output volume
-  void _apply_output_level_(NAM_SAMPLE** outputs, const int num_channels, const int num_frames, const double gain);
+  void _apply_output_level_(NAM_SAMPLE* output, const int num_frames);
 };
 
 // Class where an input buffer is kept so that long-time effects can be
diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
index 2327367..52367b7 100644
--- a/NAM/lstm.cpp
+++ b/NAM/lstm.cpp
@@ -111,8 +111,8 @@ void lstm::LSTM::_process_core_()
     this->_stale_params = false;
   }
   // Process samples, placing results in the required output location
-  for (size_t i = 0; i < this->_input_post_gain.size(); i++)
-    this->_core_dsp_output[i] = this->_process_sample(this->_input_post_gain[i]);
+  for (size_t i = 0; i < _num_frames; i++)
+    this->_core_dsp_output[i] = this->_process_sample(_input_samples[i]);
 }
 
 float lstm::LSTM::_process_sample(const float x)
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index be7f352..a2603d9 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -282,7 +282,7 @@ wavenet::WaveNet::WaveNet(const double loudness, const std::vector<wavenet::Laye
   // pre-warm the model over the size of the receptive field
   for (long i = 0; i < receptive_field; i++)
   {
-    this->process(&sample_ptr, &sample_ptr, 1, 1, 1.0, 1.0, param_dict);
+    this->process(sample_ptr, sample_ptr, 1, param_dict);
     this->finalize_(1);
     sample = 0;
   }
@@ -337,15 +337,14 @@ void wavenet::WaveNet::_prepare_for_frames_(const long num_frames)
 
 void wavenet::WaveNet::_process_core_()
 {
-  const long num_frames = this->_input_post_gain.size();
-  this->_set_num_frames_(num_frames);
-  this->_prepare_for_frames_(num_frames);
+  this->_set_num_frames_(_num_frames);
+  this->_prepare_for_frames_(_num_frames);
 
   // Fill into condition array:
   // Clumsy...
-  for (int j = 0; j < num_frames; j++)
+  for (int j = 0; j < _num_frames; j++)
   {
-    this->_condition(0, j) = this->_input_post_gain[j];
+    this->_condition(0, j) = _input_samples[j];
     if (this->_stale_params) // Column-major assignment; good for Eigen. Let the
                              // compiler optimize this.
       for (size_t i = 0; i < this->_param_names.size(); i++)
@@ -369,7 +368,7 @@ void wavenet::WaveNet::_process_core_()
 
   const long final_head_array = this->_head_arrays.size() - 1;
   assert(this->_head_arrays[final_head_array].rows() == 1);
-  for (int s = 0; s < num_frames; s++)
+  for (int s = 0; s < _num_frames; s++)
   {
     float out = this->_head_scale * this->_head_arrays[final_head_array](0, s);
     this->_core_dsp_output[s] = out;

From 7c1c4e9013dab910b32de9db3469776293ff906b Mon Sep 17 00:00:00 2001
From: Mike Oliphant <oliphant@nostatic.org>
Date: Mon, 2 Oct 2023 09:30:13 -0700
Subject: [PATCH 2/5] _num_frames => _num_input_samples to avoid variable
 collision with WaveNet

---
 NAM/convnet.cpp |  4 ++--
 NAM/dsp.cpp     | 21 +++++++++++----------
 NAM/dsp.h       |  2 +-
 NAM/lstm.cpp    |  2 +-
 NAM/wavenet.cpp |  8 ++++----
 5 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
index 9e89cae..f78e07f 100644
--- a/NAM/convnet.cpp
+++ b/NAM/convnet.cpp
@@ -124,7 +124,7 @@ void convnet::ConvNet::_process_core_()
   this->_update_buffers_();
   // Main computation!
   const long i_start = this->_input_buffer_offset;
-  const long i_end = i_start + _num_frames;
+  const long i_end = i_start + _num_input_samples;
   // TODO one unnecessary copy :/ #speed
   for (auto i = i_start; i < i_end; i++)
     this->_block_vals[0](0, i) = this->_input_buffer[i];
@@ -133,7 +133,7 @@ void convnet::ConvNet::_process_core_()
   // TODO clean up this allocation
   this->_head.process_(this->_block_vals[this->_blocks.size()], this->_head_output, i_start, i_end);
   // Copy to required output array (TODO tighten this up)
-  for (int s = 0; s < _num_frames; s++)
+  for (int s = 0; s < _num_input_samples; s++)
     this->_core_dsp_output[s] = this->_head_output(s);
   // Apply anti-pop
   this->_anti_pop_();
diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index 8ba1d44..0b19d41 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -35,12 +35,12 @@ DSP::DSP(const double loudness, const double expected_sample_rate)
 void DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames, const std::unordered_map<std::string, double>& params)
 {
   this->_input_samples = input;
-  this->_num_frames = num_frames;
+  this->_num_input_samples = num_frames;
 
   this->_get_params_(params);
   this->_ensure_core_dsp_output_ready_();
   this->_process_core_();
-  this->_apply_output_level_(output, _num_frames);
+  this->_apply_output_level_(output, _num_input_samples);
 }
 
 void DSP::finalize_(const int num_frames) {}
@@ -62,14 +62,14 @@ void DSP::_get_params_(const std::unordered_map<std::string, double>& input_para
 
 void DSP::_ensure_core_dsp_output_ready_()
 {
-  if (this->_core_dsp_output.size() < _num_frames)
-    this->_core_dsp_output.resize(_num_frames);
+  if (this->_core_dsp_output.size() < _num_input_samples)
+    this->_core_dsp_output.resize(_num_input_samples);
 }
 
 void DSP::_process_core_()
 {
   // Default implementation is the null operation
-  for (size_t i = 0; i < _num_frames; i++)
+  for (size_t i = 0; i < _num_input_samples; i++)
     this->_core_dsp_output[i] = _input_samples[i];
 }
 
@@ -112,7 +112,8 @@ void Buffer::_update_buffers_()
   // Make sure that the buffer is big enough for the receptive field and the
   // frames needed!
   {
-    const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * _num_frames;
+    const long minimum_input_buffer_size =
+      (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * _num_input_samples;
     if ((long)this->_input_buffer.size() < minimum_input_buffer_size)
     {
       long new_buffer_size = 2;
@@ -125,13 +126,13 @@ void Buffer::_update_buffers_()
 
   // If we'd run off the end of the input buffer, then we need to move the data
   // back to the start of the buffer and start again.
-  if (this->_input_buffer_offset + _num_frames > (long)this->_input_buffer.size())
+  if (this->_input_buffer_offset + _num_input_samples > (long)this->_input_buffer.size())
     this->_rewind_buffers_();
   // Put the new samples into the input buffer
-  for (long i = this->_input_buffer_offset, j = 0; j < _num_frames; i++, j++)
+  for (long i = this->_input_buffer_offset, j = 0; j < _num_input_samples; i++, j++)
     this->_input_buffer[i] = _input_samples[j];
   // And resize the output buffer:
-  this->_output_buffer.resize(_num_frames);
+  this->_output_buffer.resize(_num_input_samples);
   std::fill (this->_output_buffer.begin(), this->_output_buffer.end(), 0.0f);
 }
 
@@ -189,7 +190,7 @@ void Linear::_process_core_()
   this->Buffer::_update_buffers_();
 
   // Main computation!
-  for (size_t i = 0; i < _num_frames; i++)
+  for (size_t i = 0; i < _num_input_samples; i++)
   {
     const size_t offset = this->_input_buffer_offset - this->_weight.size() + i + 1;
     auto input = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffer[offset], this->_receptive_field);
diff --git a/NAM/dsp.h b/NAM/dsp.h
index 46f016e..e0c7e6c 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -83,7 +83,7 @@ class DSP
   // Input sample buffer
   float* _input_samples;
   // Number of samples in the input buffer
-  int _num_frames;
+  int _num_input_samples;
   // Location for the output of the core DSP algorithm.
   std::vector<float> _core_dsp_output;
 
diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
index 52367b7..83481b0 100644
--- a/NAM/lstm.cpp
+++ b/NAM/lstm.cpp
@@ -111,7 +111,7 @@ void lstm::LSTM::_process_core_()
     this->_stale_params = false;
   }
   // Process samples, placing results in the required output location
-  for (size_t i = 0; i < _num_frames; i++)
+  for (size_t i = 0; i < _num_input_samples; i++)
     this->_core_dsp_output[i] = this->_process_sample(_input_samples[i]);
 }
 
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index a2603d9..477f1cc 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -337,12 +337,12 @@ void wavenet::WaveNet::_prepare_for_frames_(const long num_frames)
 
 void wavenet::WaveNet::_process_core_()
 {
-  this->_set_num_frames_(_num_frames);
-  this->_prepare_for_frames_(_num_frames);
+  this->_set_num_frames_(_num_input_samples);
+  this->_prepare_for_frames_(_num_input_samples);
 
   // Fill into condition array:
   // Clumsy...
-  for (int j = 0; j < _num_frames; j++)
+  for (int j = 0; j < _num_input_samples; j++)
   {
     this->_condition(0, j) = _input_samples[j];
     if (this->_stale_params) // Column-major assignment; good for Eigen. Let the
@@ -368,7 +368,7 @@ void wavenet::WaveNet::_process_core_()
 
   const long final_head_array = this->_head_arrays.size() - 1;
   assert(this->_head_arrays[final_head_array].rows() == 1);
-  for (int s = 0; s < _num_frames; s++)
+  for (int s = 0; s < _num_input_samples; s++)
   {
     float out = this->_head_scale * this->_head_arrays[final_head_array](0, s);
     this->_core_dsp_output[s] = out;

From 5ce9463bb2904e200d2fd06bf15b348972771315 Mon Sep 17 00:00:00 2001
From: Mike Oliphant <oliphant@nostatic.org>
Date: Mon, 2 Oct 2023 09:34:44 -0700
Subject: [PATCH 3/5] Removed params arg to process

---
 NAM/dsp.cpp     | 3 +--
 NAM/dsp.h       | 9 +++------
 NAM/wavenet.cpp | 4 +---
 3 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index 0b19d41..17d8b7c 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -32,12 +32,11 @@ DSP::DSP(const double loudness, const double expected_sample_rate)
 {
 }
 
-void DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames, const std::unordered_map<std::string, double>& params)
+void DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
 {
   this->_input_samples = input;
   this->_num_input_samples = num_frames;
 
-  this->_get_params_(params);
   this->_ensure_core_dsp_output_ready_();
   this->_process_core_();
   this->_apply_output_level_(output, _num_input_samples);
diff --git a/NAM/dsp.h b/NAM/dsp.h
index e0c7e6c..a7e24b5 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -51,13 +51,10 @@ class DSP
   // process() does all of the processing requried to take `input` array and
   // fill in the required values on `output`.
   // To do this:
-  // 1. The parameters from the plugin (I/O levels and any other parametric
-  //    inputs) are gotten.
-  // 2. The core DSP algorithm is run (This is what should probably be
+  // 1. The core DSP algorithm is run (This is what should probably be
   //    overridden in subclasses).
-  // 3. The output level is applied and the result stored to `output`.
-  virtual void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames,
-                       const std::unordered_map<std::string, double>& params);
+  // 2. The output level is applied and the result stored to `output`.
+  virtual void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames);
   // Anything to take care of before next buffer comes in.
   // For example:
   // * Move the buffer index forward
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 477f1cc..3c66d3d 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -277,12 +277,10 @@ wavenet::WaveNet::WaveNet(const double loudness, const std::vector<wavenet::Laye
   NAM_SAMPLE sample = 0;
   NAM_SAMPLE* sample_ptr = &sample;
 
-  std::unordered_map<std::string, double> param_dict = {};
-
   // pre-warm the model over the size of the receptive field
   for (long i = 0; i < receptive_field; i++)
   {
-    this->process(sample_ptr, sample_ptr, 1, param_dict);
+    this->process(sample_ptr, sample_ptr, 1);
     this->finalize_(1);
     sample = 0;
   }

From fc236d47b76866954b02b7dd7a0ba5ea075bca03 Mon Sep 17 00:00:00 2001
From: Mike Oliphant <oliphant@nostatic.org>
Date: Mon, 2 Oct 2023 09:41:32 -0700
Subject: [PATCH 4/5] Removed extra output buffer

---
 NAM/convnet.cpp | 6 +++---
 NAM/dsp.cpp     | 9 ++++-----
 NAM/dsp.h       | 4 ++--
 NAM/lstm.cpp    | 2 +-
 NAM/wavenet.cpp | 2 +-
 5 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
index f78e07f..b9824d8 100644
--- a/NAM/convnet.cpp
+++ b/NAM/convnet.cpp
@@ -134,7 +134,7 @@ void convnet::ConvNet::_process_core_()
   this->_head.process_(this->_block_vals[this->_blocks.size()], this->_head_output, i_start, i_end);
   // Copy to required output array (TODO tighten this up)
   for (int s = 0; s < _num_input_samples; s++)
-    this->_core_dsp_output[s] = this->_head_output(s);
+    this->_output_samples[s] = this->_head_output(s);
   // Apply anti-pop
   this->_anti_pop_();
 }
@@ -190,12 +190,12 @@ void convnet::ConvNet::_anti_pop_()
   if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
     return;
   const float slope = 1.0f / float(this->_anti_pop_ramp);
-  for (size_t i = 0; i < this->_core_dsp_output.size(); i++)
+  for (size_t i = 0; i < _num_input_samples; i++)
   {
     if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
       break;
     const float gain = std::max(slope * float(this->_anti_pop_countdown), float(0.0));
-    this->_core_dsp_output[i] *= gain;
+    this->_output_samples[i] *= gain;
     this->_anti_pop_countdown++;
   }
 }
diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index 17d8b7c..fef5714 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -35,6 +35,7 @@ DSP::DSP(const double loudness, const double expected_sample_rate)
 void DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
 {
   this->_input_samples = input;
+  this->_output_samples = output;
   this->_num_input_samples = num_frames;
 
   this->_ensure_core_dsp_output_ready_();
@@ -61,15 +62,13 @@ void DSP::_get_params_(const std::unordered_map<std::string, double>& input_para
 
 void DSP::_ensure_core_dsp_output_ready_()
 {
-  if (this->_core_dsp_output.size() < _num_input_samples)
-    this->_core_dsp_output.resize(_num_input_samples);
 }
 
 void DSP::_process_core_()
 {
   // Default implementation is the null operation
   for (size_t i = 0; i < _num_input_samples; i++)
-    this->_core_dsp_output[i] = _input_samples[i];
+    this->_output_samples[i] = _input_samples[i];
 }
 
 void DSP::_apply_output_level_(NAM_SAMPLE* output, const int num_frames)
@@ -77,7 +76,7 @@ void DSP::_apply_output_level_(NAM_SAMPLE* output, const int num_frames)
   const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0);
   const double finalGain = this->mNormalizeOutputLoudness ? loudnessGain : 1.0;
   for (int s = 0; s < num_frames; s++)
-    output[s] = (NAM_SAMPLE)(finalGain * this->_core_dsp_output[s]);
+    output[s] = (NAM_SAMPLE)(finalGain * _output_samples[s]);
 }
 
 // Buffer =====================================================================
@@ -193,7 +192,7 @@ void Linear::_process_core_()
   {
     const size_t offset = this->_input_buffer_offset - this->_weight.size() + i + 1;
     auto input = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffer[offset], this->_receptive_field);
-    this->_core_dsp_output[i] = this->_bias + this->_weight.dot(input);
+    this->_output_samples[i] = this->_bias + this->_weight.dot(input);
   }
 }
 
diff --git a/NAM/dsp.h b/NAM/dsp.h
index a7e24b5..4570cdb 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -79,10 +79,10 @@ class DSP
   bool _stale_params;
   // Input sample buffer
   float* _input_samples;
+  // Output sample buffer
+  float* _output_samples;
   // Number of samples in the input buffer
   int _num_input_samples;
-  // Location for the output of the core DSP algorithm.
-  std::vector<float> _core_dsp_output;
 
   // Methods
 
diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
index 83481b0..b8f8994 100644
--- a/NAM/lstm.cpp
+++ b/NAM/lstm.cpp
@@ -112,7 +112,7 @@ void lstm::LSTM::_process_core_()
   }
   // Process samples, placing results in the required output location
   for (size_t i = 0; i < _num_input_samples; i++)
-    this->_core_dsp_output[i] = this->_process_sample(_input_samples[i]);
+    this->_output_samples[i] = this->_process_sample(_input_samples[i]);
 }
 
 float lstm::LSTM::_process_sample(const float x)
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 3c66d3d..b94c359 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -369,7 +369,7 @@ void wavenet::WaveNet::_process_core_()
   for (int s = 0; s < _num_input_samples; s++)
   {
     float out = this->_head_scale * this->_head_arrays[final_head_array](0, s);
-    this->_core_dsp_output[s] = out;
+    this->_output_samples[s] = out;
   }
 }
 

From e8e8af658352950b8a00b5836ede97cebcca92c7 Mon Sep 17 00:00:00 2001
From: Mike Oliphant <oliphant@nostatic.org>
Date: Mon, 2 Oct 2023 10:05:36 -0700
Subject: [PATCH 5/5] NAM_SAMPLE instead of float for input/output buffers.
 Updated process call for "benchmodel" tool.

---
 NAM/dsp.h            | 4 ++--
 tools/benchmodel.cpp | 7 +------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/NAM/dsp.h b/NAM/dsp.h
index 4570cdb..a007e7b 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -78,9 +78,9 @@ class DSP
   // If the params have changed since the last buffer was processed:
   bool _stale_params;
   // Input sample buffer
-  float* _input_samples;
+  NAM_SAMPLE* _input_samples;
   // Output sample buffer
-  float* _output_samples;
+  NAM_SAMPLE* _output_samples;
   // Number of samples in the input buffer
   int _num_input_samples;
 
diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp
index 6d8698d..c66a4f0 100644
--- a/tools/benchmodel.cpp
+++ b/tools/benchmodel.cpp
@@ -12,14 +12,9 @@ using std::chrono::milliseconds;
 #define AUDIO_BUFFER_SIZE 64
 
 double buffer[AUDIO_BUFFER_SIZE];
-double* buffers[1];
 
 int main(int argc, char* argv[])
 {
-  std::unordered_map<std::string, double> mNAMParams = {};
-
-  buffers[0] = buffer;
-
   if (argc > 1)
   {
     const char* modelPath = argv[1];
@@ -50,7 +45,7 @@ int main(int argc, char* argv[])
 
     for (size_t i = 0; i < numBuffers; i++)
     {
-      model->process(buffers, buffers, 1, AUDIO_BUFFER_SIZE, 1.0, 1.0, mNAMParams);
+      model->process(buffer, buffer, AUDIO_BUFFER_SIZE);
       model->finalize_(AUDIO_BUFFER_SIZE);
     }