Remove _process_core(). Pass input/output/num_frames where needed (#80)

instead of caching in local variables. Removed output normalization. Added functions for getting model normalization factor in dB and linear gain.
sdatkinson · Oct 15, 2023 · 855bf50 · 855bf50
1 parent 8904227
commit 855bf50
Show file tree

Hide file tree

Showing 8 changed files with 41 additions and 118 deletions.
diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
@@ -116,15 +116,15 @@ convnet::ConvNet::ConvNet(const double loudness, const int channels, const std::
   this->_head = _Head(channels, it);
   if (it != params.end())
     throw std::runtime_error("Didn't touch all the params when initializing wavenet");
-  this->_reset_anti_pop_();
 }
 
-void convnet::ConvNet::_process_core_()
+void convnet::ConvNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
+
 {
-  this->_update_buffers_();
+  this->_update_buffers_(input, num_frames);
   // Main computation!
   const long i_start = this->_input_buffer_offset;
-  const long i_end = i_start + _num_input_samples;
+  const long i_end = i_start + num_frames;
   // TODO one unnecessary copy :/ #speed
   for (auto i = i_start; i < i_end; i++)
     this->_block_vals[0](0, i) = this->_input_buffer[i];
@@ -133,10 +133,8 @@ void convnet::ConvNet::_process_core_()
   // TODO clean up this allocation
   this->_head.process_(this->_block_vals[this->_blocks.size()], this->_head_output, i_start, i_end);
   // Copy to required output array (TODO tighten this up)
-  for (int s = 0; s < _num_input_samples; s++)
-    this->_output_samples[s] = this->_head_output(s);
-  // Apply anti-pop
-  this->_anti_pop_();
+  for (int s = 0; s < num_frames; s++)
+    output[s] = this->_head_output(s);
 }
 
 void convnet::ConvNet::_verify_params(const int channels, const std::vector<int>& dilations, const bool batchnorm,
@@ -145,9 +143,10 @@ void convnet::ConvNet::_verify_params(const int channels, const std::vector<int>
   // TODO
 }
 
-void convnet::ConvNet::_update_buffers_()
+void convnet::ConvNet::_update_buffers_(NAM_SAMPLE* input, const int num_frames)
 {
-  this->Buffer::_update_buffers_();
+  this->Buffer::_update_buffers_(input, num_frames);
+
   const size_t buffer_size = this->_input_buffer.size();
 
   if (this->_block_vals[0].rows() != 1 || this->_block_vals[0].cols() != buffer_size)
@@ -184,27 +183,3 @@ void convnet::ConvNet::_rewind_buffers_()
   // Now we can do the rest of the rewind
   this->Buffer::_rewind_buffers_();
 }
-
-void convnet::ConvNet::_anti_pop_()
-{
-  if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
-    return;
-  const float slope = 1.0f / float(this->_anti_pop_ramp);
-  for (size_t i = 0; i < _num_input_samples; i++)
-  {
-    if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
-      break;
-    const float gain = std::max(slope * float(this->_anti_pop_countdown), float(0.0));
-    this->_output_samples[i] *= gain;
-    this->_anti_pop_countdown++;
-  }
-}
-
-void convnet::ConvNet::_reset_anti_pop_()
-{
-  // You need the "real" receptive field, not the buffers.
-  long receptive_field = 1;
-  for (size_t i = 0; i < this->_blocks.size(); i++)
-    receptive_field += this->_blocks[i].conv.get_dilation();
-  this->_anti_pop_countdown = -receptive_field;
-}
diff --git a/NAM/convnet.h b/NAM/convnet.h
@@ -77,18 +77,9 @@ class ConvNet : public Buffer
   _Head _head;
   void _verify_params(const int channels, const std::vector<int>& dilations, const bool batchnorm,
                       const size_t actual_params);
-  void _update_buffers_() override;
+  void _update_buffers_(NAM_SAMPLE* input, const int num_frames) override;
   void _rewind_buffers_() override;
 
-  void _process_core_() override;
-
-  // The net starts with random parameters inside; we need to wait for a full
-  // receptive field to pass through before we can count on the output being
-  // ok. This implements a gentle "ramp-up" so that there's no "pop" at the
-  // start.
-  long _anti_pop_countdown;
-  const long _anti_pop_ramp = 100;
-  void _anti_pop_();
-  void _reset_anti_pop_();
+  void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
 };
 }; // namespace convnet
diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
@@ -19,28 +19,22 @@ constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32;
 DSP::DSP(const double expected_sample_rate)
 : mLoudness(TARGET_DSP_LOUDNESS)
 , mExpectedSampleRate(expected_sample_rate)
-, mNormalizeOutputLoudness(false)
 , _stale_params(true)
 {
 }
 
 DSP::DSP(const double loudness, const double expected_sample_rate)
 : mLoudness(loudness)
 , mExpectedSampleRate(expected_sample_rate)
-, mNormalizeOutputLoudness(false)
 , _stale_params(true)
 {
 }
 
 void DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
 {
-  this->_input_samples = input;
-  this->_output_samples = output;
-  this->_num_input_samples = num_frames;
-
-  this->_ensure_core_dsp_output_ready_();
-  this->_process_core_();
-  this->_apply_output_level_(output, _num_input_samples);
+  // Default implementation is the null operation
+  for (size_t i = 0; i < num_frames; i++)
+    output[i] = input[i];
 }
 
 void DSP::finalize_(const int num_frames) {}
@@ -60,25 +54,6 @@ void DSP::_get_params_(const std::unordered_map<std::string, double>& input_para
   }
 }
 
-void DSP::_ensure_core_dsp_output_ready_()
-{
-}
-
-void DSP::_process_core_()
-{
-  // Default implementation is the null operation
-  for (size_t i = 0; i < _num_input_samples; i++)
-    this->_output_samples[i] = _input_samples[i];
-}
-
-void DSP::_apply_output_level_(NAM_SAMPLE* output, const int num_frames)
-{
-  const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0);
-  const double finalGain = this->mNormalizeOutputLoudness ? loudnessGain : 1.0;
-  for (int s = 0; s < num_frames; s++)
-    output[s] = (NAM_SAMPLE)(finalGain * _output_samples[s]);
-}
-
 // Buffer =====================================================================
 
 Buffer::Buffer(const int receptive_field, const double expected_sample_rate)
@@ -105,13 +80,13 @@ void Buffer::_set_receptive_field(const int new_receptive_field, const int input
   this->_reset_input_buffer();
 }
 
-void Buffer::_update_buffers_()
+void Buffer::_update_buffers_(NAM_SAMPLE* input, const int num_frames)
 {
   // Make sure that the buffer is big enough for the receptive field and the
   // frames needed!
   {
     const long minimum_input_buffer_size =
-      (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * _num_input_samples;
+      (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames;
     if ((long)this->_input_buffer.size() < minimum_input_buffer_size)
     {
       long new_buffer_size = 2;
@@ -124,13 +99,13 @@ void Buffer::_update_buffers_()
 
   // If we'd run off the end of the input buffer, then we need to move the data
   // back to the start of the buffer and start again.
-  if (this->_input_buffer_offset + _num_input_samples > (long)this->_input_buffer.size())
+  if (this->_input_buffer_offset + num_frames > (long)this->_input_buffer.size())
     this->_rewind_buffers_();
   // Put the new samples into the input buffer
-  for (long i = this->_input_buffer_offset, j = 0; j < _num_input_samples; i++, j++)
-    this->_input_buffer[i] = _input_samples[j];
+  for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++)
+    this->_input_buffer[i] = input[j];
   // And resize the output buffer:
-  this->_output_buffer.resize(_num_input_samples);
+  this->_output_buffer.resize(num_frames);
   std::fill (this->_output_buffer.begin(), this->_output_buffer.end(), 0.0f);
 }
 
@@ -183,16 +158,16 @@ Linear::Linear(const double loudness, const int receptive_field, const bool _bia
   this->_bias = _bias ? params[receptive_field] : (float)0.0;
 }
 
-void Linear::_process_core_()
+void Linear::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
 {
-  this->Buffer::_update_buffers_();
+  this->Buffer::_update_buffers_(input, num_frames);
 
   // Main computation!
-  for (size_t i = 0; i < _num_input_samples; i++)
+  for (size_t i = 0; i < num_frames; i++)
   {
     const size_t offset = this->_input_buffer_offset - this->_weight.size() + i + 1;
     auto input = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffer[offset], this->_receptive_field);
-    this->_output_samples[i] = this->_bias + this->_weight.dot(input);
+    output[i] = this->_bias + this->_weight.dot(input);
   }
 }
 

diff --git a/NAM/dsp.h b/NAM/dsp.h
@@ -63,44 +63,26 @@ class DSP
   //   DSP subclass implementation.
   virtual void finalize_(const int num_frames);
   double GetExpectedSampleRate() const { return mExpectedSampleRate; };
-  bool HasLoudness() { return mLoudness != TARGET_DSP_LOUDNESS; };
-  void SetNormalize(const bool normalize) { this->mNormalizeOutputLoudness = normalize; };
+  bool HasNormalization() { return mLoudness != TARGET_DSP_LOUDNESS; };
+  double GetNormalizationFactordB() { return -(this->mLoudness - TARGET_DSP_LOUDNESS); };
+  double GetNormalizationFactorLinear() { return pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0); };
 
 protected:
   // How loud is the model?
   double mLoudness;
   // What sample rate does the model expect?
   double mExpectedSampleRate;
-  // Should we normalize according to this loudness?
-  bool mNormalizeOutputLoudness;
   // Parameters (aka "knobs")
   std::unordered_map<std::string, double> _params;
   // If the params have changed since the last buffer was processed:
   bool _stale_params;
-  // Input sample buffer
-  NAM_SAMPLE* _input_samples;
-  // Output sample buffer
-  NAM_SAMPLE* _output_samples;
-  // Number of samples in the input buffer
-  int _num_input_samples;
 
   // Methods
 
   // Copy the parameters to the DSP module.
   // If anything has changed, then set this->_stale_params to true.
   // (TODO use "listener" approach)
   void _get_params_(const std::unordered_map<std::string, double>& input_params);
-
-  // i.e. ensure the size is correct.
-  void _ensure_core_dsp_output_ready_();
-
-  // The core of your DSP algorithm.
-  // Access the inputs in this->_input_post_gain
-  // Place the outputs in this->_core_dsp_output
-  virtual void _process_core_();
-
-  // Copy this->_core_dsp_output to output and apply the output volume
-  void _apply_output_level_(NAM_SAMPLE* output, const int num_frames);
 };
 
 // Class where an input buffer is kept so that long-time effects can be
@@ -126,7 +108,7 @@ class Buffer : public DSP
   void _set_receptive_field(const int new_receptive_field);
   void _reset_input_buffer();
   // Use this->_input_post_gain
-  virtual void _update_buffers_();
+  virtual void _update_buffers_(NAM_SAMPLE* input, int num_frames);
   virtual void _rewind_buffers_();
 };
 
@@ -138,7 +120,7 @@ class Linear : public Buffer
          const double expected_sample_rate = -1.0);
   Linear(const double loudness, const int receptive_field, const bool _bias, const std::vector<float>& params,
          const double expected_sample_rate = -1.0);
-  void _process_core_() override;
+  void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
 
 protected:
   Eigen::VectorXf _weight;

diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
@@ -101,7 +101,7 @@ void lstm::LSTM::_init_parametric(nlohmann::json& parametric)
   this->_input_and_params.resize(1 + parametric.size()); // TODO amp parameters
 }
 
-void lstm::LSTM::_process_core_()
+void lstm::LSTM::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
 {
   // Get params into the input vector before starting
   if (this->_stale_params)
@@ -111,8 +111,8 @@ void lstm::LSTM::_process_core_()
     this->_stale_params = false;
   }
   // Process samples, placing results in the required output location
-  for (size_t i = 0; i < _num_input_samples; i++)
-    this->_output_samples[i] = this->_process_sample(_input_samples[i]);
+  for (size_t i = 0; i < num_frames; i++)
+    input[i] = this->_process_sample(output[i]);
 }
 
 float lstm::LSTM::_process_sample(const float x)

diff --git a/NAM/lstm.h b/NAM/lstm.h
@@ -58,7 +58,7 @@ class LSTM : public DSP
 protected:
   Eigen::VectorXf _head_weight;
   float _head_bias;
-  void _process_core_() override;
+  void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
   std::vector<LSTMCell> _layers;
 
   float _process_sample(const float x);

diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
@@ -333,16 +333,16 @@ void wavenet::WaveNet::_prepare_for_frames_(const long num_frames)
     this->_layer_arrays[i].prepare_for_frames_(num_frames);
 }
 
-void wavenet::WaveNet::_process_core_()
+void wavenet::WaveNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
 {
-  this->_set_num_frames_(_num_input_samples);
-  this->_prepare_for_frames_(_num_input_samples);
+  this->_set_num_frames_(num_frames);
+  this->_prepare_for_frames_(num_frames);
 
   // Fill into condition array:
   // Clumsy...
-  for (int j = 0; j < _num_input_samples; j++)
+  for (int j = 0; j < num_frames; j++)
   {
-    this->_condition(0, j) = _input_samples[j];
+    this->_condition(0, j) = input[j];
     if (this->_stale_params) // Column-major assignment; good for Eigen. Let the
                              // compiler optimize this.
       for (size_t i = 0; i < this->_param_names.size(); i++)
@@ -366,10 +366,10 @@ void wavenet::WaveNet::_process_core_()
 
   const long final_head_array = this->_head_arrays.size() - 1;
   assert(this->_head_arrays[final_head_array].rows() == 1);
-  for (int s = 0; s < _num_input_samples; s++)
+  for (int s = 0; s < num_frames; s++)
   {
     float out = this->_head_scale * this->_head_arrays[final_head_array](0, s);
-    this->_output_samples[s] = out;
+    output[s] = out;
   }
 }
 

diff --git a/NAM/wavenet.h b/NAM/wavenet.h
@@ -203,7 +203,7 @@ class WaveNet : public DSP
   void _init_parametric_(nlohmann::json& parametric);
   void _prepare_for_frames_(const long num_frames);
   // Reminder: From ._input_post_gain to ._core_dsp_output
-  void _process_core_() override;
+  void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
 
   // Ensure that all buffer arrays are the right size for this num_frames
   void _set_num_frames_(const long num_frames);