Skip to content

Commit

Permalink
Remove _process_core(). Pass input/output/num_frames where needed (#80)
Browse files Browse the repository at this point in the history
instead of caching in local variables. Removed output normalization.
Added functions for getting model normalization factor in dB and linear gain.
  • Loading branch information
mikeoliphant authored Oct 15, 2023
1 parent 8904227 commit 855bf50
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 118 deletions.
43 changes: 9 additions & 34 deletions NAM/convnet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,15 @@ convnet::ConvNet::ConvNet(const double loudness, const int channels, const std::
this->_head = _Head(channels, it);
if (it != params.end())
throw std::runtime_error("Didn't touch all the params when initializing wavenet");
this->_reset_anti_pop_();
}

void convnet::ConvNet::_process_core_()
void convnet::ConvNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)

{
this->_update_buffers_();
this->_update_buffers_(input, num_frames);
// Main computation!
const long i_start = this->_input_buffer_offset;
const long i_end = i_start + _num_input_samples;
const long i_end = i_start + num_frames;
// TODO one unnecessary copy :/ #speed
for (auto i = i_start; i < i_end; i++)
this->_block_vals[0](0, i) = this->_input_buffer[i];
Expand All @@ -133,10 +133,8 @@ void convnet::ConvNet::_process_core_()
// TODO clean up this allocation
this->_head.process_(this->_block_vals[this->_blocks.size()], this->_head_output, i_start, i_end);
// Copy to required output array (TODO tighten this up)
for (int s = 0; s < _num_input_samples; s++)
this->_output_samples[s] = this->_head_output(s);
// Apply anti-pop
this->_anti_pop_();
for (int s = 0; s < num_frames; s++)
output[s] = this->_head_output(s);
}

void convnet::ConvNet::_verify_params(const int channels, const std::vector<int>& dilations, const bool batchnorm,
Expand All @@ -145,9 +143,10 @@ void convnet::ConvNet::_verify_params(const int channels, const std::vector<int>
// TODO
}

void convnet::ConvNet::_update_buffers_()
void convnet::ConvNet::_update_buffers_(NAM_SAMPLE* input, const int num_frames)
{
this->Buffer::_update_buffers_();
this->Buffer::_update_buffers_(input, num_frames);

const size_t buffer_size = this->_input_buffer.size();

if (this->_block_vals[0].rows() != 1 || this->_block_vals[0].cols() != buffer_size)
Expand Down Expand Up @@ -184,27 +183,3 @@ void convnet::ConvNet::_rewind_buffers_()
// Now we can do the rest of the rewind
this->Buffer::_rewind_buffers_();
}

void convnet::ConvNet::_anti_pop_()
{
if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
return;
const float slope = 1.0f / float(this->_anti_pop_ramp);
for (size_t i = 0; i < _num_input_samples; i++)
{
if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
break;
const float gain = std::max(slope * float(this->_anti_pop_countdown), float(0.0));
this->_output_samples[i] *= gain;
this->_anti_pop_countdown++;
}
}

void convnet::ConvNet::_reset_anti_pop_()
{
// You need the "real" receptive field, not the buffers.
long receptive_field = 1;
for (size_t i = 0; i < this->_blocks.size(); i++)
receptive_field += this->_blocks[i].conv.get_dilation();
this->_anti_pop_countdown = -receptive_field;
}
13 changes: 2 additions & 11 deletions NAM/convnet.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,9 @@ class ConvNet : public Buffer
_Head _head;
void _verify_params(const int channels, const std::vector<int>& dilations, const bool batchnorm,
const size_t actual_params);
void _update_buffers_() override;
void _update_buffers_(NAM_SAMPLE* input, const int num_frames) override;
void _rewind_buffers_() override;

void _process_core_() override;

// The net starts with random parameters inside; we need to wait for a full
// receptive field to pass through before we can count on the output being
// ok. This implements a gentle "ramp-up" so that there's no "pop" at the
// start.
long _anti_pop_countdown;
const long _anti_pop_ramp = 100;
void _anti_pop_();
void _reset_anti_pop_();
void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
};
}; // namespace convnet
51 changes: 13 additions & 38 deletions NAM/dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,22 @@ constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32;
DSP::DSP(const double expected_sample_rate)
: mLoudness(TARGET_DSP_LOUDNESS)
, mExpectedSampleRate(expected_sample_rate)
, mNormalizeOutputLoudness(false)
, _stale_params(true)
{
}

DSP::DSP(const double loudness, const double expected_sample_rate)
: mLoudness(loudness)
, mExpectedSampleRate(expected_sample_rate)
, mNormalizeOutputLoudness(false)
, _stale_params(true)
{
}

void DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
{
this->_input_samples = input;
this->_output_samples = output;
this->_num_input_samples = num_frames;

this->_ensure_core_dsp_output_ready_();
this->_process_core_();
this->_apply_output_level_(output, _num_input_samples);
// Default implementation is the null operation
for (size_t i = 0; i < num_frames; i++)
output[i] = input[i];
}

void DSP::finalize_(const int num_frames) {}
Expand All @@ -60,25 +54,6 @@ void DSP::_get_params_(const std::unordered_map<std::string, double>& input_para
}
}

void DSP::_ensure_core_dsp_output_ready_()
{
}

void DSP::_process_core_()
{
// Default implementation is the null operation
for (size_t i = 0; i < _num_input_samples; i++)
this->_output_samples[i] = _input_samples[i];
}

void DSP::_apply_output_level_(NAM_SAMPLE* output, const int num_frames)
{
const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0);
const double finalGain = this->mNormalizeOutputLoudness ? loudnessGain : 1.0;
for (int s = 0; s < num_frames; s++)
output[s] = (NAM_SAMPLE)(finalGain * _output_samples[s]);
}

// Buffer =====================================================================

Buffer::Buffer(const int receptive_field, const double expected_sample_rate)
Expand All @@ -105,13 +80,13 @@ void Buffer::_set_receptive_field(const int new_receptive_field, const int input
this->_reset_input_buffer();
}

void Buffer::_update_buffers_()
void Buffer::_update_buffers_(NAM_SAMPLE* input, const int num_frames)
{
// Make sure that the buffer is big enough for the receptive field and the
// frames needed!
{
const long minimum_input_buffer_size =
(long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * _num_input_samples;
(long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames;
if ((long)this->_input_buffer.size() < minimum_input_buffer_size)
{
long new_buffer_size = 2;
Expand All @@ -124,13 +99,13 @@ void Buffer::_update_buffers_()

// If we'd run off the end of the input buffer, then we need to move the data
// back to the start of the buffer and start again.
if (this->_input_buffer_offset + _num_input_samples > (long)this->_input_buffer.size())
if (this->_input_buffer_offset + num_frames > (long)this->_input_buffer.size())
this->_rewind_buffers_();
// Put the new samples into the input buffer
for (long i = this->_input_buffer_offset, j = 0; j < _num_input_samples; i++, j++)
this->_input_buffer[i] = _input_samples[j];
for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++)
this->_input_buffer[i] = input[j];
// And resize the output buffer:
this->_output_buffer.resize(_num_input_samples);
this->_output_buffer.resize(num_frames);
std::fill (this->_output_buffer.begin(), this->_output_buffer.end(), 0.0f);
}

Expand Down Expand Up @@ -183,16 +158,16 @@ Linear::Linear(const double loudness, const int receptive_field, const bool _bia
this->_bias = _bias ? params[receptive_field] : (float)0.0;
}

void Linear::_process_core_()
void Linear::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
{
this->Buffer::_update_buffers_();
this->Buffer::_update_buffers_(input, num_frames);

// Main computation!
for (size_t i = 0; i < _num_input_samples; i++)
for (size_t i = 0; i < num_frames; i++)
{
const size_t offset = this->_input_buffer_offset - this->_weight.size() + i + 1;
auto input = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffer[offset], this->_receptive_field);
this->_output_samples[i] = this->_bias + this->_weight.dot(input);
output[i] = this->_bias + this->_weight.dot(input);
}
}

Expand Down
28 changes: 5 additions & 23 deletions NAM/dsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,44 +63,26 @@ class DSP
// DSP subclass implementation.
virtual void finalize_(const int num_frames);
double GetExpectedSampleRate() const { return mExpectedSampleRate; };
bool HasLoudness() { return mLoudness != TARGET_DSP_LOUDNESS; };
void SetNormalize(const bool normalize) { this->mNormalizeOutputLoudness = normalize; };
bool HasNormalization() { return mLoudness != TARGET_DSP_LOUDNESS; };
double GetNormalizationFactordB() { return -(this->mLoudness - TARGET_DSP_LOUDNESS); };
double GetNormalizationFactorLinear() { return pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0); };

protected:
// How loud is the model?
double mLoudness;
// What sample rate does the model expect?
double mExpectedSampleRate;
// Should we normalize according to this loudness?
bool mNormalizeOutputLoudness;
// Parameters (aka "knobs")
std::unordered_map<std::string, double> _params;
// If the params have changed since the last buffer was processed:
bool _stale_params;
// Input sample buffer
NAM_SAMPLE* _input_samples;
// Output sample buffer
NAM_SAMPLE* _output_samples;
// Number of samples in the input buffer
int _num_input_samples;

// Methods

// Copy the parameters to the DSP module.
// If anything has changed, then set this->_stale_params to true.
// (TODO use "listener" approach)
void _get_params_(const std::unordered_map<std::string, double>& input_params);

// i.e. ensure the size is correct.
void _ensure_core_dsp_output_ready_();

// The core of your DSP algorithm.
// Access the inputs in this->_input_post_gain
// Place the outputs in this->_core_dsp_output
virtual void _process_core_();

// Copy this->_core_dsp_output to output and apply the output volume
void _apply_output_level_(NAM_SAMPLE* output, const int num_frames);
};

// Class where an input buffer is kept so that long-time effects can be
Expand All @@ -126,7 +108,7 @@ class Buffer : public DSP
void _set_receptive_field(const int new_receptive_field);
void _reset_input_buffer();
// Use this->_input_post_gain
virtual void _update_buffers_();
virtual void _update_buffers_(NAM_SAMPLE* input, int num_frames);
virtual void _rewind_buffers_();
};

Expand All @@ -138,7 +120,7 @@ class Linear : public Buffer
const double expected_sample_rate = -1.0);
Linear(const double loudness, const int receptive_field, const bool _bias, const std::vector<float>& params,
const double expected_sample_rate = -1.0);
void _process_core_() override;
void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;

protected:
Eigen::VectorXf _weight;
Expand Down
6 changes: 3 additions & 3 deletions NAM/lstm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ void lstm::LSTM::_init_parametric(nlohmann::json& parametric)
this->_input_and_params.resize(1 + parametric.size()); // TODO amp parameters
}

void lstm::LSTM::_process_core_()
void lstm::LSTM::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
{
// Get params into the input vector before starting
if (this->_stale_params)
Expand All @@ -111,8 +111,8 @@ void lstm::LSTM::_process_core_()
this->_stale_params = false;
}
// Process samples, placing results in the required output location
for (size_t i = 0; i < _num_input_samples; i++)
this->_output_samples[i] = this->_process_sample(_input_samples[i]);
for (size_t i = 0; i < num_frames; i++)
input[i] = this->_process_sample(output[i]);
}

float lstm::LSTM::_process_sample(const float x)
Expand Down
2 changes: 1 addition & 1 deletion NAM/lstm.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class LSTM : public DSP
protected:
Eigen::VectorXf _head_weight;
float _head_bias;
void _process_core_() override;
void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
std::vector<LSTMCell> _layers;

float _process_sample(const float x);
Expand Down
14 changes: 7 additions & 7 deletions NAM/wavenet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,16 +333,16 @@ void wavenet::WaveNet::_prepare_for_frames_(const long num_frames)
this->_layer_arrays[i].prepare_for_frames_(num_frames);
}

void wavenet::WaveNet::_process_core_()
void wavenet::WaveNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
{
this->_set_num_frames_(_num_input_samples);
this->_prepare_for_frames_(_num_input_samples);
this->_set_num_frames_(num_frames);
this->_prepare_for_frames_(num_frames);

// Fill into condition array:
// Clumsy...
for (int j = 0; j < _num_input_samples; j++)
for (int j = 0; j < num_frames; j++)
{
this->_condition(0, j) = _input_samples[j];
this->_condition(0, j) = input[j];
if (this->_stale_params) // Column-major assignment; good for Eigen. Let the
// compiler optimize this.
for (size_t i = 0; i < this->_param_names.size(); i++)
Expand All @@ -366,10 +366,10 @@ void wavenet::WaveNet::_process_core_()

const long final_head_array = this->_head_arrays.size() - 1;
assert(this->_head_arrays[final_head_array].rows() == 1);
for (int s = 0; s < _num_input_samples; s++)
for (int s = 0; s < num_frames; s++)
{
float out = this->_head_scale * this->_head_arrays[final_head_array](0, s);
this->_output_samples[s] = out;
output[s] = out;
}
}

Expand Down
2 changes: 1 addition & 1 deletion NAM/wavenet.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ class WaveNet : public DSP
void _init_parametric_(nlohmann::json& parametric);
void _prepare_for_frames_(const long num_frames);
// Reminder: From ._input_post_gain to ._core_dsp_output
void _process_core_() override;
void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;

// Ensure that all buffer arrays are the right size for this num_frames
void _set_num_frames_(const long num_frames);
Expand Down

0 comments on commit 855bf50

Please sign in to comment.