diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..8eaea87 --- /dev/null +++ b/.clang-format @@ -0,0 +1,79 @@ +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterStruct: true + AfterUnion: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeComma +BreakStringLiterals: true +ColumnLimit: 120 +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 0 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +FixNamespaceComments: true +IndentCaseLabels: true +IndentPPDirectives: BeforeHash +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 0 +PenaltyReturnTypeOnItsOwnLine: 1000 +PointerAlignment: Left +ReflowComments: true +SortIncludes: false +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +UseTab: Never diff --git a/NAM/activations.h b/NAM/activations.h index 1a154f3..52b1334 100644 --- a/NAM/activations.h +++ b/NAM/activations.h @@ -2,7 +2,14 @@ #include // expf -namespace activations { -float relu(float x) { return x > 0.0f ? x : 0.0f; }; -float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); }; +namespace activations +{ +float relu(float x) +{ + return x > 0.0f ? x : 0.0f; +}; +float sigmoid(float x) +{ + return 1.0f / (1.0f + expf(-x)); +}; }; // namespace activations \ No newline at end of file diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp index 5a28330..cabf4fd 100644 --- a/NAM/dsp.cpp +++ b/NAM/dsp.cpp @@ -16,14 +16,24 @@ constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32; -DSP::DSP() : mLoudness(TARGET_DSP_LOUDNESS), mNormalizeOutputLoudness(false), _stale_params(true) {} +DSP::DSP() +: mLoudness(TARGET_DSP_LOUDNESS) +, mNormalizeOutputLoudness(false) +, _stale_params(true) +{ +} -DSP::DSP(const double loudness) : mLoudness(loudness), mNormalizeOutputLoudness(false), _stale_params(true) {} +DSP::DSP(const double loudness) +: mLoudness(loudness) +, mNormalizeOutputLoudness(false) +, _stale_params(true) +{ +} -void DSP::process(double **inputs, double **outputs, - const int num_channels, const int num_frames, +void DSP::process(double** inputs, double** outputs, const int num_channels, const int num_frames, const double input_gain, const double output_gain, - const std::unordered_map ¶ms) { + const std::unordered_map& params) +{ this->_get_params_(params); this->_apply_input_level_(inputs, num_channels, num_frames, input_gain); this->_ensure_core_dsp_output_ready_(); @@ -33,10 +43,11 @@ void DSP::process(double **inputs, double **outputs, void DSP::finalize_(const int num_frames) {} -void DSP::_get_params_( - const std::unordered_map &input_params) { +void DSP::_get_params_(const std::unordered_map& input_params) +{ this->_stale_params = false; - for (auto it = input_params.begin(); it != input_params.end(); ++it) { + for (auto it = input_params.begin(); it != input_params.end(); ++it) + { const std::string key = util::lowercase(it->first); const double value = it->second; if (this->_params.find(key) == this->_params.end()) // Not contained @@ -47,8 +58,8 @@ void DSP::_get_params_( } } -void DSP::_apply_input_level_(double **inputs, const int num_channels, - const int num_frames, const double gain) { +void DSP::_apply_input_level_(double** inputs, const int num_channels, const int num_frames, const double gain) +{ // Must match exactly; we're going to use the size of _input_post_gain later // for num_frames. if (this->_input_post_gain.size() != num_frames) @@ -59,54 +70,62 @@ void DSP::_apply_input_level_(double **inputs, const int num_channels, this->_input_post_gain[i] = float(gain * inputs[channel][i]); } -void DSP::_ensure_core_dsp_output_ready_() { +void DSP::_ensure_core_dsp_output_ready_() +{ if (this->_core_dsp_output.size() < this->_input_post_gain.size()) this->_core_dsp_output.resize(this->_input_post_gain.size()); } -void DSP::_process_core_() { +void DSP::_process_core_() +{ // Default implementation is the null operation for (int i = 0; i < this->_input_post_gain.size(); i++) this->_core_dsp_output[i] = this->_input_post_gain[i]; } -void DSP::_apply_output_level_(double **outputs, const int num_channels, - const int num_frames, const double gain) { - const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0); - const double finalGain = this->mNormalizeOutputLoudness ? gain * loudnessGain : gain; - for (int c = 0; c < num_channels; c++) +void DSP::_apply_output_level_(double** outputs, const int num_channels, const int num_frames, const double gain) +{ + const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0); + const double finalGain = this->mNormalizeOutputLoudness ? gain * loudnessGain : gain; + for (int c = 0; c < num_channels; c++) for (int s = 0; s < num_frames; s++) outputs[c][s] = double(finalGain * this->_core_dsp_output[s]); } // Buffer ===================================================================== -Buffer::Buffer(const int receptive_field) : Buffer(TARGET_DSP_LOUDNESS, receptive_field) {} +Buffer::Buffer(const int receptive_field) +: Buffer(TARGET_DSP_LOUDNESS, receptive_field) +{ +} -Buffer::Buffer(const double loudness, const int receptive_field) : DSP(loudness) { +Buffer::Buffer(const double loudness, const int receptive_field) +: DSP(loudness) +{ this->_set_receptive_field(receptive_field); } -void Buffer::_set_receptive_field(const int new_receptive_field) { - this->_set_receptive_field(new_receptive_field, - _INPUT_BUFFER_SAFETY_FACTOR * new_receptive_field); +void Buffer::_set_receptive_field(const int new_receptive_field) +{ + this->_set_receptive_field(new_receptive_field, _INPUT_BUFFER_SAFETY_FACTOR * new_receptive_field); }; -void Buffer::_set_receptive_field(const int new_receptive_field, - const int input_buffer_size) { +void Buffer::_set_receptive_field(const int new_receptive_field, const int input_buffer_size) +{ this->_receptive_field = new_receptive_field; this->_input_buffer.resize(input_buffer_size); this->_reset_input_buffer(); } -void Buffer::_update_buffers_() { +void Buffer::_update_buffers_() +{ const long int num_frames = this->_input_post_gain.size(); // Make sure that the buffer is big enough for the receptive field and the // frames needed! { - const long minimum_input_buffer_size = - (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames; - if (this->_input_buffer.size() < minimum_input_buffer_size) { + const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames; + if (this->_input_buffer.size() < minimum_input_buffer_size) + { long new_buffer_size = 2; while (new_buffer_size < minimum_input_buffer_size) new_buffer_size *= 2; @@ -125,11 +144,11 @@ void Buffer::_update_buffers_() { this->_output_buffer.resize(num_frames); } -void Buffer::_rewind_buffers_() { +void Buffer::_rewind_buffers_() +{ // Copy the input buffer back // RF-1 samples because we've got at least one new one inbound. - for (long i = 0, j = this->_input_buffer_offset - this->_receptive_field; - i < this->_receptive_field; i++, j++) + for (long i = 0, j = this->_input_buffer_offset - this->_receptive_field; i < this->_receptive_field; i++, j++) this->_input_buffer[i] = this->_input_buffer[j]; // And reset the offset. // Even though we could be stingy about that one sample that we won't be using @@ -139,28 +158,31 @@ void Buffer::_rewind_buffers_() { this->_input_buffer_offset = this->_receptive_field; } -void Buffer::_reset_input_buffer() { +void Buffer::_reset_input_buffer() +{ this->_input_buffer_offset = this->_receptive_field; } -void Buffer::finalize_(const int num_frames) { +void Buffer::finalize_(const int num_frames) +{ this->DSP::finalize_(num_frames); this->_input_buffer_offset += num_frames; } // Linear ===================================================================== -Linear::Linear(const int receptive_field, const bool _bias, - const std::vector& params) : Linear(TARGET_DSP_LOUDNESS, receptive_field, _bias, params) -{} +Linear::Linear(const int receptive_field, const bool _bias, const std::vector& params) +: Linear(TARGET_DSP_LOUDNESS, receptive_field, _bias, params) +{ +} -Linear::Linear(const double loudness, const int receptive_field, const bool _bias, - const std::vector& params) - : Buffer(loudness, receptive_field) +Linear::Linear(const double loudness, const int receptive_field, const bool _bias, const std::vector& params) +: Buffer(loudness, receptive_field) { if (params.size() != (receptive_field + (_bias ? 1 : 0))) - throw std::runtime_error("Params vector does not match expected size based " - "on architecture parameters"); + throw std::runtime_error( + "Params vector does not match expected size based " + "on architecture parameters"); this->_weight.resize(this->_receptive_field); // Pass in in reverse order so that dot products work out of the box. @@ -169,102 +191,113 @@ Linear::Linear(const double loudness, const int receptive_field, const bool _bia this->_bias = _bias ? params[receptive_field] : (float)0.0; } -void Linear::_process_core_() { +void Linear::_process_core_() +{ this->Buffer::_update_buffers_(); // Main computation! - for (long i = 0; i < this->_input_post_gain.size(); i++) { - const long offset = - this->_input_buffer_offset - this->_weight.size() + i + 1; - auto input = Eigen::Map(&this->_input_buffer[offset], - this->_receptive_field); + for (long i = 0; i < this->_input_post_gain.size(); i++) + { + const long offset = this->_input_buffer_offset - this->_weight.size() + i + 1; + auto input = Eigen::Map(&this->_input_buffer[offset], this->_receptive_field); this->_core_dsp_output[i] = this->_bias + this->_weight.dot(input); } } // NN modules ================================================================= -void relu_(Eigen::MatrixXf &x, const long i_start, const long i_end, - const long j_start, const long j_end) { +void relu_(Eigen::MatrixXf& x, const long i_start, const long i_end, const long j_start, const long j_end) +{ for (long j = j_start; j < j_end; j++) for (long i = 0; i < x.rows(); i++) x(i, j) = x(i, j) < (float)0.0 ? (float)0.0 : x(i, j); } -void relu_(Eigen::MatrixXf &x, const long j_start, const long j_end) { +void relu_(Eigen::MatrixXf& x, const long j_start, const long j_end) +{ relu_(x, 0, x.rows(), j_start, j_end); } -void relu_(Eigen::MatrixXf &x) { relu_(x, 0, x.rows(), 0, x.cols()); } +void relu_(Eigen::MatrixXf& x) +{ + relu_(x, 0, x.rows(), 0, x.cols()); +} -void sigmoid_(Eigen::MatrixXf &x, const long i_start, const long i_end, - const long j_start, const long j_end) { +void sigmoid_(Eigen::MatrixXf& x, const long i_start, const long i_end, const long j_start, const long j_end) +{ for (long j = j_start; j < j_end; j++) for (long i = i_start; i < i_end; i++) x(i, j) = 1.0 / (1.0 + expf(-x(i, j))); } -inline float fast_tanh_(const float x) { +inline float fast_tanh_(const float x) +{ const float ax = fabsf(x); const float x2 = x * x; - return (x * - (2.45550750702956f + 2.45550750702956f * ax + - (0.893229853513558f + 0.821226666969744f * ax) * x2) / - (2.44506634652299f + - (2.44506634652299f + x2) * fabsf(x + 0.814642734961073f * x * ax))); + return (x * (2.45550750702956f + 2.45550750702956f * ax + (0.893229853513558f + 0.821226666969744f * ax) * x2) + / (2.44506634652299f + (2.44506634652299f + x2) * fabsf(x + 0.814642734961073f * x * ax))); } -inline float hard_tanh_(const float x) { - const float t = x < -1 ? -1 : x; - return t > 1 ? 1 : t; +inline float hard_tanh_(const float x) +{ + const float t = x < -1 ? -1 : x; + return t > 1 ? 1 : t; } -void tanh_(Eigen::MatrixXf &x, const long i_start, const long i_end, - const long j_start, const long j_end) { +void tanh_(Eigen::MatrixXf& x, const long i_start, const long i_end, const long j_start, const long j_end) +{ for (long j = j_start; j < j_end; j++) for (long i = i_start; i < i_end; i++) x(i, j) = tanh_impl_(x(i, j)); } -void tanh_(Eigen::MatrixXf &x, const long j_start, const long j_end) { +void tanh_(Eigen::MatrixXf& x, const long j_start, const long j_end) +{ tanh_(x, 0, x.rows(), j_start, j_end); } -void tanh_(Eigen::MatrixXf &x) { +void tanh_(Eigen::MatrixXf& x) +{ - float *ptr = x.data(); + float* ptr = x.data(); long size = x.rows() * x.cols(); - for (long pos = 0; pos < size; pos++) { + for (long pos = 0; pos < size; pos++) + { ptr[pos] = tanh_impl_(ptr[pos]); } } -void hard_tanh_(Eigen::MatrixXf& x, const long i_start, const long i_end, - const long j_start, const long j_end) { - for (long j = j_start; j < j_end; j++) - for (long i = i_start; i < i_end; i++) - x(i, j) = hard_tanh_(x(i, j)); +void hard_tanh_(Eigen::MatrixXf& x, const long i_start, const long i_end, const long j_start, const long j_end) +{ + for (long j = j_start; j < j_end; j++) + for (long i = i_start; i < i_end; i++) + x(i, j) = hard_tanh_(x(i, j)); } -void hard_tanh_(Eigen::MatrixXf& x, const long j_start, const long j_end) { - hard_tanh_(x, 0, x.rows(), j_start, j_end); +void hard_tanh_(Eigen::MatrixXf& x, const long j_start, const long j_end) +{ + hard_tanh_(x, 0, x.rows(), j_start, j_end); } -void hard_tanh_(Eigen::MatrixXf& x) { - float* ptr = x.data(); +void hard_tanh_(Eigen::MatrixXf& x) +{ + float* ptr = x.data(); - long size = x.rows() * x.cols(); + long size = x.rows() * x.cols(); - for (long pos = 0; pos < size; pos++) { - ptr[pos] = hard_tanh_(ptr[pos]); - } + for (long pos = 0; pos < size; pos++) + { + ptr[pos] = hard_tanh_(ptr[pos]); + } } -void Conv1D::set_params_(std::vector::iterator ¶ms) { - if (this->_weight.size() > 0) { +void Conv1D::set_params_(std::vector::iterator& params) +{ + if (this->_weight.size() > 0) + { const long out_channels = this->_weight[0].rows(); const long in_channels = this->_weight[0].cols(); // Crazy ordering because that's how it gets flattened. @@ -277,9 +310,9 @@ void Conv1D::set_params_(std::vector::iterator ¶ms) { this->_bias(i) = *(params++); } -void Conv1D::set_size_(const int in_channels, const int out_channels, - const int kernel_size, const bool do_bias, - const int _dilation) { +void Conv1D::set_size_(const int in_channels, const int out_channels, const int kernel_size, const bool do_bias, + const int _dilation) +{ this->_weight.resize(kernel_size); for (int i = 0; i < this->_weight.size(); i++) this->_weight[i].resize(out_channels, @@ -291,47 +324,47 @@ void Conv1D::set_size_(const int in_channels, const int out_channels, this->_dilation = _dilation; } -void Conv1D::set_size_and_params_(const int in_channels, const int out_channels, - const int kernel_size, const int _dilation, - const bool do_bias, - std::vector::iterator ¶ms) { +void Conv1D::set_size_and_params_(const int in_channels, const int out_channels, const int kernel_size, + const int _dilation, const bool do_bias, std::vector::iterator& params) +{ this->set_size_(in_channels, out_channels, kernel_size, do_bias, _dilation); this->set_params_(params); } -void Conv1D::process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output, - const long i_start, const long ncols, - const long j_start) const { +void Conv1D::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long ncols, + const long j_start) const +{ // This is the clever part ;) - for (long k = 0; k < this->_weight.size(); k++) { + for (long k = 0; k < this->_weight.size(); k++) + { const long offset = this->_dilation * (k + 1 - this->_weight.size()); if (k == 0) - output.middleCols(j_start, ncols) = - this->_weight[k] * input.middleCols(i_start + offset, ncols); + output.middleCols(j_start, ncols) = this->_weight[k] * input.middleCols(i_start + offset, ncols); else - output.middleCols(j_start, ncols) += - this->_weight[k] * input.middleCols(i_start + offset, ncols); + output.middleCols(j_start, ncols) += this->_weight[k] * input.middleCols(i_start + offset, ncols); } if (this->_bias.size() > 0) output.middleCols(j_start, ncols).colwise() += this->_bias; } -long Conv1D::get_num_params() const { +long Conv1D::get_num_params() const +{ long num_params = this->_bias.size(); for (long i = 0; i < this->_weight.size(); i++) num_params += this->_weight[i].size(); return num_params; } -Conv1x1::Conv1x1(const int in_channels, const int out_channels, - const bool _bias) { +Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias) +{ this->_weight.resize(out_channels, in_channels); this->_do_bias = _bias; if (_bias) this->_bias.resize(out_channels); } -void Conv1x1::set_params_(std::vector::iterator ¶ms) { +void Conv1x1::set_params_(std::vector::iterator& params) +{ for (int i = 0; i < this->_weight.rows(); i++) for (int j = 0; j < this->_weight.cols(); j++) this->_weight(i, j) = *(params++); @@ -340,7 +373,8 @@ void Conv1x1::set_params_(std::vector::iterator ¶ms) { this->_bias(i) = *(params++); } -Eigen::MatrixXf Conv1x1::process(const Eigen::MatrixXf &input) const { +Eigen::MatrixXf Conv1x1::process(const Eigen::MatrixXf& input) const +{ if (this->_do_bias) return (this->_weight * input).colwise() + this->_bias; else @@ -349,8 +383,8 @@ Eigen::MatrixXf Conv1x1::process(const Eigen::MatrixXf &input) const { // ConvNet ==================================================================== -convnet::BatchNorm::BatchNorm(const int dim, - std::vector::iterator ¶ms) { +convnet::BatchNorm::BatchNorm(const int dim, std::vector::iterator& params) +{ // Extract from param buffer Eigen::VectorXf running_mean(dim); Eigen::VectorXf running_var(dim); @@ -374,35 +408,32 @@ convnet::BatchNorm::BatchNorm(const int dim, this->loc = _bias - this->scale.cwiseProduct(running_mean); } -void convnet::BatchNorm::process_(Eigen::MatrixXf &x, const long i_start, - const long i_end) const { +void convnet::BatchNorm::process_(Eigen::MatrixXf& x, const long i_start, const long i_end) const +{ // todo using colwise? // #speed but conv probably dominates - for (auto i = i_start; i < i_end; i++) { + for (auto i = i_start; i < i_end; i++) + { x.col(i) = x.col(i).cwiseProduct(this->scale); x.col(i) += this->loc; } } -void convnet::ConvNetBlock::set_params_(const int in_channels, - const int out_channels, - const int _dilation, - const bool batchnorm, - const std::string activation, - std::vector::iterator ¶ms) { +void convnet::ConvNetBlock::set_params_(const int in_channels, const int out_channels, const int _dilation, + const bool batchnorm, const std::string activation, + std::vector::iterator& params) +{ this->_batchnorm = batchnorm; // HACK 2 kernel - this->conv.set_size_and_params_(in_channels, out_channels, 2, _dilation, - !batchnorm, params); + this->conv.set_size_and_params_(in_channels, out_channels, 2, _dilation, !batchnorm, params); if (this->_batchnorm) this->batchnorm = BatchNorm(out_channels, params); this->activation = activation; } -void convnet::ConvNetBlock::process_(const Eigen::MatrixXf &input, - Eigen::MatrixXf &output, - const long i_start, - const long i_end) const { +void convnet::ConvNetBlock::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, + const long i_end) const +{ const long ncols = i_end - i_start; this->conv.process_(input, output, i_start, ncols, i_start); if (this->_batchnorm) @@ -415,50 +446,52 @@ void convnet::ConvNetBlock::process_(const Eigen::MatrixXf &input, throw std::runtime_error("Unrecognized activation"); } -long convnet::ConvNetBlock::get_out_channels() const { +long convnet::ConvNetBlock::get_out_channels() const +{ return this->conv.get_out_channels(); } -convnet::_Head::_Head(const int channels, - std::vector::iterator ¶ms) { +convnet::_Head::_Head(const int channels, std::vector::iterator& params) +{ this->_weight.resize(channels); for (int i = 0; i < channels; i++) this->_weight[i] = *(params++); this->_bias = *(params++); } -void convnet::_Head::process_(const Eigen::MatrixXf &input, - Eigen::VectorXf &output, const long i_start, - const long i_end) const { +void convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::VectorXf& output, const long i_start, + const long i_end) const +{ const long length = i_end - i_start; output.resize(length); for (long i = 0, j = i_start; i < length; i++, j++) output(i) = this->_bias + input.col(j).dot(this->_weight); } -convnet::ConvNet::ConvNet(const int channels, const std::vector& dilations, - const bool batchnorm, const std::string activation, - std::vector& params) : ConvNet(TARGET_DSP_LOUDNESS, channels, dilations, batchnorm, activation, params) {} +convnet::ConvNet::ConvNet(const int channels, const std::vector& dilations, const bool batchnorm, + const std::string activation, std::vector& params) +: ConvNet(TARGET_DSP_LOUDNESS, channels, dilations, batchnorm, activation, params) +{ +} -convnet::ConvNet::ConvNet(const double loudness, const int channels, const std::vector &dilations, - const bool batchnorm, const std::string activation, - std::vector ¶ms) - : Buffer(loudness, *std::max_element(dilations.begin(), dilations.end())) { +convnet::ConvNet::ConvNet(const double loudness, const int channels, const std::vector& dilations, + const bool batchnorm, const std::string activation, std::vector& params) +: Buffer(loudness, *std::max_element(dilations.begin(), dilations.end())) +{ this->_verify_params(channels, dilations, batchnorm, params.size()); this->_blocks.resize(dilations.size()); std::vector::iterator it = params.begin(); for (int i = 0; i < dilations.size(); i++) - this->_blocks[i].set_params_(i == 0 ? 1 : channels, channels, dilations[i], - batchnorm, activation, it); + this->_blocks[i].set_params_(i == 0 ? 1 : channels, channels, dilations[i], batchnorm, activation, it); this->_block_vals.resize(this->_blocks.size() + 1); this->_head = _Head(channels, it); if (it != params.end()) - throw std::runtime_error( - "Didn't touch all the params when initializing wavenet"); + throw std::runtime_error("Didn't touch all the params when initializing wavenet"); this->_reset_anti_pop_(); } -void convnet::ConvNet::_process_core_() { +void convnet::ConvNet::_process_core_() +{ this->_update_buffers_(); // Main computation! const long i_start = this->_input_buffer_offset; @@ -468,11 +501,9 @@ void convnet::ConvNet::_process_core_() { for (auto i = i_start; i < i_end; i++) this->_block_vals[0](0, i) = this->_input_buffer[i]; for (auto i = 0; i < this->_blocks.size(); i++) - this->_blocks[i].process_(this->_block_vals[i], this->_block_vals[i + 1], - i_start, i_end); + this->_blocks[i].process_(this->_block_vals[i], this->_block_vals[i + 1], i_start, i_end); // TODO clean up this allocation - this->_head.process_(this->_block_vals[this->_blocks.size()], - this->_head_output, i_start, i_end); + this->_head.process_(this->_block_vals[this->_blocks.size()], this->_head_output, i_start, i_end); // Copy to required output array (TODO tighten this up) for (int s = 0; s < num_frames; s++) this->_core_dsp_output[s] = this->_head_output(s); @@ -480,33 +511,33 @@ void convnet::ConvNet::_process_core_() { this->_anti_pop_(); } -void convnet::ConvNet::_verify_params(const int channels, - const std::vector &dilations, - const bool batchnorm, - const size_t actual_params) { +void convnet::ConvNet::_verify_params(const int channels, const std::vector& dilations, const bool batchnorm, + const size_t actual_params) +{ // TODO } -void convnet::ConvNet::_update_buffers_() { +void convnet::ConvNet::_update_buffers_() +{ this->Buffer::_update_buffers_(); const long buffer_size = this->_input_buffer.size(); this->_block_vals[0].resize(1, buffer_size); for (long i = 1; i < this->_block_vals.size(); i++) - this->_block_vals[i].resize(this->_blocks[i - 1].get_out_channels(), - buffer_size); + this->_block_vals[i].resize(this->_blocks[i - 1].get_out_channels(), buffer_size); } -void convnet::ConvNet::_rewind_buffers_() { +void convnet::ConvNet::_rewind_buffers_() +{ // Need to rewind the block vals first because Buffer::rewind_buffers() // resets the offset index // The last _block_vals is the output of the last block and doesn't need to be // rewound. - for (long k = 0; k < this->_block_vals.size() - 1; k++) { + for (long k = 0; k < this->_block_vals.size() - 1; k++) + { // We actually don't need to pull back a lot...just as far as the first // input sample would grab from dilation const long _dilation = this->_blocks[k].conv.get_dilation(); - for (long i = this->_receptive_field - _dilation, - j = this->_input_buffer_offset - _dilation; + for (long i = this->_receptive_field - _dilation, j = this->_input_buffer_offset - _dilation; j < this->_input_buffer_offset; i++, j++) for (long r = 0; r < this->_block_vals[k].rows(); r++) this->_block_vals[k](r, i) = this->_block_vals[k](r, j); @@ -515,21 +546,23 @@ void convnet::ConvNet::_rewind_buffers_() { this->Buffer::_rewind_buffers_(); } -void convnet::ConvNet::_anti_pop_() { +void convnet::ConvNet::_anti_pop_() +{ if (this->_anti_pop_countdown >= this->_anti_pop_ramp) return; const float slope = 1.0f / float(this->_anti_pop_ramp); - for (int i = 0; i < this->_core_dsp_output.size(); i++) { + for (int i = 0; i < this->_core_dsp_output.size(); i++) + { if (this->_anti_pop_countdown >= this->_anti_pop_ramp) break; - const float gain = - std::max(slope * float(this->_anti_pop_countdown), float(0.0)); + const float gain = std::max(slope * float(this->_anti_pop_countdown), float(0.0)); this->_core_dsp_output[i] *= gain; this->_anti_pop_countdown++; } } -void convnet::ConvNet::_reset_anti_pop_() { +void convnet::ConvNet::_reset_anti_pop_() +{ // You need the "real" receptive field, not the buffers. long receptive_field = 1; for (int i = 0; i < this->_blocks.size(); i++) diff --git a/NAM/dsp.h b/NAM/dsp.h index a317caf..f9f1e8c 100644 --- a/NAM/dsp.h +++ b/NAM/dsp.h @@ -9,7 +9,8 @@ #include -enum EArchitectures { +enum EArchitectures +{ kLinear = 0, kConvNet, kLSTM, @@ -21,9 +22,10 @@ enum EArchitectures { // Class for providing params from the plugin to the DSP module // For now, we'll work with doubles. Later, we'll add other types. -class DSPParam { +class DSPParam +{ public: - const char *name; + const char* name; const double val; }; // And the params shall be provided as a std::vector. @@ -31,10 +33,11 @@ class DSPParam { // How loud do we want the models to be? in dB #define TARGET_DSP_LOUDNESS -18.0 -class DSP { +class DSP +{ public: - DSP(); - DSP(const double loudness); + DSP(); + DSP(const double loudness); // process() does all of the processing requried to take `inputs` array and // fill in the required values on `outputs`. // To do this: @@ -44,10 +47,9 @@ class DSP { // 3. The core DSP algorithm is run (This is what should probably be // overridden in subclasses). // 4. The output level is applied and the result stored to `output`. - virtual void process(double **inputs, double **outputs, - const int num_channels, const int num_frames, + virtual void process(double** inputs, double** outputs, const int num_channels, const int num_frames, const double input_gain, const double output_gain, - const std::unordered_map ¶ms); + const std::unordered_map& params); // Anything to take care of before next buffer comes in. // For example: // * Move the buffer index forward @@ -76,13 +78,11 @@ class DSP { // Copy the parameters to the DSP module. // If anything has changed, then set this->_stale_params to true. // (TODO use "listener" approach) - void - _get_params_(const std::unordered_map &input_params); + void _get_params_(const std::unordered_map& input_params); // Apply the input gain // Result populates this->_input_post_gain - void _apply_input_level_(double **inputs, const int num_channels, - const int num_frames, const double gain); + void _apply_input_level_(double** inputs, const int num_channels, const int num_frames, const double gain); // i.e. ensure the size is correct. void _ensure_core_dsp_output_ready_(); @@ -93,14 +93,14 @@ class DSP { virtual void _process_core_(); // Copy this->_core_dsp_output to output and apply the output volume - void _apply_output_level_(double **outputs, const int num_channels, - const int num_frames, const double gain); + void _apply_output_level_(double** outputs, const int num_channels, const int num_frames, const double gain); }; // Class where an input buffer is kept so that long-time effects can be // captured. (e.g. conv nets or impulse responses, where we need history that's // longer than the sample buffer that's coming in.) -class Buffer : public DSP { +class Buffer : public DSP +{ public: Buffer(const int receptive_field); Buffer(const double loudness, const int receptive_field); @@ -115,8 +115,7 @@ class Buffer : public DSP { std::vector _input_buffer; std::vector _output_buffer; - void _set_receptive_field(const int new_receptive_field, - const int input_buffer_size); + void _set_receptive_field(const int new_receptive_field, const int input_buffer_size); void _set_receptive_field(const int new_receptive_field); void _reset_input_buffer(); // Use this->_input_post_gain @@ -125,12 +124,11 @@ class Buffer : public DSP { }; // Basic linear model (an IR!) -class Linear : public Buffer { +class Linear : public Buffer +{ public: - Linear(const int receptive_field, const bool _bias, - const std::vector ¶ms); - Linear(const double loudness, const int receptive_field, const bool _bias, - const std::vector& params); + Linear(const int receptive_field, const bool _bias, const std::vector& params); + Linear(const double loudness, const int receptive_field, const bool _bias, const std::vector& params); void _process_core_() override; protected: @@ -143,57 +141,47 @@ class Linear : public Buffer { // Activations // In-place ReLU on (N,M) array -void relu_(Eigen::MatrixXf &x, const long i_start, const long i_end, - const long j_start, const long j_end); +void relu_(Eigen::MatrixXf& x, const long i_start, const long i_end, const long j_start, const long j_end); // Subset of the columns -void relu_(Eigen::MatrixXf &x, const long j_start, const long j_end); -void relu_(Eigen::MatrixXf &x); +void relu_(Eigen::MatrixXf& x, const long j_start, const long j_end); +void relu_(Eigen::MatrixXf& x); // In-place sigmoid -void sigmoid_(Eigen::MatrixXf &x, const long i_start, const long i_end, - const long j_start, const long j_end); -void sigmoid_(Eigen::MatrixXf &x); +void sigmoid_(Eigen::MatrixXf& x, const long i_start, const long i_end, const long j_start, const long j_end); +void sigmoid_(Eigen::MatrixXf& x); // In-place Tanh on (N,M) array -void tanh_(Eigen::MatrixXf &x, const long i_start, const long i_end, - const long j_start, const long j_end); +void tanh_(Eigen::MatrixXf& x, const long i_start, const long i_end, const long j_start, const long j_end); // Subset of the columns -void tanh_(Eigen::MatrixXf &x, const long i_start, const long i_end); +void tanh_(Eigen::MatrixXf& x, const long i_start, const long i_end); -void tanh_(Eigen::MatrixXf &x); +void tanh_(Eigen::MatrixXf& x); // In-place Hardtanh on (N,M) array -void hard_tanh_(Eigen::MatrixXf& x, const long i_start, const long i_end, - const long j_start, const long j_end); +void hard_tanh_(Eigen::MatrixXf& x, const long i_start, const long i_end, const long j_start, const long j_end); // Subset of the columns void hard_tanh_(Eigen::MatrixXf& x, const long i_start, const long i_end); void hard_tanh_(Eigen::MatrixXf& x); -class Conv1D { +class Conv1D +{ public: Conv1D() { this->_dilation = 1; }; - void set_params_(std::vector::iterator ¶ms); - void set_size_(const int in_channels, const int out_channels, - const int kernel_size, const bool do_bias, + void set_params_(std::vector::iterator& params); + void set_size_(const int in_channels, const int out_channels, const int kernel_size, const bool do_bias, const int _dilation); - void set_size_and_params_(const int in_channels, const int out_channels, - const int kernel_size, const int _dilation, - const bool do_bias, - std::vector::iterator ¶ms); + void set_size_and_params_(const int in_channels, const int out_channels, const int kernel_size, const int _dilation, + const bool do_bias, std::vector::iterator& params); // Process from input to output // Rightmost indices of input go from i_start to i_end, // Indices on output for from j_start (to j_start + i_end - i_start) - void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output, - const long i_start, const long i_end, const long j_start) const; - long get_in_channels() const { - return this->_weight.size() > 0 ? this->_weight[0].cols() : 0; - }; + void process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long i_end, + const long j_start) const; + long get_in_channels() const { return this->_weight.size() > 0 ? this->_weight[0].cols() : 0; }; long get_kernel_size() const { return this->_weight.size(); }; long get_num_params() const; - long get_out_channels() const { - return this->_weight.size() > 0 ? this->_weight[0].rows() : 0; - }; + long get_out_channels() const { return this->_weight.size() > 0 ? this->_weight[0].rows() : 0; }; int get_dilation() const { return this->_dilation; }; private: @@ -205,13 +193,14 @@ class Conv1D { }; // Really just a linear layer -class Conv1x1 { +class Conv1x1 +{ public: Conv1x1(const int in_channels, const int out_channels, const bool _bias); - void set_params_(std::vector::iterator ¶ms); + void set_params_(std::vector::iterator& params); // :param input: (N,Cin) or (Cin,) // :return: (N,Cout) or (Cout,), respectively - Eigen::MatrixXf process(const Eigen::MatrixXf &input) const; + Eigen::MatrixXf process(const Eigen::MatrixXf& input) const; long get_out_channels() const { return this->_weight.rows(); }; @@ -223,19 +212,20 @@ class Conv1x1 { // ConvNet ==================================================================== -namespace convnet { +namespace convnet +{ // Custom Conv that avoids re-computing on pieces of the input and trusts // that the corresponding outputs are where they need to be. // Beware: this is clever! // Batch normalization // In prod mode, so really just an elementwise affine layer. -class BatchNorm { +class BatchNorm +{ public: BatchNorm(){}; - BatchNorm(const int dim, std::vector::iterator ¶ms); - void process_(Eigen::MatrixXf &input, const long i_start, - const long i_end) const; + BatchNorm(const int dim, std::vector::iterator& params); + void process_(Eigen::MatrixXf& input, const long i_start, const long i_end) const; private: // TODO simplify to just ax+b @@ -247,15 +237,13 @@ class BatchNorm { Eigen::VectorXf loc; }; -class ConvNetBlock { +class ConvNetBlock +{ public: ConvNetBlock() { this->_batchnorm = false; }; - void set_params_(const int in_channels, const int out_channels, - const int _dilation, const bool batchnorm, - const std::string activation, - std::vector::iterator ¶ms); - void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output, - const long i_start, const long i_end) const; + void set_params_(const int in_channels, const int out_channels, const int _dilation, const bool batchnorm, + const std::string activation, std::vector::iterator& params); + void process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long i_end) const; long get_out_channels() const; Conv1D conv; @@ -265,34 +253,33 @@ class ConvNetBlock { std::string activation; }; -class _Head { +class _Head +{ public: _Head() { this->_bias = (float)0.0; }; - _Head(const int channels, std::vector::iterator ¶ms); - void process_(const Eigen::MatrixXf &input, Eigen::VectorXf &output, - const long i_start, const long i_end) const; + _Head(const int channels, std::vector::iterator& params); + void process_(const Eigen::MatrixXf& input, Eigen::VectorXf& output, const long i_start, const long i_end) const; private: Eigen::VectorXf _weight; float _bias; }; -class ConvNet : public Buffer { +class ConvNet : public Buffer +{ public: - ConvNet(const int channels, const std::vector &dilations, - const bool batchnorm, const std::string activation, - std::vector ¶ms); - ConvNet(const double loudness, const int channels, const std::vector& dilations, - const bool batchnorm, const std::string activation, - std::vector& params); + ConvNet(const int channels, const std::vector& dilations, const bool batchnorm, const std::string activation, + std::vector& params); + ConvNet(const double loudness, const int channels, const std::vector& dilations, const bool batchnorm, + const std::string activation, std::vector& params); protected: std::vector _blocks; std::vector _block_vals; Eigen::VectorXf _head_output; _Head _head; - void _verify_params(const int channels, const std::vector &dilations, - const bool batchnorm, const size_t actual_params); + void _verify_params(const int channels, const std::vector& dilations, const bool batchnorm, + const size_t actual_params); void _update_buffers_() override; void _rewind_buffers_() override; diff --git a/NAM/get_dsp.cpp b/NAM/get_dsp.cpp index 8de64b7..b77d2c3 100644 --- a/NAM/get_dsp.cpp +++ b/NAM/get_dsp.cpp @@ -6,9 +6,11 @@ #include "lstm.h" #include "wavenet.h" -void verify_config_version(const std::string version) { +void verify_config_version(const std::string version) +{ const std::unordered_set supported_versions({"0.5.0", "0.5.1"}); - if (supported_versions.find(version) == supported_versions.end()) { + if (supported_versions.find(version) == supported_versions.end()) + { std::stringstream ss; ss << "Model config is an unsupported version " << version << ". Try either converting the model to a more recent version, or " @@ -17,24 +19,28 @@ void verify_config_version(const std::string version) { } } -std::vector _get_weights(nlohmann::json const &j, - const std::filesystem::path config_path) { - if (j.find("weights") != j.end()) { +std::vector _get_weights(nlohmann::json const& j, const std::filesystem::path config_path) +{ + if (j.find("weights") != j.end()) + { auto weight_list = j["weights"]; std::vector weights; for (auto it = weight_list.begin(); it != weight_list.end(); ++it) weights.push_back(*it); return weights; - } else + } + else throw std::runtime_error("Corrupted model file is missing weights."); } -std::unique_ptr get_dsp_legacy(const std::filesystem::path model_dir) { +std::unique_ptr get_dsp_legacy(const std::filesystem::path model_dir) +{ auto config_filename = model_dir / std::filesystem::path("config.json"); return get_dsp(config_filename); } -std::unique_ptr get_dsp(const std::filesystem::path config_filename) { +std::unique_ptr get_dsp(const std::filesystem::path config_filename) +{ if (!std::filesystem::exists(config_filename)) throw std::runtime_error("Config JSON doesn't exist!\n"); std::ifstream i(config_filename); @@ -47,63 +53,72 @@ std::unique_ptr get_dsp(const std::filesystem::path config_filename) { std::vector params = _get_weights(j, config_filename); bool haveLoudness = false; double loudness = TARGET_DSP_LOUDNESS; - if (j.find("metadata") != j.end()) { - if (j["metadata"].find("loudness") != j["metadata"].end()) { - loudness = j["metadata"]["loudness"]; - haveLoudness = true; - } + if (j.find("metadata") != j.end()) + { + if (j["metadata"].find("loudness") != j["metadata"].end()) + { + loudness = j["metadata"]["loudness"]; + haveLoudness = true; + } } - if (architecture == "Linear") { + if (architecture == "Linear") + { const int receptive_field = config["receptive_field"]; const bool _bias = config["bias"]; return std::make_unique(loudness, receptive_field, _bias, params); - } else if (architecture == "ConvNet") { + } + else if (architecture == "ConvNet") + { const int channels = config["channels"]; const bool batchnorm = config["batchnorm"]; std::vector dilations; for (int i = 0; i < config["dilations"].size(); i++) dilations.push_back(config["dilations"][i]); const std::string activation = config["activation"]; - return std::make_unique(loudness, channels, dilations, batchnorm, - activation, params); - } else if (architecture == "LSTM") { + return std::make_unique(loudness, channels, dilations, batchnorm, activation, params); + } + else if (architecture == "LSTM") + { const int num_layers = config["num_layers"]; const int input_size = config["input_size"]; const int hidden_size = config["hidden_size"]; auto json = nlohmann::json{}; - return std::make_unique(loudness, num_layers, input_size, hidden_size, - params, json); - } else if (architecture == "CatLSTM") { + return std::make_unique(loudness, num_layers, input_size, hidden_size, params, json); + } + else if (architecture == "CatLSTM") + { const int num_layers = config["num_layers"]; const int input_size = config["input_size"]; const int hidden_size = config["hidden_size"]; - return std::make_unique(loudness, num_layers, input_size, hidden_size, - params, config["parametric"]); - } else if (architecture == "WaveNet" || architecture == "CatWaveNet") { + return std::make_unique(loudness, num_layers, input_size, hidden_size, params, config["parametric"]); + } + else if (architecture == "WaveNet" || architecture == "CatWaveNet") + { std::vector layer_array_params; - for (int i = 0; i < config["layers"].size(); i++) { + for (int i = 0; i < config["layers"].size(); i++) + { nlohmann::json layer_config = config["layers"][i]; std::vector dilations; for (int j = 0; j < layer_config["dilations"].size(); j++) dilations.push_back(layer_config["dilations"][j]); - layer_array_params.push_back(wavenet::LayerArrayParams( - layer_config["input_size"], layer_config["condition_size"], - layer_config["head_size"], layer_config["channels"], - layer_config["kernel_size"], dilations, layer_config["activation"], - layer_config["gated"], layer_config["head_bias"])); + layer_array_params.push_back( + wavenet::LayerArrayParams(layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], + layer_config["channels"], layer_config["kernel_size"], dilations, + layer_config["activation"], layer_config["gated"], layer_config["head_bias"])); } const bool with_head = config["head"] == NULL; const float head_scale = config["head_scale"]; // Solves compilation issue on macOS Error: No matching constructor for // initialization of 'wavenet::WaveNet' Solution from // https://stackoverflow.com/a/73956681/3768284 - auto parametric_json = - architecture == "CatWaveNet" ? config["parametric"] : nlohmann::json{}; + auto parametric_json = architecture == "CatWaveNet" ? config["parametric"] : nlohmann::json{}; return std::make_unique( - loudness, layer_array_params, head_scale, with_head, parametric_json, params); - } else { + loudness, layer_array_params, head_scale, with_head, parametric_json, params); + } + else + { throw std::runtime_error("Unrecognized architecture"); } } diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp index 03e6503..bffd18d 100644 --- a/NAM/lstm.cpp +++ b/NAM/lstm.cpp @@ -5,8 +5,8 @@ #include "activations.h" #include "lstm.h" -lstm::LSTMCell::LSTMCell(const int input_size, const int hidden_size, - std::vector::iterator ¶ms) { +lstm::LSTMCell::LSTMCell(const int input_size, const int hidden_size, std::vector::iterator& params) +{ // Resize arrays this->_w.resize(4 * hidden_size, input_size + hidden_size); this->_b.resize(4 * hidden_size); @@ -27,7 +27,8 @@ lstm::LSTMCell::LSTMCell(const int input_size, const int hidden_size, this->_c[i] = *(params++); } -void lstm::LSTMCell::process_(const Eigen::VectorXf &x) { +void lstm::LSTMCell::process_(const Eigen::VectorXf& x) +{ const long hidden_size = this->_get_hidden_size(); const long input_size = this->_get_input_size(); // Assign inputs @@ -40,28 +41,27 @@ void lstm::LSTMCell::process_(const Eigen::VectorXf &x) { const long g_offset = 2 * hidden_size; const long o_offset = 3 * hidden_size; for (auto i = 0; i < hidden_size; i++) - this->_c[i] = - activations::sigmoid(this->_ifgo[i + f_offset]) * this->_c[i] + - activations::sigmoid(this->_ifgo[i + i_offset]) * - tanhf(this->_ifgo[i + g_offset]); + this->_c[i] = activations::sigmoid(this->_ifgo[i + f_offset]) * this->_c[i] + + activations::sigmoid(this->_ifgo[i + i_offset]) * tanhf(this->_ifgo[i + g_offset]); const long h_offset = input_size; for (int i = 0; i < hidden_size; i++) - this->_xh[i + h_offset] = - activations::sigmoid(this->_ifgo[i + o_offset]) * tanhf(this->_c[i]); + this->_xh[i + h_offset] = activations::sigmoid(this->_ifgo[i + o_offset]) * tanhf(this->_c[i]); } -lstm::LSTM::LSTM(const int num_layers, const int input_size, - const int hidden_size, std::vector& params, - nlohmann::json& parametric) : LSTM(TARGET_DSP_LOUDNESS, num_layers, input_size, hidden_size, params, parametric) {} +lstm::LSTM::LSTM(const int num_layers, const int input_size, const int hidden_size, std::vector& params, + nlohmann::json& parametric) +: LSTM(TARGET_DSP_LOUDNESS, num_layers, input_size, hidden_size, params, parametric) +{ +} -lstm::LSTM::LSTM(const double loudness, const int num_layers, const int input_size, - const int hidden_size, std::vector ¶ms, - nlohmann::json ¶metric) : DSP(loudness) { +lstm::LSTM::LSTM(const double loudness, const int num_layers, const int input_size, const int hidden_size, + std::vector& params, nlohmann::json& parametric) +: DSP(loudness) +{ this->_init_parametric(parametric); std::vector::iterator it = params.begin(); for (int i = 0; i < num_layers; i++) - this->_layers.push_back( - LSTMCell(i == 0 ? input_size : hidden_size, hidden_size, it)); + this->_layers.push_back(LSTMCell(i == 0 ? input_size : hidden_size, hidden_size, it)); this->_head_weight.resize(hidden_size); for (int i = 0; i < hidden_size; i++) this->_head_weight[i] = *(it++); @@ -69,46 +69,44 @@ lstm::LSTM::LSTM(const double loudness, const int num_layers, const int input_si assert(it == params.end()); } -void lstm::LSTM::_init_parametric(nlohmann::json ¶metric) { +void lstm::LSTM::_init_parametric(nlohmann::json& parametric) +{ std::vector parametric_names; - for (nlohmann::json::iterator it = parametric.begin(); it != parametric.end(); - ++it) { + for (nlohmann::json::iterator it = parametric.begin(); it != parametric.end(); ++it) + { parametric_names.push_back(it.key()); } std::sort(parametric_names.begin(), parametric_names.end()); { int i = 1; - for (std::vector::iterator it = parametric_names.begin(); - it != parametric_names.end(); ++it, i++) + for (std::vector::iterator it = parametric_names.begin(); it != parametric_names.end(); ++it, i++) this->_parametric_map[*it] = i; } this->_input_and_params.resize(1 + parametric.size()); // TODO amp parameters } -void lstm::LSTM::_process_core_() { +void lstm::LSTM::_process_core_() +{ // Get params into the input vector before starting - if (this->_stale_params) { - for (std::unordered_map::iterator it = - this->_params.begin(); - it != this->_params.end(); ++it) + if (this->_stale_params) + { + for (std::unordered_map::iterator it = this->_params.begin(); it != this->_params.end(); ++it) this->_input_and_params[this->_parametric_map[it->first]] = it->second; this->_stale_params = false; } // Process samples, placing results in the required output location for (int i = 0; i < this->_input_post_gain.size(); i++) - this->_core_dsp_output[i] = - this->_process_sample(this->_input_post_gain[i]); + this->_core_dsp_output[i] = this->_process_sample(this->_input_post_gain[i]); } -float lstm::LSTM::_process_sample(const float x) { +float lstm::LSTM::_process_sample(const float x) +{ if (this->_layers.size() == 0) return x; this->_input_and_params(0) = x; this->_layers[0].process_(this->_input_and_params); for (int i = 1; i < this->_layers.size(); i++) this->_layers[i].process_(this->_layers[i - 1].get_hidden_state()); - return this->_head_weight.dot( - this->_layers[this->_layers.size() - 1].get_hidden_state()) + - this->_head_bias; + return this->_head_weight.dot(this->_layers[this->_layers.size() - 1].get_hidden_state()) + this->_head_bias; } diff --git a/NAM/lstm.h b/NAM/lstm.h index 8aadbcf..7dab325 100644 --- a/NAM/lstm.h +++ b/NAM/lstm.h @@ -9,7 +9,8 @@ #include "dsp.h" #include "json.hpp" -namespace lstm { +namespace lstm +{ // A Single LSTM cell // i input // f forget @@ -17,14 +18,12 @@ namespace lstm { // o output // c cell state // h hidden state -class LSTMCell { +class LSTMCell +{ public: - LSTMCell(const int input_size, const int hidden_size, - std::vector::iterator ¶ms); - Eigen::VectorXf get_hidden_state() const { - return this->_xh(Eigen::placeholders::lastN(this->_get_hidden_size())); - }; - void process_(const Eigen::VectorXf &x); + LSTMCell(const int input_size, const int hidden_size, std::vector::iterator& params); + Eigen::VectorXf get_hidden_state() const { return this->_xh(Eigen::placeholders::lastN(this->_get_hidden_size())); }; + void process_(const Eigen::VectorXf& x); private: // Parameters @@ -43,18 +42,17 @@ class LSTMCell { Eigen::VectorXf _c; long _get_hidden_size() const { return this->_b.size() / 4; }; - long _get_input_size() const { - return this->_xh.size() - this->_get_hidden_size(); - }; + long _get_input_size() const { return this->_xh.size() - this->_get_hidden_size(); }; }; // The multi-layer LSTM model -class LSTM : public DSP { +class LSTM : public DSP +{ public: - LSTM(const int num_layers, const int input_size, const int hidden_size, - std::vector ¶ms, nlohmann::json ¶metric); + LSTM(const int num_layers, const int input_size, const int hidden_size, std::vector& params, + nlohmann::json& parametric); LSTM(const double loudness, const int num_layers, const int input_size, const int hidden_size, - std::vector& params, nlohmann::json& parametric); + std::vector& params, nlohmann::json& parametric); protected: Eigen::VectorXf _head_weight; @@ -65,7 +63,7 @@ class LSTM : public DSP { float _process_sample(const float x); // Initialize the parametric map - void _init_parametric(nlohmann::json ¶metric); + void _init_parametric(nlohmann::json& parametric); // Mapping from param name to index in _input_and_params: std::map _parametric_map; diff --git a/NAM/util.cpp b/NAM/util.cpp index b5451cb..92ca7a1 100644 --- a/NAM/util.cpp +++ b/NAM/util.cpp @@ -3,9 +3,9 @@ #include "util.h" -std::string util::lowercase(const std::string &s) { +std::string util::lowercase(const std::string& s) +{ std::string out(s); - std::transform(s.begin(), s.end(), out.begin(), - [](unsigned char c) { return std::tolower(c); }); + std::transform(s.begin(), s.end(), out.begin(), [](unsigned char c) { return std::tolower(c); }); return out; } \ No newline at end of file diff --git a/NAM/util.h b/NAM/util.h index 722ea5e..8949247 100644 --- a/NAM/util.h +++ b/NAM/util.h @@ -4,6 +4,7 @@ #include -namespace util { -std::string lowercase(const std::string &s); +namespace util +{ +std::string lowercase(const std::string& s); }; // namespace util diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index ea5dcd8..2dbf491 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -6,24 +6,23 @@ #include "wavenet.h" -wavenet::_DilatedConv::_DilatedConv(const int in_channels, - const int out_channels, - const int kernel_size, const int bias, - const int dilation) { +wavenet::_DilatedConv::_DilatedConv(const int in_channels, const int out_channels, const int kernel_size, + const int bias, const int dilation) +{ this->set_size_(in_channels, out_channels, kernel_size, bias, dilation); } -void wavenet::_Layer::set_params_(std::vector::iterator ¶ms) { +void wavenet::_Layer::set_params_(std::vector::iterator& params) +{ this->_conv.set_params_(params); this->_input_mixin.set_params_(params); this->_1x1.set_params_(params); } -void wavenet::_Layer::process_(const Eigen::MatrixXf &input, - const Eigen::MatrixXf &condition, - Eigen::MatrixXf &head_input, - Eigen::MatrixXf &output, const long i_start, - const long j_start) { +void wavenet::_Layer::process_(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, + Eigen::MatrixXf& head_input, Eigen::MatrixXf& output, const long i_start, + const long j_start) +{ const long ncols = condition.cols(); const long channels = this->get_channels(); // Input dilated conv @@ -38,7 +37,8 @@ void wavenet::_Layer::process_(const Eigen::MatrixXf &input, relu_(this->_z, 0, channels, 0, this->_z.cols()); else throw std::runtime_error("Unrecognized activation."); - if (this->_gated) { + if (this->_gated) + { sigmoid_(this->_z, channels, 2 * channels, 0, this->_z.cols()); this->_z.topRows(channels).array() *= this->_z.bottomRows(channels).array(); // this->_z.topRows(channels) = this->_z.topRows(channels).cwiseProduct( @@ -47,12 +47,11 @@ void wavenet::_Layer::process_(const Eigen::MatrixXf &input, } head_input += this->_z.topRows(channels); - output.middleCols(j_start, ncols) = - input.middleCols(i_start, ncols) + - this->_1x1.process(this->_z.topRows(channels)); + output.middleCols(j_start, ncols) = input.middleCols(i_start, ncols) + this->_1x1.process(this->_z.topRows(channels)); } -void wavenet::_Layer::set_num_frames_(const long num_frames) { +void wavenet::_Layer::set_num_frames_(const long num_frames) +{ this->_z.resize(this->_conv.get_out_channels(), num_frames); } @@ -60,39 +59,38 @@ void wavenet::_Layer::set_num_frames_(const long num_frames) { #define LAYER_ARRAY_BUFFER_SIZE 65536 -wavenet::_LayerArray::_LayerArray(const int input_size, - const int condition_size, const int head_size, - const int channels, const int kernel_size, - const std::vector &dilations, - const std::string activation, - const bool gated, const bool head_bias) - : _rechannel(input_size, channels, false), - _head_rechannel(channels, head_size, head_bias) { +wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size, + const int channels, const int kernel_size, const std::vector& dilations, + const std::string activation, const bool gated, const bool head_bias) +: _rechannel(input_size, channels, false) +, _head_rechannel(channels, head_size, head_bias) +{ for (int i = 0; i < dilations.size(); i++) - this->_layers.push_back(_Layer(condition_size, channels, kernel_size, - dilations[i], activation, gated)); + this->_layers.push_back(_Layer(condition_size, channels, kernel_size, dilations[i], activation, gated)); const long receptive_field = this->_get_receptive_field(); - for (int i = 0; i < dilations.size(); i++) { - this->_layer_buffers.push_back(Eigen::MatrixXf( - channels, LAYER_ARRAY_BUFFER_SIZE + receptive_field - 1)); + for (int i = 0; i < dilations.size(); i++) + { + this->_layer_buffers.push_back(Eigen::MatrixXf(channels, LAYER_ARRAY_BUFFER_SIZE + receptive_field - 1)); this->_layer_buffers[i].setZero(); } this->_buffer_start = this->_get_receptive_field() - 1; } -void wavenet::_LayerArray::advance_buffers_(const int num_frames) { +void wavenet::_LayerArray::advance_buffers_(const int num_frames) +{ this->_buffer_start += num_frames; } -long wavenet::_LayerArray::get_receptive_field() const { +long wavenet::_LayerArray::get_receptive_field() const +{ long result = 0; for (int i = 0; i < this->_layers.size(); i++) - result += this->_layers[i].get_dilation() * - (this->_layers[i].get_kernel_size() - 1); + result += this->_layers[i].get_dilation() * (this->_layers[i].get_kernel_size() - 1); return result; } -void wavenet::_LayerArray::prepare_for_frames_(const long num_frames) { +void wavenet::_LayerArray::prepare_for_frames_(const long num_frames) +{ // Example: // _buffer_start = 0 // num_frames = 64 @@ -104,31 +102,30 @@ void wavenet::_LayerArray::prepare_for_frames_(const long num_frames) { this->_rewind_buffers_(); } -void wavenet::_LayerArray::process_(const Eigen::MatrixXf &layer_inputs, - const Eigen::MatrixXf &condition, - Eigen::MatrixXf &head_inputs, - Eigen::MatrixXf &layer_outputs, - Eigen::MatrixXf &head_outputs) { - this->_layer_buffers[0].middleCols(this->_buffer_start, layer_inputs.cols()) = - this->_rechannel.process(layer_inputs); +void wavenet::_LayerArray::process_(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, + Eigen::MatrixXf& head_inputs, Eigen::MatrixXf& layer_outputs, + Eigen::MatrixXf& head_outputs) +{ + this->_layer_buffers[0].middleCols(this->_buffer_start, layer_inputs.cols()) = this->_rechannel.process(layer_inputs); const long last_layer = this->_layers.size() - 1; - for (auto i = 0; i < this->_layers.size(); i++) { - this->_layers[i].process_( - this->_layer_buffers[i], condition, head_inputs, - i == last_layer ? layer_outputs : this->_layer_buffers[i + 1], - this->_buffer_start, i == last_layer ? 0 : this->_buffer_start); + for (auto i = 0; i < this->_layers.size(); i++) + { + this->_layers[i].process_(this->_layer_buffers[i], condition, head_inputs, + i == last_layer ? layer_outputs : this->_layer_buffers[i + 1], this->_buffer_start, + i == last_layer ? 0 : this->_buffer_start); } head_outputs = this->_head_rechannel.process(head_inputs); } -void wavenet::_LayerArray::set_num_frames_(const long num_frames) { +void wavenet::_LayerArray::set_num_frames_(const long num_frames) +{ // Wavenet checks for unchanged num_frames; if we made it here, there's // something to do. - if (LAYER_ARRAY_BUFFER_SIZE - num_frames < this->_get_receptive_field()) { + if (LAYER_ARRAY_BUFFER_SIZE - num_frames < this->_get_receptive_field()) + { std::stringstream ss; - ss << "Asked to accept a buffer of " << num_frames - << " samples, but the buffer is too short (" << LAYER_ARRAY_BUFFER_SIZE - << ") to get out of the recptive field (" << this->_get_receptive_field() + ss << "Asked to accept a buffer of " << num_frames << " samples, but the buffer is too short (" + << LAYER_ARRAY_BUFFER_SIZE << ") to get out of the recptive field (" << this->_get_receptive_field() << "); copy errors could occur!\n"; throw std::runtime_error(ss.str().c_str()); } @@ -136,23 +133,25 @@ void wavenet::_LayerArray::set_num_frames_(const long num_frames) { this->_layers[i].set_num_frames_(num_frames); } -void wavenet::_LayerArray::set_params_(std::vector::iterator ¶ms) { +void wavenet::_LayerArray::set_params_(std::vector::iterator& params) +{ this->_rechannel.set_params_(params); for (int i = 0; i < this->_layers.size(); i++) this->_layers[i].set_params_(params); this->_head_rechannel.set_params_(params); } -long wavenet::_LayerArray::_get_channels() const { +long wavenet::_LayerArray::_get_channels() const +{ return this->_layers.size() > 0 ? this->_layers[0].get_channels() : 0; } -long wavenet::_LayerArray::_get_receptive_field() const { +long wavenet::_LayerArray::_get_receptive_field() const +{ // TODO remove this and use get_receptive_field() instead! long res = 1; for (int i = 0; i < this->_layers.size(); i++) - res += (this->_layers[i].get_kernel_size() - 1) * - this->_layers[i].get_dilation(); + res += (this->_layers[i].get_kernel_size() - 1) * this->_layers[i].get_dilation(); return res; } @@ -161,46 +160,49 @@ void wavenet::_LayerArray::_rewind_buffers_() // Can make this smaller--largest dilation, not receptive field! { const long start = this->_get_receptive_field() - 1; - for (int i = 0; i < this->_layer_buffers.size(); i++) { - const long d = (this->_layers[i].get_kernel_size() - 1) * - this->_layers[i].get_dilation(); - this->_layer_buffers[i].middleCols(start - d, d) = - this->_layer_buffers[i].middleCols(this->_buffer_start - d, d); + for (int i = 0; i < this->_layer_buffers.size(); i++) + { + const long d = (this->_layers[i].get_kernel_size() - 1) * this->_layers[i].get_dilation(); + this->_layer_buffers[i].middleCols(start - d, d) = this->_layer_buffers[i].middleCols(this->_buffer_start - d, d); } this->_buffer_start = start; } // Head ======================================================================= -wavenet::_Head::_Head(const int input_size, const int num_layers, - const int channels, const std::string activation) - : _channels(channels), _activation(activation), - _head(num_layers > 0 ? channels : input_size, 1, true) { +wavenet::_Head::_Head(const int input_size, const int num_layers, const int channels, const std::string activation) +: _channels(channels) +, _activation(activation) +, _head(num_layers > 0 ? channels : input_size, 1, true) +{ assert(num_layers > 0); int dx = input_size; - for (int i = 0; i < num_layers; i++) { - this->_layers.push_back( - Conv1x1(dx, i == num_layers - 1 ? 1 : channels, true)); + for (int i = 0; i < num_layers; i++) + { + this->_layers.push_back(Conv1x1(dx, i == num_layers - 1 ? 1 : channels, true)); dx = channels; if (i < num_layers - 1) this->_buffers.push_back(Eigen::MatrixXf()); } } -void wavenet::_Head::set_params_(std::vector::iterator ¶ms) { +void wavenet::_Head::set_params_(std::vector::iterator& params) +{ for (int i = 0; i < this->_layers.size(); i++) this->_layers[i].set_params_(params); } -void wavenet::_Head::process_(Eigen::MatrixXf &inputs, - Eigen::MatrixXf &outputs) { +void wavenet::_Head::process_(Eigen::MatrixXf& inputs, Eigen::MatrixXf& outputs) +{ const size_t num_layers = this->_layers.size(); this->_apply_activation_(inputs); if (num_layers == 1) outputs = this->_layers[0].process(inputs); - else { + else + { this->_buffers[0] = this->_layers[0].process(inputs); - for (int i = 1; i < num_layers; i++) { // Asserted > 0 layers + for (int i = 1; i < num_layers; i++) + { // Asserted > 0 layers this->_apply_activation_(this->_buffers[i - 1]); if (i < num_layers - 1) this->_buffers[i] = this->_layers[i].process(this->_buffers[i - 1]); @@ -210,12 +212,14 @@ void wavenet::_Head::process_(Eigen::MatrixXf &inputs, } } -void wavenet::_Head::set_num_frames_(const long num_frames) { +void wavenet::_Head::set_num_frames_(const long num_frames) +{ for (int i = 0; i < this->_buffers.size(); i++) this->_buffers[i].resize(this->_channels, num_frames); } -void wavenet::_Head::_apply_activation_(Eigen::MatrixXf &x) { +void wavenet::_Head::_apply_activation_(Eigen::MatrixXf& x) +{ if (this->_activation == "Tanh") tanh_(x); else if (this->_activation == "ReLU") @@ -226,95 +230,95 @@ void wavenet::_Head::_apply_activation_(Eigen::MatrixXf &x) { // WaveNet ==================================================================== -wavenet::WaveNet::WaveNet( - const std::vector& layer_array_params, - const float head_scale, const bool with_head, nlohmann::json parametric, - std::vector params) : WaveNet(TARGET_DSP_LOUDNESS, layer_array_params, head_scale, with_head, parametric, params) {} - -wavenet::WaveNet::WaveNet( - const double loudness, - const std::vector &layer_array_params, - const float head_scale, const bool with_head, nlohmann::json parametric, - std::vector params) - : DSP(loudness), - _num_frames(0), _head_scale(head_scale) { +wavenet::WaveNet::WaveNet(const std::vector& layer_array_params, const float head_scale, + const bool with_head, nlohmann::json parametric, std::vector params) +: WaveNet(TARGET_DSP_LOUDNESS, layer_array_params, head_scale, with_head, parametric, params) +{ +} + +wavenet::WaveNet::WaveNet(const double loudness, const std::vector& layer_array_params, + const float head_scale, const bool with_head, nlohmann::json parametric, + std::vector params) +: DSP(loudness) +, _num_frames(0) +, _head_scale(head_scale) +{ if (with_head) throw std::runtime_error("Head not implemented!"); this->_init_parametric_(parametric); - for (int i = 0; i < layer_array_params.size(); i++) { + for (int i = 0; i < layer_array_params.size(); i++) + { this->_layer_arrays.push_back(wavenet::_LayerArray( - layer_array_params[i].input_size, layer_array_params[i].condition_size, - layer_array_params[i].head_size, layer_array_params[i].channels, - layer_array_params[i].kernel_size, layer_array_params[i].dilations, - layer_array_params[i].activation, layer_array_params[i].gated, - layer_array_params[i].head_bias)); - this->_layer_array_outputs.push_back( - Eigen::MatrixXf(layer_array_params[i].channels, 0)); + layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size, + layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations, + layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias)); + this->_layer_array_outputs.push_back(Eigen::MatrixXf(layer_array_params[i].channels, 0)); if (i == 0) - this->_head_arrays.push_back( - Eigen::MatrixXf(layer_array_params[i].channels, 0)); + this->_head_arrays.push_back(Eigen::MatrixXf(layer_array_params[i].channels, 0)); if (i > 0) - if (layer_array_params[i].channels != - layer_array_params[i - 1].head_size) { + if (layer_array_params[i].channels != layer_array_params[i - 1].head_size) + { std::stringstream ss; - ss << "channels of layer " << i << " (" - << layer_array_params[i].channels - << ") doesn't match head_size of preceding layer (" - << layer_array_params[i - 1].head_size << "!\n"; + ss << "channels of layer " << i << " (" << layer_array_params[i].channels + << ") doesn't match head_size of preceding layer (" << layer_array_params[i - 1].head_size << "!\n"; throw std::runtime_error(ss.str().c_str()); } - this->_head_arrays.push_back( - Eigen::MatrixXf(layer_array_params[i].head_size, 0)); + this->_head_arrays.push_back(Eigen::MatrixXf(layer_array_params[i].head_size, 0)); } this->_head_output.resize(1, 0); // Mono output! this->set_params_(params); this->_reset_anti_pop_(); } -void wavenet::WaveNet::finalize_(const int num_frames) { +void wavenet::WaveNet::finalize_(const int num_frames) +{ this->DSP::finalize_(num_frames); this->_advance_buffers_(num_frames); } -void wavenet::WaveNet::set_params_(std::vector ¶ms) { +void wavenet::WaveNet::set_params_(std::vector& params) +{ std::vector::iterator it = params.begin(); for (int i = 0; i < this->_layer_arrays.size(); i++) this->_layer_arrays[i].set_params_(it); // this->_head.set_params_(it); this->_head_scale = *(it++); - if (it != params.end()) { + if (it != params.end()) + { std::stringstream ss; for (int i = 0; i < params.size(); i++) - if (params[i] == *it) { - ss << "Parameter mismatch: assigned " << i + 1 << " parameters, but " - << params.size() << " were provided."; + if (params[i] == *it) + { + ss << "Parameter mismatch: assigned " << i + 1 << " parameters, but " << params.size() << " were provided."; throw std::runtime_error(ss.str().c_str()); } - ss << "Parameter mismatch: provided " << params.size() - << " weights, but the model expects more."; + ss << "Parameter mismatch: provided " << params.size() << " weights, but the model expects more."; throw std::runtime_error(ss.str().c_str()); } } -void wavenet::WaveNet::_advance_buffers_(const int num_frames) { +void wavenet::WaveNet::_advance_buffers_(const int num_frames) +{ for (int i = 0; i < this->_layer_arrays.size(); i++) this->_layer_arrays[i].advance_buffers_(num_frames); } -void wavenet::WaveNet::_init_parametric_(nlohmann::json ¶metric) { - for (nlohmann::json::iterator it = parametric.begin(); it != parametric.end(); - ++it) +void wavenet::WaveNet::_init_parametric_(nlohmann::json& parametric) +{ + for (nlohmann::json::iterator it = parametric.begin(); it != parametric.end(); ++it) this->_param_names.push_back(it.key()); // TODO assert continuous 0 to 1 std::sort(this->_param_names.begin(), this->_param_names.end()); } -void wavenet::WaveNet::_prepare_for_frames_(const long num_frames) { +void wavenet::WaveNet::_prepare_for_frames_(const long num_frames) +{ for (auto i = 0; i < this->_layer_arrays.size(); i++) this->_layer_arrays[i].prepare_for_frames_(num_frames); } -void wavenet::WaveNet::_process_core_() { +void wavenet::WaveNet::_process_core_() +{ const long num_frames = this->_input_post_gain.size(); this->_set_num_frames_(num_frames); this->_prepare_for_frames_(num_frames); @@ -326,13 +330,13 @@ void wavenet::WaveNet::_process_core_() { // Fill into condition array: // Clumsy... - for (int j = 0; j < num_frames; j++) { + for (int j = 0; j < num_frames; j++) + { this->_condition(0, j) = this->_input_post_gain[j]; if (this->_stale_params) // Column-major assignment; good for Eigen. Let the // compiler optimize this. for (int i = 0; i < this->_param_names.size(); i++) - this->_condition(i + 1, j) = - (float)this->_params[this->_param_names[i]]; + this->_condition(i + 1, j) = (float)this->_params[this->_param_names[i]]; } // Main layer arrays: @@ -340,10 +344,8 @@ void wavenet::WaveNet::_process_core_() { // Sum on head output this->_head_arrays[0].setZero(); for (int i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].process_( - i == 0 ? this->_condition : this->_layer_array_outputs[i - 1], - this->_condition, this->_head_arrays[i], this->_layer_array_outputs[i], - this->_head_arrays[i + 1]); + this->_layer_arrays[i].process_(i == 0 ? this->_condition : this->_layer_array_outputs[i - 1], this->_condition, + this->_head_arrays[i], this->_layer_array_outputs[i], this->_head_arrays[i + 1]); // this->_head.process_( // this->_head_input, // this->_head_output @@ -354,7 +356,8 @@ void wavenet::WaveNet::_process_core_() { const long final_head_array = this->_head_arrays.size() - 1; assert(this->_head_arrays[final_head_array].rows() == 1); - for (int s = 0; s < num_frames; s++) { + for (int s = 0; s < num_frames; s++) + { float out = this->_head_scale * this->_head_arrays[final_head_array](0, s); // This is the NaN check that we could fix with anti-popping the input if (isnan(out)) @@ -365,7 +368,8 @@ void wavenet::WaveNet::_process_core_() { this->_anti_pop_(); } -void wavenet::WaveNet::_set_num_frames_(const long num_frames) { +void wavenet::WaveNet::_set_num_frames_(const long num_frames) +{ if (num_frames == this->_num_frames) return; @@ -373,8 +377,7 @@ void wavenet::WaveNet::_set_num_frames_(const long num_frames) { for (int i = 0; i < this->_head_arrays.size(); i++) this->_head_arrays[i].resize(this->_head_arrays[i].rows(), num_frames); for (int i = 0; i < this->_layer_array_outputs.size(); i++) - this->_layer_array_outputs[i].resize(this->_layer_array_outputs[i].rows(), - num_frames); + this->_layer_array_outputs[i].resize(this->_layer_array_outputs[i].rows(), num_frames); this->_head_output.resize(this->_head_output.rows(), num_frames); for (int i = 0; i < this->_layer_arrays.size(); i++) @@ -383,11 +386,13 @@ void wavenet::WaveNet::_set_num_frames_(const long num_frames) { this->_num_frames = num_frames; } -void wavenet::WaveNet::_anti_pop_() { +void wavenet::WaveNet::_anti_pop_() +{ if (this->_anti_pop_countdown >= this->_anti_pop_ramp) return; const float slope = 1.0f / float(this->_anti_pop_ramp); - for (int i = 0; i < this->_core_dsp_output.size(); i++) { + for (int i = 0; i < this->_core_dsp_output.size(); i++) + { if (this->_anti_pop_countdown >= this->_anti_pop_ramp) break; const float gain = std::max(slope * float(this->_anti_pop_countdown), 0.0f); @@ -396,7 +401,8 @@ void wavenet::WaveNet::_anti_pop_() { } } -void wavenet::WaveNet::_reset_anti_pop_() { +void wavenet::WaveNet::_reset_anti_pop_() +{ // You need the "real" receptive field, not the buffers. long receptive_field = 1; for (int i = 0; i < this->_layer_arrays.size(); i++) diff --git a/NAM/wavenet.h b/NAM/wavenet.h index cb22991..fc1daab 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -8,29 +8,31 @@ #include "dsp.h" -namespace wavenet { +namespace wavenet +{ // Rework the initialization API slightly. Merge w/ dsp.h later. -class _DilatedConv : public Conv1D { +class _DilatedConv : public Conv1D +{ public: - _DilatedConv(const int in_channels, const int out_channels, - const int kernel_size, const int bias, const int dilation); + _DilatedConv(const int in_channels, const int out_channels, const int kernel_size, const int bias, + const int dilation); }; -class _Layer { +class _Layer +{ public: - _Layer(const int condition_size, const int channels, const int kernel_size, - const int dilation, const std::string activation, const bool gated) - : _activation(activation), _gated(gated), - _conv(channels, gated ? 2 * channels : channels, kernel_size, true, - dilation), - _input_mixin(condition_size, gated ? 2 * channels : channels, false), - _1x1(channels, channels, true){}; - void set_params_(std::vector::iterator ¶ms); + _Layer(const int condition_size, const int channels, const int kernel_size, const int dilation, + const std::string activation, const bool gated) + : _activation(activation) + , _gated(gated) + , _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation) + , _input_mixin(condition_size, gated ? 2 * channels : channels, false) + , _1x1(channels, channels, true){}; + void set_params_(std::vector::iterator& params); // :param `input`: from previous layer // :param `output`: to next layer - void process_(const Eigen::MatrixXf &input, const Eigen::MatrixXf &condition, - Eigen::MatrixXf &head_input, Eigen::MatrixXf &output, - const long i_start, const long j_start); + void process_(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, Eigen::MatrixXf& head_input, + Eigen::MatrixXf& output, const long i_start, const long j_start); void set_num_frames_(const long num_frames); long get_channels() const { return this->_conv.get_in_channels(); }; int get_dilation() const { return this->_conv.get_dilation(); }; @@ -50,16 +52,21 @@ class _Layer { const bool _gated; }; -class LayerArrayParams { +class LayerArrayParams +{ public: - LayerArrayParams(const int input_size_, const int condition_size_, - const int head_size_, const int channels_, - const int kernel_size_, const std::vector &dilations_, - const std::string activation_, const bool gated_, - const bool head_bias_) - : input_size(input_size_), condition_size(condition_size_), - head_size(head_size_), channels(channels_), kernel_size(kernel_size_), - activation(activation_), gated(gated_), head_bias(head_bias_) { + LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_, + const int kernel_size_, const std::vector& dilations_, const std::string activation_, + const bool gated_, const bool head_bias_) + : input_size(input_size_) + , condition_size(condition_size_) + , head_size(head_size_) + , channels(channels_) + , kernel_size(kernel_size_) + , activation(activation_) + , gated(gated_) + , head_bias(head_bias_) + { for (int i = 0; i < dilations_.size(); i++) this->dilations.push_back(dilations_[i]); }; @@ -76,12 +83,12 @@ class LayerArrayParams { }; // An array of layers with the same channels, kernel sizes, activations. -class _LayerArray { +class _LayerArray +{ public: - _LayerArray(const int input_size, const int condition_size, - const int head_size, const int channels, const int kernel_size, - const std::vector &dilations, const std::string activation, - const bool gated, const bool head_bias); + _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels, + const int kernel_size, const std::vector& dilations, const std::string activation, const bool gated, + const bool head_bias); void advance_buffers_(const int num_frames); @@ -92,14 +99,14 @@ class _LayerArray { void prepare_for_frames_(const long num_frames); // All arrays are "short". - void process_(const Eigen::MatrixXf &layer_inputs, // Short - const Eigen::MatrixXf &condition, // Short - Eigen::MatrixXf &layer_outputs, // Short - Eigen::MatrixXf &head_inputs, // Sum up on this. - Eigen::MatrixXf &head_outputs // post head-rechannel + void process_(const Eigen::MatrixXf& layer_inputs, // Short + const Eigen::MatrixXf& condition, // Short + Eigen::MatrixXf& layer_outputs, // Short + Eigen::MatrixXf& head_inputs, // Sum up on this. + Eigen::MatrixXf& head_outputs // post head-rechannel ); void set_num_frames_(const long num_frames); - void set_params_(std::vector::iterator &it); + void set_params_(std::vector::iterator& it); // "Zero-indexed" receptive field. // E.g. a 1x1 convolution has a z.i.r.f. of zero. @@ -120,9 +127,7 @@ class _LayerArray { // Rechannel for the head Conv1x1 _head_rechannel; - long _get_buffer_size() const { - return this->_layer_buffers.size() > 0 ? this->_layer_buffers[0].cols() : 0; - }; + long _get_buffer_size() const { return this->_layer_buffers.size() > 0 ? this->_layer_buffers[0].cols() : 0; }; long _get_channels() const; // "One-indexed" receptive field // TODO remove! @@ -133,14 +138,14 @@ class _LayerArray { // The head module // [Act->Conv] x L -class _Head { +class _Head +{ public: - _Head(const int input_size, const int num_layers, const int channels, - const std::string activation); - void set_params_(std::vector::iterator ¶ms); + _Head(const int input_size, const int num_layers, const int channels, const std::string activation); + void set_params_(std::vector::iterator& params); // NOTE: the head transforms the provided input by applying a nonlinearity // to it in-place! - void process_(Eigen::MatrixXf &inputs, Eigen::MatrixXf &outputs); + void process_(Eigen::MatrixXf& inputs, Eigen::MatrixXf& outputs); void set_num_frames_(const long num_frames); private: @@ -154,26 +159,25 @@ class _Head { std::vector _buffers; // Apply the activation to the provided array, in-place - void _apply_activation_(Eigen::MatrixXf &x); + void _apply_activation_(Eigen::MatrixXf& x); }; // The main WaveNet model // Both parametric and not; difference is handled at param read-in. -class WaveNet : public DSP { +class WaveNet : public DSP +{ public: - WaveNet(const std::vector &layer_array_params, - const float head_scale, const bool with_head, + WaveNet(const std::vector& layer_array_params, const float head_scale, const bool with_head, nlohmann::json parametric, std::vector params); - WaveNet(const double loudness, const std::vector& layer_array_params, - const float head_scale, const bool with_head, - nlohmann::json parametric, std::vector params); + WaveNet(const double loudness, const std::vector& layer_array_params, const float head_scale, + const bool with_head, nlohmann::json parametric, std::vector params); // WaveNet(WaveNet&&) = default; // WaveNet& operator=(WaveNet&&) = default; // ~WaveNet() = default; void finalize_(const int num_frames) override; - void set_params_(std::vector ¶ms); + void set_params_(std::vector& params); private: long _num_frames; @@ -195,7 +199,7 @@ class WaveNet : public DSP { void _advance_buffers_(const int num_frames); // Get the info from the parametric config - void _init_parametric_(nlohmann::json ¶metric); + void _init_parametric_(nlohmann::json& parametric); void _prepare_for_frames_(const long num_frames); // Reminder: From ._input_post_gain to ._core_dsp_output void _process_core_() override; diff --git a/dsp/ImpulseResponse.cpp b/dsp/ImpulseResponse.cpp index 34fe31c..9756aa3 100644 --- a/dsp/ImpulseResponse.cpp +++ b/dsp/ImpulseResponse.cpp @@ -10,29 +10,29 @@ #include "ImpulseResponse.h" -dsp::ImpulseResponse::ImpulseResponse(const char* fileName, - const double sampleRate) - : mWavState(dsp::wav::LoadReturnCode::ERROR_OTHER) { +dsp::ImpulseResponse::ImpulseResponse(const char* fileName, const double sampleRate) +: mWavState(dsp::wav::LoadReturnCode::ERROR_OTHER) +{ // Try to load the WAV - this->mWavState = dsp::wav::Load(fileName, this->mRawAudio, - this->mRawAudioSampleRate); - if (this->mWavState != dsp::wav::LoadReturnCode::SUCCESS) { + this->mWavState = dsp::wav::Load(fileName, this->mRawAudio, this->mRawAudioSampleRate); + if (this->mWavState != dsp::wav::LoadReturnCode::SUCCESS) + { std::stringstream ss; ss << "Failed to load IR at " << fileName << std::endl; - } else + } + else // Set the weights based on the raw audio. this->_SetWeights(sampleRate); } -double **dsp::ImpulseResponse::Process(double **inputs, const size_t numChannels, - const size_t numFrames) { +double** dsp::ImpulseResponse::Process(double** inputs, const size_t numChannels, const size_t numFrames) +{ this->_PrepareBuffers(numChannels, numFrames); this->_UpdateHistory(inputs, numChannels, numFrames); - for (size_t i = 0, j = this->mHistoryIndex - this->mHistoryRequired; - i < numFrames; i++, j++) { - auto input = Eigen::Map(&this->mHistory[j], - this->mHistoryRequired + 1); + for (size_t i = 0, j = this->mHistoryIndex - this->mHistoryRequired; i < numFrames; i++, j++) + { + auto input = Eigen::Map(&this->mHistory[j], this->mHistoryRequired + 1); this->mOutputs[0][i] = (double)this->mWeight.dot(input); } // Copy out for more-than-mono. @@ -44,20 +44,22 @@ double **dsp::ImpulseResponse::Process(double **inputs, const size_t numChannels return this->_GetPointers(); } -void dsp::ImpulseResponse::_SetWeights(const double sampleRate) { - if (this->mRawAudioSampleRate == sampleRate) { +void dsp::ImpulseResponse::_SetWeights(const double sampleRate) +{ + if (this->mRawAudioSampleRate == sampleRate) + { this->mResampled.resize(this->mRawAudio.size()); - memcpy(this->mResampled.data(), this->mRawAudio.data(), - this->mResampled.size()); - } else { + memcpy(this->mResampled.data(), this->mRawAudio.data(), this->mResampled.size()); + } + else + { // Cubic resampling std::vector padded; padded.resize(this->mRawAudio.size() + 2); padded[0] = 0.0f; padded[padded.size() - 1] = 0.0f; memcpy(padded.data() + 1, this->mRawAudio.data(), this->mRawAudio.size()); - dsp::ResampleCubic(padded, this->mRawAudioSampleRate, sampleRate, - 0.0, this->mResampled); + dsp::ResampleCubic(padded, this->mRawAudioSampleRate, sampleRate, 0.0, this->mResampled); } // Simple implementation w/ no resample... const size_t irLength = std::min(this->mResampled.size(), this->mMaxLength); diff --git a/dsp/ImpulseResponse.h b/dsp/ImpulseResponse.h index 7a14027..ce8bb21 100644 --- a/dsp/ImpulseResponse.h +++ b/dsp/ImpulseResponse.h @@ -15,12 +15,13 @@ #include "dsp.h" #include "wav.h" -namespace dsp { -class ImpulseResponse : public History { +namespace dsp +{ +class ImpulseResponse : public History +{ public: ImpulseResponse(const char* fileName, const double sampleRate); - double **Process(double **inputs, const size_t numChannels, - const size_t numFrames) override; + double** Process(double** inputs, const size_t numChannels, const size_t numFrames) override; // TODO states for the IR class dsp::wav::LoadReturnCode GetWavState() const { return this->mWavState; }; diff --git a/dsp/NoiseGate.cpp b/dsp/NoiseGate.cpp index 2ea6f4b..e16d056 100644 --- a/dsp/NoiseGate.cpp +++ b/dsp/NoiseGate.cpp @@ -9,69 +9,85 @@ #include "NoiseGate.h" -double _LevelToDB(const double db) { return 10.0 * log10(db); } +double _LevelToDB(const double db) +{ + return 10.0 * log10(db); +} -double _DBToLevel(const double level) { return pow(10.0, level / 10.0); } +double _DBToLevel(const double level) +{ + return pow(10.0, level / 10.0); +} dsp::noise_gate::Trigger::Trigger() - : mParams(0.05, -60.0, 1.5, 0.002, 0.050, 0.050), mSampleRate(0) {} +: mParams(0.05, -60.0, 1.5, 0.002, 0.050, 0.050) +, mSampleRate(0) +{ +} -double signum(const double val) { return (0.0 < val) - (val < 0.0); } +double signum(const double val) +{ + return (0.0 < val) - (val < 0.0); +} -double **dsp::noise_gate::Trigger::Process(double **inputs, - const size_t numChannels, - const size_t numFrames) { +double** dsp::noise_gate::Trigger::Process(double** inputs, const size_t numChannels, const size_t numFrames) +{ this->_PrepareBuffers(numChannels, numFrames); // A bunch of numbers we'll use a few times. - const double alpha = - pow(0.5, 1.0 / (this->mParams.GetTime() * this->mSampleRate)); + const double alpha = pow(0.5, 1.0 / (this->mParams.GetTime() * this->mSampleRate)); const double beta = 1.0 - alpha; const double threshold = this->mParams.GetThreshold(); const double dt = 1.0 / this->mSampleRate; const double maxHold = this->mParams.GetHoldTime(); const double maxGainReduction = this->_GetMaxGainReduction(); // Amount of open or close in a sample: rate times time - const double dOpen = - -this->_GetMaxGainReduction() / this->mParams.GetOpenTime() * dt; // >0 - const double dClose = - this->_GetMaxGainReduction() / this->mParams.GetCloseTime() * dt; // <0 + const double dOpen = -this->_GetMaxGainReduction() / this->mParams.GetOpenTime() * dt; // >0 + const double dClose = this->_GetMaxGainReduction() / this->mParams.GetCloseTime() * dt; // <0 // The main algorithm: compute the gain reduction - for (auto c = 0; c < numChannels; c++) { - for (auto s = 0; s < numFrames; s++) { - this->mLevel[c] = std::clamp(alpha * this->mLevel[c] + - beta * (inputs[c][s] * inputs[c][s]), - MINIMUM_LOUDNESS_POWER, 1000.0); + for (auto c = 0; c < numChannels; c++) + { + for (auto s = 0; s < numFrames; s++) + { + this->mLevel[c] = + std::clamp(alpha * this->mLevel[c] + beta * (inputs[c][s] * inputs[c][s]), MINIMUM_LOUDNESS_POWER, 1000.0); const double levelDB = _LevelToDB(this->mLevel[c]); - if (this->mState[c] == dsp::noise_gate::Trigger::State::HOLDING) { + if (this->mState[c] == dsp::noise_gate::Trigger::State::HOLDING) + { this->mGainReductionDB[c][s] = 0.0; this->mLastGainReductionDB[c] = 0.0; - if (levelDB < threshold) { + if (levelDB < threshold) + { this->mTimeHeld[c] += dt; if (this->mTimeHeld[c] >= maxHold) this->mState[c] = dsp::noise_gate::Trigger::State::MOVING; - } else { + } + else + { this->mTimeHeld[c] = 0.0; } - } else { // Moving + } + else + { // Moving const double targetGainReduction = this->_GetGainReduction(levelDB); - if (targetGainReduction > this->mLastGainReductionDB[c]) { - const double dGain = std::clamp( - 0.5 * (targetGainReduction - this->mLastGainReductionDB[c]), 0.0, - dOpen); + if (targetGainReduction > this->mLastGainReductionDB[c]) + { + const double dGain = std::clamp(0.5 * (targetGainReduction - this->mLastGainReductionDB[c]), 0.0, dOpen); this->mLastGainReductionDB[c] += dGain; - if (this->mLastGainReductionDB[c] >= 0.0) { + if (this->mLastGainReductionDB[c] >= 0.0) + { this->mLastGainReductionDB[c] = 0.0; this->mState[c] = dsp::noise_gate::Trigger::State::HOLDING; this->mTimeHeld[c] = 0.0; } - } else if (targetGainReduction < this->mLastGainReductionDB[c]) { - const double dGain = std::clamp( - 0.5 * (targetGainReduction - this->mLastGainReductionDB[c]), - dClose, 0.0); + } + else if (targetGainReduction < this->mLastGainReductionDB[c]) + { + const double dGain = std::clamp(0.5 * (targetGainReduction - this->mLastGainReductionDB[c]), dClose, 0.0); this->mLastGainReductionDB[c] += dGain; - if (this->mLastGainReductionDB[c] < maxGainReduction) { + if (this->mLastGainReductionDB[c] < maxGainReduction) + { this->mLastGainReductionDB[c] = maxGainReduction; } } @@ -81,8 +97,7 @@ double **dsp::noise_gate::Trigger::Process(double **inputs, } // Share the results with gain objects that are listening to this trigger: - for (auto gain = this->mGainListeners.begin(); - gain != this->mGainListeners.end(); ++gain) + for (auto gain = this->mGainListeners.begin(); gain != this->mGainListeners.end(); ++gain) (*gain)->SetGainReductionDB(this->mGainReductionDB); // Copy input to output @@ -91,8 +106,8 @@ double **dsp::noise_gate::Trigger::Process(double **inputs, return this->_GetPointers(); } -void dsp::noise_gate::Trigger::_PrepareBuffers(const size_t numChannels, - const size_t numFrames) { +void dsp::noise_gate::Trigger::_PrepareBuffers(const size_t numChannels, const size_t numFrames) +{ const size_t oldChannels = this->_GetNumChannels(); const size_t oldFrames = this->_GetNumFrames(); this->DSP::_PrepareBuffers(numChannels, numFrames); @@ -100,27 +115,27 @@ void dsp::noise_gate::Trigger::_PrepareBuffers(const size_t numChannels, const bool updateChannels = numChannels != oldChannels; const bool updateFrames = updateChannels || numFrames != oldFrames; - if (updateChannels || updateFrames) { + if (updateChannels || updateFrames) + { const double maxGainReduction = this->_GetMaxGainReduction(); - if (updateChannels) { + if (updateChannels) + { this->mGainReductionDB.resize(numChannels); this->mLastGainReductionDB.resize(numChannels); - std::fill(this->mLastGainReductionDB.begin(), - this->mLastGainReductionDB.end(), maxGainReduction); + std::fill(this->mLastGainReductionDB.begin(), this->mLastGainReductionDB.end(), maxGainReduction); this->mState.resize(numChannels); - std::fill(this->mState.begin(), this->mState.end(), - dsp::noise_gate::Trigger::State::MOVING); + std::fill(this->mState.begin(), this->mState.end(), dsp::noise_gate::Trigger::State::MOVING); this->mLevel.resize(numChannels); - std::fill(this->mLevel.begin(), this->mLevel.end(), - MINIMUM_LOUDNESS_POWER); + std::fill(this->mLevel.begin(), this->mLevel.end(), MINIMUM_LOUDNESS_POWER); this->mTimeHeld.resize(numChannels); std::fill(this->mTimeHeld.begin(), this->mTimeHeld.end(), 0.0); } - if (updateFrames) { - for (auto i = 0; i < this->mGainReductionDB.size(); i++) { + if (updateFrames) + { + for (auto i = 0; i < this->mGainReductionDB.size(); i++) + { this->mGainReductionDB[i].resize(numFrames); - std::fill(this->mGainReductionDB[i].begin(), - this->mGainReductionDB[i].end(), maxGainReduction); + std::fill(this->mGainReductionDB[i].begin(), this->mGainReductionDB[i].end(), maxGainReduction); } } } @@ -128,27 +143,29 @@ void dsp::noise_gate::Trigger::_PrepareBuffers(const size_t numChannels, // Gain======================================================================== -double **dsp::noise_gate::Gain::Process(double **inputs, const size_t numChannels, - const size_t numFrames) { +double** dsp::noise_gate::Gain::Process(double** inputs, const size_t numChannels, const size_t numFrames) +{ // Assume that SetGainReductionDB() was just called to get data from a // trigger. Could use listeners... this->_PrepareBuffers(numChannels, numFrames); - if (this->mGainReductionDB.size() != numChannels) { + if (this->mGainReductionDB.size() != numChannels) + { std::stringstream ss; - ss << "Gain module expected to operate on " << this->mGainReductionDB.size() - << "channels, but " << numChannels << " were provided."; + ss << "Gain module expected to operate on " << this->mGainReductionDB.size() << "channels, but " << numChannels + << " were provided."; throw std::runtime_error(ss.str()); } - if ((this->mGainReductionDB.size() == 0) && (numFrames > 0)) { + if ((this->mGainReductionDB.size() == 0) && (numFrames > 0)) + { std::stringstream ss; - ss << "No channels expected by gain module, yet " << numFrames - << " were provided?"; + ss << "No channels expected by gain module, yet " << numFrames << " were provided?"; throw std::runtime_error(ss.str()); - } else if (this->mGainReductionDB[0].size() != numFrames) { + } + else if (this->mGainReductionDB[0].size() != numFrames) + { std::stringstream ss; - ss << "Gain module expected to operate on " - << this->mGainReductionDB[0].size() << "frames, but " << numFrames + ss << "Gain module expected to operate on " << this->mGainReductionDB[0].size() << "frames, but " << numFrames << " were provided."; throw std::runtime_error(ss.str()); } @@ -156,8 +173,7 @@ double **dsp::noise_gate::Gain::Process(double **inputs, const size_t numChannel // Apply gain! for (auto c = 0; c < numChannels; c++) for (auto s = 0; s < numFrames; s++) - this->mOutputs[c][s] = - _DBToLevel(this->mGainReductionDB[c][s]) * inputs[c][s]; + this->mOutputs[c][s] = _DBToLevel(this->mGainReductionDB[c][s]) * inputs[c][s]; return this->_GetPointers(); } diff --git a/dsp/NoiseGate.h b/dsp/NoiseGate.h index b859778..d6effa7 100644 --- a/dsp/NoiseGate.h +++ b/dsp/NoiseGate.h @@ -13,8 +13,10 @@ #include "dsp.h" -namespace dsp { -namespace noise_gate { +namespace dsp +{ +namespace noise_gate +{ // Disclaimer: No one told me how noise gates work. I'm just going to try // and have fun with it and see if I like what I get! :D @@ -30,12 +32,13 @@ const double MINIMUM_LOUDNESS_POWER = pow(10.0, MINIMUM_LOUDNESS_DB / 10.0); // forward declaration. // The class that applies the gain reductions calculated by a trigger instance. -class Gain : public DSP { +class Gain : public DSP +{ public: - double **Process(double **inputs, const size_t numChannels, - const size_t numFrames) override; + double** Process(double** inputs, const size_t numChannels, const size_t numFrames) override; - void SetGainReductionDB(std::vector> &gainReductionDB) { + void SetGainReductionDB(std::vector>& gainReductionDB) + { this->mGainReductionDB = gainReductionDB; } @@ -47,13 +50,17 @@ class Gain : public DSP { // This listens to a stream of incoming audio and determines how much gain // to apply based on the loudness of the signal. -class TriggerParams { +class TriggerParams +{ public: - TriggerParams(const double time, const double threshold, const double ratio, - const double openTime, const double holdTime, - const double closeTime) - : mTime(time), mThreshold(threshold), mRatio(ratio), mOpenTime(openTime), - mHoldTime(holdTime), mCloseTime(closeTime){}; + TriggerParams(const double time, const double threshold, const double ratio, const double openTime, + const double holdTime, const double closeTime) + : mTime(time) + , mThreshold(threshold) + , mRatio(ratio) + , mOpenTime(openTime) + , mHoldTime(holdTime) + , mCloseTime(closeTime){}; double GetTime() const { return this->mTime; }; double GetThreshold() const { return this->mThreshold; }; @@ -77,45 +84,39 @@ class TriggerParams { double mCloseTime; }; -class Trigger : public DSP { +class Trigger : public DSP +{ public: Trigger(); - double **Process(double **inputs, const size_t numChannels, - const size_t numFrames) override; - std::vector> GetGainReduction() const { - return this->mGainReductionDB; - }; - void SetParams(const TriggerParams ¶ms) { this->mParams = params; }; - void SetSampleRate(const double sampleRate) { - this->mSampleRate = sampleRate; - } - std::vector> GetGainReductionDB() const { - return this->mGainReductionDB; - }; + double** Process(double** inputs, const size_t numChannels, const size_t numFrames) override; + std::vector> GetGainReduction() const { return this->mGainReductionDB; }; + void SetParams(const TriggerParams& params) { this->mParams = params; }; + void SetSampleRate(const double sampleRate) { this->mSampleRate = sampleRate; } + std::vector> GetGainReductionDB() const { return this->mGainReductionDB; }; - void AddListener(Gain *gain) { + void AddListener(Gain* gain) + { // This might be risky dropping a raw pointer, but I don't think that the // gain would be destructed, so probably ok. this->mGainListeners.insert(gain); } private: - enum class State { MOVING = 0, HOLDING }; + enum class State + { + MOVING = 0, + HOLDING + }; - double _GetGainReduction(const double levelDB) const { + double _GetGainReduction(const double levelDB) const + { const double threshold = this->mParams.GetThreshold(); // Quadratic gain reduction? :) - return levelDB < threshold - ? -(this->mParams.GetRatio()) * (levelDB - threshold) * - (levelDB - threshold) - : 0.0; - } - double _GetMaxGainReduction() const { - return this->_GetGainReduction(MINIMUM_LOUDNESS_DB); + return levelDB < threshold ? -(this->mParams.GetRatio()) * (levelDB - threshold) * (levelDB - threshold) : 0.0; } - virtual void _PrepareBuffers(const size_t numChannels, - const size_t numFrames) override; + double _GetMaxGainReduction() const { return this->_GetGainReduction(MINIMUM_LOUDNESS_DB); } + virtual void _PrepareBuffers(const size_t numChannels, const size_t numFrames) override; TriggerParams mParams; std::vector mState; // One per channel @@ -130,7 +131,7 @@ class Trigger : public DSP { // How long we've been holding std::vector mTimeHeld; - std::unordered_set mGainListeners; + std::unordered_set mGainListeners; }; }; // namespace noise_gate diff --git a/dsp/RecursiveLinearFilter.cpp b/dsp/RecursiveLinearFilter.cpp index 1fe6751..d370bab 100644 --- a/dsp/RecursiveLinearFilter.cpp +++ b/dsp/RecursiveLinearFilter.cpp @@ -7,22 +7,23 @@ // See: https://webaudio.github.io/Audio-EQ-Cookbook/audio-eq-cookbook.html #include // std::fill -#include // isnan +#include // isnan #include #include "RecursiveLinearFilter.h" -recursive_linear_filter::Base::Base(const size_t inputDegree, - const size_t outputDegree) - : dsp::DSP(), mInputStart(inputDegree), // 1 is subtracted before first use - mOutputStart(outputDegree) { +recursive_linear_filter::Base::Base(const size_t inputDegree, const size_t outputDegree) +: dsp::DSP() +, mInputStart(inputDegree) +, // 1 is subtracted before first use +mOutputStart(outputDegree) +{ this->mInputCoefficients.resize(inputDegree); this->mOutputCoefficients.resize(outputDegree); } -double **recursive_linear_filter::Base::Process(double **inputs, - const size_t numChannels, - const size_t numFrames) { +double** recursive_linear_filter::Base::Process(double** inputs, const size_t numChannels, const size_t numFrames) +{ this->_PrepareBuffers(numChannels, numFrames); long inputStart = 0; long outputStart = 0; @@ -33,10 +34,12 @@ double **recursive_linear_filter::Base::Process(double **inputs, // 0,2,3,... are fine. const size_t inputDegree = this->_GetInputDegree(); const size_t outputDegree = this->_GetOutputDegree(); - for (auto c = 0; c < numChannels; c++) { + for (auto c = 0; c < numChannels; c++) + { inputStart = this->mInputStart; // Should be plenty fine outputStart = this->mOutputStart; - for (auto s = 0; s < numFrames; s++) { + for (auto s = 0; s < numFrames; s++) + { double out = 0.0; // Compute input terms inputStart -= 1; @@ -44,16 +47,14 @@ double **recursive_linear_filter::Base::Process(double **inputs, inputStart = inputDegree - 1; this->mInputHistory[c][inputStart] = inputs[c][s]; // Store current input for (auto i = 0; i < inputDegree; i++) - out += this->mInputCoefficients[i] * - this->mInputHistory[c][(inputStart + i) % inputDegree]; + out += this->mInputCoefficients[i] * this->mInputHistory[c][(inputStart + i) % inputDegree]; // Output terms outputStart -= 1; if (outputStart < 0) outputStart = outputDegree - 1; for (auto i = 1; i < outputDegree; i++) - out += this->mOutputCoefficients[i] * - this->mOutputHistory[c][(outputStart + i) % outputDegree]; + out += this->mOutputCoefficients[i] * this->mOutputHistory[c][(outputStart + i) % outputDegree]; // Prevent a NaN from jamming the filter! if (isnan(out)) out = 0.0; @@ -68,31 +69,31 @@ double **recursive_linear_filter::Base::Process(double **inputs, return this->_GetPointers(); } -void recursive_linear_filter::Base::_PrepareBuffers(const size_t numChannels, - const size_t numFrames) { +void recursive_linear_filter::Base::_PrepareBuffers(const size_t numChannels, const size_t numFrames) +{ // Check for new channel count *before* parent class ensures they match! const bool newChannels = this->_GetNumChannels() != numChannels; // Parent implementation takes care of mOutputs and mOutputPointers this->dsp::DSP::_PrepareBuffers(numChannels, numFrames); - if (newChannels) { + if (newChannels) + { this->mInputHistory.resize(numChannels); this->mOutputHistory.resize(numChannels); const size_t inputDegree = this->_GetInputDegree(); const size_t outputDegree = this->_GetOutputDegree(); - for (auto c = 0; c < numChannels; c++) { + for (auto c = 0; c < numChannels; c++) + { this->mInputHistory[c].resize(inputDegree); this->mOutputHistory[c].resize(outputDegree); - std::fill(this->mInputHistory[c].begin(), this->mInputHistory[c].end(), - 0.0); - std::fill(this->mOutputHistory[c].begin(), this->mOutputHistory[c].end(), - 0.0); + std::fill(this->mInputHistory[c].begin(), this->mInputHistory[c].end(), 0.0); + std::fill(this->mOutputHistory[c].begin(), this->mOutputHistory[c].end(), 0.0); } } } -void recursive_linear_filter::Biquad::_AssignCoefficients( - const double a0, const double a1, const double a2, const double b0, - const double b1, const double b2) { +void recursive_linear_filter::Biquad::_AssignCoefficients(const double a0, const double a1, const double a2, + const double b0, const double b1, const double b2) +{ this->mInputCoefficients[0] = b0 / a0; this->mInputCoefficients[1] = b1 / a0; this->mInputCoefficients[2] = b2 / a0; @@ -102,8 +103,8 @@ void recursive_linear_filter::Biquad::_AssignCoefficients( this->mOutputCoefficients[2] = -a2 / a0; } -void recursive_linear_filter::LowShelf::SetParams( - const recursive_linear_filter::BiquadParams ¶ms) { +void recursive_linear_filter::LowShelf::SetParams(const recursive_linear_filter::BiquadParams& params) +{ const double a = params.GetA(); const double omega_0 = params.GetOmega0(); const double alpha = params.GetAlpha(omega_0); @@ -123,8 +124,8 @@ void recursive_linear_filter::LowShelf::SetParams( this->_AssignCoefficients(a0, a1, a2, b0, b1, b2); } -void recursive_linear_filter::Peaking::SetParams( - const recursive_linear_filter::BiquadParams ¶ms) { +void recursive_linear_filter::Peaking::SetParams(const recursive_linear_filter::BiquadParams& params) +{ const double a = params.GetA(); const double omega_0 = params.GetOmega0(); const double alpha = params.GetAlpha(omega_0); @@ -140,8 +141,8 @@ void recursive_linear_filter::Peaking::SetParams( this->_AssignCoefficients(a0, a1, a2, b0, b1, b2); } -void recursive_linear_filter::HighShelf::SetParams( - const recursive_linear_filter::BiquadParams ¶ms) { +void recursive_linear_filter::HighShelf::SetParams(const recursive_linear_filter::BiquadParams& params) +{ const double a = params.GetA(); const double omega_0 = params.GetOmega0(); const double alpha = params.GetAlpha(omega_0); diff --git a/dsp/RecursiveLinearFilter.h b/dsp/RecursiveLinearFilter.h index cb609be..e26a802 100644 --- a/dsp/RecursiveLinearFilter.h +++ b/dsp/RecursiveLinearFilter.h @@ -16,20 +16,20 @@ // TODO refactor base DSP into a common abstraction. -namespace recursive_linear_filter { -class Base : public dsp::DSP { +namespace recursive_linear_filter +{ +class Base : public dsp::DSP +{ public: Base(const size_t inputDegree, const size_t outputDegree); - double **Process(double **inputs, const size_t numChannels, - const size_t numFrames) override; + double** Process(double** inputs, const size_t numChannels, const size_t numFrames) override; protected: // Methods size_t _GetInputDegree() const { return this->mInputCoefficients.size(); }; size_t _GetOutputDegree() const { return this->mOutputCoefficients.size(); }; // Additionally prepares mInputHistory and mOutputHistory. - void _PrepareBuffers(const size_t numChannels, - const size_t numFrames) override; + void _PrepareBuffers(const size_t numChannels, const size_t numFrames) override; // Coefficients for the DSP filter std::vector mInputCoefficients; @@ -47,9 +47,12 @@ class Base : public dsp::DSP { long mOutputStart; }; -class LevelParams : public dsp::Params { +class LevelParams : public dsp::Params +{ public: - LevelParams(const double gain) : Params(), mGain(gain){}; + LevelParams(const double gain) + : Params() + , mGain(gain){}; double GetGain() const { return this->mGain; }; private: @@ -57,35 +60,34 @@ class LevelParams : public dsp::Params { double mGain; }; -class Level : public Base { +class Level : public Base +{ public: - Level() : Base(1, 0){}; + Level() + : Base(1, 0){}; // Invalid usage: require a pointer to recursive_linear_filter::Params so // that SetCoefficients() is defined. - void SetParams(const LevelParams ¶ms) { - this->mInputCoefficients[0] = params.GetGain(); - }; + void SetParams(const LevelParams& params) { this->mInputCoefficients[0] = params.GetGain(); }; ; }; // The same 3 params (frequency, quality, gain) describe a bunch of filters. // (Low shelf, high shelf, peaking) -class BiquadParams : public dsp::Params { +class BiquadParams : public dsp::Params +{ public: - BiquadParams(const double sampleRate, const double frequency, - const double quality, const double gainDB) - : dsp::Params(), mFrequency(frequency), mGainDB(gainDB), - mQuality(quality), mSampleRate(sampleRate){}; + BiquadParams(const double sampleRate, const double frequency, const double quality, const double gainDB) + : dsp::Params() + , mFrequency(frequency) + , mGainDB(gainDB) + , mQuality(quality) + , mSampleRate(sampleRate){}; // Parameters defined in // https://webaudio.github.io/Audio-EQ-Cookbook/audio-eq-cookbook.html double GetA() const { return pow(10.0, this->mGainDB / 40.0); }; - double GetOmega0() const { - return 2.0 * MATH_PI * this->mFrequency / this->mSampleRate; - }; - double GetAlpha(const double omega_0) const { - return sin(omega_0) / (2.0 * this->mQuality); - }; + double GetOmega0() const { return 2.0 * MATH_PI * this->mFrequency / this->mSampleRate; }; + double GetAlpha(const double omega_0) const { return sin(omega_0) / (2.0 * this->mQuality); }; double GetCosW(const double omega_0) const { return cos(omega_0); }; private: @@ -95,28 +97,33 @@ class BiquadParams : public dsp::Params { double mSampleRate; }; -class Biquad : public Base { +class Biquad : public Base +{ public: - Biquad() : Base(3, 3){}; - virtual void SetParams(const BiquadParams ¶ms) = 0; + Biquad() + : Base(3, 3){}; + virtual void SetParams(const BiquadParams& params) = 0; protected: - void _AssignCoefficients(const double a0, const double a1, const double a2, - const double b0, const double b1, const double b2); + void _AssignCoefficients(const double a0, const double a1, const double a2, const double b0, const double b1, + const double b2); }; -class LowShelf : public Biquad { +class LowShelf : public Biquad +{ public: - void SetParams(const BiquadParams ¶ms) override; + void SetParams(const BiquadParams& params) override; }; -class Peaking : public Biquad { +class Peaking : public Biquad +{ public: - void SetParams(const BiquadParams ¶ms) override; + void SetParams(const BiquadParams& params) override; }; -class HighShelf : public Biquad { +class HighShelf : public Biquad +{ public: - void SetParams(const BiquadParams ¶ms) override; + void SetParams(const BiquadParams& params) override; }; }; // namespace recursive_linear_filter \ No newline at end of file diff --git a/dsp/Resample.h b/dsp/Resample.h index 66100e7..493922e 100644 --- a/dsp/Resample.h +++ b/dsp/Resample.h @@ -11,7 +11,8 @@ #include #include -namespace dsp { +namespace dsp +{ // Resample a provided vector in inputs to outputs. // Creates an array of the required length to fill all points from the SECOND // input to the SECOND-TO-LAST input point, exclusive. @@ -20,24 +21,23 @@ namespace dsp { // tOutputStart: location of first output point relative to the second input // point (should be >=0.0) template -void ResampleCubic(const std::vector &inputs, - const double originalSampleRate, - const double desiredSampleRate, const double tOutputStart, - std::vector &outputs); +void ResampleCubic(const std::vector& inputs, const double originalSampleRate, const double desiredSampleRate, + const double tOutputStart, std::vector& outputs); // Interpolate the 4 provided equispaced points to x in [-1,2] -template T _CubicInterpolation(T p[4], T x) { - return p[1] + 0.5 * x * - (p[2] - p[0] + - x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + - x * (3.0 * (p[1] - p[2]) + p[3] - p[0]))); +template +T _CubicInterpolation(T p[4], T x) +{ + return p[1] + + 0.5 * x + * (p[2] - p[0] + + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0]))); }; }; // namespace dsp template -void dsp::ResampleCubic(const std::vector &inputs, - const double originalSampleRate, - const double desiredSampleRate, - const double tOutputStart, std::vector &outputs) { +void dsp::ResampleCubic(const std::vector& inputs, const double originalSampleRate, const double desiredSampleRate, + const double tOutputStart, std::vector& outputs) +{ if (tOutputStart < 0.0) throw std::runtime_error("Starting time must be non-negative"); @@ -51,7 +51,8 @@ void dsp::ResampleCubic(const std::vector &inputs, double time = timeIncrement + tOutputStart; const double endTimeOriginal = (inputs.size() - 1) * timeIncrement; - while (time < endTimeOriginal) { + while (time < endTimeOriginal) + { // Find the index of the sample in the original audio file that is just // before the current time in the resampled audio file long index = (long)std::floor(time / timeIncrement); @@ -65,15 +66,12 @@ void dsp::ResampleCubic(const std::vector &inputs, double p[4]; p[0] = (index == 0) ? inputs[0] : inputs[index - 1]; p[1] = inputs[index]; - p[2] = (index == inputs.size() - 1) ? inputs[inputs.size() - 1] - : inputs[index + 1]; - p[3] = (index == inputs.size() - 2) ? inputs[inputs.size() - 1] - : inputs[index + 2]; + p[2] = (index == inputs.size() - 1) ? inputs[inputs.size() - 1] : inputs[index + 1]; + p[3] = (index == inputs.size() - 2) ? inputs[inputs.size() - 1] : inputs[index + 2]; // Use cubic interpolation to estimate the value of the audio signal at the // current time in the resampled audio file - T resampledValue = - dsp::_CubicInterpolation(p, timeDifference / timeIncrement); + T resampledValue = dsp::_CubicInterpolation(p, timeDifference / timeIncrement); // Add the estimated value to the resampled audio file outputs.push_back(resampledValue); diff --git a/dsp/dsp.cpp b/dsp/dsp.cpp index 6ca5a4d..8936e29 100644 --- a/dsp/dsp.cpp +++ b/dsp/dsp.cpp @@ -12,22 +12,31 @@ // ============================================================================ // Implementation of Version 2 interface -dsp::DSP::DSP() : mOutputPointers(nullptr), mOutputPointersSize(0) {} +dsp::DSP::DSP() +: mOutputPointers(nullptr) +, mOutputPointersSize(0) +{ +} -dsp::DSP::~DSP() { this->_DeallocateOutputPointers(); }; +dsp::DSP::~DSP() +{ + this->_DeallocateOutputPointers(); +}; -void dsp::DSP::_AllocateOutputPointers(const size_t numChannels) { +void dsp::DSP::_AllocateOutputPointers(const size_t numChannels) +{ if (this->mOutputPointers != nullptr) - throw std::runtime_error( - "Tried to re-allocate over non-null mOutputPointers"); - this->mOutputPointers = new double *[numChannels]; + throw std::runtime_error("Tried to re-allocate over non-null mOutputPointers"); + this->mOutputPointers = new double*[numChannels]; if (this->mOutputPointers == nullptr) throw std::runtime_error("Failed to allocate pointer to output buffer!\n"); this->mOutputPointersSize = numChannels; } -void dsp::DSP::_DeallocateOutputPointers() { - if (this->mOutputPointers != nullptr) { +void dsp::DSP::_DeallocateOutputPointers() +{ + if (this->mOutputPointers != nullptr) + { delete[] this->mOutputPointers; this->mOutputPointers = nullptr; } @@ -36,20 +45,22 @@ void dsp::DSP::_DeallocateOutputPointers() { this->mOutputPointersSize = 0; } -double **dsp::DSP::_GetPointers() { +double** dsp::DSP::_GetPointers() +{ for (auto c = 0; c < this->_GetNumChannels(); c++) this->mOutputPointers[c] = this->mOutputs[c].data(); return this->mOutputPointers; } -void dsp::DSP::_PrepareBuffers(const size_t numChannels, - const size_t numFrames) { +void dsp::DSP::_PrepareBuffers(const size_t numChannels, const size_t numFrames) +{ const size_t oldFrames = this->_GetNumFrames(); const size_t oldChannels = this->_GetNumChannels(); const bool resizeChannels = oldChannels != numChannels; const bool resizeFrames = resizeChannels || (oldFrames != numFrames); - if (resizeChannels) { + if (resizeChannels) + { this->mOutputs.resize(numChannels); this->_ResizePointers(numChannels); } @@ -58,24 +69,32 @@ void dsp::DSP::_PrepareBuffers(const size_t numChannels, this->mOutputs[c].resize(numFrames); } -void dsp::DSP::_ResizePointers(const size_t numChannels) { +void dsp::DSP::_ResizePointers(const size_t numChannels) +{ if (this->mOutputPointersSize == numChannels) return; this->_DeallocateOutputPointers(); this->_AllocateOutputPointers(numChannels); } -dsp::History::History() : DSP(), mHistoryRequired(0), mHistoryIndex(0) {} +dsp::History::History() +: DSP() +, mHistoryRequired(0) +, mHistoryIndex(0) +{ +} -void dsp::History::_AdvanceHistoryIndex(const size_t bufferSize) { +void dsp::History::_AdvanceHistoryIndex(const size_t bufferSize) +{ this->mHistoryIndex += bufferSize; } -void dsp::History::_EnsureHistorySize(const size_t bufferSize) { +void dsp::History::_EnsureHistorySize(const size_t bufferSize) +{ const size_t repeatSize = std::max(bufferSize, this->mHistoryRequired); - const size_t requiredHistoryArraySize = - 10 * repeatSize; // Just so we don't spend too much time copying back. - if (this->mHistory.size() < requiredHistoryArraySize) { + const size_t requiredHistoryArraySize = 10 * repeatSize; // Just so we don't spend too much time copying back. + if (this->mHistory.size() < requiredHistoryArraySize) + { this->mHistory.resize(requiredHistoryArraySize); std::fill(this->mHistory.begin(), this->mHistory.end(), 0.0f); this->mHistoryIndex = this->mHistoryRequired; // Guaranteed to be less than @@ -83,16 +102,16 @@ void dsp::History::_EnsureHistorySize(const size_t bufferSize) { } } -void dsp::History::_RewindHistory() { +void dsp::History::_RewindHistory() +{ // TODO memcpy? Should be fine w/ history array being >2x the history length. - for (size_t i = 0, j = this->mHistoryIndex - this->mHistoryRequired; - i < this->mHistoryRequired; i++, j++) + for (size_t i = 0, j = this->mHistoryIndex - this->mHistoryRequired; i < this->mHistoryRequired; i++, j++) this->mHistory[i] = this->mHistory[j]; this->mHistoryIndex = this->mHistoryRequired; } -void dsp::History::_UpdateHistory(double **inputs, const size_t numChannels, - const size_t numFrames) { +void dsp::History::_UpdateHistory(double** inputs, const size_t numChannels, const size_t numFrames) +{ this->_EnsureHistorySize(numFrames); if (numChannels < 1) throw std::runtime_error("Zero channels?"); diff --git a/dsp/dsp.h b/dsp/dsp.h index fdc2971..35ffdf7 100644 --- a/dsp/dsp.h +++ b/dsp/dsp.h @@ -9,10 +9,14 @@ // Version 2 DSP abstraction ================================================== -namespace dsp { -class Params {}; +namespace dsp +{ +class Params +{ +}; -class DSP { +class DSP +{ public: DSP(); ~DSP(); @@ -22,8 +26,7 @@ class DSP { // The output shall be a pointer-to-pointers of matching size. // This object instance will own the data referenced by the pointers and be // responsible for its allocation and deallocation. - virtual double **Process(double **inputs, const size_t numChannels, - const size_t numFrames) = 0; + virtual double** Process(double** inputs, const size_t numChannels, const size_t numFrames) = 0; // Update the parameters of the DSP object according to the provided params. // Not declaring a pure virtual bc there's no concrete definition that can // use Params. @@ -40,16 +43,13 @@ class DSP { void _DeallocateOutputPointers(); size_t _GetNumChannels() const { return this->mOutputs.size(); }; - size_t _GetNumFrames() const { - return this->_GetNumChannels() > 0 ? this->mOutputs[0].size() : 0; - } + size_t _GetNumFrames() const { return this->_GetNumChannels() > 0 ? this->mOutputs[0].size() : 0; } // Return a pointer-to-pointers for the DSP's output buffers (all channels) // Assumes that ._PrepareBuffers() was called recently enough. - double **_GetPointers(); + double** _GetPointers(); // Resize mOutputs to (numChannels, numFrames) and ensure that the raw // pointers are also keeping up. - virtual void _PrepareBuffers(const size_t numChannels, - const size_t numFrames); + virtual void _PrepareBuffers(const size_t numChannels, const size_t numFrames); // Resize the pointer-to-pointers for the vector-of-vectors. void _ResizePointers(const size_t numChannels); @@ -62,7 +62,7 @@ class DSP { // A pointer to pointers of which copies will be given out as the output of // .Process(). This object will ensure proper allocation and deallocation of // the first level; The second level points to .data() from mOutputs. - double **mOutputPointers; + double** mOutputPointers; size_t mOutputPointersSize; }; @@ -72,7 +72,8 @@ class DSP { // Hacky stuff: // * Mono // * Single-precision floats. -class History : public DSP { +class History : public DSP +{ public: History(); @@ -82,8 +83,7 @@ class History : public DSP { void _AdvanceHistoryIndex(const size_t bufferSize); // Drop the new samples into the history array. // Manages history array size - void _UpdateHistory(double **inputs, const size_t numChannels, - const size_t numFrames); + void _UpdateHistory(double** inputs, const size_t numChannels, const size_t numFrames); // The history array that's used for DSP calculations. std::vector mHistory; diff --git a/dsp/wav.cpp b/dsp/wav.cpp index d6905df..98b7279 100644 --- a/dsp/wav.cpp +++ b/dsp/wav.cpp @@ -14,17 +14,19 @@ #include "wav.h" -bool idIsJunk(char *id) { - return strncmp(id, "junk", 4) == 0 || strncmp(id, "JUNK", 4) == 0 || - strncmp(id, "smpl", 4) == 0 || strncmp(id, "LIST", 4) == 0 || - strncmp(id, "bext", 4) == 0 || strncmp(id, "PAD ", 4) == 0; +bool idIsJunk(char* id) +{ + return strncmp(id, "junk", 4) == 0 || strncmp(id, "JUNK", 4) == 0 || strncmp(id, "smpl", 4) == 0 + || strncmp(id, "LIST", 4) == 0 || strncmp(id, "bext", 4) == 0 || strncmp(id, "PAD ", 4) == 0; } -bool ReadChunkAndSkipJunk(std::ifstream &file, char *chunkID) { +bool ReadChunkAndSkipJunk(std::ifstream& file, char* chunkID) +{ file.read(chunkID, 4); - while (idIsJunk(chunkID) && file.good()) { + while (idIsJunk(chunkID) && file.good()) + { int junkSize; - file.read(reinterpret_cast(&junkSize), 4); + file.read(reinterpret_cast(&junkSize), 4); file.ignore(junkSize); // Unused byte if junkSize is odd if ((junkSize % 2) == 1) @@ -35,15 +37,15 @@ bool ReadChunkAndSkipJunk(std::ifstream &file, char *chunkID) { return file.good(); } -dsp::wav::LoadReturnCode dsp::wav::Load(const char *fileName, - std::vector &audio, - double &sampleRate) { +dsp::wav::LoadReturnCode dsp::wav::Load(const char* fileName, std::vector& audio, double& sampleRate) +{ // FYI: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html // Open the WAV file for reading std::ifstream wavFile(fileName, std::ios::binary); // Check if the file was opened successfully - if (!wavFile.is_open()) { + if (!wavFile.is_open()) + { std::cerr << "Error opening WAV file" << std::endl; return dsp::wav::LoadReturnCode::ERROR_OPENING; } @@ -51,43 +53,47 @@ dsp::wav::LoadReturnCode dsp::wav::Load(const char *fileName, // WAV file has 3 "chunks": RIFF ("RIFF"), format ("fmt ") and data ("data"). // Read the WAV file header char chunkId[4]; - if (!ReadChunkAndSkipJunk(wavFile, chunkId)) { + if (!ReadChunkAndSkipJunk(wavFile, chunkId)) + { std::cerr << "Error while reading for next chunk." << std::endl; return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE; } - if (strncmp(chunkId, "RIFF", 4) != 0) { - std::cerr << "Error: File does not start with expected RIFF chunk. Got" - << chunkId << " instead." << std::endl; + if (strncmp(chunkId, "RIFF", 4) != 0) + { + std::cerr << "Error: File does not start with expected RIFF chunk. Got" << chunkId << " instead." << std::endl; return dsp::wav::LoadReturnCode::ERROR_NOT_RIFF; } int chunkSize; - wavFile.read(reinterpret_cast(&chunkSize), 4); + wavFile.read(reinterpret_cast(&chunkSize), 4); char format[4]; wavFile.read(format, 4); - if (strncmp(format, "WAVE", 4) != 0) { - std::cerr << "Error: Files' second chunk (format) is not expected WAV. Got" - << format << " instead." << std::endl; + if (strncmp(format, "WAVE", 4) != 0) + { + std::cerr << "Error: Files' second chunk (format) is not expected WAV. Got" << format << " instead." << std::endl; return dsp::wav::LoadReturnCode::ERROR_NOT_WAVE; } // Read the format chunk char subchunk1Id[4]; - if (!ReadChunkAndSkipJunk(wavFile, subchunk1Id)) { + if (!ReadChunkAndSkipJunk(wavFile, subchunk1Id)) + { std::cerr << "Error while reading for next chunk." << std::endl; return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE; } - if (strncmp(subchunk1Id, "fmt ", 4) != 0) { - std::cerr << "Error: Invalid WAV file missing expected fmt section; got " - << subchunk1Id << " instead." << std::endl; + if (strncmp(subchunk1Id, "fmt ", 4) != 0) + { + std::cerr << "Error: Invalid WAV file missing expected fmt section; got " << subchunk1Id << " instead." + << std::endl; return dsp::wav::LoadReturnCode::ERROR_MISSING_FMT; } int subchunk1Size; - wavFile.read(reinterpret_cast(&subchunk1Size), 4); - if (subchunk1Size < 16) { + wavFile.read(reinterpret_cast(&subchunk1Size), 4); + if (subchunk1Size < 16) + { std::cerr << "WAV chunk 1 size is " << subchunk1Size << ", which is smaller than the requried 16 to fit the expected " "information." @@ -96,94 +102,100 @@ dsp::wav::LoadReturnCode dsp::wav::Load(const char *fileName, } unsigned short audioFormat; - wavFile.read(reinterpret_cast(&audioFormat), 2); + wavFile.read(reinterpret_cast(&audioFormat), 2); const short AUDIO_FORMAT_PCM = 1; const short AUDIO_FORMAT_IEEE = 3; - std::unordered_set supportedFormats{AUDIO_FORMAT_PCM, - AUDIO_FORMAT_IEEE}; - if (supportedFormats.find(audioFormat) == supportedFormats.end()) { + std::unordered_set supportedFormats{AUDIO_FORMAT_PCM, AUDIO_FORMAT_IEEE}; + if (supportedFormats.find(audioFormat) == supportedFormats.end()) + { std::cerr << "Error: Unsupported WAV format detected. "; - switch (audioFormat) { - case 6: - std::cerr << "(Got: A-law)" << std::endl; - return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_ALAW; - case 7: - std::cerr << "(Got: mu-law)" << std::endl; - return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_MULAW; - case 65534: - std::cerr << "(Got: Extensible)" << std::endl; - return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_EXTENSIBLE; - default: - std::cerr << "(Got unknown format " << audioFormat << ")" << std::endl; - return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE; + switch (audioFormat) + { + case 6: std::cerr << "(Got: A-law)" << std::endl; return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_ALAW; + case 7: + std::cerr << "(Got: mu-law)" << std::endl; + return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_MULAW; + case 65534: + std::cerr << "(Got: Extensible)" << std::endl; + return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_EXTENSIBLE; + default: + std::cerr << "(Got unknown format " << audioFormat << ")" << std::endl; + return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE; } } short numChannels; - wavFile.read(reinterpret_cast(&numChannels), 2); + wavFile.read(reinterpret_cast(&numChannels), 2); // HACK - if (numChannels != 1) { + if (numChannels != 1) + { std::cerr << "Require mono (using for IR loading)" << std::endl; return dsp::wav::LoadReturnCode::ERROR_NOT_MONO; } int iSampleRate; - wavFile.read(reinterpret_cast(&iSampleRate), 4); + wavFile.read(reinterpret_cast(&iSampleRate), 4); // Store in format we assume (SR is double) sampleRate = (double)iSampleRate; int byteRate; - wavFile.read(reinterpret_cast(&byteRate), 4); + wavFile.read(reinterpret_cast(&byteRate), 4); short blockAlign; - wavFile.read(reinterpret_cast(&blockAlign), 2); + wavFile.read(reinterpret_cast(&blockAlign), 2); short bitsPerSample; - wavFile.read(reinterpret_cast(&bitsPerSample), 2); + wavFile.read(reinterpret_cast(&bitsPerSample), 2); // The default is for there to be 16 bytes in the fmt chunk, but sometimes // it's different. - if (subchunk1Size > 16) { + if (subchunk1Size > 16) + { const int extraBytes = subchunk1Size - 16; const int skipChars = extraBytes / 4; wavFile.ignore(skipChars); const int remainder = extraBytes % 4; - wavFile.read(reinterpret_cast(&byteRate), remainder); + wavFile.read(reinterpret_cast(&byteRate), remainder); } // Read the data chunk char subchunk2Id[4]; - if (!ReadChunkAndSkipJunk(wavFile, subchunk2Id)) { + if (!ReadChunkAndSkipJunk(wavFile, subchunk2Id)) + { std::cerr << "Error while reading for next chunk." << std::endl; return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE; } - if (strncmp(subchunk2Id, "data", 4) != 0) { + if (strncmp(subchunk2Id, "data", 4) != 0) + { std::cerr << "Error: Invalid WAV file" << std::endl; return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE; } // Size of the data chunk, in bits. int subchunk2Size; - wavFile.read(reinterpret_cast(&subchunk2Size), 4); + wavFile.read(reinterpret_cast(&subchunk2Size), 4); - if (audioFormat == AUDIO_FORMAT_IEEE) { + if (audioFormat == AUDIO_FORMAT_IEEE) + { if (bitsPerSample == 32) dsp::wav::_LoadSamples32(wavFile, subchunk2Size, audio); - else { - std::cerr << "Error: Unsupported bits per sample for IEEE files: " - << bitsPerSample << std::endl; + else + { + std::cerr << "Error: Unsupported bits per sample for IEEE files: " << bitsPerSample << std::endl; return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_BITS_PER_SAMPLE; } - } else if (audioFormat == AUDIO_FORMAT_PCM) { + } + else if (audioFormat == AUDIO_FORMAT_PCM) + { if (bitsPerSample == 16) dsp::wav::_LoadSamples16(wavFile, subchunk2Size, audio); else if (bitsPerSample == 24) dsp::wav::_LoadSamples24(wavFile, subchunk2Size, audio); else if (bitsPerSample == 32) dsp::wav::_LoadSamples32(wavFile, subchunk2Size, audio); - else { - std::cerr << "Error: Unsupported bits per sample for PCM files: " - << bitsPerSample << std::endl; + else + { + std::cerr << "Error: Unsupported bits per sample for PCM files: " << bitsPerSample << std::endl; return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_BITS_PER_SAMPLE; } } @@ -197,13 +209,13 @@ dsp::wav::LoadReturnCode dsp::wav::Load(const char *fileName, return dsp::wav::LoadReturnCode::SUCCESS; } -void dsp::wav::_LoadSamples16(std::ifstream &wavFile, const int chunkSize, - std::vector &samples) { +void dsp::wav::_LoadSamples16(std::ifstream& wavFile, const int chunkSize, std::vector& samples) +{ // Allocate an array to hold the samples std::vector tmp(chunkSize / 2); // 16 bits (2 bytes) per sample // Read the samples from the file into the array - wavFile.read(reinterpret_cast(tmp.data()), chunkSize); + wavFile.read(reinterpret_cast(tmp.data()), chunkSize); // Copy into the return array const float scale = 1.0 / ((double)(1 << 15)); @@ -212,12 +224,13 @@ void dsp::wav::_LoadSamples16(std::ifstream &wavFile, const int chunkSize, samples[i] = scale * ((float)tmp[i]); // 2^16 } -void dsp::wav::_LoadSamples24(std::ifstream &wavFile, const int chunkSize, - std::vector &samples) { +void dsp::wav::_LoadSamples24(std::ifstream& wavFile, const int chunkSize, std::vector& samples) +{ // Allocate an array to hold the samples std::vector tmp(chunkSize / 3); // 24 bits (3 bytes) per sample // Read in and convert the samples - for (int &x : tmp) { + for (int& x : tmp) + { x = dsp::wav::_ReadSigned24BitInt(wavFile); } @@ -228,10 +241,11 @@ void dsp::wav::_LoadSamples24(std::ifstream &wavFile, const int chunkSize, samples[i] = scale * ((float)tmp[i]); } -int dsp::wav::_ReadSigned24BitInt(std::ifstream &stream) { +int dsp::wav::_ReadSigned24BitInt(std::ifstream& stream) +{ // Read the three bytes of the 24-bit integer. std::uint8_t bytes[3]; - stream.read(reinterpret_cast(bytes), 3); + stream.read(reinterpret_cast(bytes), 3); // Combine the three bytes into a single integer using bit shifting and // masking. This works by isolating each byte using a bit mask (0xff) and then @@ -241,17 +255,18 @@ int dsp::wav::_ReadSigned24BitInt(std::ifstream &stream) { // The value is stored in two's complement format, so if the most significant // bit (the 24th bit) is set, then the value is negative. In this case, we // need to extend the sign bit to get the correct negative value. - if (value & (1 << 23)) { + if (value & (1 << 23)) + { value |= ~((1 << 24) - 1); } return value; } -void dsp::wav::_LoadSamples32(std::ifstream &wavFile, const int chunkSize, - std::vector &samples) { +void dsp::wav::_LoadSamples32(std::ifstream& wavFile, const int chunkSize, std::vector& samples) +{ // NOTE: 32-bit is float. samples.resize(chunkSize / 4); // 32 bits (4 bytes) per sample // Read the samples from the file into the array - wavFile.read(reinterpret_cast(samples.data()), chunkSize); + wavFile.read(reinterpret_cast(samples.data()), chunkSize); } diff --git a/dsp/wav.h b/dsp/wav.h index 01fce60..f8786cb 100644 --- a/dsp/wav.h +++ b/dsp/wav.h @@ -7,9 +7,12 @@ #pragma once -namespace dsp { -namespace wav { -enum class LoadReturnCode { +namespace dsp +{ +namespace wav +{ +enum class LoadReturnCode +{ SUCCESS = 0, ERROR_OPENING, ERROR_NOT_RIFF, @@ -27,20 +30,16 @@ enum class LoadReturnCode { // And note the sample rate. // // Returns: as per return cases above -LoadReturnCode Load(const char *fileName, std::vector &audio, - double &sampleRate); +LoadReturnCode Load(const char* fileName, std::vector& audio, double& sampleRate); // Load samples, 16-bit -void _LoadSamples16(std::ifstream &wavFile, const int chunkSize, - std::vector &samples); +void _LoadSamples16(std::ifstream& wavFile, const int chunkSize, std::vector& samples); // Load samples, 24-bit -void _LoadSamples24(std::ifstream &wavFile, const int chunkSize, - std::vector &samples); +void _LoadSamples24(std::ifstream& wavFile, const int chunkSize, std::vector& samples); // Load samples, 32-bit -void _LoadSamples32(std::ifstream &wavFile, const int chunkSize, - std::vector &samples); +void _LoadSamples32(std::ifstream& wavFile, const int chunkSize, std::vector& samples); // Read in a 24-bit sample and convert it to an int -int _ReadSigned24BitInt(std::ifstream &stream); +int _ReadSigned24BitInt(std::ifstream& stream); }; // namespace wav }; // namespace dsp diff --git a/format.sh b/format.sh index 1db8446..a34e153 100755 --- a/format.sh +++ b/format.sh @@ -6,7 +6,7 @@ echo "Formatting..." -git ls-files "*.h" "*.cpp" | xargs clang-format --style=llvm -i +git ls-files "*.h" "*.cpp" | xargs clang-format -i . echo "Formatting complete!" echo "You can stage all of the files using:"