From 80735f6c6842113c9c797a9da8697a572951339f Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 11 Jan 2022 17:33:58 +0100 Subject: [PATCH 1/4] fluentd: Remove non utf-8 characters from log lines When a log line in hash format contains non UTF-8 characters fluentd would drop the complete line because it failed to convert the line in key-value format. By forcing UTF-8 encoding and replacing non UTF-8 characters with empty strings the log line will not be dropped but only contain the valid UTF-8 characters. Fixes #5099 Signed-off-by: Christian Haudum --- Makefile | 9 +-- clients/cmd/fluentd/.gitignore | 12 +++- clients/cmd/fluentd/.rubocop.yml | 3 +- clients/cmd/fluentd/Dockerfile | 2 +- clients/cmd/fluentd/README.md | 11 ++-- clients/cmd/fluentd/bin/setup | 10 +-- clients/cmd/fluentd/bin/test | 5 ++ clients/cmd/fluentd/docker/docker-compose.yml | 20 +++++- .../cmd/fluentd/lib/fluent/plugin/out_loki.rb | 3 +- .../spec/gems/fluent/plugin/data/non_utf8.log | 1 + .../gems/fluent/plugin/loki_output_spec.rb | 63 ++++++++++++------- 11 files changed, 95 insertions(+), 44 deletions(-) create mode 100755 clients/cmd/fluentd/bin/test create mode 100644 clients/cmd/fluentd/spec/gems/fluent/plugin/data/non_utf8.log diff --git a/Makefile b/Makefile index 4a709d0ed004..7ce04c1e6fbe 100644 --- a/Makefile +++ b/Makefile @@ -425,9 +425,10 @@ fluent-bit-test: # fluentd plugin # ################## fluentd-plugin: - gem install bundler --version 1.16.2 + gem install bundler --version 2.3.4 bundle config silence_root_warning true - bundle install --gemfile=clients/cmd/fluentd/Gemfile --path=clients/cmd/fluentd/vendor/bundle + bundle config set --local path clients/cmd/fluentd/vendor/bundle + bundle install --gemfile=clients/cmd/fluentd/Gemfile fluentd-image: $(SUDO) docker build -t $(IMAGE_PREFIX)/fluent-plugin-loki:$(IMAGE_TAG) -f clients/cmd/fluentd/Dockerfile . @@ -435,9 +436,9 @@ fluentd-image: fluentd-push: $(SUDO) $(PUSH_OCI) $(IMAGE_PREFIX)/fluent-plugin-loki:$(IMAGE_TAG) -fluentd-test: LOKI_URL ?= http://localhost:3100/loki/api/ +fluentd-test: LOKI_URL ?= http://loki:3100 fluentd-test: - LOKI_URL="$(LOKI_URL)" docker-compose -f clients/cmd/fluentd/docker/docker-compose.yml up --build $(IMAGE_PREFIX)/fluent-plugin-loki:$(IMAGE_TAG) + LOKI_URL="$(LOKI_URL)" docker-compose -f clients/cmd/fluentd/docker/docker-compose.yml up --build ################## # logstash plugin # diff --git a/clients/cmd/fluentd/.gitignore b/clients/cmd/fluentd/.gitignore index d45f5732786b..42d037130f75 100644 --- a/clients/cmd/fluentd/.gitignore +++ b/clients/cmd/fluentd/.gitignore @@ -1,3 +1,9 @@ -/coverage/ -/.rspec_status -/Gemfile.lock +Gemfile.lock +.rspec_status +# rbenv +.ruby-version +# bundler +.bundle/ +vendor/ +# simplecov +coverage/ diff --git a/clients/cmd/fluentd/.rubocop.yml b/clients/cmd/fluentd/.rubocop.yml index c9fc1eb7f612..093d5002fce0 100644 --- a/clients/cmd/fluentd/.rubocop.yml +++ b/clients/cmd/fluentd/.rubocop.yml @@ -1,6 +1,7 @@ require: rubocop-rspec AllCops: + NewCops: disable Exclude: - 'bin/**' - 'test/**/*.rb' @@ -30,4 +31,4 @@ Style/HashEachMethods: Style/HashTransformKeys: Enabled: true Style/HashTransformValues: - Enabled: true \ No newline at end of file + Enabled: true diff --git a/clients/cmd/fluentd/Dockerfile b/clients/cmd/fluentd/Dockerfile index 960386493741..2e0f9e902367 100644 --- a/clients/cmd/fluentd/Dockerfile +++ b/clients/cmd/fluentd/Dockerfile @@ -1,4 +1,4 @@ -FROM ruby:2.6 as build +FROM ruby:2.7.5 as build ENV DEBIAN_FRONTEND=noninteractive diff --git a/clients/cmd/fluentd/README.md b/clients/cmd/fluentd/README.md index 278c2d6395c9..a95634dfa6c5 100644 --- a/clients/cmd/fluentd/README.md +++ b/clients/cmd/fluentd/README.md @@ -6,14 +6,17 @@ See [docs/client/fluentd/README.md](../../docs/sources/clients/fluentd/_index.md ## Development -After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. +After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. -To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `fluent-plugin-grafana-loki.gemspec`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). +To install this gem onto your local machine, run `ruby -S bundle exec rake install`. To release a new version, update the version number in `fluent-plugin-grafana-loki.gemspec`, and then run `ruby -S bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). -To create the gem: `gem build fluent-plugin-grafana-loki.gemspec` +To create the gem: `ruby -S gem build fluent-plugin-grafana-loki.gemspec` Useful additions: - `gem install rubocop` + +```bash +ruby -S gem install rubocop +``` ## Testing diff --git a/clients/cmd/fluentd/bin/setup b/clients/cmd/fluentd/bin/setup index 1ba28afdc7c1..66447235f3f8 100755 --- a/clients/cmd/fluentd/bin/setup +++ b/clients/cmd/fluentd/bin/setup @@ -1,7 +1,9 @@ #!/usr/bin/env bash + set -euo pipefail -IFS=$'\n\t' -set -vx -gem install bundler -bundle install +ruby --version +echo "" +ruby -S gem install bundler --version 2.3.4 +ruby -S bundle config set --local path $(pwd)/vendor/bundle +ruby -S bundle install diff --git a/clients/cmd/fluentd/bin/test b/clients/cmd/fluentd/bin/test new file mode 100755 index 000000000000..5f4f9dbf0ae3 --- /dev/null +++ b/clients/cmd/fluentd/bin/test @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ruby -S bundle exec rspec diff --git a/clients/cmd/fluentd/docker/docker-compose.yml b/clients/cmd/fluentd/docker/docker-compose.yml index 0a472bf356be..6b38a4c8e7f7 100644 --- a/clients/cmd/fluentd/docker/docker-compose.yml +++ b/clients/cmd/fluentd/docker/docker-compose.yml @@ -1,15 +1,28 @@ version: '3' services: + loki: + build: + context: ../../../../ + dockerfile: ./cmd/loki/Dockerfile + image: grafana/loki:main + ports: + - 3100 + volumes: + - ./fluentd.conf:/fluentd/etc/fluent.conf + # Receive forwarded logs and send to /fluentd/logs/data.log and loki fluentd: build: - context: ../../../.. - dockerfile: ../Dockerfile - image: fluentd:loki + context: ../../../../ + dockerfile: ./clients/cmd/fluentd/Dockerfile + image: grafana/fluent-plugin-loki:main volumes: - ./fluentd.conf:/fluentd/etc/fluent.conf environment: - LOKI_URL + depends_on: + - loki + # Read /var/log/syslog and send it to fluentd fluentbit: image: fluent/fluent-bit:1.0 @@ -20,3 +33,4 @@ services: - /var/log/syslog:/var/log/syslog:ro depends_on: - fluentd + - loki diff --git a/clients/cmd/fluentd/lib/fluent/plugin/out_loki.rb b/clients/cmd/fluentd/lib/fluent/plugin/out_loki.rb index e3ce5952c37b..29b363d15ffc 100644 --- a/clients/cmd/fluentd/lib/fluent/plugin/out_loki.rb +++ b/clients/cmd/fluentd/lib/fluent/plugin/out_loki.rb @@ -278,6 +278,8 @@ def record_to_line(record) when :key_value formatted_labels = [] record.each do |k, v| + # Remove non UTF-8 characters by force-encoding the string and replacing said chars with empty string + v = v.encode('utf-8', invalid: :replace, replace: '') # Escape double quotes and backslashes by prefixing them with a backslash v = v.to_s.gsub(%r{(["\\])}, '\\\\\1') if v.include?(' ') || v.include?('=') @@ -292,7 +294,6 @@ def record_to_line(record) line end - # # convert a line to loki line with labels def line_to_loki(record) chunk_labels = {} diff --git a/clients/cmd/fluentd/spec/gems/fluent/plugin/data/non_utf8.log b/clients/cmd/fluentd/spec/gems/fluent/plugin/data/non_utf8.log new file mode 100644 index 000000000000..cce30b739c90 --- /dev/null +++ b/clients/cmd/fluentd/spec/gems/fluent/plugin/data/non_utf8.log @@ -0,0 +1 @@ +Á rest of line \ No newline at end of file diff --git a/clients/cmd/fluentd/spec/gems/fluent/plugin/loki_output_spec.rb b/clients/cmd/fluentd/spec/gems/fluent/plugin/loki_output_spec.rb index 819ffd8ce6a3..0fc6922b0dc3 100644 --- a/clients/cmd/fluentd/spec/gems/fluent/plugin/loki_output_spec.rb +++ b/clients/cmd/fluentd/spec/gems/fluent/plugin/loki_output_spec.rb @@ -50,12 +50,12 @@ driver = Fluent::Test::Driver::Output.new(described_class) driver.configure(config) content = File.readlines('spec/gems/fluent/plugin/data/syslog2') - single_chunk = [Time.at(1_546_270_458), content] - payload = driver.instance.generic_to_loki([single_chunk]) - body = { 'streams': payload } - expect(body[:streams][0]['stream'].empty?).to eq true - expect(body[:streams][0]['values'].count).to eq 1 - expect(body[:streams][0]['values'][0][0]).to eq '1546270458000000000' + chunk = [Time.at(1_546_270_458), content[0]] + payload = driver.instance.generic_to_loki([chunk]) + expect(payload[0]['stream'].empty?).to eq true + expect(payload[0]['values'].count).to eq 1 + expect(payload[0]['values'][0][0]).to eq '1546270458000000000' + expect(payload[0]['values'][0][1]).to eq content[0] end it 'converts syslog output with extra labels to loki output' do @@ -66,12 +66,12 @@ driver = Fluent::Test::Driver::Output.new(described_class) driver.configure(config) content = File.readlines('spec/gems/fluent/plugin/data/syslog2') - single_chunk = [Time.at(1_546_270_458), content] - payload = driver.instance.generic_to_loki([single_chunk]) - body = { 'streams': payload } - expect(body[:streams][0]['stream']).to eq('env' => 'test') - expect(body[:streams][0]['values'].count).to eq 1 - expect(body[:streams][0]['values'][0][0]).to eq '1546270458000000000' + chunk = [Time.at(1_546_270_458), content[0]] + payload = driver.instance.generic_to_loki([chunk]) + expect(payload[0]['stream']).to eq('env' => 'test') + expect(payload[0]['values'].count).to eq 1 + expect(payload[0]['values'][0][0]).to eq '1546270458000000000' + expect(payload[0]['values'][0][1]).to eq content[0] end it 'converts multiple syslog output lines to loki output' do @@ -84,11 +84,12 @@ line1 = [Time.at(1_546_270_458), content[0]] line2 = [Time.at(1_546_270_460), content[1]] payload = driver.instance.generic_to_loki([line1, line2]) - body = { 'streams': payload } - expect(body[:streams][0]['stream'].empty?).to eq true - expect(body[:streams][0]['values'].count).to eq 2 - expect(body[:streams][0]['values'][0][0]).to eq '1546270458000000000' - expect(body[:streams][0]['values'][1][0]).to eq '1546270460000000000' + expect(payload[0]['stream'].empty?).to eq true + expect(payload[0]['values'].count).to eq 2 + expect(payload[0]['values'][0][0]).to eq '1546270458000000000' + expect(payload[0]['values'][0][1]).to eq content[0] + expect(payload[0]['values'][1][0]).to eq '1546270460000000000' + expect(payload[0]['values'][1][1]).to eq content[1] end it 'converts multiple syslog output lines with extra labels to loki output' do @@ -102,11 +103,27 @@ line1 = [Time.at(1_546_270_458), content[0]] line2 = [Time.at(1_546_270_460), content[1]] payload = driver.instance.generic_to_loki([line1, line2]) - body = { 'streams': payload } - expect(body[:streams][0]['stream']).to eq('env' => 'test') - expect(body[:streams][0]['values'].count).to eq 2 - expect(body[:streams][0]['values'][0][0]).to eq '1546270458000000000' - expect(body[:streams][0]['values'][1][0]).to eq '1546270460000000000' + expect(payload[0]['stream']).to eq('env' => 'test') + expect(payload[0]['values'].count).to eq 2 + expect(payload[0]['values'][0][0]).to eq '1546270458000000000' + expect(payload[0]['values'][0][1]).to eq content[0] + expect(payload[0]['values'][1][0]).to eq '1546270460000000000' + expect(payload[0]['values'][1][1]).to eq content[1] + end + + it 'removed non utf-8 characters from log lines' do + config = <<-CONF + url https://logs-us-west1.grafana.net + CONF + driver = Fluent::Test::Driver::Output.new(described_class) + driver.configure(config) + content = File.readlines('spec/gems/fluent/plugin/data/non_utf8.log') + chunk = [Time.at(1_546_270_458), {'message'=>content[0], 'stream'=>'stdout'}] + payload = driver.instance.generic_to_loki([chunk]) + expect(payload[0]['stream'].empty?).to eq true + expect(payload[0]['values'].count).to eq 1 + expect(payload[0]['values'][0][0]).to eq '1546270458000000000' + expect(payload[0]['values'][0][1]).to eq 'message=" rest of line" stream=stdout' end it 'formats record hash as key_value' do @@ -122,7 +139,7 @@ expect(body[:streams][0]['stream'].empty?).to eq true expect(body[:streams][0]['values'].count).to eq 1 expect(body[:streams][0]['values'][0][0]).to eq '1546270458000000000' - expect(body[:streams][0]['values'][0][1]).to eq 'message="' + content[0] + '" stream="stdout"' + expect(body[:streams][0]['values'][0][1]).to eq 'message="' + content[0] + '" stream=stdout' end it 'formats record hash as json' do From 7fd4c40df21ab6cf72942f599a0dbd9f8e63d087 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 12 Jan 2022 10:18:47 +0100 Subject: [PATCH 2/4] Bump fluent-plugin-grafana-loki to version 1.2.17 Signed-off-by: Christian Haudum --- clients/cmd/fluentd/fluent-plugin-grafana-loki.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/cmd/fluentd/fluent-plugin-grafana-loki.gemspec b/clients/cmd/fluentd/fluent-plugin-grafana-loki.gemspec index a3e2b8d9fe6f..531bc4290328 100644 --- a/clients/cmd/fluentd/fluent-plugin-grafana-loki.gemspec +++ b/clients/cmd/fluentd/fluent-plugin-grafana-loki.gemspec @@ -4,7 +4,7 @@ $LOAD_PATH.push File.expand_path('lib', __dir__) Gem::Specification.new do |spec| spec.name = 'fluent-plugin-grafana-loki' - spec.version = '1.2.16' + spec.version = '1.2.17' spec.authors = %w[woodsaj briangann cyriltovena] spec.email = ['awoods@grafana.com', 'brian@grafana.com', 'cyril.tovena@grafana.com'] From a830c8501ec3f595b5f7d50107e6c9e7e428e2b3 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 12 Jan 2022 10:25:49 +0100 Subject: [PATCH 3/4] fixup! fluentd: Remove non utf-8 characters from log lines Signed-off-by: Christian Haudum --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index be126937772a..b2078830b008 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +* [5107](https://github.com/grafana/loki/pull5107) **chaudum** Fix bug in fluentd plugin that caused log lines containing non UTF-8 characters to be dropped. * [5091](https://github.com/grafana/loki/pull/5091) **owen-d**: Changes `ingester.concurrent-flushes` default to 32 * [4879](https://github.com/grafana/loki/pull/4879) **cyriltovena**: LogQL: add __line__ function to | line_format template. * [5081](https://github.com/grafana/loki/pull/5081) **SasSwart**: Add the option to configure memory ballast for Loki From badc04dcdfda24a0cf60b576b586864ce3353024 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 12 Jan 2022 11:24:04 +0100 Subject: [PATCH 4/4] fixup! fluentd: Remove non utf-8 characters from log lines Signed-off-by: Christian Haudum --- clients/cmd/fluentd/lib/fluent/plugin/out_loki.rb | 4 ++-- .../cmd/fluentd/spec/gems/fluent/plugin/loki_output_spec.rb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clients/cmd/fluentd/lib/fluent/plugin/out_loki.rb b/clients/cmd/fluentd/lib/fluent/plugin/out_loki.rb index 29b363d15ffc..338402798a80 100644 --- a/clients/cmd/fluentd/lib/fluent/plugin/out_loki.rb +++ b/clients/cmd/fluentd/lib/fluent/plugin/out_loki.rb @@ -278,8 +278,8 @@ def record_to_line(record) when :key_value formatted_labels = [] record.each do |k, v| - # Remove non UTF-8 characters by force-encoding the string and replacing said chars with empty string - v = v.encode('utf-8', invalid: :replace, replace: '') + # Remove non UTF-8 characters by force-encoding the string + v = v.encode('utf-8', invalid: :replace) # Escape double quotes and backslashes by prefixing them with a backslash v = v.to_s.gsub(%r{(["\\])}, '\\\\\1') if v.include?(' ') || v.include?('=') diff --git a/clients/cmd/fluentd/spec/gems/fluent/plugin/loki_output_spec.rb b/clients/cmd/fluentd/spec/gems/fluent/plugin/loki_output_spec.rb index 0fc6922b0dc3..b5bbdaa98f0c 100644 --- a/clients/cmd/fluentd/spec/gems/fluent/plugin/loki_output_spec.rb +++ b/clients/cmd/fluentd/spec/gems/fluent/plugin/loki_output_spec.rb @@ -123,7 +123,7 @@ expect(payload[0]['stream'].empty?).to eq true expect(payload[0]['values'].count).to eq 1 expect(payload[0]['values'][0][0]).to eq '1546270458000000000' - expect(payload[0]['values'][0][1]).to eq 'message=" rest of line" stream=stdout' + expect(payload[0]['values'][0][1]).to eq 'message="� rest of line" stream=stdout' end it 'formats record hash as key_value' do