-
Notifications
You must be signed in to change notification settings - Fork 1
/
config_sample.yml
327 lines (286 loc) · 19.7 KB
/
config_sample.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
########################################################################################################################
# Anchors is an ignored section. Define variables here to be referenced
#
anchors:
- &deleteBuiltinFields
type: delFields # delFields: Clear specified fields (set to empty string)
keys: [facility, pid, extradata]
########################################################################################################################
# Schema: Declare all fields
#
# Basic rules:
# - Fields are of string type
# - Fields must be declared in schema/fields even if they only exist temporarily during transformations
# - A field is empty by default. There is no difference between empty string or null/undefined
# - A field of empty value is always skipped from final output
# - New fields may be appended for config reload (SIGHUP), but never removed
#
# Required fields for syslog input:
# - facility: syslog facility name, e.g. local4, kern, ..
# - level: depends on input/../levelMapping
# - time: raw timestamp string
# - host: hostname
# - app: app-id in RFC5424
# - pid: proc-id in RFC5424
# - source: msgid in RFC5424
# - extradata: raw metadata value in RFC5424, unparsed
# - log: message in RFC5424
#
# The reason for declaration is because a log record of fields is internally a Go array/slice
# If runtime reconfiguration is to be supported in future, it will require new fields to be appended at the end
#
# maxFields sets the max count of fields, which may be changed during config reloading but the max must stay constant
schema:
fields: [facility, level, time, host, app, pid, source, extradata, log, class, task, vhost, pnum, ddsource, ddtags, hostname, service]
maxFields: 30
########################################################################################################################
# Reload restrictions on SIGHUP, as opposed to full restart in which the previous config is ignored completely:
#
# - No change in inputs including extractions
# - No change in orchestration type and keys (metric keys may be changed)
# - The maximum number of allowed fields for reload is defined by schema/maxFields, which cannot be changed itself
# - Any fields touched by input, extractions or orchestration keys MUST not be moved or renamed.
# - Any failure would result in an error logged while slog-agent continues to run with previous config
########################################################################################################################
# Inputs: list of inputs
#
inputs:
- type: syslog # syslog: RFC 5424 Protocol, TCP only
# multiline is supported, for example:
# <163>1 2019-10-15T15:50:46.866915+03:00 local my-app 123 fn - First line
# second line
# ...
# non-ASCII bytes at the end are stripped to prevent invalid UTF-8.
#
address: localhost:5140 # address: 0.0.0.0:514 or 0.0.0.0:0 (port 0 to be assigned by OS. Real number is logged)
levelMapping: [off, fatal, crit, error, warn, notice, info, debug]
#
# Extractions: List of transforms to compute fields required for orchestration and metrics
#
# Leave as few steps as possible, as they're performed on input goroutines
# Any transform can be used here
#
extractions:
- type: extractHead # extractHead: Extract and cut class from the start of "log" to a new field "class"
key: log # e.g. log=[MyClass1 ] - Initialized
pattern: '\[*\] - ' # pattern: note brackets and asterisks need to be escaped
maxLen: 100
destKey: class # result in e.g. log=Initialized class=MyClass1 (always trimmed)
- type: extractTail # extractTail: Extract and cut UUID from the end of "source" to a new field "task"
key: source # e.g. source=task.log:123e4567-e89b-12d3-a456-426614174000
pattern: :[0-9a-f-] # pattern: optional prefix + { [] or * to match target } + optional suffix
maxLen: 41 # maxLen: max search length in bytes
destKey: task # result in e.g. source=task.log task=123e4567-e89b-12d3-a456-426614174000
- type: extractTail
key: app
pattern: /*
maxLen: 100
destKey: vhost
- type: addFields # addFields: Add or update one or more fields
fields:
pnum: ${task[-1:]} # value may reference other variables by $var or ${var}
# may also use substring (no overflow),
# e.g. ${task[-3:-1]} result in "78" for task=56789
- type: if # Conditional block (optional execution - there is no "else")
match: # match: AND of all
class: !!str-any # field1: !!operator1 value1
task: !!str-any # field2: !!operator2 value2
# see "Match Operators" below for details
then:
- type: addFields # addFields: Add or update one or more fields
fields:
task: $task:$class # e.g. task=123e4567-e89b-12d3-a456-426614174000:MyClass1
- type: delFields # delFields: Clear specified fields (set to empty string)
keys: [facility, pid, extradata]
########################################################################################################################
# Orchestration creates log-processing pipeline(s) for key fields and distribute input logs among them
#
# Keys and tags are always overriden in integration agent tests to split outputs for comparison
#
orchestration:
type: byKeySet # "byKeySet": one queue and one set of pipelines for each of key field set.
# "singleton": single pipeline, no redistribution. static tag only (no field expansion)
keys: [app, level, pnum] # keys: e.g. [app, level] => "sshd,info", "sshd,warn", ..
# keys are also used as dir names for on-disk queues and any change would break autorecovery on next restart.
#
# Choose keys carefully as idle pipelines and queues are never destroyed without restart
#
# Logs routed to the same pipeline (of the same keyset) are guaranteed to be outputted in the original order
#
# The ordering guarantee doesn't mean all logs would be ordered by timestamps - it still depends on the source
tag: development.$app # tag: required by Fluentd in upstream. Key fields may be referenced here.
metricKeys: [host, vhost, source] # metricKeys define additional fields as metric labels (Prometheus metrics), in addition to key fields above
# They're applied to all processing-level metrics, e.g.
# - slogagent_process_passed_records_total{key_app="sshd", key_level="error", key_vhost="foo.com", ..} 100
# - slogagent_process_passed_record_bytes_total{key_...} ...
########################################################################################################################
# Transformations run on each of log-processing pipelines
#
# All custom metrics (transforms with "metricLabel") are lazy-initialized as they're likely to be tied to specific
# pipelines and most combinations would be unused. This blocks the detection of zero to non-zero transition in alert
# rules.
#
transformations:
- type: switch # switch: Switch-Case branches, no fallthrough
cases: # continue processing if not matched in any cases
- match:
app: appServ
then:
- type: drop # drop: Drop logs on conditions (AND only)
match:
source: auth.log
level: !!str-not fatal
percentage: 100
metricLabel: app-auth # metricLabel: a metric label value to track dropped logs, e.g.
# slogagent_process_labelled_record_bytes_total{key_app="appServ",key_vhost="foo.com",label="app-auth"} 175
- type: drop # drop can also be used with a percentage
match:
host: errors
source: main.log
level: !!str warn
percentage: 33 # percentage: ratio of logs to exclude, 0-100
metricLabel: downsampled # percentage < 100 would create an additional label with "!" in front, e.g.
# label="!downsampled" to count matched but retained logs
- type: switch
cases:
- match:
source: access.log
then:
- type: if
match:
log: !!glob P[OU][ST]** params=**
then:
- type: truncate # truncate: Truncate oversized fields by bytes.
# non-ASCII bytes at the end are stripped to prevent invalid UTF-8.
key: log # key: field to check & truncate
maxLen: 180 # maxLen: max length to preserve (before appending suffix)
suffix: ' ... (cut)' # suffix: appended to the end. REQUIRED
- match:
task: !!len-lt 1
then:
- type: redactEmail # redactEmail: search and redact emails in-place
key: log # key: field to check and redact
metricLabel: redacted # metricLabel: a metric label value to track changed logs (not numbers of emails redacted)
- match:
app: abandoned
then:
#
# Examples of generic/abandoned transforms which should be avoided in production mode
#
- type: addFields # addFields: could cause high CPU & mem when field values are huge
fields: #
log: task=$task $log # ex: 60K error dumps in "log" field, all of them rewritten / recreated
#
# May be replaced by serialization rewrites (see "inline" rewrite below)
- type: unescape # unescape: Unescape special chars in syslog messages: high CPU & mem when logs are huge
key: log # Not part of RFC 5424; including chars: "\b", "\f", "\n", "\r", "\t" (as in Java)
# Skipped if a log is marked by input as unescaped (e.g. multiline message via Syslog)
#
# Can be replaced by serialization rewrites with side-effects
- type: replace # replace: Regex search & replace: extreme CPU & mem usage, avoid completely
key: log
pattern: !!regex ^(P(OS|U)T ".*".*params=.{145}).{15,}$ # No generic replacement. Write new transforms if needed (e.g. redactEmail)
replacement: $1 ... (cut)
#
# Match Operators (Examples)
#
- match:
facility: !!str-eq kern # !!str-eq or !!str is the default, "equals to"
level: !!str-not notice # !!str-not means "not equals to"
time: !!str-start 2020/ # !!str-start matches the beginning of value
host: !!str-end .com # !!str-end matches the end of value
app: !!str-contain server # !!str-contain tests whether value contains something
vhost: !!glob api.*.{com,.co.uk} # !!glob uses https://github.com/gobwas/glob pattern, note there are ** and *
log: !!regex ^(P(OS|U)T)\s # !!regex uses Go's regular expression
pid: !!len-gt 5 # !!len-gt checks the length is greater than N
source: !!len-lt 2 # !!len-lt checks the length is smaller than N
task: !!str-any # !!str-any is the same as !!len-gt 0
then:
- type: addFields
fields:
host: ${host[:-4]}
#
# When arranging transformations, the key for performance is to drop as early as possible and to modify as little as possible.
#
# For example, if there are millions of debug logs/s and all of them are going to be dropped after updating metrics,
# there is no point to parse timestamps first - which is an expensive operation itself (compared to other steps)
#
- type: block
steps:
- type: parseTime # parseTime: Parse timestamp; Only RFC 3339 is supported now
key: time # e.g. 2019-08-15T15:50:46.866915+03:00 or 2020-09-17T16:51:47.867Z
errorLabel: timeError # update "slogagent_process_labelled_*" metrics with label=timeError on failures
- type: delFields
keys:
- time
# Add Datadog specific fields
#
# ddtags is always overriden in integration agent tests to split outputs for comparison
- type: addFields
fields:
ddsource: csharp
# ddtags: $app,test-$vhost # "ddtags" defaults to the orchestration tag if empty or undefined in schema
hostname: $host
service: $vhost
# Multi-output: executed in the exact sequence here
outputBufferPairs:
- name: customFluentd
########################################################################################################################
# Buffer offers storage / queues to store processed log chunks outside of memory when needed
#
# Buffering is applied on output chunks, AFTER logs are processed into the final output form (as defined in output section)
# The data persisted is determined solely by the output, while the buffer has no idea what's inside.
buffer:
type: hybridBuffer # hybridBuffer is the only buffer type for now
# All chunks are kept in per-pipeline go-channel of fixed size
# When the count reaches 500, newly processed chunks have their main bodies moved to individual files
# ... and only loaded back when ready to be forwarded to upstream
#
rootPath: /tmp/slog-buffer-fluentd # rootPath: storage root. There may be subdirs for each pipeline, depending on the orchestration type
maxBufSize: 10GB # maxBufSize: limit per queue / pipeline
# Compressed by gzip and the ratio is about 1:20 to 1:50
########################################################################################################################
# Output defines how logs are serialized, packed (compression) and forwarded to upstream
#
# Serialization and compression are done on the log-processing pipelines before buffering.
# The serialization stage also allows certain in-place transformations ("rewrites") without introducing additional heap allocations and memory copying
output:
type: fluentdForward # fluentdForward: Forward Protocol v1
# with a specific "environment" field, e.g.
# {log: "User A logged in", "environment": {"app": "sshd", "host": "google.com"}}
serialization:
environmentFields: [host, vhost, app, source] # List of fields to be nested under the "environment" field
hiddenFields: [task, class, pnum, ddsource, ddtags, hostname, service] # List of fields to be omitted from output
rewriteFields: # Fields to be rewritten during serialization, as simple and efficient alternatives to transformations
# field: [list of rewrite steps]
log:
- type: inline # inline: insert a field to the beginning if present (not empty)
field: class # e.g. "class=MyClass1 Original log message"
- type: unescape # unescape: Unescape special chars, same as the "unescape" transform
# Skipped if a log is marked by input as unescaped (e.g. multiline message via Syslog)
#
# Note unescaping at the this stage breaks certain cases, e.g. "log\nbob@gmail.com",
# where the wrong email "nbob@gmail.com" is reacted before unescaping of "\n"
# The last step must be "copy" or "unescape"
messageMode: CompressedPackedForward # fluentd forward modes: Forward, PackedForward, or CompressedPackedForward
# Use compression wherever possible, otherwise buffering would be done uncompressed.
# In our experiences the compression ratio is 20-50 times for logs, which is critical
# for (not running out of) disk space and IO performance.
upstream: # Forward destination, see https://docs.fluentd.org/input/forward for explanations
address: localhost:24224
tls: true
secret: guess
maxDuration: 30m
- name: datadogAPI
buffer:
type: hybridBuffer # see [hybridBuffer] section above
rootPath: /tmp/slog-buffer-datadog
maxBufSize: 5GB
output:
type: datadog # Datadog API, as described in https://docs.datadoghq.com/api/latest/logs/#send-logs
serialization: # Serialze all non-empty fields on top level
# "timestamp" is always added, formatted from the parsed timestamp (not part of schema)
hiddenFields: [host, vhost, app, source, task, class, pnum]
upstream:
address: https://http-intake.logs.datadoghq.eu/api/v2/logs
httpTimeout: 30s