From 5f1e656e06fc07a0dee6a180bcb381853bd741dd Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 4 Mar 2024 13:45:20 +0100 Subject: [PATCH] [AWS] [S3] Remove url.QueryUnescape() from aws-s3 input in polling mode (#38125) * Remove url.QueryUnescape() We introduced [^1] the `url.QueryUnescape()` function to unescape object keys from S3 notification in SQS messages. However, the object keys in the S3 list object responses do not require [^2] unescape. We must remove the unescape to avoid unintended changes to the S3 object key. [^1]: https://github.com/elastic/beats/pull/18370 [^2]: https://github.com/elastic/beats/issues/38012#issuecomment-1946440453 --------- Co-authored-by: Andrea Spacca --- CHANGELOG.next.asciidoc | 1 + x-pack/filebeat/input/awss3/s3.go | 12 ++---------- x-pack/filebeat/input/awss3/s3_test.go | 9 +++++++++ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index eef3e230416..7731d291ba4 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -97,6 +97,7 @@ fields added to events containing the Beats version. {pull}37553[37553] - [threatintel] MISP pagination fixes {pull}37898[37898] - Fix file handle leak when handling errors in filestream {pull}37973[37973] - Prevent HTTPJSON holding response bodies between executions. {issue}35219[35219] {pull}38116[38116] +- Fix "failed processing S3 event for object key" error on aws-s3 input when key contains the "+" character {issue}38012[38012] {pull}38125[38125] *Heartbeat* diff --git a/x-pack/filebeat/input/awss3/s3.go b/x-pack/filebeat/input/awss3/s3.go index 96be746c160..5aa8d31e95d 100644 --- a/x-pack/filebeat/input/awss3/s3.go +++ b/x-pack/filebeat/input/awss3/s3.go @@ -8,7 +8,6 @@ import ( "context" "errors" "fmt" - "net/url" "sync" "time" @@ -208,14 +207,7 @@ func (p *s3Poller) GetS3Objects(ctx context.Context, s3ObjectPayloadChan chan<- // Metrics p.metrics.s3ObjectsListedTotal.Add(uint64(totListedObjects)) for _, object := range page.Contents { - // Unescape s3 key name. For example, convert "%3D" back to "=". - filename, err := url.QueryUnescape(*object.Key) - if err != nil { - p.log.Errorw("Error when unescaping object key, skipping.", "error", err, "s3_object", *object.Key) - continue - } - - state := newState(bucketName, filename, *object.ETag, p.listPrefix, *object.LastModified) + state := newState(bucketName, *object.Key, *object.ETag, p.listPrefix, *object.LastModified) if p.states.MustSkip(state, p.store) { p.log.Debugw("skipping state.", "state", state) continue @@ -240,7 +232,7 @@ func (p *s3Poller) GetS3Objects(ctx context.Context, s3ObjectPayloadChan chan<- s3ObjectHandler: s3Processor, s3ObjectInfo: s3ObjectInfo{ name: bucketName, - key: filename, + key: *object.Key, etag: *object.ETag, lastModified: *object.LastModified, listingID: listingID.String(), diff --git a/x-pack/filebeat/input/awss3/s3_test.go b/x-pack/filebeat/input/awss3/s3_test.go index 6f075a2f854..b94ba7cfb09 100644 --- a/x-pack/filebeat/input/awss3/s3_test.go +++ b/x-pack/filebeat/input/awss3/s3_test.go @@ -93,6 +93,11 @@ func TestS3Poller(t *testing.T) { Key: aws.String("key5"), LastModified: aws.Time(time.Now()), }, + { + ETag: aws.String("etag6"), + Key: aws.String("2024-02-08T08:35:00+00:02.json.gz"), + LastModified: aws.Time(time.Now()), + }, }, }, nil }) @@ -124,6 +129,10 @@ func TestS3Poller(t *testing.T) { GetObject(gomock.Any(), gomock.Eq(bucket), gomock.Eq("key5")). Return(nil, errFakeConnectivityFailure) + mockAPI.EXPECT(). + GetObject(gomock.Any(), gomock.Eq(bucket), gomock.Eq("2024-02-08T08:35:00+00:02.json.gz")). + Return(nil, errFakeConnectivityFailure) + s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockAPI, nil, backupConfig{}, numberOfWorkers) receiver := newS3Poller(logp.NewLogger(inputName), nil, mockAPI, mockPublisher, s3ObjProc, newStates(inputCtx), store, bucket, "key", "region", "provider", numberOfWorkers, pollInterval) require.Error(t, context.DeadlineExceeded, receiver.Poll(ctx))