Skip to content

Commit

Permalink
Add support in aws-s3 input for s3 notification from SNS to SQS (#28800
Browse files Browse the repository at this point in the history
…) (#28874)

(cherry picked from commit 2fe1842)

Co-authored-by: kaiyan-sheng <kaiyan.sheng@elastic.co>
  • Loading branch information
mergify[bot] and kaiyan-sheng committed Nov 10, 2021
1 parent c36cd13 commit a4b233f
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Update `aws-s3` input to connect to non AWS S3 buckets {issue}28222[28222] {pull}28234[28234]
- Sophos UTM: Support logs containing hostname in syslog header. {pull}28638[28638]
- Moving Oracle Filebeat module to GA. {pull}28754[28754]
- Add support in aws-s3 input for s3 notification from SNS to SQS. {pull}28800[28800]

*Heartbeat*

Expand Down
10 changes: 9 additions & 1 deletion x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ The use of SQS notification is preferred: polling list of S3 objects is expensiv
in terms of performance and costs and should be preferably used only when no SQS
notification can be attached to the S3 buckets. This input can, for example, be
used to receive S3 access logs to monitor detailed records for the requests that
are made to a bucket.
are made to a bucket. This input also supports S3 notification from SNS to SQS.

SQS notification method is enabled setting `queue_url` configuration value.
S3 bucket list polling method is enabled setting `bucket_arn` configuration value.
Expand Down Expand Up @@ -386,6 +386,14 @@ create a notification through SQS. Please see
https://docs.aws.amazon.com/AmazonS3/latest/dev/ways-to-add-notification-config-to-bucket.html#step1-create-sqs-queue-for-notification[create-sqs-queue-for-notification]
for more details.

[float]
=== S3 -> SNS -> SQS setup
If you would like to use the bucket notification in multiple different consumers
(others than {beatname_lc}), you should use an SNS topic for the bucket notification.
Please see https://docs.aws.amazon.com/AmazonS3/latest/userguide/ways-to-add-notification-config-to-bucket.html#step1-create-sns-topic-for-notification[create-SNS-topic-for-notification]
for more details. SQS queue will be configured as a
https://docs.aws.amazon.com/sns/latest/dg/sns-sqs-as-subscriber.html[subscriber to the SNS topic].

[float]
=== Parallel Processing

Expand Down
6 changes: 3 additions & 3 deletions x-pack/filebeat/input/awss3/_meta/terraform/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Terraform setup for AWS S3 Input Integration Tests

This directory contains a Terrafrom module that creates the AWS resources needed
This directory contains a Terraform module that creates the AWS resources needed
for executing the integration tests for the `aws-s3` Filebeat input. It creates
an S3 bucket and SQS queue and configures S3 `ObjectCreated:*` notifications to
be delivered to SQS.
be delivered to SQS. It also creates a second S3 bucket, SNS topic, SQS queue and configures S3 `ObjectCreated:*` notifications to be delivered to SNS and also creates a subscription for this SNS topic to SQS queue to automatically place messages sent to SNS topic in SQS queue.

It outputs configuration information that is consumed by the tests to
`outputs.yml`. The AWS resources are randomly named to prevent name collisions
Expand Down Expand Up @@ -33,7 +33,7 @@ to match the AWS region of the profile you are using.
4. Execute the integration test.
```
cd x-pack/filebeat/inputs/awss3
cd x-pack/filebeat/input/awss3
go test -tags aws,integration -run TestInputRun.+ -v .
```

Expand Down
74 changes: 74 additions & 0 deletions x-pack/filebeat/input/awss3/_meta/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,77 @@ resource "aws_s3_bucket_notification" "bucket_notification" {
aws_sqs_queue.filebeat-integtest,
]
}

resource "aws_sns_topic" "filebeat-integtest-sns" {
name = "filebeat-s3-integtest-sns-${random_string.random.result}"

policy = <<POLICY
{
"Version":"2012-10-17",
"Statement":[{
"Effect": "Allow",
"Principal": { "Service": "s3.amazonaws.com" },
"Action": "SNS:Publish",
"Resource": "arn:aws:sns:*:*:filebeat-s3-integtest-sns-${random_string.random.result}",
"Condition":{
"ArnEquals": { "aws:SourceArn": "${aws_s3_bucket.filebeat-integtest-sns.arn}" }
}
}]
}
POLICY

depends_on = [
aws_s3_bucket.filebeat-integtest-sns,
]
}

resource "aws_s3_bucket" "filebeat-integtest-sns" {
bucket = "filebeat-s3-integtest-sns-${random_string.random.result}"
force_destroy = true
}

resource "aws_s3_bucket_notification" "bucket_notification-sns" {
bucket = aws_s3_bucket.filebeat-integtest-sns.id

topic {
topic_arn = aws_sns_topic.filebeat-integtest-sns.arn
events = ["s3:ObjectCreated:*"]
}

depends_on = [
aws_s3_bucket.filebeat-integtest-sns,
aws_sns_topic.filebeat-integtest-sns,
]
}

resource "aws_sqs_queue" "filebeat-integtest-sns" {
name = "filebeat-s3-integtest-sns-${random_string.random.result}"

policy = <<POLICY
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": "*",
"Action": "sqs:SendMessage",
"Resource": "arn:aws:sqs:*:*:filebeat-s3-integtest-sns-${random_string.random.result}",
"Condition": {
"ArnEquals": { "aws:SourceArn": "${aws_sns_topic.filebeat-integtest-sns.arn}" }
}
}
]
}
POLICY

depends_on = [
aws_s3_bucket.filebeat-integtest-sns,
aws_sns_topic.filebeat-integtest-sns
]
}

resource "aws_sns_topic_subscription" "filebeat-integtest-sns" {
topic_arn = aws_sns_topic.filebeat-integtest-sns.arn
protocol = "sqs"
endpoint = aws_sqs_queue.filebeat-integtest-sns.arn
}
2 changes: 2 additions & 0 deletions x-pack/filebeat/input/awss3/_meta/terraform/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ resource "local_file" "secrets" {
"queue_url" : aws_sqs_queue.filebeat-integtest.url
"aws_region" : aws_s3_bucket.filebeat-integtest.region
"bucket_name" : aws_s3_bucket.filebeat-integtest.id
"bucket_name_for_sns" : aws_s3_bucket.filebeat-integtest-sns.id
"queue_url_for_sns" : aws_sqs_queue.filebeat-integtest-sns.url
})
filename = "${path.module}/outputs.yml"
file_permission = "0644"
Expand Down
106 changes: 88 additions & 18 deletions x-pack/filebeat/input/awss3/input_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ import (
"testing"
"time"

"github.com/aws/aws-sdk-go-v2/service/s3"

awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/aws/external"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/s3/s3manager"
"github.com/aws/aws-sdk-go-v2/service/sqs"
"github.com/stretchr/testify/assert"
Expand All @@ -36,7 +39,6 @@ import (
pubtest "github.com/elastic/beats/v7/libbeat/publisher/testing"
"github.com/elastic/beats/v7/libbeat/statestore"
"github.com/elastic/beats/v7/libbeat/statestore/storetest"
awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws"
)

const (
Expand All @@ -48,9 +50,11 @@ const (
)

type terraformOutputData struct {
AWSRegion string `yaml:"aws_region"`
BucketName string `yaml:"bucket_name"`
QueueURL string `yaml:"queue_url"`
AWSRegion string `yaml:"aws_region"`
BucketName string `yaml:"bucket_name"`
QueueURL string `yaml:"queue_url"`
BucketNameForSNS string `yaml:"bucket_name_for_sns"`
QueueURLForSNS string `yaml:"queue_url_for_sns"`
}

func getTerraformOutputs(t *testing.T) terraformOutputData {
Expand Down Expand Up @@ -174,11 +178,11 @@ func newV2Context() (v2.Context, func()) {
func TestInputRunSQS(t *testing.T) {
logp.TestingSetup()

// Terraform is used to setup S3 and SQS and must be executed manually.
// Terraform is used to set up S3 and SQS and must be executed manually.
tfConfig := getTerraformOutputs(t)

// Ensure SQS is empty before testing.
drainSQS(t, tfConfig)
drainSQS(t, tfConfig.AWSRegion, tfConfig.QueueURL)

// Ensure metrics are removed before testing.
monitoring.GetNamespace("dataset").GetRegistry().Remove(inputID)
Expand Down Expand Up @@ -240,7 +244,7 @@ func TestInputRunSQS(t *testing.T) {
func TestInputRunS3(t *testing.T) {
logp.TestingSetup()

// Terraform is used to setup S3 and must be executed manually.
// Terraform is used to set up S3 and must be executed manually.
tfConfig := getTerraformOutputs(t)

// Ensure metrics are removed before testing.
Expand Down Expand Up @@ -297,6 +301,7 @@ func TestInputRunS3(t *testing.T) {
assertMetric(t, snap, "s3_objects_acked_total", 6)
assertMetric(t, snap, "s3_events_created_total", 12)
}

func assertMetric(t *testing.T, snapshot common.MapStr, name string, value interface{}) {
n, _ := snapshot.GetValue(inputID + "." + name)
assert.EqualValues(t, value, n, name)
Expand Down Expand Up @@ -332,16 +337,16 @@ func uploadS3TestFiles(t *testing.T, region, bucket string, filenames ...string)
}
}

func drainSQS(t *testing.T, tfConfig terraformOutputData) {
func drainSQS(t *testing.T, region string, queueURL string) {
cfg, err := external.LoadDefaultAWSConfig()
if err != nil {
t.Fatal(err)
}
cfg.Region = tfConfig.AWSRegion
cfg.Region = region

sqs := &awsSQSAPI{
client: sqs.New(cfg),
queueURL: tfConfig.QueueURL,
queueURL: queueURL,
apiTimeout: 1 * time.Minute,
visibilityTimeout: 30 * time.Second,
longPollWaitTime: 10,
Expand Down Expand Up @@ -370,13 +375,13 @@ func drainSQS(t *testing.T, tfConfig terraformOutputData) {

func TestGetBucketNameFromARN(t *testing.T) {
bucketName := getBucketNameFromARN("arn:aws:s3:::my_corporate_bucket")
assert.Equal("my_corporate_bucket", bucketName)
assert.Equal(t, "my_corporate_bucket", bucketName)
}

func TestGetRegionForBucketARN(t *testing.T) {
logp.TestingSetup()

// Terraform is used to setup S3 and must be executed manually.
// Terraform is used to set up S3 and must be executed manually.
tfConfig := getTerraformOutputs(t)

awsConfig, err := external.LoadDefaultAWSConfig()
Expand All @@ -393,7 +398,7 @@ func TestGetRegionForBucketARN(t *testing.T) {
func TestPaginatorListPrefix(t *testing.T) {
logp.TestingSetup()

// Terraform is used to setup S3 and must be executed manually.
// Terraform is used to set up S3 and must be executed manually.
tfConfig := getTerraformOutputs(t)

uploadS3TestFiles(t, tfConfig.AWSRegion, tfConfig.BucketName,
Expand Down Expand Up @@ -440,8 +445,73 @@ func TestPaginatorListPrefix(t *testing.T) {
}

func TestGetProviderFromDomain(t *testing.T) {
assert.Equal("aws", getProviderFromDomain("", ""))
assert.Equal("aws", getProviderFromDomain("c2s.ic.gov", ""))
assert.Equal("abc", getProviderFromDomain("abc.com", "abc"))
assert.Equal("xyz", getProviderFromDomain("oraclecloud.com", "xyz"))
assert.Equal(t, "aws", getProviderFromDomain("", ""))
assert.Equal(t, "aws", getProviderFromDomain("c2s.ic.gov", ""))
assert.Equal(t, "abc", getProviderFromDomain("abc.com", "abc"))
assert.Equal(t, "xyz", getProviderFromDomain("oraclecloud.com", "xyz"))
}

func TestInputRunSNS(t *testing.T) {
logp.TestingSetup()

// Terraform is used to set up S3, SNS and SQS and must be executed manually.
tfConfig := getTerraformOutputs(t)

// Ensure SQS is empty before testing.
drainSQS(t, tfConfig.AWSRegion, tfConfig.QueueURLForSNS)

// Ensure metrics are removed before testing.
monitoring.GetNamespace("dataset").GetRegistry().Remove(inputID)

uploadS3TestFiles(t, tfConfig.AWSRegion, tfConfig.BucketNameForSNS,
"testdata/events-array.json",
"testdata/invalid.json",
"testdata/log.json",
"testdata/log.ndjson",
"testdata/multiline.json",
"testdata/multiline.json.gz",
"testdata/multiline.txt",
"testdata/log.txt", // Skipped (no match).
)

s3Input := createInput(t, makeTestConfigSQS(tfConfig.QueueURLForSNS))

inputCtx, cancel := newV2Context()
t.Cleanup(cancel)
time.AfterFunc(15*time.Second, func() {
cancel()
})

client := pubtest.NewChanClient(0)
defer close(client.Channel)
go func() {
for event := range client.Channel {
event.Private.(*eventACKTracker).ACK()
}
}()

var errGroup errgroup.Group
errGroup.Go(func() error {
pipeline := pubtest.PublisherWithClient(client)
return s3Input.Run(inputCtx, pipeline)
})

if err := errGroup.Wait(); err != nil {
t.Fatal(err)
}

snap := common.MapStr(monitoring.CollectStructSnapshot(
monitoring.GetNamespace("dataset").GetRegistry(),
monitoring.Full,
false))
t.Log(snap.StringToPrint())

assertMetric(t, snap, "sqs_messages_received_total", 8) // S3 could batch notifications.
assertMetric(t, snap, "sqs_messages_inflight_gauge", 0)
assertMetric(t, snap, "sqs_messages_deleted_total", 7)
assertMetric(t, snap, "sqs_messages_returned_total", 1) // Invalid JSON is returned so that it can eventually be DLQed.
assertMetric(t, snap, "sqs_visibility_timeout_extensions_total", 0)
assertMetric(t, snap, "s3_objects_inflight_gauge", 0)
assertMetric(t, snap, "s3_objects_requested_total", 7)
assertMetric(t, snap, "s3_events_created_total", 12)
}
19 changes: 17 additions & 2 deletions x-pack/filebeat/input/awss3/sqs_s3_event.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,12 @@ func nonRetryableErrorWrap(err error) error {
// s3EventsV2 is the notification message that Amazon S3 sends to notify of S3 changes.
// This was derived from the version 2.2 schema.
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/notification-content-structure.html
// If the notification message is sent from SNS to SQS, then Records will be
// replaced by TopicArn and Message fields.
type s3EventsV2 struct {
Records []s3EventV2 `json:"Records"`
TopicArn string `json:"TopicArn"`
Message string `json:"Message"`
Records []s3EventV2 `json:"Records"`
}

// s3EventV2 is a S3 change notification event.
Expand Down Expand Up @@ -189,6 +193,18 @@ func (p *sqsS3EventProcessor) getS3Notifications(body string) ([]s3EventV2, erro
return nil, fmt.Errorf("failed to decode SQS message body as an S3 notification: %w", err)
}

// Check if the notification is from S3 -> SNS -> SQS
if events.TopicArn != "" {
dec := json.NewDecoder(strings.NewReader(events.Message))
if err := dec.Decode(&events); err != nil {
p.log.Debugw("Invalid SQS message body.", "sqs_message_body", body)
return nil, fmt.Errorf("failed to decode SQS message body as an S3 notification: %w", err)
}
}
return p.getS3Info(events)
}

func (p *sqsS3EventProcessor) getS3Info(events s3EventsV2) ([]s3EventV2, error) {
var out []s3EventV2
for _, record := range events.Records {
if !p.isObjectCreatedEvents(record) {
Expand All @@ -211,7 +227,6 @@ func (p *sqsS3EventProcessor) getS3Notifications(body string) ([]s3EventV2, erro

out = append(out, record)
}

return out, nil
}

Expand Down
10 changes: 10 additions & 0 deletions x-pack/filebeat/input/awss3/sqs_s3_event_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,16 @@ func TestSqsProcessor_getS3Notifications(t *testing.T) {
require.NoError(t, err)
assert.Len(t, events, 0)
})

t.Run("sns-sqs notification", func(t *testing.T) {
msg := newSNSSQSMessage()
events, err := p.getS3Notifications(*msg.Body)
require.NoError(t, err)
assert.Len(t, events, 1)
assert.Equal(t, "test-object-key", events[0].S3.Object.Key)
assert.Equal(t, "arn:aws:s3:::vpc-flow-logs-ks", events[0].S3.Bucket.ARN)
assert.Equal(t, "vpc-flow-logs-ks", events[0].S3.Bucket.Name)
})
}

func TestNonRecoverableError(t *testing.T) {
Expand Down
Loading

0 comments on commit a4b233f

Please sign in to comment.