From 59813c1e75689f0fb41b5e4411889eb9ba3e21d8 Mon Sep 17 00:00:00 2001 From: subham sarkar Date: Fri, 11 Aug 2023 01:23:34 +0530 Subject: [PATCH] x-pack/metricbeat/module/sql: Add option to execute SQL queries for all databases (#35688) --- CHANGELOG.next.asciidoc | 1 + metricbeat/docs/modules/sql.asciidoc | 327 ++++++++++++++++-- metricbeat/helper/sql/sql.go | 16 +- .../metricbeat/module/sql/_meta/docs.asciidoc | 327 ++++++++++++++++-- .../metricbeat/module/sql/docker-compose.yml | 5 + x-pack/metricbeat/module/sql/query/query.go | 194 +++++++++-- .../module/sql/query/test_sql_mssql.py | 59 ++++ 7 files changed, 829 insertions(+), 100 deletions(-) create mode 100644 x-pack/metricbeat/module/sql/query/test_sql_mssql.py diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 2049ca28b4f3..3baff192e436 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -183,6 +183,7 @@ automatic splitting at root level, if root level element is an array. {pull}3415 - Fix EC2 host.cpu.usage {pull}35717[35717] - Resolve statsd module's prematurely halting of metrics parsing upon encountering an invalid packet. {pull}35075[35075] - Fix the gap in fetching forecast API metrics at the end of each month for Azure billing module {pull}36142[36142] +- Add option in SQL module to execute queries for all dbs. {pull}35688[35688] *Osquerybeat* diff --git a/metricbeat/docs/modules/sql.asciidoc b/metricbeat/docs/modules/sql.asciidoc index 879f80ab67fa..9c27c0bc4ba5 100644 --- a/metricbeat/docs/modules/sql.asciidoc +++ b/metricbeat/docs/modules/sql.asciidoc @@ -11,7 +11,7 @@ This file is generated! See scripts/mage/docs_collector.go == SQL module The SQL module allows you to execute custom queries against an SQL database and -store the results in {es}. It also enables developing various SQL metrics integrations, using sql query as input. +store the results in {es}. It also enables the development of various SQL metrics integrations, using SQL query as input. This module supports the databases that you can monitor with {metricbeat}, including: @@ -35,20 +35,21 @@ fields: `driver`:: The driver can be any driver that has a {metricbeat} module, such as `mssql` or `postgres`. -`raw_data.enabled`:: Expects either true or false. By default false. Marking as true will generate event results in new field format. +`fetch_from_all_databases`:: Expects either `true` or `false` and it is by default set to `false`. Marking as `true` will enable execution `sql_queries` or `sql_query` for all databases in a server. Currently only `mssql` driver supports this feature. For other drivers, if enabled, "fetch from all databases feature is not supported for driver: " error would be logged. -Expects either `sql_queries` or `sql_query`. +`raw_data.enabled`:: Expects either `true` or `false` and it is by default set to `false`. Marking as `true` will generate event results in a new field format. -`sql_queries`:: Receives the list of queries to execute. `query` and `response_format` is repeated to get multiple query inputs. -`query`::: Expects sql query. -`response_format`::: Either `variables` or `table`: -`variables`:::: Expects a two-column table that looks like a key/value result. -The left column is considered a key and the right column the value. This mode -generates a single event on each fetch operation. -`table`:::: Expects any number of columns. This mode generates a single event for -each row. +Use `sql_queries` or `sql_query` depending on the use-case. -`sql_query`:: The single query you want to run. (`Backward Compatibility`). Also provide corresponding `sql_response_format`: either `variables` or `table` +`sql_queries`:: List of queries to execute. `query` and `response_format` fields are repeated to get multiple query inputs. + +`query`::: Single SQL query. + +`response_format`::: Either `variables` or `table`. +`variables`:::: Expects a two-column table that looks like a key-value result. The left column is considered a key and the right column is the value. This mode generates a single event on each fetch operation. +`table`:::: Expects any number of columns. This mode generates a single event for each row. + +`sql_query` (`Backward Compatibility`):: Single query you want to run. Also, provide corresponding `sql_response_format` (value: `variables` or `table`) similar to `sql_queries`'s `response_format`. [float] == Example @@ -56,7 +57,7 @@ each row. Examples of configurations in `sql.yml` to connect with supported databases are mentioned below. [float] -=== Example: capture Innodb-related metrics +=== Example: Capture Innodb-related metrics This `sql.yml` configuration shows how to capture Innodb-related metrics that result from the query `SHOW GLOBAL STATUS LIKE 'Innodb_system%'` in a MySQL @@ -143,7 +144,7 @@ The example shown earlier generates this event: ---- [float] -=== Example: query PostgreSQL and generate a "table" result +=== Example: Query PostgreSQL and generate a "table" result This `sql.yml` configuration shows how to query PostgreSQL and generate a "table" result. This configuration generates a single event for each row @@ -231,7 +232,7 @@ for each row. For example, this event is created for the first row: ---- [float] -=== Example: get the buffer catch hit ratio in Oracle +=== Example: Get the buffer catch hit ratio in Oracle This `sql.yml` configuration shows how to get the buffer cache hit ratio: @@ -306,7 +307,7 @@ The example generates this event: ---- [float] -=== Example: get the buffer cache hit ratio for MSSQL +=== Example: Get the buffer cache hit ratio for MSSQL This `sql.yml` configuration gets the buffer cache hit ratio: @@ -356,7 +357,7 @@ The example generates this event: ---- [float] -=== Example: launch two or more queries. +=== Example: Launch two or more queries. To launch two or more queries, specify the full configuration for each query. @@ -420,14 +421,14 @@ The example generates this event: The response event is generated in new format "version": "8.0.0" }, "host": { - "name": "Muthu-mps" + "name": "mps" }, "agent": { "type": "metricbeat", "version": "8.3.0", "ephemeral_id": "8decc9eb-5ea5-47d8-8a22-fac507a5521b", "id": "6bbf5058-afed-44c6-aa05-775ee14a2da4", - "name": "Muthu-mps" + "name": "mps" } } ---- @@ -477,26 +478,26 @@ The example generates this event: By disabling the flag `raw_data.enabled`, whic "version": "8.0.0" }, "host": { - "name": "Muthu-mps" + "name": "mps" }, "agent": { "version": "8.3.0", "ephemeral_id": "bc09584b-62db-4b45-bfe9-6b7e8e982361", "id": "6bbf5058-afed-44c6-aa05-775ee14a2da4", - "name": "Muthu-mps", + "name": "mps", "type": "metricbeat" } } ---- [float] -=== Example: Merge multiple queries to single event. +=== Example: Merge multiple queries into a single event. -Multiple queries will create multiple events, one for each query. It may be preferrable to create a single event by combining the metrics together in a single event. +Multiple queries will create multiple events, one for each query. It may be preferable to create a single event by combining the metrics together in a single event. This feature can be enabled using the `merge_results` config. -However, such a merge is possible only if the table queries being merged, each produce a single row. +However, such a merge is possible only if the table queries are merged, each produces a single row. For example: @@ -525,7 +526,7 @@ This creates a combined event as below, where `blks_hit`, `blks_read`, `checkpoi { "@timestamp": "2022-07-21T07:07:06.747Z", "agent": { - "name": "Lalits-MBP-2", + "name": "MBP-2", "type": "metricbeat", "version": "8.4.0", "ephemeral_id": "b0867287-e56a-492f-b421-0ac870c426f9", @@ -556,6 +557,276 @@ This creates a combined event as below, where `blks_hit`, `blks_read`, `checkpoi } ---- +[float] +=== Example: Execute given queries for all database(s) present in a server + +Assuming a user could have 100s of databases on their server and then it becomes cumbersome to add them manually to the query. If `fetch_from_all_databases` is set to `true` then SQL module would fetch the databases names automatically and prefix +the database selector statement to the queries so that the queries can run against +the database provided. + +Currently, this feature only works with `mssql` driver. For example: + +[source,yaml] +---- +- module: sql + metricsets: + - query + period: 50s + hosts: ["sqlserver://:@"] + raw_data.enabled: true + + fetch_from_all_databases: true + + driver: "mssql" + sql_queries: + - query: SELECT DB_NAME() AS 'database_name'; + response_format: table +---- + +For an mssql instance, by default only four databases are present namely — `master`, `model`, `msdb`, `tempdb`. So, if `fetch_from_all_databases` is enabled then query `SELECT DB_NAME() AS 'database_name'` runs for each one of them i.e., there would be in total 4 documents (one each for 4 databases) for every scrape. + + +[source,json] +---- +{ + "@timestamp": "2023-07-16T22:05:26.976Z", + "@metadata": { + "beat": "metricbeat", + "type": "_doc", + "version": "8.10.0" + }, + "service": { + "type": "sql", + "address": "localhost" + }, + "event": { + "dataset": "sql.query", + "module": "sql", + "duration": 40346375 + }, + "metricset": { + "name": "query", + "period": 50000 + }, + "sql": { + "metrics": { + "database_name": "master" + }, + "driver": "mssql", + "query": "USE [master]; SELECT DB_NAME() AS 'database_name';" + }, + "host": { + "os": { + "type": "macos", + "platform": "darwin", + "version": "13.3.1", + "family": "darwin", + "name": "macOS", + "kernel": "", + "build": "" + }, + "name": "", + "id": "", + "ip": [ + "" + ], + "mac": [ + "" + ], + "hostname": "", + "architecture": "arm64" + }, + "agent": { + "name": "", + "type": "metricbeat", + "version": "8.10.0", + "ephemeral_id": "", + "id": "" + }, + "ecs": { + "version": "8.0.0" + } +} +{ + "@timestamp": "2023-07-16T22:05:26.976Z", + "@metadata": { + "beat": "metricbeat", + "type": "_doc", + "version": "8.10.0" + }, + "agent": { + "ephemeral_id": "", + "id": "", + "name": "", + "type": "metricbeat", + "version": "8.10.0" + }, + "event": { + "module": "sql", + "duration": 43147875, + "dataset": "sql.query" + }, + "metricset": { + "period": 50000, + "name": "query" + }, + "service": { + "address": "localhost", + "type": "sql" + }, + "sql": { + "metrics": { + "database_name": "tempdb" + }, + "driver": "mssql", + "query": "USE [tempdb]; SELECT DB_NAME() AS 'database_name';" + }, + "ecs": { + "version": "8.0.0" + }, + "host": { + "name": "", + "architecture": "arm64", + "os": { + "platform": "darwin", + "version": "13.3.1", + "family": "darwin", + "name": "macOS", + "kernel": "", + "build": "", + "type": "macos" + }, + "id": "", + "ip": [ + "" + ], + "mac": [ + "" + ], + "hostname": "" + } +} +{ + "@timestamp": "2023-07-16T22:05:26.976Z", + "@metadata": { + "beat": "metricbeat", + "type": "_doc", + "version": "8.10.0" + }, + "host": { + "os": { + "build": "", + "type": "macos", + "platform": "darwin", + "version": "13.3.1", + "family": "darwin", + "name": "macOS", + "kernel": "" + }, + "id": "", + "ip": [ + "" + ], + "mac": [ + "" + ], + "hostname": "", + "name": "", + "architecture": "arm64" + }, + "agent": { + "ephemeral_id": "", + "id": "", + "name": "", + "type": "metricbeat", + "version": "8.10.0" + }, + "service": { + "address": "localhost", + "type": "sql" + }, + "sql": { + "metrics": { + "database_name": "model" + }, + "driver": "mssql", + "query": "USE [model]; SELECT DB_NAME() AS 'database_name';" + }, + "event": { + "dataset": "sql.query", + "module": "sql", + "duration": 46623125 + }, + "metricset": { + "name": "query", + "period": 50000 + }, + "ecs": { + "version": "8.0.0" + } +} +{ + "@timestamp": "2023-07-16T22:05:26.976Z", + "@metadata": { + "beat": "metricbeat", + "type": "_doc", + "version": "8.10.0" + }, + "host": { + "architecture": "arm64", + "os": { + "kernel": "", + "build": "", + "type": "macos", + "platform": "darwin", + "version": "13.3.1", + "family": "darwin", + "name": "macOS" + }, + "name": "", + "id": "", + "ip": [ + "" + ], + "mac": [ + "" + ], + "hostname": "" + }, + "agent": { + "type": "metricbeat", + "version": "8.10.0", + "ephemeral_id": "", + "id": "", + "name": "" + }, + "event": { + "dataset": "sql.query", + "module": "sql", + "duration": 49649250 + }, + "metricset": { + "name": "query", + "period": 50000 + }, + "service": { + "address": "localhost", + "type": "sql" + }, + "sql": { + "metrics": { + "database_name": "msdb" + }, + "driver": "mssql", + "query": "USE [msdb]; SELECT DB_NAME() AS 'database_name';" + }, + "ecs": { + "version": "8.0.0" + } +} +---- + + === Host Setup Some drivers require additional configuration to work. Find here instructions for these drivers. @@ -575,9 +846,9 @@ Also, add `ORACLE_HOME/bin` to the `PATH` environment variable. ===== Oracle Instant Client Installation -Oracle Instant Client enables development and deployment of applications that connect to Oracle Database. The Instant Client libraries provide the necessary network connectivity and advanced data features to make full use of Oracle Database. If you have OCI Oracle server which comes with these libraries pre-installed, you don't need a separate client installation. +Oracle Instant Client enables the development and deployment of applications that connect to the Oracle Database. The Instant Client libraries provide the necessary network connectivity and advanced data features to make full use of the Oracle Database. If you have an OCI Oracle server which comes with these libraries pre-installed, you don't need a separate client installation. -The OCI library install few Client Shared Libraries that must be referenced on the machine where Metricbeat is installed. Please follow https://docs.oracle.com/en/database/oracle/oracle-database/21/lacli/install-instant-client-using-zip.html#GUID-D3DCB4FB-D3CA-4C25-BE48-3A1FB5A22E84[this] link for OCI Instant Client set up. The OCI Instant Client is available with the Oracle Universal Installer, RPM file or ZIP file. Download links can be found https://www.oracle.com/database/technologies/instant-client/downloads.html[here]. +The OCI library installs a few Client Shared Libraries that must be referenced on the machine where Metricbeat is installed. Please follow https://docs.oracle.com/en/database/oracle/oracle-database/21/lacli/install-instant-client-using-zip.html#GUID-D3DCB4FB-D3CA-4C25-BE48-3A1FB5A22E84[this] link for OCI Instant Client set up. The OCI Instant Client is available with the Oracle Universal Installer, RPM file or ZIP file. Download links can be found at https://www.oracle.com/database/technologies/instant-client/downloads.html[here]. ===== Enable Oracle Listener @@ -612,7 +883,7 @@ The following two types of host configurations are supported: Note: If the password contains the backslash (`\`) character, it must be escaped with a backslash. For example, if the password is `my\_password`, it should be written as `my\\_password`. -Username and Password to connect to the database can be provided as values to `username` and `password` keys of `sql.yml`. +The username and password to connect to the database can be provided as values to `username` and `password` keys of `sql.yml`. [source,yml] ---- diff --git a/metricbeat/helper/sql/sql.go b/metricbeat/helper/sql/sql.go index 90ba5d960622..a0d4ebbd36be 100644 --- a/metricbeat/helper/sql/sql.go +++ b/metricbeat/helper/sql/sql.go @@ -44,14 +44,17 @@ type sqlRow interface { // NewDBClient gets a client ready to query the database func NewDBClient(driver, uri string, l *logp.Logger) (*DbClient, error) { - dbx, err := sql.Open(switchDriverName(driver), uri) + dbx, err := sql.Open(SwitchDriverName(driver), uri) if err != nil { return nil, fmt.Errorf("opening connection: %w", err) } err = dbx.Ping() if err != nil { if closeErr := dbx.Close(); closeErr != nil { - return nil, fmt.Errorf("failed to close with %s, after connection test failed: %w", closeErr, err) + // NOTE(SS): Support for wrapping multiple errors is there in Go 1.20+. + // TODO(SS): When beats module starts using Go 1.20+, use: https://pkg.go.dev/errors#Join + // and until then, let's use the following workaround. + return nil, fmt.Errorf(fmt.Sprintf("failed to close with: %s", closeErr.Error())+" after connection test failed: %w", err) } return nil, fmt.Errorf("testing connection: %w", err) } @@ -59,7 +62,7 @@ func NewDBClient(driver, uri string, l *logp.Logger) (*DbClient, error) { return &DbClient{DB: dbx, logger: l}, nil } -// fetchTableMode scan the rows and publishes the event for querys that return the response in a table format. +// FetchTableMode scan the rows and publishes the event for querys that return the response in a table format. func (d *DbClient) FetchTableMode(ctx context.Context, q string) ([]mapstr.M, error) { rows, err := d.QueryContext(ctx, q) if err != nil { @@ -144,7 +147,8 @@ func (d *DbClient) fetchVariableMode(rows sqlRow) (mapstr.M, error) { r := mapstr.M{} for key, value := range data { - value := getValue(&value) + value := value + value = getValue(&value) r.Put(key, value) } @@ -187,9 +191,9 @@ func getValue(pval *interface{}) interface{} { } } -// switchDriverName switches between driver name and a pretty name for a driver. For example, 'oracle' driver is called +// SwitchDriverName switches between driver name and a pretty name for a driver. For example, 'oracle' driver is called // 'godror' so this detail implementation must be hidden to the user, that should only choose and see 'oracle' as driver -func switchDriverName(d string) string { +func SwitchDriverName(d string) string { switch d { case "oracle": return "godror" diff --git a/x-pack/metricbeat/module/sql/_meta/docs.asciidoc b/x-pack/metricbeat/module/sql/_meta/docs.asciidoc index 9286e043d60b..17175cb58780 100644 --- a/x-pack/metricbeat/module/sql/_meta/docs.asciidoc +++ b/x-pack/metricbeat/module/sql/_meta/docs.asciidoc @@ -1,5 +1,5 @@ The SQL module allows you to execute custom queries against an SQL database and -store the results in {es}. It also enables developing various SQL metrics integrations, using sql query as input. +store the results in {es}. It also enables the development of various SQL metrics integrations, using SQL query as input. This module supports the databases that you can monitor with {metricbeat}, including: @@ -23,20 +23,21 @@ fields: `driver`:: The driver can be any driver that has a {metricbeat} module, such as `mssql` or `postgres`. -`raw_data.enabled`:: Expects either true or false. By default false. Marking as true will generate event results in new field format. +`fetch_from_all_databases`:: Expects either `true` or `false` and it is by default set to `false`. Marking as `true` will enable execution `sql_queries` or `sql_query` for all databases in a server. Currently only `mssql` driver supports this feature. For other drivers, if enabled, "fetch from all databases feature is not supported for driver: " error would be logged. -Expects either `sql_queries` or `sql_query`. +`raw_data.enabled`:: Expects either `true` or `false` and it is by default set to `false`. Marking as `true` will generate event results in a new field format. -`sql_queries`:: Receives the list of queries to execute. `query` and `response_format` is repeated to get multiple query inputs. -`query`::: Expects sql query. -`response_format`::: Either `variables` or `table`: -`variables`:::: Expects a two-column table that looks like a key/value result. -The left column is considered a key and the right column the value. This mode -generates a single event on each fetch operation. -`table`:::: Expects any number of columns. This mode generates a single event for -each row. +Use `sql_queries` or `sql_query` depending on the use-case. -`sql_query`:: The single query you want to run. (`Backward Compatibility`). Also provide corresponding `sql_response_format`: either `variables` or `table` +`sql_queries`:: List of queries to execute. `query` and `response_format` fields are repeated to get multiple query inputs. + +`query`::: Single SQL query. + +`response_format`::: Either `variables` or `table`. +`variables`:::: Expects a two-column table that looks like a key-value result. The left column is considered a key and the right column is the value. This mode generates a single event on each fetch operation. +`table`:::: Expects any number of columns. This mode generates a single event for each row. + +`sql_query` (`Backward Compatibility`):: Single query you want to run. Also, provide corresponding `sql_response_format` (value: `variables` or `table`) similar to `sql_queries`'s `response_format`. [float] == Example @@ -44,7 +45,7 @@ each row. Examples of configurations in `sql.yml` to connect with supported databases are mentioned below. [float] -=== Example: capture Innodb-related metrics +=== Example: Capture Innodb-related metrics This `sql.yml` configuration shows how to capture Innodb-related metrics that result from the query `SHOW GLOBAL STATUS LIKE 'Innodb_system%'` in a MySQL @@ -131,7 +132,7 @@ The example shown earlier generates this event: ---- [float] -=== Example: query PostgreSQL and generate a "table" result +=== Example: Query PostgreSQL and generate a "table" result This `sql.yml` configuration shows how to query PostgreSQL and generate a "table" result. This configuration generates a single event for each row @@ -219,7 +220,7 @@ for each row. For example, this event is created for the first row: ---- [float] -=== Example: get the buffer catch hit ratio in Oracle +=== Example: Get the buffer catch hit ratio in Oracle This `sql.yml` configuration shows how to get the buffer cache hit ratio: @@ -294,7 +295,7 @@ The example generates this event: ---- [float] -=== Example: get the buffer cache hit ratio for MSSQL +=== Example: Get the buffer cache hit ratio for MSSQL This `sql.yml` configuration gets the buffer cache hit ratio: @@ -344,7 +345,7 @@ The example generates this event: ---- [float] -=== Example: launch two or more queries. +=== Example: Launch two or more queries. To launch two or more queries, specify the full configuration for each query. @@ -408,14 +409,14 @@ The example generates this event: The response event is generated in new format "version": "8.0.0" }, "host": { - "name": "Muthu-mps" + "name": "mps" }, "agent": { "type": "metricbeat", "version": "8.3.0", "ephemeral_id": "8decc9eb-5ea5-47d8-8a22-fac507a5521b", "id": "6bbf5058-afed-44c6-aa05-775ee14a2da4", - "name": "Muthu-mps" + "name": "mps" } } ---- @@ -465,26 +466,26 @@ The example generates this event: By disabling the flag `raw_data.enabled`, whic "version": "8.0.0" }, "host": { - "name": "Muthu-mps" + "name": "mps" }, "agent": { "version": "8.3.0", "ephemeral_id": "bc09584b-62db-4b45-bfe9-6b7e8e982361", "id": "6bbf5058-afed-44c6-aa05-775ee14a2da4", - "name": "Muthu-mps", + "name": "mps", "type": "metricbeat" } } ---- [float] -=== Example: Merge multiple queries to single event. +=== Example: Merge multiple queries into a single event. -Multiple queries will create multiple events, one for each query. It may be preferrable to create a single event by combining the metrics together in a single event. +Multiple queries will create multiple events, one for each query. It may be preferable to create a single event by combining the metrics together in a single event. This feature can be enabled using the `merge_results` config. -However, such a merge is possible only if the table queries being merged, each produce a single row. +However, such a merge is possible only if the table queries are merged, each produces a single row. For example: @@ -513,7 +514,7 @@ This creates a combined event as below, where `blks_hit`, `blks_read`, `checkpoi { "@timestamp": "2022-07-21T07:07:06.747Z", "agent": { - "name": "Lalits-MBP-2", + "name": "MBP-2", "type": "metricbeat", "version": "8.4.0", "ephemeral_id": "b0867287-e56a-492f-b421-0ac870c426f9", @@ -544,6 +545,276 @@ This creates a combined event as below, where `blks_hit`, `blks_read`, `checkpoi } ---- +[float] +=== Example: Execute given queries for all database(s) present in a server + +Assuming a user could have 100s of databases on their server and then it becomes cumbersome to add them manually to the query. If `fetch_from_all_databases` is set to `true` then SQL module would fetch the databases names automatically and prefix +the database selector statement to the queries so that the queries can run against +the database provided. + +Currently, this feature only works with `mssql` driver. For example: + +[source,yaml] +---- +- module: sql + metricsets: + - query + period: 50s + hosts: ["sqlserver://:@"] + raw_data.enabled: true + + fetch_from_all_databases: true + + driver: "mssql" + sql_queries: + - query: SELECT DB_NAME() AS 'database_name'; + response_format: table +---- + +For an mssql instance, by default only four databases are present namely — `master`, `model`, `msdb`, `tempdb`. So, if `fetch_from_all_databases` is enabled then query `SELECT DB_NAME() AS 'database_name'` runs for each one of them i.e., there would be in total 4 documents (one each for 4 databases) for every scrape. + + +[source,json] +---- +{ + "@timestamp": "2023-07-16T22:05:26.976Z", + "@metadata": { + "beat": "metricbeat", + "type": "_doc", + "version": "8.10.0" + }, + "service": { + "type": "sql", + "address": "localhost" + }, + "event": { + "dataset": "sql.query", + "module": "sql", + "duration": 40346375 + }, + "metricset": { + "name": "query", + "period": 50000 + }, + "sql": { + "metrics": { + "database_name": "master" + }, + "driver": "mssql", + "query": "USE [master]; SELECT DB_NAME() AS 'database_name';" + }, + "host": { + "os": { + "type": "macos", + "platform": "darwin", + "version": "13.3.1", + "family": "darwin", + "name": "macOS", + "kernel": "", + "build": "" + }, + "name": "", + "id": "", + "ip": [ + "" + ], + "mac": [ + "" + ], + "hostname": "", + "architecture": "arm64" + }, + "agent": { + "name": "", + "type": "metricbeat", + "version": "8.10.0", + "ephemeral_id": "", + "id": "" + }, + "ecs": { + "version": "8.0.0" + } +} +{ + "@timestamp": "2023-07-16T22:05:26.976Z", + "@metadata": { + "beat": "metricbeat", + "type": "_doc", + "version": "8.10.0" + }, + "agent": { + "ephemeral_id": "", + "id": "", + "name": "", + "type": "metricbeat", + "version": "8.10.0" + }, + "event": { + "module": "sql", + "duration": 43147875, + "dataset": "sql.query" + }, + "metricset": { + "period": 50000, + "name": "query" + }, + "service": { + "address": "localhost", + "type": "sql" + }, + "sql": { + "metrics": { + "database_name": "tempdb" + }, + "driver": "mssql", + "query": "USE [tempdb]; SELECT DB_NAME() AS 'database_name';" + }, + "ecs": { + "version": "8.0.0" + }, + "host": { + "name": "", + "architecture": "arm64", + "os": { + "platform": "darwin", + "version": "13.3.1", + "family": "darwin", + "name": "macOS", + "kernel": "", + "build": "", + "type": "macos" + }, + "id": "", + "ip": [ + "" + ], + "mac": [ + "" + ], + "hostname": "" + } +} +{ + "@timestamp": "2023-07-16T22:05:26.976Z", + "@metadata": { + "beat": "metricbeat", + "type": "_doc", + "version": "8.10.0" + }, + "host": { + "os": { + "build": "", + "type": "macos", + "platform": "darwin", + "version": "13.3.1", + "family": "darwin", + "name": "macOS", + "kernel": "" + }, + "id": "", + "ip": [ + "" + ], + "mac": [ + "" + ], + "hostname": "", + "name": "", + "architecture": "arm64" + }, + "agent": { + "ephemeral_id": "", + "id": "", + "name": "", + "type": "metricbeat", + "version": "8.10.0" + }, + "service": { + "address": "localhost", + "type": "sql" + }, + "sql": { + "metrics": { + "database_name": "model" + }, + "driver": "mssql", + "query": "USE [model]; SELECT DB_NAME() AS 'database_name';" + }, + "event": { + "dataset": "sql.query", + "module": "sql", + "duration": 46623125 + }, + "metricset": { + "name": "query", + "period": 50000 + }, + "ecs": { + "version": "8.0.0" + } +} +{ + "@timestamp": "2023-07-16T22:05:26.976Z", + "@metadata": { + "beat": "metricbeat", + "type": "_doc", + "version": "8.10.0" + }, + "host": { + "architecture": "arm64", + "os": { + "kernel": "", + "build": "", + "type": "macos", + "platform": "darwin", + "version": "13.3.1", + "family": "darwin", + "name": "macOS" + }, + "name": "", + "id": "", + "ip": [ + "" + ], + "mac": [ + "" + ], + "hostname": "" + }, + "agent": { + "type": "metricbeat", + "version": "8.10.0", + "ephemeral_id": "", + "id": "", + "name": "" + }, + "event": { + "dataset": "sql.query", + "module": "sql", + "duration": 49649250 + }, + "metricset": { + "name": "query", + "period": 50000 + }, + "service": { + "address": "localhost", + "type": "sql" + }, + "sql": { + "metrics": { + "database_name": "msdb" + }, + "driver": "mssql", + "query": "USE [msdb]; SELECT DB_NAME() AS 'database_name';" + }, + "ecs": { + "version": "8.0.0" + } +} +---- + + === Host Setup Some drivers require additional configuration to work. Find here instructions for these drivers. @@ -563,9 +834,9 @@ Also, add `ORACLE_HOME/bin` to the `PATH` environment variable. ===== Oracle Instant Client Installation -Oracle Instant Client enables development and deployment of applications that connect to Oracle Database. The Instant Client libraries provide the necessary network connectivity and advanced data features to make full use of Oracle Database. If you have OCI Oracle server which comes with these libraries pre-installed, you don't need a separate client installation. +Oracle Instant Client enables the development and deployment of applications that connect to the Oracle Database. The Instant Client libraries provide the necessary network connectivity and advanced data features to make full use of the Oracle Database. If you have an OCI Oracle server which comes with these libraries pre-installed, you don't need a separate client installation. -The OCI library install few Client Shared Libraries that must be referenced on the machine where Metricbeat is installed. Please follow https://docs.oracle.com/en/database/oracle/oracle-database/21/lacli/install-instant-client-using-zip.html#GUID-D3DCB4FB-D3CA-4C25-BE48-3A1FB5A22E84[this] link for OCI Instant Client set up. The OCI Instant Client is available with the Oracle Universal Installer, RPM file or ZIP file. Download links can be found https://www.oracle.com/database/technologies/instant-client/downloads.html[here]. +The OCI library installs a few Client Shared Libraries that must be referenced on the machine where Metricbeat is installed. Please follow https://docs.oracle.com/en/database/oracle/oracle-database/21/lacli/install-instant-client-using-zip.html#GUID-D3DCB4FB-D3CA-4C25-BE48-3A1FB5A22E84[this] link for OCI Instant Client set up. The OCI Instant Client is available with the Oracle Universal Installer, RPM file or ZIP file. Download links can be found at https://www.oracle.com/database/technologies/instant-client/downloads.html[here]. ===== Enable Oracle Listener @@ -600,7 +871,7 @@ The following two types of host configurations are supported: Note: If the password contains the backslash (`\`) character, it must be escaped with a backslash. For example, if the password is `my\_password`, it should be written as `my\\_password`. -Username and Password to connect to the database can be provided as values to `username` and `password` keys of `sql.yml`. +The username and password to connect to the database can be provided as values to `username` and `password` keys of `sql.yml`. [source,yml] ---- diff --git a/x-pack/metricbeat/module/sql/docker-compose.yml b/x-pack/metricbeat/module/sql/docker-compose.yml index fac50c274b1e..184a340f1b52 100644 --- a/x-pack/metricbeat/module/sql/docker-compose.yml +++ b/x-pack/metricbeat/module/sql/docker-compose.yml @@ -15,3 +15,8 @@ services: extends: file: ../../../../x-pack/metricbeat/module/oracle/docker-compose.yml service: oracle + + mssql: + extends: + file: ../../../../x-pack/metricbeat/module/mssql/docker-compose.yml + service: mssql diff --git a/x-pack/metricbeat/module/sql/query/query.go b/x-pack/metricbeat/module/sql/query/query.go index e80c706b3494..f521acbe133a 100644 --- a/x-pack/metricbeat/module/sql/query/query.go +++ b/x-pack/metricbeat/module/sql/query/query.go @@ -6,6 +6,7 @@ package query import ( "context" + "errors" "fmt" "github.com/jmoiron/sqlx" @@ -44,12 +45,15 @@ type config struct { Driver string `config:"driver" validate:"nonzero,required"` - // Support either the previous query / or the new list of queries. - ResponseFormat string `config:"sql_response_format"` - Query string `config:"sql_query" ` + // Support either the query or list of queries. + ResponseFormat string `config:"sql_response_format"` + Query string `config:"sql_query"` + Queries []query `config:"sql_queries"` + MergeResults bool `config:"merge_results"` - Queries []query `config:"sql_queries" ` - MergeResults bool `config:"merge_results"` + // Support fetch response for given queries from all databases. + // NOTE: Currently, mssql driver only respects FetchFromAllDatabases. + FetchFromAllDatabases bool `config:"fetch_from_all_databases"` } // MetricSet holds any configuration or state information. It must implement @@ -82,7 +86,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { return nil, fmt.Errorf("invalid sql_response_format value: %s", b.Config.ResponseFormat) } } else { - // Backword compartibility, if no value is provided + // Backward compatibility, if no value is provided. // This will ensure there is no braking change, as the previous code worked with no ResponseFormat b.Config.ResponseFormat = variableResponseFormat } @@ -104,76 +108,190 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { return b, nil } -// Fetch methods implements the data gathering and data conversion to the right -// format. It publishes the event which is then forwarded to the output. In case -// of an error set the Error field of mb.Event or simply call report.Error(). -// It calls m.fetchTableMode() or m.fetchVariableMode() depending on the response -// format of the query. -func (m *MetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error { - db, err := sql.NewDBClient(m.Config.Driver, m.HostData().URI, m.Logger()) - if err != nil { - return fmt.Errorf("could not open connection: %w", err) +// queryDBNames returns the query to list databases present in a server +// as per the driver name. If the given driver is not supported, queryDBNames +// returns an empty query. +func queryDBNames(driver string) string { + switch sql.SwitchDriverName(driver) { + // NOTE: Add support for other drivers in future as when the need arises. + // dbSelector function would also required to be modified in order to add + // support for a new driver. + case "mssql": + return "SELECT [name] FROM sys.databases WITH (NOLOCK) WHERE state = 0 AND HAS_DBACCESS([name]) = 1" + // case "mysql": + // return "SHOW DATABASES" + // case "godror": + // // NOTE: Requires necessary priviledges to access DBA_USERS + // // Ref: https://stackoverflow.com/a/3005623/5821408 + // return "SELECT * FROM DBA_USERS" + // case "postgres": + // return "SELECT datname FROM pg_database" } - defer db.Close() - queries := m.Config.Queries - if len(queries) == 0 { - one_query := query{Query: m.Config.Query, ResponseFormat: m.Config.ResponseFormat} - queries = append(queries, one_query) - } + return "" +} - merged := mapstr.M{} +// dbSelector returns the statement to select a named database to run the +// subsequent statements. If the given driver is not supported, dbSelector +// returns an empty statement. +func dbSelector(driver, dbName string) string { + switch sql.SwitchDriverName(driver) { + // NOTE: Add support for other drivers in future as when the need arises. + // queryDBNames function would also required to be modified in order to add + // support for a new driver. + // + case "mssql": + return fmt.Sprintf("USE [%s];", dbName) + } + return "" +} +func (m *MetricSet) fetch(ctx context.Context, db *sql.DbClient, reporter mb.ReporterV2, queries []query) (bool, error) { + var ok bool + merged := make(mapstr.M, 0) for _, q := range queries { if q.ResponseFormat == tableResponseFormat { // Table format mss, err := db.FetchTableMode(ctx, q.Query) if err != nil { - return fmt.Errorf("fetch table mode failed: %w", err) + return ok, fmt.Errorf("fetch table mode failed: %w", err) } for _, ms := range mss { if m.Config.MergeResults { if len(mss) > 1 { - return fmt.Errorf("can not merge query resulting with more than one rows: %s", q) + return ok, fmt.Errorf("cannot merge query resulting with more than one rows: %s", q) } else { for k, v := range ms { _, ok := merged[k] if ok { - m.Logger().Warn("overwriting duplicate metrics: ", k) + m.Logger().Warn("overwriting duplicate metrics:", k) } merged[k] = v } } } else { // Report immediately for non-merged cases. - m.reportEvent(ms, reporter, q.Query) + ok = m.reportEvent(ms, reporter, q.Query) } } } else { // Variable format ms, err := db.FetchVariableMode(ctx, q.Query) if err != nil { - return fmt.Errorf("fetch variable mode failed: %w", err) + return ok, fmt.Errorf("fetch variable mode failed: %w", err) } if m.Config.MergeResults { for k, v := range ms { _, ok := merged[k] if ok { - m.Logger().Warn("overwriting duplicate metrics: ", k) + m.Logger().Warn("overwriting duplicate metrics:", k) } merged[k] = v } } else { // Report immediately for non-merged cases. - m.reportEvent(ms, reporter, q.Query) + ok = m.reportEvent(ms, reporter, q.Query) } } } + if m.Config.MergeResults { // Report here for merged case. - m.reportEvent(merged, reporter, "") + ok = m.reportEvent(merged, reporter, "") + } + + return ok, nil +} + +// Fetch method implements the data gathering and data conversion to the right +// format. It publishes the event which is then forwarded to the output. In case +// of an error set the Error field of mb.Event or simply call report.Error(). +// It calls m.fetchTableMode() or m.fetchVariableMode() depending on the response +// format of the query. +func (m *MetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error { + db, err := sql.NewDBClient(m.Config.Driver, m.HostData().URI, m.Logger()) + if err != nil { + return fmt.Errorf("cannot open connection: %w", err) + } + defer db.Close() + + queries := m.Config.Queries + if len(queries) == 0 { + one_query := query{Query: m.Config.Query, ResponseFormat: m.Config.ResponseFormat} + queries = append(queries, one_query) + } + + if !m.Config.FetchFromAllDatabases { + reported, err := m.fetch(ctx, db, reporter, queries) + if err != nil { + m.Logger().Warn("error while fetching:", err) + } + if !reported { + m.Logger().Debug("error trying to emit event") + } + return nil + } + + // NOTE: Only mssql driver is supported for now because: + // + // * Difference in queries to fetch the name of the databases + // * The statement to select a named database (for subsequent statements + // to be executed) may not be generic i.e, USE statement (e.g., USE ) + // works for MSSQL but not Oracle. + // + // TODO: Add the feature for other drivers when need arises. + validQuery := queryDBNames(m.Config.Driver) + if validQuery == "" { + return fmt.Errorf("fetch from all databases feature is not supported for driver: %s", m.Config.Driver) + } + + // Discover all databases in the server and execute given queries on each + // of the databases. + dbNames, err := db.FetchTableMode(ctx, queryDBNames(m.Config.Driver)) + if err != nil { + return fmt.Errorf("cannot fetch database names: %w", err) + } + + if len(dbNames) == 0 { + return errors.New("no database names found") + } + + qs := make([]query, 0, len(queries)) + + for i := range dbNames { + // Create a copy of the queries as query would be modified on every + // iteration. + qs = qs[:0] // empty slice + qs = append(qs, queries...) // copy queries + + val, err := dbNames[i].GetValue("name") + if err != nil { + m.Logger().Warn("error with database name:", err) + continue + } + dbName, ok := val.(string) + if !ok { + m.Logger().Warn("error with database name's type") + continue + } + + // Prefix dbSelector to the query based on the driver + // provided. + // Example: USE ; @command (or @query) + for i := range qs { + qs[i].Query = dbSelector(m.Config.Driver, dbName) + " " + qs[i].Query + } + + reported, err := m.fetch(ctx, db, reporter, qs) + if err != nil { + m.Logger().Warn("error while fetching:", err) + } + if !reported { + m.Logger().Debug("error trying to emit event") + return nil + } } return nil @@ -181,15 +299,15 @@ func (m *MetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error { // reportEvent using 'user' mode with keys under `sql.metrics.*` or using Raw data mode (module and metricset key spaces // provided by the user) -func (m *MetricSet) reportEvent(ms mapstr.M, reporter mb.ReporterV2, qry string) { +func (m *MetricSet) reportEvent(ms mapstr.M, reporter mb.ReporterV2, qry string) bool { + var ok bool if m.Config.RawData.Enabled { - // New usage. // Only driver & query field mapped. // metrics to be mapped by end user. if len(qry) > 0 { // set query. - reporter.Event(mb.Event{ + ok = reporter.Event(mb.Event{ ModuleFields: mapstr.M{ "metrics": ms, // Individual metric "driver": m.Config.Driver, @@ -197,19 +315,18 @@ func (m *MetricSet) reportEvent(ms mapstr.M, reporter mb.ReporterV2, qry string) }, }) } else { - reporter.Event(mb.Event{ + ok = reporter.Event(mb.Event{ // Do not set query. ModuleFields: mapstr.M{ "metrics": ms, // Individual metric "driver": m.Config.Driver, }, }) - } } else { - // Previous usage. Backword compartibility. + // Previous usage. Backward compatibility. // Supports field mapping. - reporter.Event(mb.Event{ + ok = reporter.Event(mb.Event{ ModuleFields: mapstr.M{ "driver": m.Config.Driver, "query": qry, @@ -217,6 +334,7 @@ func (m *MetricSet) reportEvent(ms mapstr.M, reporter mb.ReporterV2, qry string) }, }) } + return ok } // inferTypeFromMetrics to organize the output event into 'numeric', 'strings', 'floats' and 'boolean' values @@ -237,7 +355,7 @@ func inferTypeFromMetrics(ms mapstr.M) mapstr.M { case bool: boolMetrics[k] = v case nil: - //Ignore because a nil has no data type and thus cannot be indexed + // Ignore because a nil has no data type and thus cannot be indexed default: stringMetrics[k] = v } diff --git a/x-pack/metricbeat/module/sql/query/test_sql_mssql.py b/x-pack/metricbeat/module/sql/query/test_sql_mssql.py new file mode 100644 index 000000000000..c212830fe0c8 --- /dev/null +++ b/x-pack/metricbeat/module/sql/query/test_sql_mssql.py @@ -0,0 +1,59 @@ +import os +import sys +import unittest +import time +from xpack_metricbeat import XPackTest, metricbeat + + +class Test(XPackTest): + COMPOSE_SERVICES = ['mssql'] + + @unittest.skipUnless(metricbeat.INTEGRATION_TESTS, "integration test") + def test_query_without_fetch_from_all_databases(self): + """ + SQL MSSQL custom query with fetch_from_all_databases=false + + 1 document will be received from the default selected database: 'master' in MSSQL. + """ + self.run_query_test(fetch_from_all_databases=False, expected_output_count=1) + + @unittest.skipUnless(metricbeat.INTEGRATION_TESTS, "integration test") + def test_query_with_fetch_from_all_databases(self): + """ + SQL MSSQL custom query with fetch_from_all_databases=true + + 4 documents will be received, each corresponding to one of the four default databases in MSSQL. + """ + self.run_query_test(fetch_from_all_databases=True, expected_output_count=4) + + def run_query_test(self, fetch_from_all_databases: bool, expected_output_count: int) -> list: + self.render_config_template(modules=[{ + "name": "sql", + "metricsets": ["query"], + "hosts": ['sqlserver://{}:{}@{}'.format(self.get_username(), self.get_password(), self.compose_host())], + "period": "5s", + "additional_content": f""" + driver: mssql + fetch_from_all_databases: {str(fetch_from_all_databases).lower()} + sql_query: SELECT DB_NAME() AS 'database_name'; + sql_response_format: table""" + }]) + + proc = self.start_beat() + self.wait_until(lambda: self.output_lines() > 0) + proc.check_kill_and_wait() + self.assert_no_logged_warnings() + + output = self.read_output_json() + self.assertEqual(len(output), expected_output_count) + + for evt in output: + self.assert_fields_are_documented(evt) + self.assertIn("sql", evt.keys(), evt) + self.assertIn("query", evt["sql"].keys(), evt) + + def get_username(self): + return os.getenv('MSSQL_USERNAME', 'SA') + + def get_password(self): + return os.getenv('MSSQL_PASSWORD', '1234_asdf')