From c80dc4f85ac08c56ea9373cee3b446262c398480 Mon Sep 17 00:00:00 2001 From: Oleksiy Kovyrin Date: Fri, 5 Nov 2021 15:02:18 -0400 Subject: [PATCH] Add crawler metrics into the stats metricset for Enterprise Search (#28790) * Add crawler metrics into the stats metricset for Enterprise Search * Adjust ent-search docker testing setup to use 8.0 images + update configs to align with recent deprecations * Spelling, etc fixes * Better field description --- metricbeat/docs/fields.asciidoc | 367 +++++++++++++++++- .../module/enterprisesearch/_meta/Dockerfile | 2 +- .../enterprisesearch/docker-compose.yml | 6 +- .../module/enterprisesearch/fields.go | 2 +- .../enterprisesearch/stats/_meta/fields.yml | 155 +++++++- .../stats/_meta/testdata/stats.json | 36 ++ .../_meta/testdata/stats.json-expected.json | 36 ++ .../module/enterprisesearch/stats/data.go | 57 +++ 8 files changed, 652 insertions(+), 9 deletions(-) diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index 875f8672214..cba51ff349f 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -32130,7 +32130,7 @@ Workplace Search worker pools stats. [float] === extract_worker_pool -Status information for the extrator workers pool. +Status information for the extractor workers pool. *`enterprisesearch.stats.connectors.pool.extract_worker_pool.size`*:: @@ -32190,7 +32190,7 @@ type: long [float] === subextract_worker_pool -Status information for the sub-extrator workers pool. +Status information for the sub-extractor workers pool. *`enterprisesearch.stats.connectors.pool.subextract_worker_pool.size`*:: @@ -32522,6 +32522,369 @@ type: long -- +[float] +=== crawler + +Aggregate stats on the functioning of the crawler subsystem within Enterprise Search. + + +[float] +=== global + +Global deployment-wide metrics for the crawler. + + +[float] +=== crawl_requests + +Crawl request summary for the deployment. + + +*`enterprisesearch.stats.crawler.global.crawl_requests.pending`*:: ++ +-- +Total number of crawl requests waiting to be processed. + +type: long + +-- + +*`enterprisesearch.stats.crawler.global.crawl_requests.active`*:: ++ +-- +Total number of crawl requests currently being processed (running crawls). + +type: long + +-- + +*`enterprisesearch.stats.crawler.global.crawl_requests.successful`*:: ++ +-- +Total number of crawl requests that have succeeded. + +type: long + +-- + +*`enterprisesearch.stats.crawler.global.crawl_requests.failed`*:: ++ +-- +Total number of failed crawl requests. + +type: long + +-- + +[float] +=== node + +Node-level statistics for the crawler. + + +*`enterprisesearch.stats.crawler.node.pages_visited`*:: ++ +-- +Total number of pages visited by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.urls_allowed`*:: ++ +-- +Total number of URLs allowed by the crawler during discovery since the instance start. + +type: long + +-- + +[float] +=== urls_denied + +Total number of URLs denied by the crawler during discovery since the instance start, broken down by deny reason. + + +*`enterprisesearch.stats.crawler.node.urls_denied.already_seen`*:: ++ +-- +Total number of URLs not followed because of URL de-duplication (each URL is visited only once). + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.urls_denied.domain_filter_denied`*:: ++ +-- +Total number of URLs denied because of an unknown domain. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.urls_denied.incorrect_protocol`*:: ++ +-- +Total number of URLs with incorrect/invalid/unsupported protocols. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.urls_denied.link_too_deep`*:: ++ +-- +Total number of URLs not followed due to crawl depth limits. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.urls_denied.nofollow`*:: ++ +-- +Total number of URLs denied due to a nofollow meta tag or an HTML link attribute. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.urls_denied.unsupported_content_type`*:: ++ +-- +Total number of URLs denied due to an unsupported content type. + +type: long + +-- + +[float] +=== status_codes + +HTTP request result counts, by status code. + + +*`enterprisesearch.stats.crawler.node.status_codes.200`*:: ++ +-- +Total number of HTTP 200 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.301`*:: ++ +-- +Total number of HTTP 301 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.302`*:: ++ +-- +Total number of HTTP 302 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.400`*:: ++ +-- +Total number of HTTP 400 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.401`*:: ++ +-- +Total number of HTTP 401 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.402`*:: ++ +-- +Total number of HTTP 402 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.403`*:: ++ +-- +Total number of HTTP 403 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.404`*:: ++ +-- +Total number of HTTP 404 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.405`*:: ++ +-- +Total number of HTTP 405 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.410`*:: ++ +-- +Total number of HTTP 410 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.422`*:: ++ +-- +Total number of HTTP 422 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.429`*:: ++ +-- +Total number of HTTP 429 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.500`*:: ++ +-- +Total number of HTTP 500 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.501`*:: ++ +-- +Total number of HTTP 501 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.502`*:: ++ +-- +Total number of HTTP 502 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.503`*:: ++ +-- +Total number of HTTP 503 responses seen by the crawler since the instance start. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.status_codes.504`*:: ++ +-- +Total number of HTTP 504 responses seen by the crawler since the instance start. + +type: long + +-- + +[float] +=== queue_size + +Total current URL queue size for the instance. + + +*`enterprisesearch.stats.crawler.node.queue_size.primary`*:: ++ +-- +Total number of URLs waiting to be crawled by the instance. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.queue_size.purge`*:: ++ +-- +Total number of URLs waiting to be checked by the purge crawl phase. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.active_threads`*:: ++ +-- +Total number of crawler worker threads currently active on the instance. + +type: long + +-- + +[float] +=== workers + +Crawler workers information for the instance. + + +*`enterprisesearch.stats.crawler.node.workers.pool_size`*:: ++ +-- +Total size of the crawl workers pool (number of concurrent crawls possible) for the instance. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.workers.active`*:: ++ +-- +Total number of currently active crawl workers (running crawls) for the instance. + +type: long + +-- + +*`enterprisesearch.stats.crawler.node.workers.available`*:: ++ +-- +Total number of currently available (free) crawl workers for the instance. + +type: long + +-- + [float] === product_usage diff --git a/x-pack/metricbeat/module/enterprisesearch/_meta/Dockerfile b/x-pack/metricbeat/module/enterprisesearch/_meta/Dockerfile index dc35294c947..57546590cf6 100644 --- a/x-pack/metricbeat/module/enterprisesearch/_meta/Dockerfile +++ b/x-pack/metricbeat/module/enterprisesearch/_meta/Dockerfile @@ -9,4 +9,4 @@ COPY docker-entrypoint-dependencies.sh /usr/local/bin/ ENTRYPOINT ["tini", "--", "/usr/local/bin/docker-entrypoint-dependencies.sh"] HEALTHCHECK --interval=1s --retries=300 --start-period=60s \ - CMD curl --user elastic:changeme --fail --silent http://localhost:3002/api/as/v1/internal/health + CMD curl --user elastic:changeme --fail --silent http://localhost:3002/api/ent/v1/internal/health diff --git a/x-pack/metricbeat/module/enterprisesearch/docker-compose.yml b/x-pack/metricbeat/module/enterprisesearch/docker-compose.yml index 88f8ce3c38d..3e9dbfdf9bf 100644 --- a/x-pack/metricbeat/module/enterprisesearch/docker-compose.yml +++ b/x-pack/metricbeat/module/enterprisesearch/docker-compose.yml @@ -2,11 +2,11 @@ version: '2.3' services: enterprise_search: - image: docker.elastic.co/integrations-ci/beats-enterprisesearch:${ENT_VERSION:-7.16.0-SNAPSHOT} + image: docker.elastic.co/integrations-ci/beats-enterprisesearch:${ENT_VERSION:-8.0.0-SNAPSHOT} build: context: ./_meta args: - ENT_VERSION: ${ENT_VERSION:-7.16.0-SNAPSHOT} + ENT_VERSION: ${ENT_VERSION:-8.0.0-SNAPSHOT} depends_on: - "elasticsearch" environment: @@ -15,8 +15,8 @@ services: - "elasticsearch.password=changeme" - "elasticsearch.host=http://elasticsearch:9200" - "allow_es_settings_modification=true" - - "ent_search.auth.native1.source=elasticsearch-native" - "secret_management.encryption_keys=[4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09]" + - "kibana.host=http://localhost:5601" - "JAVA_OPTS=-Xms2g -Xmx2g" # Make it possible to run against slightly older ES versions - "elasticsearch.ignore_version_mismatch=true" diff --git a/x-pack/metricbeat/module/enterprisesearch/fields.go b/x-pack/metricbeat/module/enterprisesearch/fields.go index 16e172b5c45..061ec34b5cf 100644 --- a/x-pack/metricbeat/module/enterprisesearch/fields.go +++ b/x-pack/metricbeat/module/enterprisesearch/fields.go @@ -19,5 +19,5 @@ func init() { // AssetEnterprisesearch returns asset data. // This is the base64 encoded zlib format compressed contents of module/enterprisesearch. func AssetEnterprisesearch() string { - return "eJzsnEuP47gRx+/9KQp9SoB0B/u69GGByewGs4vMYIGZzR6CQEuRZZtjidTwYbf30wckJZu2KFmSZXeQdB8WCz+qfv9isVik6HmANe6eAIVBVSmuUSNRdHUHYLgp8Anuf9y/BR/9e/d3AAoLJBqfIEdD7gA0GsPFUj/Bv+61Lu7/AvcrY6r7f98BMNRU8cpwKZ7g+zsAgJZNKCWzBd4BLDgWTD/5jz2AICUm6dyf2VX4BEslbVW/knDl/n4/NfA7UCkM4UJDiUZxqoEIBiskhVkBFwupSuKswEKqNm1tOEaNcWlhtUGVWcvZ/s0Gd427rVTx60fQb8N34ddff/rBOzcrhB8Log2ngb0xD1YjA6L9JxgxBLSRCtPEjy3IoLWFF0fT/Z0OdBK6PZwt66exilHcf4/e6I5Vy/U7qY03cAhWC0ZIdrCfJNig0lyKJMRpRAaor83FiXRiIBWOo5DYMkfVers/MCPIgn2wmoulj5rGkgjDafMJ93qAT0cuhs0tL1i2Inp1KfAbsIJ/sRhMgjPZM64VoWuyPDO0lZIUtZ5raGtzFwxtxVNhCECFFMtzMfqlJojrQwuTC22IoB2xiXFsZXiJjxrpLFTB3CxkC15gjlECnnKlhq8F9i5d0x0cljkyhgz+Xntqwx3+uoYVzg7t2UB2BjMa4gGsvWgKtSHKZFRakYroeMgPoYTIBbgR1z2EsCIMjIQcGwxkwCy617QsEbjWFvUwIc5ZprmgmLlVMastdiTweFmfXPp6+z7uzkeftNo9/OnhK+ALELhBdVD55/7a9HlTTqtLP//zfXuJhQHFJ73Qwdg67fw3S4lvQ4wEZUVnp9Qzy5fddWfQ/P6ZbAgsicrJEoHKokDqJ3nd102cyQdDM86YT9KQoll65SKJLhVwsZHUVyodJWJIMzfbVtha1IZq8aW+TAVlqhpf7XWFwrgE8G1Dh6ypUhoZJZZS7TKr4yX/FH5Qyrz3lqDL0pD8WCGpMi64ecx3BicHNKxJT9Bn5HQ1q8D5DdMu3/kYuunIRbDyOCwpvABn4yUEzMFOZVlyY24u4G3j12O4wneJipI835j/PXkO5F2DMIxeSJG91CT4IMXDfiJwUtSl4dK02kt6qdTa69oDzKRM5p+RmqxCwbhYZgsuSMH/IDMvb993fAjgB66rguzCUQWpKiWfeUkMRmthQNS+392uOF1BTAlcQ03fI3nfIq4UEpYK+YhF4lMw0rPRg6G9hFUKZ4rx22ArClzBN9goHpgOjGCZVDMbT/AwEqskz/PsnpCs47CAT/Go/3AzqNmG1Nuryn1nS7Rr3dEMBDauA8pqS1fpEOsAAlVIHC0R7K+ularhk5I64PfZqMi2aB0tDdx3vA1fnrb32Eq1RnXhtGwIuo0N2qhLWWSa/5Fq/2D0qP0WWLxVcFYH5g+hhm/mQThsxoPRJj5DSTaEFyQvZodp7LZ5GtfaEBMP46zH0N74Y/T5vmNoKoXwW5WJh4UuDaqC0L33g0HQNtc7bbBsI3VhxWifZZ750/3LJk8f4WeZ1w8QkoR9lDHplnDDO9Nkah4dc+rGS32oVG8hO0tfi1Gq9fUZ62W/2EGOjnUspRtzx9Lff3aNewv0bwrJmsmtSLG61tK5So06nBn5mJlhgaariMCQ4La4DwEOxgMwF37Z8//vs/ZMUGPIhS2KKyE606B3gl5OyQVVWKIw5FqwkYeZmCtUJdeaS9GVtJcyRx4mMsctwMzVNKxx3rK+rIris1GEmixYzDpYz/O2mD8aYqxOPw1xPo3ct1Zex6UFoafHgkmp8NshxkNaLTh6TKl3V0pLZ3pgyxUTfbFoMWNYtXrpucC4wXLqchmTctbZG16MyFLt4VmisPmisqzcqtC1/boULqzkjZPTM+zRuJqukNniurh7JyNx93PW5i9Rf7TNH15r0GsN6iR9rUH/PzWosnnB9eqmBaj2+Vp7XmtPi/S19vzv1p79A1ljTkvIwJO/nwSVpUuwd58+/QIKv1jUprmBMvasrz6b6dhEj9ifHlMdzPqtKfcXiqfuT6/7SOuEV4OsUIRLRv13Cm/8oOk9eealLSNyKkUdmrSI8xImrpd+es0i6vQR1OTBGKFk/wQejSuEl6X9h2AEjCKLBadd0xAGprpCinxz87sAp8Pgv7iHae4CzJ5HGsWtb3OklTqQq6k8HtOsQpVdcH10tOI3G1RkiZeMboUKNFIpJm/y9wP9XyN/4JBPkn4Yeb82Z8yqrisdI0rNm+VS4ZIYPF74G+MzLLQleZ7rvuY/pFg6utNu4ECbvqA5dnFFIuZCbjLlysjasIzhZi7qj4YIRhQDhht+2GJ2itiQwmLnVd8BWqI7/pUUOjkLL8nqYDVco9GQK7lGAf5RYr4Ln9FhV+2cTM30r57nacqS/Us9MzUsuOB6hQy23KyAOK+xQNZaUSam1NcvoubrK6n55kXUfHMlNd++iJpvr6TmuxdR891Mao5OgibeuznUK//MFWR4GrywwhdatwOuYXJC186cYOGkoD4J8rczeIUFF+gF8sQvWcZu41EsucCMoTZK7lA9dl20Hf4bu9MhijUggw0n4UHuiecQ3BnWl5Lw4pZCgr/Z8GsHmb8BeTsVR25nE7NwsWHXUtEcmNYXK4KzwH5mIldKMktN8mc6o+dzbSz8Uifqpnt+XsqwKuSudLubkROWVFWmj3+0NgS9jV9V0Y+Fk/zjeqTjI9Qwu9Od6pmKf+bYLQKvnTTF0P8LA4nAdsZy21yNmSWirYs288T1NLJSLTMtraLTDz/ORLglRKrlw5Yz9P8chfts7X904NtqKsU3xOCtFdVuhwr6TwAAAP//L5rceg==" + return "eJzsXM2O4zYSvvdTFPo0A6Qn/edD+hAgO8nuJJgJgqRnc1gsFEos2xxTpEJS7naefkFSkmX92JIsqRe73YcgGNtV31csVpGsIq9gg7sHQGFQJYpp1EhUtL4AMMxwfIDLH4qP4Df32eUFgEKOROMDhGjIBYBGY5hY6Qf416XW/PIruFwbk1z++wKAoo4USwyT4gG+vQAAqMmEWNKU4wXAkiGn+sF97QoEibERnf0zuwQfYKVkmmT/0qDK/v1RFfAHRFIYwoSGGI1ikQYiKKyRcLMGJpZSxcRKgaVUdbSZ4DLUMtyIp9qgCtKU0eLDHO4Gd09Slf/9APR7/1v4/PnH751ys0b4gRNtWOSx5+Ih1UiBaPcNSgwBbaTCZsTvaiA91xq8sjXtX3WgG0HXh7MmvWqrMhT734MP2m1VU/1BauME7I1VAyMk3ctvRLBFpZkUjSCqFunAPhNXdqSKgCZzHJgkjUNUtY+PG6YHMi8fUs3EyllNY0yEYVH+DfvvHnyz5cpgw5RxGqyJXp8L+DtIBfszRS8SrMgj45qQaENWJ4Y2UTJCrcca2kzcGUObsCYzeEBcitUpG/2SISjHhxpMJrQhImqxTRlOmhgW4zuN0SiovLhRkC0ZxxBLDljF1TR8NWAfmmO6BYdxiJQihb9nmurg9n9twwonh/akIVuNWRriDliPQlOoDVEmiGQqmizaH+TPPoTIJdgR10cQwppQMBJCzGEgBZqi/TctYwSmdYq6GxGrLNBMRBjYrBhkElscuD+tR+u+Tr6zu9VxjFqmHt5c3QBbgsAtqj3Lt8dj05dtPCwu/fTPT/UUCx2CT3Oig75x2urPU4lbhhgJKhWtK6Ujs3zVHnc6ze+fyJbAiqiQrBAiyTlGbpJn67qBM3kvaMQZ8ygN4XnqlctG6FIBE1sZuUilS47o3czOtjXWklpXLi7Ux01GGcrGRXudoDDWAdyyoYXWUCo5jRhjqXZBqsspvwq+k8t8cpKgTVIX/1gjSQImmHkX7gwONqjPSQ9wTEg1myVg9fppF+6cDe10ZMJLedfNKRwBK+MlCIyBPZJxzIyZncD7XK+DYQPfOSxi8jwz/k/k2SNvG4Ru6IUUwUtNgp+luComAiM8Cw3nulVB6aVcq+BVABiJmQy/YGSCBAVlYhUsmSCc/UVGTm/ftnwJ4HumE052/qiCJImSzywmBku50EPUbr37tGbRGsoogWnI0B+hXCwR1woJbTJ5jyTx6IUc2ehB17VEqhSOZOP3XlbJcJxtMWfc0R0owbiRzWh4vIaesGLyPM7uCcmmbBZwLl5af9gZlG9Dsu1VYn/zRLRduqPpCNjYFVCQSZpkhZgZECKFxKIlgn5tl1IZ+EZKLeALb1TkideOljruO977Hw/bezxJtUF15rTMEbQL67RRl5IHmv3VtPyD3qP2u8fipIKV2tF/SGTYdhwI+824F5rbpyuSLWGchHx0MLncOp5ctTbElIdx1GNoJ/xd6fvHjqEjKYTbqgw8LLRukHASFdr3AkGnod5pg3EdUhusMrQvMgzc6f55k+cYwi8yzAoIjQiPoSwjfSLMsFY3GepHhzh1riU7VMq2kK2hr4ZRqs30GLO0z3cQosXaF6Udc4vl+PqzbdxrQP+mkGyofBJNWO3S0qpqGnU4MfJlzBQ5mrYgAl2MW8O9N7AX7gEz4dKe+3/ntSeMWga5TDmfCKIVDXonovNRMhEpjFEYMhXYkoaRMCeoYqY1k6LNac/FXNIwEHN5CTByNPU5zknW50VRfDaKRCbwEoMWrKfx1jD/ZohJdXM1xOuUxdrKETk3IhxZZMEgX/h9b+Quay04qFPq3UR+aUV3XHOVEf2ZYooBxaS2mB4LGDMYD82XZaSMti4Oz4ZIm9aHJxH53Vck48Smhbb917ngfCrPlVQPsXvD1dEaacqnhVso6Qm3mLNp+BIBSKfh1WsQeg1C7Uhfg9D/TxBK0pAzvZ41AmU6X2PPa+ypIX2NPf+7sacoyRpTDSEdz/5+FJGMrYN9eHz8BRT+maI2eQ9K39O+7HSmZRvdY4d6iGov1m1OmWspHrpDnbaoVcGrQSYofJvR8a7CmUtNn8gzi9O4hDySIjNNM4nTFAbmSze9RiFVLUINHoweTIoaPBobCM9z+5+9EDCKLJcsapuG0NHVFUbItrN3A1SHwf2wAJN3A4zuRxrF3P0czUwtkMlYHo5pkKAKzmgg7c34uy0qssJzRjdBBRojKQbv8ouB/q+h33HIB1Hfj7zLzQFNVVtTR49Q891qpXBFDB4m/lz4CIk2Js9jdWx+lGJl0VVXA3u0zS2afZMrEjEW5NxTJoasDQ0obsdC/ZshghJFgeKW7beYrSS2hKfY2uzbgUupyz+RQjfOwnO82kv1jTQaQiU3KMAVE8Od/472u2qrZKin3zyPsyhrXL9kM1PDkgmm10jhiZk1EKu1TJDWMspAl7p9ETa3E7G5exE2dxOxuX8RNvcTsVm8CJvFSGwOToIGdt7s45WruoL09eBlKlygtTvgDExIoo0VJ6g/KchOglx/BkuQM4GOIGu4y9J3G49ixQQGFLVRcofqXVurbfdbdtUhKnNACltGfCm3otkbd4T8EhPG5yTi9Y0GP1MQuB7I+VgcqB2NzNLahk7FIj8wzVorvDKPvQFb44w+q9O0+5TO1JRa7MaawCsuw8YzlR5LqX84GUAx4XIXozBXT4xi8epAXnnISAw+jbM/D/JYfTQbdK6TuE7bYkej0zgmalcA3hM6t0CS9fWPeh5cdemozKVDNaAN69GO3UmgtnYRwpv8vp37hX57Gr1OI/vbZTpuR9kJBmZNDKzJFr1+pF0M7ePNpDCzkHaItssRZfk1iSqgbueTkuIVxy3y0vnAWKEgISvUwZZpNtWlBKcBMg35cU0RhYu8tj+h63EolSquA8K5fJoI/OdfP2rIFFSx01TZ+USZjuQW1W4UMhQFO8Glc0huJOMVDObyVXU7T1HsQCHRUpwb2glXSOgu0Iht141glLnsDCGkgaXMhxYjkmrMPgSKVzRNOPP3quENkmjtPmB7T5aC70CKCN/2qGpSGRMmgiXjBtXxwR6Raj7me5JEQCo2wo6hh9Sv1Vgq5W4IKmlk1NppMSIDt4ssFH/NxJZwRr9OhU6TRLrbTTmYPjVxzsQmMFIGFLFtSo3I4sDlshctfEJxLQ3AWcyOJpUqfiG9uNlcKANNCs12aUrAkBVIZb3qw+Onj86sQIxRLExNn46T0ngGkRQGhXGXGmbnJ8pQIINSOa48cURMTKqDSNKxbmQcVAwU6pSb7IT1KxuHsxNVq/DcOHx5e319OanJHZfb6+v9CTTYoN9/ddBK4e76ZgYKd9c3U1K4nYXC7XQU7mdxpPspHel+Fke6n9KR7mdxpPtpHeluFgp3U1K4n4XC/ZQUFrNQWExI4WaWiHQzZUS6nWU63045nW+/mYXCN9NRWMyS2hZTprbFLKltMWVqW8yS2hZTprbFLKltMWVqW8yS2hYjpLajezZ/u+DkOx89z9/y/t7Pv34sinl/7Z88rZfwDv8610UUi4ka975G81HMQTXEm744TzzFpnxhRzU+pzct2jVGmz1ahyE7gUnWRPd7iSVofziqE4HTVZHi4Zr9yz5FfSd7uCUrdPbsAWl/DgcGVv72b+w0XpUazctPvMQDZ3iOm5blKvHBxS5409i176tpkEitWcjxbWfG81YJq25zyK9aGxxC4sSTQKPzKJ4KerNUiG8rjNoJXDShT5SkaWQaXwvt3YGQCfMPhjaV7Oq9yu3l8VNNByRJAn34dm4X6HX4SVJ6s7wRf78S4+E9Lt9i1BxwTrjFibs/JeCZkryho63voNWWT/kLHaNYtPbexzh2rVpWqlWgZaqi4TcwTli4RkSqlW9Lyc/FM/29DV9nkyi2JQbnZpSp7UroPwEAAP//yshTWw==" } diff --git a/x-pack/metricbeat/module/enterprisesearch/stats/_meta/fields.yml b/x-pack/metricbeat/module/enterprisesearch/stats/_meta/fields.yml index 606d94514e3..74b36eb9b63 100644 --- a/x-pack/metricbeat/module/enterprisesearch/stats/_meta/fields.yml +++ b/x-pack/metricbeat/module/enterprisesearch/stats/_meta/fields.yml @@ -45,7 +45,7 @@ fields: - name: extract_worker_pool type: group - description: Status information for the extrator workers pool. + description: Status information for the extractor workers pool. fields: - name: size type: long @@ -73,7 +73,7 @@ - name: subextract_worker_pool type: group - description: Status information for the sub-extrator workers pool. + description: Status information for the sub-extractor workers pool. fields: - name: size type: long @@ -231,6 +231,157 @@ type: long description: Total number of jobs waiting in the failed queue. + + - name: crawler + type: group + description: Aggregate stats on the functioning of the crawler subsystem within Enterprise Search. + fields: + - name: global + type: group + description: Global deployment-wide metrics for the crawler. + fields: + - name: crawl_requests + type: group + description: Crawl request summary for the deployment. + fields: + - name: pending + type: long + description: Total number of crawl requests waiting to be processed. + - name: active + type: long + description: Total number of crawl requests currently being processed (running crawls). + - name: successful + type: long + description: Total number of crawl requests that have succeeded. + - name: failed + type: long + description: Total number of failed crawl requests. + + - name: node + type: group + description: Node-level statistics for the crawler. + fields: + - name: pages_visited + type: long + description: Total number of pages visited by the crawler since the instance start. + + - name: urls_allowed + type: long + description: Total number of URLs allowed by the crawler during discovery since the instance start. + + - name: urls_denied + type: group + description: Total number of URLs denied by the crawler during discovery since the instance start, broken down by deny reason. + fields: + - name: already_seen + type: long + description: Total number of URLs not followed because of URL de-duplication (each URL is visited only once). + + - name: domain_filter_denied + type: long + description: Total number of URLs denied because of an unknown domain. + + - name: incorrect_protocol + type: long + description: Total number of URLs with incorrect/invalid/unsupported protocols. + + - name: link_too_deep + type: long + description: Total number of URLs not followed due to crawl depth limits. + + - name: nofollow + type: long + description: Total number of URLs denied due to a nofollow meta tag or an HTML link attribute. + + - name: unsupported_content_type + type: long + description: Total number of URLs denied due to an unsupported content type. + + - name: status_codes + type: group + description: HTTP request result counts, by status code. + fields: + - name: "200" + type: long + description: Total number of HTTP 200 responses seen by the crawler since the instance start. + - name: "301" + type: long + description: Total number of HTTP 301 responses seen by the crawler since the instance start. + - name: "302" + type: long + description: Total number of HTTP 302 responses seen by the crawler since the instance start. + - name: "400" + type: long + description: Total number of HTTP 400 responses seen by the crawler since the instance start. + - name: "401" + type: long + description: Total number of HTTP 401 responses seen by the crawler since the instance start. + - name: "402" + type: long + description: Total number of HTTP 402 responses seen by the crawler since the instance start. + - name: "403" + type: long + description: Total number of HTTP 403 responses seen by the crawler since the instance start. + - name: "404" + type: long + description: Total number of HTTP 404 responses seen by the crawler since the instance start. + - name: "405" + type: long + description: Total number of HTTP 405 responses seen by the crawler since the instance start. + - name: "410" + type: long + description: Total number of HTTP 410 responses seen by the crawler since the instance start. + - name: "422" + type: long + description: Total number of HTTP 422 responses seen by the crawler since the instance start. + - name: "429" + type: long + description: Total number of HTTP 429 responses seen by the crawler since the instance start. + - name: "500" + type: long + description: Total number of HTTP 500 responses seen by the crawler since the instance start. + - name: "501" + type: long + description: Total number of HTTP 501 responses seen by the crawler since the instance start. + - name: "502" + type: long + description: Total number of HTTP 502 responses seen by the crawler since the instance start. + - name: "503" + type: long + description: Total number of HTTP 503 responses seen by the crawler since the instance start. + - name: "504" + type: long + description: Total number of HTTP 504 responses seen by the crawler since the instance start. + + - name: queue_size + type: group + description: Total current URL queue size for the instance. + fields: + - name: primary + type: long + description: Total number of URLs waiting to be crawled by the instance. + - name: purge + type: long + description: Total number of URLs waiting to be checked by the purge crawl phase. + + - name: active_threads + type: long + description: Total number of crawler worker threads currently active on the instance. + + - name: workers + type: group + description: Crawler workers information for the instance. + fields: + - name: pool_size + type: long + description: Total size of the crawl workers pool (number of concurrent crawls possible) for the instance. + - name: active + type: long + description: Total number of currently active crawl workers (running crawls) for the instance. + - name: available + type: long + description: Total number of currently available (free) crawl workers for the instance. + - name: product_usage type: group description: Aggregate product usage statistics for the Enterprise Search deployment. diff --git a/x-pack/metricbeat/module/enterprisesearch/stats/_meta/testdata/stats.json b/x-pack/metricbeat/module/enterprisesearch/stats/_meta/testdata/stats.json index d042f260364..77741d83159 100644 --- a/x-pack/metricbeat/module/enterprisesearch/stats/_meta/testdata/stats.json +++ b/x-pack/metricbeat/module/enterprisesearch/stats/_meta/testdata/stats.json @@ -105,6 +105,42 @@ } } }, + "crawler": { + "global": { + "crawl_requests": { + "pending": 0, + "active": 2, + "successful": 2, + "failed": 0 + } + }, + "node": { + "pages_visited": 385, + "urls_allowed": 478, + "urls_denied": { + "nofollow": 98, + "already_seen": 8466, + "domain_filter_denied": 5286, + "incorrect_protocol": 23, + "unsupported_content_type": 4, + "link_too_deep": 45 + }, + "status_codes": { + "200": 367, + "301": 18 + }, + "queue_size": { + "primary": 91, + "purge": 0 + }, + "active_threads": 18, + "workers": { + "pool_size": 16, + "active": 2, + "available": 14 + } + } + }, "product_usage": { "app_search": { "total_engines": 1 diff --git a/x-pack/metricbeat/module/enterprisesearch/stats/_meta/testdata/stats.json-expected.json b/x-pack/metricbeat/module/enterprisesearch/stats/_meta/testdata/stats.json-expected.json index ca69493d341..379ae110479 100644 --- a/x-pack/metricbeat/module/enterprisesearch/stats/_meta/testdata/stats.json-expected.json +++ b/x-pack/metricbeat/module/enterprisesearch/stats/_meta/testdata/stats.json-expected.json @@ -41,6 +41,42 @@ } } }, + "crawler": { + "global": { + "crawl_requests": { + "active": 2, + "failed": 0, + "pending": 0, + "successful": 2 + } + }, + "node": { + "active_threads": 18, + "pages_visited": 385, + "queue_size": { + "primary": 91, + "purge": 0 + }, + "status_codes": { + "200": 367, + "301": 18 + }, + "urls_allowed": 478, + "urls_denied": { + "already_seen": 8466, + "domain_filter_denied": 5286, + "incorrect_protocol": 23, + "link_too_deep": 45, + "nofollow": 98, + "unsupported_content_type": 4 + }, + "workers": { + "active": 2, + "available": 14, + "pool_size": 16 + } + } + }, "http": { "connections": { "current": 1, diff --git a/x-pack/metricbeat/module/enterprisesearch/stats/data.go b/x-pack/metricbeat/module/enterprisesearch/stats/data.go index 45541fda1ad..049145dda19 100644 --- a/x-pack/metricbeat/module/enterprisesearch/stats/data.go +++ b/x-pack/metricbeat/module/enterprisesearch/stats/data.go @@ -86,6 +86,63 @@ var ( }), }), + "crawler": c.Dict("crawler", s.Schema{ + "global": c.Dict("global", s.Schema{ + "crawl_requests": c.Dict("crawl_requests", s.Schema{ + "pending": c.Int("pending"), + "active": c.Int("active"), + "successful": c.Int("successful"), + "failed": c.Int("failed"), + }), + }), + + "node": c.Dict("node", s.Schema{ + "pages_visited": c.Int("pages_visited"), + "urls_allowed": c.Int("urls_allowed"), + "urls_denied": c.Dict("urls_denied", s.Schema{ + "already_seen": c.Int("already_seen", s.Optional), + "domain_filter_denied": c.Int("domain_filter_denied", s.Optional), + "incorrect_protocol": c.Int("incorrect_protocol", s.Optional), + "link_too_deep": c.Int("link_too_deep", s.Optional), + "nofollow": c.Int("nofollow", s.Optional), + "unsupported_content_type": c.Int("unsupported_content_type", s.Optional), + }), + + "status_codes": c.Dict("status_codes", s.Schema{ + "200": c.Int("200", s.Optional), + "301": c.Int("301", s.Optional), + "302": c.Int("302", s.Optional), + "304": c.Int("304", s.Optional), + "400": c.Int("400", s.Optional), + "401": c.Int("401", s.Optional), + "402": c.Int("402", s.Optional), + "403": c.Int("403", s.Optional), + "404": c.Int("404", s.Optional), + "405": c.Int("405", s.Optional), + "410": c.Int("410", s.Optional), + "422": c.Int("422", s.Optional), + "429": c.Int("429", s.Optional), + "500": c.Int("500", s.Optional), + "501": c.Int("501", s.Optional), + "502": c.Int("502", s.Optional), + "503": c.Int("503", s.Optional), + "504": c.Int("504", s.Optional), + }), + + "queue_size": c.Dict("queue_size", s.Schema{ + "primary": c.Int("primary"), + "purge": c.Int("purge"), + }), + + "active_threads": c.Int("active_threads"), + "workers": c.Dict("workers", s.Schema{ + "pool_size": c.Int("pool_size"), + "active": c.Int("active"), + "available": c.Int("available"), + }), + }), + }), + "product_usage": c.Dict("product_usage", s.Schema{ "app_search": c.Dict("app_search", s.Schema{ "total_engines": c.Int("total_engines"),