Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Elasticsearch v2 Upgrade #315

Merged
merged 2 commits into from
Dec 15, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build/cmake/rsyslog.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ list(APPEND RSYSLOG_CONFIGURE_CMD --prefix=${INSTALL_PREFIX_EMBEDDED})
list(APPEND RSYSLOG_CONFIGURE_CMD --enable-liblogging-stdlog)
list(APPEND RSYSLOG_CONFIGURE_CMD --disable-libgcrypt)
list(APPEND RSYSLOG_CONFIGURE_CMD --enable-imptcp)
list(APPEND RSYSLOG_CONFIGURE_CMD --enable-impstats)
list(APPEND RSYSLOG_CONFIGURE_CMD --enable-mmjsonparse)
list(APPEND RSYSLOG_CONFIGURE_CMD --enable-mmutf8fix)
list(APPEND RSYSLOG_CONFIGURE_CMD --enable-elasticsearch)
Expand Down
4 changes: 2 additions & 2 deletions build/cmake/versions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ set(API_UMBRELLA_STATIC_SITE_VERSION e54283244890e3d5ffeb7ec00dde2f5eaa778d28)
set(API_UMBRELLA_STATIC_SITE_HASH 037b9317f6b08eb2073060c1beed16e2)
set(BUNDLER_VERSION 1.13.6)
set(BUNDLER_HASH fafd22dfed658ca0603f321bdd168ed709d7c682e61273b55637716459f2d0f7)
set(ELASTICSEARCH_VERSION 1.7.6)
set(ELASTICSEARCH_HASH 0b6ec9fe34b29e6adc4d8481630bf1f69cb04aa9)
set(ELASTICSEARCH_VERSION 2.4.3)
set(ELASTICSEARCH_HASH 7f415b3598315c017b0733f50cd47aa837886a7f)
set(FLUME_VERSION 1.7.0)
set(FLUME_HASH 12496e632a96d7ca823ab3c239a2a7d2)
set(JSON_C_VERSION 0.12.1)
Expand Down
5 changes: 0 additions & 5 deletions circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ dependencies:
# fresh clone.
- if [[ ! -e build/work/git-restore-mtime ]]; then mkdir -p build/work && curl -f -L https://github.com/raw/MestreLion/git-tools/0fc841a3e49d041576e5b21d1644c8df2d2ef801/git-restore-mtime > build/work/git-restore-mtime && chmod +x build/work/git-restore-mtime; fi
- ./build/work/git-restore-mtime -f .
- if [[ ! -e build/work/elasticsearch-2.3.3/bin/elasticsearch ]]; then mkdir -p build/work/elasticsearch-2.3.3 && curl -f -L https://download.elastic.co/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.3.3/elasticsearch-2.3.3.tar.gz | tar --strip-components 1 -C build/work/elasticsearch-2.3.3 -xz; fi
post:
- rm -rf /tmp/elasticsearch-v2-data
- ./build/work/elasticsearch-2.3.3/bin/elasticsearch --path.data=/tmp/elasticsearch-v2-data:
background: true
override:
# Build all the API Umbrella software dependencies.
- ./configure --enable-test-dependencies
Expand Down
9 changes: 8 additions & 1 deletion config/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ elasticsearch:
- "http://127.0.0.1:14002"
embedded_server_env:
heap_size: 512m
api_version: 1
api_version: 2
embedded_server_config:
network:
host: 127.0.0.1
Expand All @@ -162,6 +162,13 @@ elasticsearch:
breaker:
fielddata:
limit: 60%
index:
translog:
# Sync the data to disk asynchronously on a fixed interval, rather than
# for every request. This significantly helps indexing throughput (at
# the risk of losing a few seconds of data if things crash).
durability: async
sync_interval: 10s
analytics:
adapter: elasticsearch
timezone: UTC
Expand Down
132 changes: 114 additions & 18 deletions config/elasticsearch_templates.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,66 +81,162 @@
"type": "string",
"index": "not_analyzed"
},
"user_id": {
"backend_response_time": {
"type": "integer"
},
"gatekeeper_denied_code": {
"type": "string",
"index": "not_analyzed"
"analyzer": "keyword_lowercase"
},
"internal_gatekeeper_time": {
"type": "float"
},
"internal_response_time": {
"type": "float"
},
"proxy_overhead": {
"type": "integer"
},
"request_accept": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_accept_encoding": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_at": {
"type": "date"
},
"request_basic_auth_username": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_connection": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_content_type": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_hierarchy": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_host": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_ip": {
"type": "string",
"index": "not_analyzed"
},
"request_ip_city": {
"type": "string",
"index": "not_analyzed"
},
"request_ip_country": {
"type": "string",
"index": "not_analyzed"
},
"request_ip_location": {
"type": "geo_point",
"lat_lon": true
},
"request_ip_region": {
"type": "string",
"index": "not_analyzed"
},
"request_ip_city": {
"request_method": {
"type": "string",
"index": "not_analyzed"
"analyzer": "keyword_lowercase"
},
"request_origin": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_path": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_path_hierarchy": {
"type": "string",
"analyzer": "path_hierarchy_lowercase"
},
"request_referer": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_scheme": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_size": {
"type": "integer"
},
"response_status": {
"type": "short"
"request_url": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_user_agent": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_user_agent_family": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"request_user_agent_type": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"response_age": {
"type": "integer"
},
"response_cache": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"response_content_encoding": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"response_content_length": {
"type": "integer"
},
"response_content_type": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"response_server": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"response_size": {
"type": "integer"
},
"response_time": {
"type": "integer"
"response_status": {
"type": "short"
},
"backend_response_time": {
"response_time": {
"type": "integer"
},
"proxy_overhead": {
"type": "integer"
"response_transfer_encoding": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"internal_response_time": {
"type": "float"
"user_email": {
"type": "string",
"analyzer": "keyword_lowercase"
},
"internal_gatekeeper_time": {
"type": "float"
"user_id": {
"type": "string",
"index": "not_analyzed"
},
"request_ip_location": {
"type": "geo_point",
"lat_lon": true
"user_registration_source": {
"type": "string",
"analyzer": "keyword_lowercase"
}
}
}
Expand Down
1 change: 0 additions & 1 deletion config/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ http_port: 9080
https_port: 9081
analytics:
timezone: America/Denver
log_request_url_query_params_separately: true
static_site:
port: 13013
nginx:
Expand Down
2 changes: 1 addition & 1 deletion scripts/rake/outdated_packages.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class OutdatedPackages
},
"elasticsearch" => {
:git => "https://github.com/elasticsearch/elasticsearch.git",
:constraint => "~> 1.7.5",
:constraint => "~> 2.4.3",
},
"flume" => {
:git => "https://github.com/apache/flume.git",
Expand Down
1 change: 1 addition & 0 deletions src/api-umbrella/cli/setup.lua
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ local function prepare()
config["run_dir"],
config["tmp_dir"],
path.join(config["db_dir"], "elasticsearch"),
path.join(config["etc_dir"], "elasticsearch_scripts"),
path.join(config["db_dir"], "mongodb"),
path.join(config["db_dir"], "rsyslog"),
path.join(config["etc_dir"], "trafficserver/snapshots"),
Expand Down
2 changes: 1 addition & 1 deletion templates/etc/perp/elasticsearch/rc.env.mustache
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ES_JAVA_OPTS=-Des.default.path.conf={{etc_dir}}/elasticsearch -XX:-HeapDumpOnOutOfMemoryError {{elasticsearch.embedded_server_env.java_opts}}
ES_JAVA_OPTS=-Des.default.path.conf={{etc_dir}}/elasticsearch -Dmapper.allow_dots_in_name=true -XX:-HeapDumpOnOutOfMemoryError {{elasticsearch.embedded_server_env.java_opts}}
ES_HEAP_SIZE={{elasticsearch.embedded_server_env.heap_size}}
45 changes: 41 additions & 4 deletions templates/etc/rsyslog.conf.mustache
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ global(maxMessageSize="32k")

global(workDirectory="{{db_dir}}/rsyslog")

# Output queue statistics periodically so the health of the queue can be
# inspected.
module(
load="impstats"
interval="60"
resetCounters="on"
log.syslog="off"
log.file="{{log_dir}}/rsyslog/stats.log"
)

# Load Modules
module(load="imtcp" MaxSessions="500")
module(load="mmjsonparse")
Expand Down Expand Up @@ -160,36 +170,59 @@ template(name="elasticsearch-json-record" type="subtree" subtree="$!usr!es")
# Output to ElasticSearch.
# A disk-assisted memory queue is used for buffering.
local0.info action(
name="output-elasticsearch"
type="omelasticsearch"
server="{{elasticsearch._first_server.host}}"
serverport="{{elasticsearch._first_server.port}}"
searchIndex="elasticsearch-index"
dynSearchIndex="on"
searchType="log"
template="elasticsearch-json-record"

# Enable bulk indexing, so batches of records are sent as a single HTTP
# request.
bulkmode="on"
bulkId="elasticsearch-id"
dynBulkId="on"

# Allow bulk indexing of batches *up to* this size.
#
# Note that as long as Elasticsearch is keeping up and data isn't being
# queued by rsyslog, then rsyslog will send data as quickly as it can to
# Elasticsearch (so the batch sizes might be much smaller). See
# http://www.gossamer-threads.com/lists/rsyslog/users/17550
queue.dequeuebatchsize="5000"

# Require indexing by all replica shards.
asyncrepl="off"

# For the in-memory queue, use a linked-list (so the memory doesn't have to
# be pre-allocated based on a fixed size).
queue.type="LinkedList"

# Set a filename, so the queue is disk assisted. This allows for offloading
# the data from the memory queue to disk if the queue becomes bigger than
# expected.
queue.filename="queue-elasticsearch"

# Set thresholds for when the memory queue is too big and should use the
# disk (note the disk queue size is not bounded by the queue.size, that only
# applies to the memory portion).
queue.size="15000"
queue.highwatermark="10000"
queue.lowwatermark="2000"

# Persist data to disk on this interval (in seconds). We're okay with some
# loss in the event of unexpected failures.
queue.checkpointinterval="10"

# Persist data to disk on graceful shutdowns.
queue.saveonshutdown="on"
# Set thresholds for when the memory queue is too big and should use the
# disk.
queue.highwatermark="10000"
queue.lowwatermark="2000"

# If Elasticsearch is inaccessible, retry on this interval (in seconds)
# indefinitely (so we don't stop logging to Elasticsearch in case it goes
# down for a longer period of time).
action.resumeInterval="30"
action.resumeRetryCount="-1"
)
{{/analytics._output_elasticsearch?}}
Expand Down Expand Up @@ -361,6 +394,7 @@ template(name="sql-json-record" type="subtree" subtree="$!usr!sql")
# Output to Kafka.
# A disk-assisted memory queue is used for buffering.
local0.info action(
name="output-kafka"
type="omkafka"
broker=[{{kafka._rsyslog_broker}}]
topic="{{kafka.topic}}"
Expand All @@ -370,6 +404,7 @@ local0.info action(
queue.filename="queue-kafka"
queue.checkpointinterval="10"
queue.saveonshutdown="on"
queue.size="15000"
queue.highwatermark="10000"
queue.lowwatermark="2000"
action.resumeRetryCount="-1"
Expand All @@ -381,13 +416,15 @@ template(name="all-json-record" type="list") {
property(name="$!raw") constant(value="\n")
}
local0.info action(
name="output-file"
type="omfile"
file="{{log_dir}}/rsyslog/requests.log.gz"
template="all-json-record"
queue.type="LinkedList"
queue.filename="queue-file"
queue.checkpointinterval="10"
queue.saveonshutdown="on"
queue.size="15000"
queue.highwatermark="10000"
queue.lowwatermark="2000"
zipLevel="3"
Expand Down
2 changes: 1 addition & 1 deletion test/admin_ui/test_elasticsearch_proxy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_allowed_for_superuser_admins
}))
assert_response_code(301, response)
assert_equal("/admin/elasticsearch/_plugin/foobar/", response.headers["Location"])
assert_match(%r{URL=/admin/elasticsearch/_plugin/foobar/>}, response.body)
assert_match(%r{URL=/admin/elasticsearch/_plugin/foobar/}, response.body)
assert_equal(response.body.bytesize, response.headers["Content-Length"].to_i)
end
end
Loading