Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Common modularization, custom emitters #112

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,15 @@ def run(self, agentConfig=None, run_forever=True):
else:
agentLogger.info('Not running on EC2, using hostname to identify this server')

emitter = http_emitter
emitters = [http_emitter]
for emitter_spec in [s.strip() for s in agentConfig.get('custom_emitters', '').split(',')]:
if len(s) == 0: continue
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@charles-dyfis-net not sure I get this line. Did you mean?

if len(emitter_spec) == 0: continue

My little test

agentConfig = {'custom_emitters': "2,3 , 4 ,,,,," }
print agentConfig.get('custom_emitters')
emitters = [1]
print [s.strip() for s in agentConfig.get('custom_emitters', '').split(',')]
for emitter_spec in [s.strip() for s in agentConfig.get('custom_emitters', '').split(',')]:
    if len(emitter_spec) == 0: continue
    emitters.append(emitter_spec)
print emitters

gets me [1, '2', '3', '4'] whereas with len(s) == 0 I only get [1].

Did I get this right?

Interestingly enough:

Python 2.7.1 (r271:86832, Jun 25 2011, 05:09:01)
[GCC 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> for x in [s for s in [1, 2, 3]]:
...     print x, s
...
1 3
2 3
3 3

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to go listcomp all the way and replace the for loop + if test.

emitters.extend([s.strip() for s in agentConfig.get('custom_emitters', '').split(',') if len(s.strip()) > 0])

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, thanks!

As for the condensed version, that's what I would have done if it didn't mean calling strip() twice.

emitters.append(modules.load(emitter_spec, 'emitter'))

checkFreq = int(agentConfig['checkFreq'])

# Checks instance
c = checks(agentConfig, emitter)
c = checks(agentConfig, emitters)

# Watchdog
watchdog = None
Expand Down
7 changes: 4 additions & 3 deletions checks/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ def wrapper(*args, **kwargs):
return wrapper

class checks(object):
def __init__(self, agentConfig, emitter):
def __init__(self, agentConfig, emitters):
self.agentConfig = agentConfig
self.plugins = None
self.emitter = emitter
self.emitters = emitters
self.os = None

self.checksLogger = logging.getLogger('checks')
Expand Down Expand Up @@ -416,5 +416,6 @@ def doChecks(self, firstRun=False, systemStats=False):

# Send back data
self.checksLogger.debug("checksData: %s" % checksData)
self.emitter(checksData, self.checksLogger, self.agentConfig)
for emitter in self.emitters:
emitter(checksData, self.checksLogger, self.agentConfig)
self.checksLogger.info("Checks done")
10 changes: 4 additions & 6 deletions checks/datadog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from checks.utils import TailFile
import modules
import os
import sys
import traceback
Expand Down Expand Up @@ -98,17 +99,14 @@ def init(cls, logger, log_path, parser_spec=None):

if parser_spec:
try:
module_name, func_name = parser_spec.split(':')
__import__(module_name)
parse_func = getattr(sys.modules[module_name], func_name,
None)
parse_func = modules.load(parser_spec, 'parser')
except:
logger.exception(traceback.format_exc())
logger.error('Could not load Dogstream line parser "%s" PYTHONPATH=%s' % (
parser_spec,
os.environ.get('PYTHONPATH', ''))
)
logger.info("dogstream: parsing %s with %s" % (log_path, parse_func))
logger.info("dogstream: parsing %s with %s (requested %s)" % (log_path, parse_func, parser_spec))
else:
logger.info("dogstream: parsing %s with default parser" % log_path)

Expand Down Expand Up @@ -236,7 +234,7 @@ def _default_line_parser(self, logger, line):
try:
while line:
keyval, _, line = partition(line.strip(), sep)
key, val = keyval.split('=')
key, val = keyval.split('=', 1)
attributes[key] = val
except Exception, e:
logger.debug(traceback.format_exc())
Expand Down
28 changes: 23 additions & 5 deletions datadog.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,11 @@ use_mount: no
# Comma-separated list of logs to parse and optionally custom parsers to use.
# The form should look like this:
#
# dogstreams: /path/to/log1:parsers:custom_parser, /path/to/log2, /path/to/log3, ...
# dogstreams: /path/to/log1:parsers_module:custom_parser, /path/to/log2, /path/to/log3, ...
#
# Or this:
#
# dogstreams: /path/to/log1:/path/to/my/parsers_module.py:custom_parser, /path/to/log2, /path/to/log3, ...
#
# Each entry is a path to a log file and optionally a Python module/function pair
# separated by colons.
Expand All @@ -270,9 +274,10 @@ use_mount: no
# where attributes should at least contain the key 'metric_type', specifying
# whether the given metric is a 'counter' or 'gauge'.
#
# In order for the agent to find the custom line parser functions, the modules
# must exist in the agent's PYTHONPATH. You can set this as an environment
# variable when starting the agent.
# Unless parsers are specified with an absolute path, the modules must exist in
# the agent's PYTHONPATH. You can set this as an environment variable when
# starting the agent. If the name of the custom parser function is not passed,
# 'parser' is assumed.
#
# If this value isn't specified, the default parser assumes this log format:
# metric timestamp value key0=val0 key1=val1 ...
Expand Down Expand Up @@ -310,6 +315,19 @@ use_mount: no
# -------------------------------------------------------------------------- #
#elasticsearch: http://localhost:9200/_cluster/nodes/stats?all=true

# ========================================================================== #
# Custom Emitters #
# ========================================================================== #

# Comma-separated list of emitters to be used in addition to the standard one
#
# Expected to be passed as a colon-separated file (or module) / classname list.
#
# custom_emitters: /usr/local/my-code/emitters/rabbitmq.py:RabbitMQEmitter
#
# If the name of the emitter function is not classed, 'emitter' is assumed.


# -------------------------------------------------------------------------- #
# Logging Configuration
# -------------------------------------------------------------------------- #
Expand Down Expand Up @@ -345,4 +363,4 @@ args:('/tmp/dogstatsd.log', 'a')

[formatter_ddagent]
format: %(asctime)s | %(name)s | %(levelname)s | %(message)s
class:logging.Formatter
class:logging.Formatter
71 changes: 71 additions & 0 deletions modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
""" Tools for loading Python modules from arbitrary locations.
"""

import os
import imp
import sys

def imp_type_for_filename(filename):
"""Given the name of a Python module, return a type description suitable to
be passed to imp.load_module()"""
for type_data in imp.get_suffixes():
extension = type_data[0]
if filename.endswith(extension):
return type_data
return None

def load_qualified_module(full_module_name, path=None):
"""Load a module which may be within a package"""
remaining_pieces = full_module_name.split('.')
done_pieces = []
file_obj = None
while remaining_pieces:
try:
done_pieces.append(remaining_pieces.pop(0))
curr_module_name = '.'.join(done_pieces)
(file_obj, filename, description) = imp.find_module(
done_pieces[-1], path)
package_module = imp.load_module(
curr_module_name, file_obj, filename, description)
path = getattr(package_module, '__path__', None) or [filename]
finally:
if file_obj:
file_obj.close()
return package_module

def module_name_for_filename(filename):
"""Given the name of a Python file, find an appropropriate module name.

This involves determining whether the file is within a package, and
determining the name of same."""
all_segments = filename.split(os.sep)
path_elements = all_segments[:-1]
module_elements = [all_segments[-1].rsplit('.', 1)[0]]
while os.path.exists('/'.join(path_elements + ['__init__.py'])):
module_elements.insert(0, path_elements.pop())
modulename = '.'.join(module_elements)
basename = '/'.join(path_elements)
return (basename, modulename)

def get_module(name):
"""Given either an absolute path to a Python file or a module name, load
and return a Python module.

If the module is already loaded, takes no action."""
if name.startswith('/'):
basename, modulename = module_name_for_filename(name)
path = [basename]
else:
modulename = name
path = None
if modulename in sys.modules:
return sys.modules[modulename]
return load_qualified_module(modulename, path)

def load(config_string, default_name=None):
"""Given a module name and an object expected to be contained within,
return said object"""
(module_name, object_name) = \
(config_string.rsplit(':', 1) + [default_name])[:2]
module = get_module(module_name)
return getattr(module, object_name) if object_name else module
2 changes: 2 additions & 0 deletions tests/target_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
default_target = 'DEFAULT'
specified_target = 'SPECIFIED'
51 changes: 51 additions & 0 deletions tests/test_modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import sys
import os
import logging
import unittest

import modules

log = logging.getLogger('datadog.test')

default_target = 'DEFAULT'
specified_target = 'SPECIFIED'
has_been_mutated = False

class TestModuleLoad(unittest.TestCase):
def setUp(self):
sys.modules[__name__].has_been_mutated = True
if 'tests.target_module' in sys.modules:
del sys.modules['tests.target_module']
def tearDown(self):
sys.modules[__name__].has_been_mutated = False
def test_cached_module(self):
"""Modules already in the cache should be reused"""
self.assertTrue(modules.load('%s:has_been_mutated' % __name__))
def test_cache_population(self):
"""Python module cache should be populated"""
self.assertTrue(not 'tests.target_module' in sys.modules)
modules.load('tests.target_module')
self.assertTrue('tests.target_module' in sys.modules)
def test_modname_load_default(self):
"""When the specifier contains no module name, any provided default
should be used"""
self.assertEquals(
modules.load(
'tests.target_module',
'default_target'),
'DEFAULT'
)
def test_modname_load_specified(self):
"""When the specifier contains a module name, any provided default
should be overridden"""
self.assertEquals(
modules.load(
'tests.target_module:specified_target',
'default_target'),
'SPECIFIED'
)
def test_pathname_load_finds_package(self):
""""Loading modules by absolute path should correctly set the name of
the loaded module to include any package containing it."""
m = modules.load(os.getcwd() + '/tests/target_module.py')
self.assertEquals(m.__name__, 'tests.target_module')