DataDog · charles-dyfis-net · Jul 19, 2012 · Jul 19, 2012 · alq666 · Jul 20, 2012
@@ -60,12 +60,15 @@ def run(self, agentConfig=None, run_forever=True):
             else:
                 agentLogger.info('Not running on EC2, using hostname to identify this server')
 
-        emitter = http_emitter
+        emitters = [http_emitter]
+        for emitter_spec in [s.strip() for s in agentConfig.get('custom_emitters', '').split(',')]:
+            if len(s) == 0: continue
+            emitters.append(modules.load(emitter_spec, 'emitter'))
 
         checkFreq = int(agentConfig['checkFreq'])
 
         # Checks instance
-        c = checks(agentConfig, emitter)
+        c = checks(agentConfig, emitters)
 
         # Watchdog
         watchdog = None

@@ -68,10 +68,10 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 class checks(object):
-    def __init__(self, agentConfig, emitter):
+    def __init__(self, agentConfig, emitters):
         self.agentConfig = agentConfig
         self.plugins = None
-        self.emitter = emitter
+        self.emitters = emitters
         self.os = None
 
         self.checksLogger = logging.getLogger('checks')
@@ -416,5 +416,6 @@ def doChecks(self, firstRun=False, systemStats=False):
 
         # Send back data
         self.checksLogger.debug("checksData: %s" % checksData)
-        self.emitter(checksData, self.checksLogger, self.agentConfig)
+        for emitter in self.emitters:
+            emitter(checksData, self.checksLogger, self.agentConfig)
         self.checksLogger.info("Checks done")
@@ -1,4 +1,5 @@
 from checks.utils import TailFile
+import modules
 import os
 import sys
 import traceback
@@ -98,17 +99,14 @@ def init(cls, logger, log_path, parser_spec=None):
 
         if parser_spec:
             try:
-                module_name, func_name = parser_spec.split(':')
-                __import__(module_name)
-                parse_func = getattr(sys.modules[module_name], func_name, 
-                    None)
+                parse_func = modules.load(parser_spec, 'parser')
             except:
                 logger.exception(traceback.format_exc())
                 logger.error('Could not load Dogstream line parser "%s" PYTHONPATH=%s' % (
                     parser_spec, 
                     os.environ.get('PYTHONPATH', ''))
                 )
-            logger.info("dogstream: parsing %s with %s" % (log_path, parse_func))
+            logger.info("dogstream: parsing %s with %s (requested %s)" % (log_path, parse_func, parser_spec))
         else:
             logger.info("dogstream: parsing %s with default parser" % log_path)
 
@@ -236,7 +234,7 @@ def _default_line_parser(self, logger, line):
         try:
             while line:
                 keyval, _, line = partition(line.strip(), sep)
-                key, val = keyval.split('=')
+                key, val = keyval.split('=', 1)
                 attributes[key] = val
         except Exception, e:
             logger.debug(traceback.format_exc())

@@ -258,7 +258,11 @@ use_mount: no
 # Comma-separated list of logs to parse and optionally custom parsers to use.
 # The form should look like this:
 #
-#   dogstreams: /path/to/log1:parsers:custom_parser, /path/to/log2, /path/to/log3, ...
+#   dogstreams: /path/to/log1:parsers_module:custom_parser, /path/to/log2, /path/to/log3, ...
+#
+# Or this:
+#
+#   dogstreams: /path/to/log1:/path/to/my/parsers_module.py:custom_parser, /path/to/log2, /path/to/log3, ...
 #
 # Each entry is a path to a log file and optionally a Python module/function pair
 # separated by colons.
@@ -270,9 +274,10 @@ use_mount: no
 # where attributes should at least contain the key 'metric_type', specifying
 # whether the given metric is a 'counter' or 'gauge'.
 #
-# In order for the agent to find the custom line parser functions, the modules
-# must exist in the agent's PYTHONPATH. You can set this as an environment
-# variable when starting the agent.
+# Unless parsers are specified with an absolute path, the modules must exist in
+# the agent's PYTHONPATH. You can set this as an environment variable when
+# starting the agent. If the name of the custom parser function is not passed,
+# 'parser' is assumed.
 #
 # If this value isn't specified, the default parser assumes this log format:
 #     metric timestamp value key0=val0 key1=val1 ...
@@ -310,6 +315,19 @@ use_mount: no
 # -------------------------------------------------------------------------- #
 #elasticsearch: http://localhost:9200/_cluster/nodes/stats?all=true
 
+# ========================================================================== #
+# Custom Emitters                                                            #
+# ========================================================================== #
+
+# Comma-separated list of emitters to be used in addition to the standard one
+#
+# Expected to be passed as a colon-separated file (or module) / classname list.
+#
+# custom_emitters: /usr/local/my-code/emitters/rabbitmq.py:RabbitMQEmitter
+#
+# If the name of the emitter function is not classed, 'emitter' is assumed.
+
+
 # -------------------------------------------------------------------------- #
 #  Logging Configuration
 # -------------------------------------------------------------------------- #
@@ -345,4 +363,4 @@ args:('/tmp/dogstatsd.log', 'a')
 
 [formatter_ddagent]
 format: %(asctime)s | %(name)s | %(levelname)s | %(message)s
-class:logging.Formatter
+class:logging.Formatter
@@ -0,0 +1,71 @@
+""" Tools for loading Python modules from arbitrary locations.
+"""
+
+import os
+import imp
+import sys
+
+def imp_type_for_filename(filename):
+    """Given the name of a Python module, return a type description suitable to
+    be passed to imp.load_module()"""
+    for type_data in imp.get_suffixes():
+        extension = type_data[0]
+        if filename.endswith(extension):
+            return type_data
+    return None
+
+def load_qualified_module(full_module_name, path=None):
+    """Load a module which may be within a package"""
+    remaining_pieces = full_module_name.split('.')
+    done_pieces = []
+    file_obj = None
+    while remaining_pieces:
+        try:
+            done_pieces.append(remaining_pieces.pop(0))
+            curr_module_name = '.'.join(done_pieces)
+            (file_obj, filename, description) = imp.find_module(
+                    done_pieces[-1], path)
+            package_module = imp.load_module(
+                    curr_module_name, file_obj, filename, description)
+            path = getattr(package_module, '__path__', None) or [filename]
+        finally:
+            if file_obj:
+                file_obj.close()
+    return package_module
+
+def module_name_for_filename(filename):
+    """Given the name of a Python file, find an appropropriate module name.
+
+    This involves determining whether the file is within a package, and
+    determining the name of same."""
+    all_segments = filename.split(os.sep)
+    path_elements = all_segments[:-1]
+    module_elements = [all_segments[-1].rsplit('.', 1)[0]]
+    while os.path.exists('/'.join(path_elements + ['__init__.py'])):
+        module_elements.insert(0, path_elements.pop())
+    modulename = '.'.join(module_elements)
+    basename = '/'.join(path_elements)
+    return (basename, modulename)
+
+def get_module(name):
+    """Given either an absolute path to a Python file or a module name, load
+    and return a Python module.
+
+    If the module is already loaded, takes no action."""
+    if name.startswith('/'):
+        basename, modulename = module_name_for_filename(name)
+        path = [basename]
+    else:
+        modulename = name
+        path = None
+    if modulename in sys.modules:
+        return sys.modules[modulename]
+    return load_qualified_module(modulename, path)
+
+def load(config_string, default_name=None):
+    """Given a module name and an object expected to be contained within,
+    return said object"""
+    (module_name, object_name) = \
+            (config_string.rsplit(':', 1) + [default_name])[:2]
+    module = get_module(module_name)
+    return getattr(module, object_name) if object_name else module
@@ -0,0 +1,2 @@
+default_target = 'DEFAULT'
+specified_target = 'SPECIFIED'
@@ -0,0 +1,51 @@
+import sys
+import os
+import logging
+import unittest
+
+import modules
+
+log = logging.getLogger('datadog.test')
+
+default_target = 'DEFAULT'
+specified_target = 'SPECIFIED'
+has_been_mutated = False
+
+class TestModuleLoad(unittest.TestCase):
+    def setUp(self):
+        sys.modules[__name__].has_been_mutated = True
+        if 'tests.target_module' in sys.modules:
+            del sys.modules['tests.target_module']
+    def tearDown(self):
+        sys.modules[__name__].has_been_mutated = False
+    def test_cached_module(self):
+        """Modules already in the cache should be reused"""
+        self.assertTrue(modules.load('%s:has_been_mutated' % __name__))
+    def test_cache_population(self):
+        """Python module cache should be populated"""
+        self.assertTrue(not 'tests.target_module' in sys.modules)
+        modules.load('tests.target_module')
+        self.assertTrue('tests.target_module' in sys.modules)
+    def test_modname_load_default(self):
+        """When the specifier contains no module name, any provided default
+        should be used"""
+        self.assertEquals(
+            modules.load(
+                'tests.target_module',
+                'default_target'),
+            'DEFAULT'
+        )
+    def test_modname_load_specified(self):
+        """When the specifier contains a module name, any provided default
+        should be overridden"""
+        self.assertEquals(
+            modules.load(
+                'tests.target_module:specified_target',
+                'default_target'),
+            'SPECIFIED'
+        )
+    def test_pathname_load_finds_package(self):
+        """"Loading modules by absolute path should correctly set the name of
+        the loaded module to include any package containing it."""
+        m = modules.load(os.getcwd() + '/tests/target_module.py')
+        self.assertEquals(m.__name__, 'tests.target_module')