DataDog · clutchski · Jun 19, 2012 · Jun 18, 2012 · Jun 18, 2012 · Jun 18, 2012
@@ -8,4 +8,6 @@ install:
 script: nosetests tests
 before_script:
   - sudo apt-get install sysstat
-
+env:
+  - DB=redis
+
@@ -1,9 +1,14 @@
-"""
-    Datadog agent
+"""Base class for Checks.
+
+If you are writing your own checks you should subclass the Check class.
+
+The typicall workflow works like this:
+1. Create your Check class
+2. Declare your metrics as gauges or counters
+3. Call save_sample for each metric
+4. Call get_metrics() to get results
+5. Plug the results into checks/common.py
 
-    Licensed under Simplified BSD License (see LICENSE)
-    (C) Boxed Ice 2010 all rights reserved
-    (C) Datadog, Inc 2011 All Rights Reserved
 """
 
 import logging
@@ -53,12 +58,17 @@ def filter(self, record):
 class Check(object):
     """
     (Abstract) class for all checks with the ability to:
+    * store 1 (and only 1) sample for gauges per metric/tag combination
     * compute rates for counters
     * only log error messages once (instead of each time they occur)
+
     """
     def __init__(self, logger):
         # where to store samples, indexed by metric_name
-        # metric_name: [(ts, value), (ts, value)]
+        # metric_name: {("sorted", "tags"): [(ts, value), (ts, value)],
+        #                 tuple(tags) are stored as a key since lists are not hashable
+        #               None: [(ts, value), (ts, value)]}
+        #                 untagged values are indexed by None
         self._sample_store = {}
         self._counters = {} # metric_name: bool
         self.logger = logger
@@ -92,7 +102,7 @@ def counter(self, metric):
         ACHTUNG: Resets previous values associated with this metric.
         """
         self._counters[metric] = True
-        self._sample_store[metric] = []
+        self._sample_store[metric] = {}
 
     def is_counter(self, metric):
         "Is this metric a counter?"
@@ -103,7 +113,7 @@ def gauge(self, metric):
         Treats the metric as a gauge, i.e. keep the data as is
         ACHTUNG: Resets previous values associated with this metric.
         """
-        self._sample_store[metric] = []
+        self._sample_store[metric] = {}
 
     def is_metric(self, metric):
         return metric in self._sample_store
@@ -116,32 +126,47 @@ def get_metric_names(self):
         "Get all metric names"
         return self._sample_store.keys()
 
-    def save_sample(self, metric, value, timestamp=None):
-        """Save a simple sample, evict old values if needed"""
+    def save_gauge(self, metric, value, timestamp=None, tags=None):
+        """ Save a gauge value. """
+        if not self.is_gauge(metric):
+            self.gauge(metric)
+        self.save_sample(metric, value, timestamp, tags)
+
+    def save_sample(self, metric, value, timestamp=None, tags=None):
+        """Save a simple sample, evict old values if needed
+        """
         if timestamp is None:
             timestamp = time.time()
         if metric not in self._sample_store:
-            raise CheckException("Saving a sample for an undefined metric %s" % metric)
+            raise CheckException("Saving a sample for an undefined metric: %s" % metric)
         try:
             value = float(value)
         except ValueError, ve:
             raise NaN(ve)
+
+        # Sort and validate tags
+        if tags is not None:
+            if type(tags) not in [type([]), type(())]:
+                raise CheckException("Tags must be a list or tuple of strings")
+            else:
+                tags = tuple(sorted(tags))
 
         # Data eviction rules
         if self.is_gauge(metric):
-            self._sample_store[metric] = [(timestamp, value)]
+            self._sample_store[metric][tags] = ((timestamp, value), )
         elif self.is_counter(metric):
-            if len(self._sample_store[metric]) == 0:
-                self._sample_store[metric] = [(timestamp, value)]
+            if self._sample_store[metric].get(tags) is None:
+                self._sample_store[metric][tags] = [(timestamp, value)]
             else:
-                self._sample_store[metric] = self._sample_store[metric][-1:] + [(timestamp, value)]
+                self._sample_store[metric][tags] = self._sample_store[metric][tags][-1:] + [(timestamp, value)]
         else:
             raise CheckException("%s must be either gauge or counter, skipping sample at %s" % (metric, time.ctime(timestamp)))
 
         if self.is_gauge(metric):
-            assert len(self._sample_store[metric]) in (0, 1), self._sample_store[metric]
+            # store[metric][tags] = (ts, val) - only 1 value allowd
+            assert len(self._sample_store[metric][tags]) == 1, self._sample_store[metric]
         elif self.is_counter(metric):
-            assert len(self._sample_store[metric]) in (0, 1, 2), self._sample_store[metric]
+            assert len(self._sample_store[metric][tags]) in (1, 2), self._sample_store[metric]
 
     @classmethod
     def _rate(cls, sample1, sample2):
@@ -163,33 +188,39 @@ def _rate(cls, sample1, sample2):
         except Exception, e:
             raise NaN(e)
 
-    def get_sample_with_timestamp(self, metric):
+    def get_sample_with_timestamp(self, metric, tags=None):
         "Get (timestamp-epoch-style, value)"
+
+        # Get the proper tags
+        if tags is not None and type(tags) == type([]):
+            tags.sort()
+            tags = tuple(tags)
+
         # Never seen this metric
         if metric not in self._sample_store:
             raise UnknownValue()
 
         # Not enough value to compute rate
-        elif self.is_counter(metric) and len(self._sample_store[metric]) < 2:
+        elif self.is_counter(metric) and len(self._sample_store[metric][tags]) < 2:
             raise UnknownValue()
 
-        elif self.is_counter(metric) and len(self._sample_store[metric]) >= 2:
-            return self._rate(self._sample_store[metric][-2], self._sample_store[metric][-1])
+        elif self.is_counter(metric) and len(self._sample_store[metric][tags]) >= 2:
+            return self._rate(self._sample_store[metric][tags][-2], self._sample_store[metric][tags][-1])
 
-        elif self.is_gauge(metric) and len(self._sample_store[metric]) >= 1:
-            return self._sample_store[metric][-1]
+        elif self.is_gauge(metric) and len(self._sample_store[metric][tags]) >= 1:
+            return self._sample_store[metric][tags][-1]
 
         else:
             raise UnknownValue()
 
-    def get_sample(self, metric):
+    def get_sample(self, metric, tags=None):
         "Return the last value for that metric"
-        x = self.get_sample_with_timestamp(metric)
+        x = self.get_sample_with_timestamp(metric, tags)
         assert type(x) == types.TupleType and len(x) == 2, x
         return x[1]
 
     def get_samples_with_timestamps(self):
-        "Return all values {metric: (ts, value)}"
+        "Return all values {metric: (ts, value)} for non-tagged metrics"
         values = {}
         for m in self._sample_store:
             try:
@@ -199,7 +230,7 @@ def get_samples_with_timestamps(self):
         return values
 
     def get_samples(self):
-        "Return all values {metric: value}"
+        "Return all values {metric: value} for non-tagged metrics"
         values = {}
         for m in self._sample_store:
             try:
@@ -209,21 +240,22 @@ def get_samples(self):
                 pass
         return values
 
-    def get_metadata(self):
-        """Return a dictionary of key-value pairs with metadata
-        How these metadata are interpreted and processed is not defined here
-        """
-        return {}
-
     def get_metrics(self):
-        """This is the new format to send metrics backs
+        """Get all metrics, including the ones that are tagged.
+        This is the preferred method to retrieve metrics
+
+        @return the list of samples
+        @rtype [(metric_name, timestamp, value, {"tags": ["tag1", "tag2"]}), ...]
         """
         metrics = []
         for m in self._sample_store:
             try:
-                ts, val = self.get_sample_with_timestamp(m)
-                # FIXME alq - no metadata yet
-                metrics.append((m, int(ts), val, {}))
+                for t in self._sample_store[m]:
+                   ts, val = self.get_sample_with_timestamp(m, t)
+                   if t is None:
+                       metrics.append((m, int(ts), val, {}))
+                   else:
+                       metrics.append((m, int(ts), val, {"tags": list(t)}))
             except:
                 pass
         return metrics

@@ -101,7 +101,12 @@ def __init__(self, agentConfig, emitter):
         self._dogstream = Dogstreams.init(self.checksLogger, self.agentConfig)
         self._ddforwarder = DdForwarder(self.checksLogger, self.agentConfig)
 
-        self._metrics_checks = [Cacti(self.checksLogger), Varnish(self.checksLogger)]
+        # All new checks should be metrics checks:
+        self._metrics_checks = [
+            Cacti(self.checksLogger),
+            Redis(self.checksLogger),
+            Varnish(self.checksLogger)
+        ]
         self._event_checks = [Hudson(), Nagios(socket.gethostname())]
         self._resources_checks = [ResProcesses(self.checksLogger,self.agentConfig)]
 
@@ -177,10 +182,6 @@ def getGangliaData(self):
     def getCassandraData(self):
         return self._cassandra.check(self.checksLogger, self.agentConfig)
 
-    @recordsize
-    def getRedisData(self):
-        return self._redis.check(self.agentConfig)
-
     @recordsize
     def getJvmData(self):
         return self._jvm.check(self.agentConfig)
@@ -234,7 +235,6 @@ def doChecks(self, firstRun=False, systemStats=False):
         cpuStats = self.getCPUStats()
         gangliaData = self.getGangliaData()
         cassandraData = self.getCassandraData()
-        redisData = self.getRedisData()
         jvmData = self.getJvmData()
         tomcatData = self.getTomcatData()
         activeMQData = self.getActiveMQData()
@@ -315,9 +315,6 @@ def doChecks(self, firstRun=False, systemStats=False):
         if ioStats:
             checksData['ioStats'] = ioStats
 
-        if redisData:
-            checksData['redis'] = redisData
-
         if jvmData:
             checksData['jvm'] = jvmData