#!/usr/bin/env python
try:
    import json
except:
    import simplejson as json
import getopt
import os
import pycurl
import sys
from StringIO import StringIO

# Exit codes (NAGIOS compliant)
EX_OK       = 0
EX_WARNING  = 1
EX_CRITICAL = 2
EX_UNKNOWN  = 3

class FtsStalledServerProbe:
    """
    Check if the given server is stalled.
    The server is in a critical state if:
        - Server is not transferring anything
        - And the total active / number of hosts > max(number of hosts, 10)
    The server is in a warning state if:
        - It has active transfers
        - But they are less than total (active / number of hosts) * 0.8
    """

    def __init__(self, argv):
        self.description = ' '.join(self.__doc__.split())
        self.scriptName  = os.path.basename(argv[0])

        # Defaults
        self.abortMsg   = None
        self.help       = False
        self.host       = None
        self.monitoring = None
        self.cert       = None

        # Process args
        try:
            opts, args = getopt.getopt(argv[1:], 'hH:m:',
                                    ['help', 'host=', 'mon=', 'cert='])
            for opt, arg in opts:
                if opt in ('-h', '--help'):
                    self.help = True
                elif opt in ('-H', '--host'):
                    self.host = arg
                elif opt in ('-m', '--mon'):
                    self.monitoring = arg
                elif opt in ('--cert'):
                    self.cert = arg
        except Exception, e:
            self.abortMsg = str(e)

        if not self.abortMsg and self.host is None:
            self.abortMsg = "-H must be specified"

        if not self.monitoring:
            self.monitoring = "https://%s:8449/fts3/ftsmon/stats/servers" % self.host
        else:
            self.monitoring = self.monitoring + '/stats/servers'

    def usage(self):
        return \
"""%s

Usage:
    %s -H <host> [-m <monitoring base url>]

Options:
    -H, --host Specifies the host to check
    -m, --mon  Monitoring base url. i.e. https://fts3-pilot-mon.cern.ch:8449/fts3/ftsmon
""" % (self.description, self.scriptName)


    def __call__(self):
        if self.help:
            return (EX_OK, self.usage())
        elif self.abortMsg:
            return (EX_CRITICAL, self.abortMsg)

        buffer = StringIO()

        curl = pycurl.Curl()
        curl.setopt(pycurl.WRITEFUNCTION, buffer.write)
        curl.setopt(pycurl.FOLLOWLOCATION, True)
        curl.setopt(pycurl.URL, self.monitoring)
        curl.setopt(pycurl.CAPATH, '/etc/grid-security/certificates/')

        if self.cert:
            curl.setopt(pycurl.SSLCERT, self.cert)
            curl.setopt(pycurl.CAINFO, self.cert)

        try:
            curl.perform()
        except Exception, e:
            return (EX_CRITICAL, str(e))

        if curl.getinfo(pycurl.RESPONSE_CODE) != 200:
            return (EX_CRITICAL, "Got %d" % curl.getinfo(curl.RESPONSE_CODE))

        try:
            response = json.loads(buffer.getvalue())
        except Exception, e:
            return (EX_CRITICAL, 'Could not retrieve the status from the monitoring (%s)' % str(e))
        
        if self.host not in response:
            return (EX_CRITICAL, 'The host is not visible in the monitoring. Maybe it is down?')

        nHosts = len(response.keys())
        totalActive = reduce(lambda a, b: a + b , map(lambda h: h.get('active', 0), response.values()))
        thisHostActive = response[self.host].get('active', 0)

        msg = "Running %d out of %d" % (thisHostActive, totalActive)
        if thisHostActive == 0 and (totalActive / nHosts) > max(nHosts, 10):
            return (EX_CRITICAL, msg)
        elif thisHostActive < (totalActive / nHosts) * 0.8:
            return (EX_WARNING, msg)

        return (EX_OK, msg)


if __name__ == '__main__':
    probe = FtsStalledServerProbe(sys.argv)
    (status, msg) = probe()

    if status == EX_OK:
        print "OK -", msg
    elif status == EX_WARNING:
        print "WARNING -", msg
    elif status == EX_CRITICAL:
        print "CRITICAL -", msg
    else:
        print "UNKNOWN -", msg

    sys.exit(status)

