#!/usr/bin/env python
try:
    import json
except:
    import simplejson as json
import getopt
import os
import pycurl
import sys
import time
from datetime import datetime, timedelta
from StringIO import StringIO

# Exit codes (NAGIOS compliant)
EX_OK       = 0
EX_WARNING  = 1
EX_CRITICAL = 2
EX_UNKNOWN  = 3

class FtsStalledTransfersProbe:
    """
    Check if there are any transfers that have been waiting for too long
    """

    def __init__(self, argv):
        self.description = ' '.join(self.__doc__.split())
        self.scriptName  = os.path.basename(argv[0])

        # Defaults
        self.abortMsg   = None
        self.help       = False
        self.host       = None
        self.monitoring = None
        self.warning    = timedelta(days = 2)
        self.critical   = timedelta(days = 7)
        self.cert       = None

        # Process args
        try:
            opts, args = getopt.getopt(argv[1:], 'hH:m:w:c:',
                                    ['help', 'host=', 'mon=', 'cert='])
            for opt, arg in opts:
                if opt in ('-h', '--help'):
                    self.help = True
                elif opt in ('-H', '--host'):
                    self.host = arg
                elif opt in ('-m', '--mon'):
                    self.monitoring = arg
                elif opt == '-w':
                    self.warning = timedelta(days = int(arg))
                elif opt == '-c':
                    self.critical = timedelta(days = int(arg))
                elif opt == '--cert':
                    self.cert = arg
        except Exception, e:
            self.abortMsg = str(e)

        if not self.abortMsg and (self.host is None and self.monitoring is None):
            self.abortMsg = "-H OR -m must be specified"

        if not self.monitoring:
            self.monitoring = "https://%s:8449/fts3/ftsmon/jobs" % self.host
        else:
            self.monitoring = self.monitoring + '/jobs'

    def usage(self):
        return \
"""%s

Usage:
    %s -H <host> [-m <monitoring base url>] [-w <number of days>] [-c <number of days>]

Options:
    -H, --host Specifies the host to check
    -m, --mon  Monitoring base url. i.e. https://fts3-pilot-mon.cern.ch:8449/fts3/ftsmon
    -w         Warning threshold.
    -c         Critical threshold.
""" % (self.description, self.scriptName)


    def __call__(self):
        if self.help:
            return (EX_OK, self.usage())
        elif self.abortMsg:
            return (EX_CRITICAL, self.abortMsg)

        buffer = StringIO()

        curl = pycurl.Curl()
        curl.setopt(pycurl.WRITEFUNCTION, buffer.write)
        curl.setopt(pycurl.FOLLOWLOCATION, True)
        curl.setopt(pycurl.CAPATH, '/etc/grid-security/certificates')
        curl.setopt(pycurl.URL, self.monitoring + '?state=SUBMITTED&orderby=submit_time&page=all')

        if self.cert:
            curl.setopt(pycurl.SSLCERT, self.cert)
            curl.setopt(pycurl.CAINFO, self.cert)

        try:
            curl.perform()
        except Exception, e:
            return (EX_CRITICAL, str(e))

        if curl.getinfo(pycurl.RESPONSE_CODE) != 200:
            return (EX_CRITICAL, "Got %d" % curl.getinfo(curl.RESPONSE_CODE))

        try:
            response = json.loads(buffer.getvalue())
        except:
            return (EX_CRITICAL, 'Could not retrieve the status from the monitoring')

        submittedCount = response['count']
        if submittedCount == 0:
            return (EX_OK, "No transfers in submitted state")

        # Pick the oldest, which is the one with the oldest submit time
        # and (oldest start time or no start time)
        oldest_time = datetime.utcnow()
        oldest = None
        for t in response['items']:
            reference_struct = time.strptime(t['submit_time'], '%Y-%m-%dT%H:%M:%S')
            reference = datetime.fromtimestamp(time.mktime(reference_struct))
            if reference < oldest_time:
                oldest = t
                oldest_time = reference

        # Check elapsed to the oldest
        now = datetime.utcnow()
        elapsed = now - oldest_time
        if elapsed > self.critical:
            return (EX_CRITICAL, "Transfer stalled for %s (Job %s)" % (elapsed, oldest['job_id']))
        elif elapsed > self.warning:
            return (EX_WARNING, "Transfer stalled for %s (Job %s)" % (elapsed, oldest['job_id']))
        else:
            return (EX_OK, "%d in submitted state (oldest queued for %s)" % (submittedCount, elapsed))


if __name__ == '__main__':
    probe = FtsStalledTransfersProbe(sys.argv)
    (status, msg) = probe()

    if status == EX_OK:
        print "OK -", msg
    elif status == EX_WARNING:
        print "WARNING -", msg
    elif status == EX_CRITICAL:
        print "CRITICAL -", msg
    else:
        print "UNKNOWN -", msg

    sys.exit(status)

