Group By with Deadman

#1

I have dnsmasq running on all my hosts and would like to monitor when it goes down and notify for each host individually. So far I’m not having any luck being able to do this. Any help is appreciated.

var db = 'telegraf'

var rp = 'autogen'

var measurement = 'procstat'

var groupBy = []

var whereFilter = lambda: ("process_name" == 'dnsmasq')

var period = 1m

var name = 'Dnsmasq Deadman'

var idVar = name + ':{{.Group}}'

var message = 'Dnsmasq not responding. {{.Time}} Restarting dnsmasq. {{ index .Tags "host"}}'

var idTag = 'alertID'

var levelTag = 'level'

var messageField = 'message'

var durationField = 'duration'

var outputDB = 'chronograf'

var outputRP = 'autogen'

var outputMeasurement = 'alerts'

var triggerType = 'deadman'

var threshold = 0.0

var data = stream
    |from()
        .database(db)
        .retentionPolicy(rp)
        .measurement(measurement)
        .groupBy('host')
        .where(whereFilter)

var trigger = data
    |deadman(threshold, period)
        .stateChangesOnly()
        .message(message)
        .id(idVar)
        .idTag(idTag)
        .levelTag(levelTag)
        .messageField(messageField)
        .durationField(durationField)
        .slack()
        .channel('#embassy')
        .username('kapacitor')
        .log('/tmp/alerts.log')

trigger
    |eval(lambda: "emitted")
        .as('value')
        .keep('value', messageField, durationField)
    |influxDBOut()
        .create()
        .database(outputDB)
        .retentionPolicy(outputRP)
        .measurement(outputMeasurement)
        .tag('alertName', name)
        .tag('triggerType', triggerType)

trigger
    |httpOut('output')
#2

Did you ever figure out a solution for this? I have the same question…

#3

Yes, you should group by host.