Hi Team,
we have an tick script to alert when healthy host count value changes, it was giving incorrect data to Kapacitor.
It was querying data through availability zone instead of region. i have attached tick script query we are using.
can we have some help on this as this is critical monitoring needed for our business requirement.
Part of tick script
var whereFilter = lambda: ( isPresent(“account”) AND “account” =~ /^prd$/ AND isPresent(“target_group”) AND isPresent(“availability_zone”) AND isPresent(“region”) AND isPresent(“load_balancer”) AND isPresent(“healthy_host_count_minimum”) AND isPresent(“healthy_host_count_average”) AND “load_balancer” =~ /^.XXX-XXX-./ AND "target_group" =~ /^.*XXX-prd.*/ )
var fieldToEvaluate = lambda: “healthy_host_count_minimum”
//var warnThreshold = 2
var warnThreshold = 6
var critThreshold = 0
//var critThreshold = 3
var period = 15m
//TODO:we can change this to 5m frequency
var every = 1m
var group_By = [‘account’, ‘region’, ‘load_balancer’, ‘target_group’ ]
var db = ‘telegraf’
var rp = ‘autogen’
var idVar = name + ‘: {{ index .Tags “target_group” }}’
var message = ‘{{.ID}} is {{.Level}}: {{.Name}} = {{ index .Fields “value” }}’
var idTag = ‘alertID’
var levelTag = ‘level’
var messageField = ‘message’
var durationField = ‘duration’
var outputDB = ‘chronograf’
var outputRP = ‘autogen’
var outputMeasurement = ‘alerts’
var triggerType = ‘threshold’
var data = stream
|from()
.database(db)
.retentionPolicy(rp)
.measurement(measurement)
.groupBy(group_By)
.where(whereFilter)
|window()
.align()
.period(period)
.every(every)
.fillPeriod()
|max(‘healthy_host_count_minimum’)
.as(‘stat’)
data
|alert()
.warn(lambda: “stat” < warnThreshold AND “stat” != critThreshold)
.stateChangesOnly()
.message(subject)
.id(idVar)
.idTag(idTag)
.levelTag(levelTag)
.messageField(messageField)
.durationField(durationField)
.details(details)
.email()
|influxDBOut()
.create()
.database(outputDB)
.retentionPolicy(outputRP)
.measurement(outputMeasurement)
.tag(‘alertName’, name)
.tag(‘triggerType’, triggerType)
data
|alert()
.crit(prd_crit_threshold_lambda)
.stateChangesOnly()
.message(subject)
.id(idVar)
.idTag(idTag)
.levelTag(levelTag)
.messageField(messageField)
.durationField(durationField)
.details(details)
// .pagerDuty2()
.email()
|influxDBOut()
.create()
.database(outputDB)
.retentionPolicy(outputRP)
.measurement(outputMeasurement)
.tag(‘alertName’, name)
.tag(‘triggerType’, triggerType)
can we have any references ?
Thanks,
karthik.