Derivativenode-in-kapacitor-batch

I am using derivative node to calculate bandwidth utilization of network devices, below is the script.

// database
var database = 'router'

// measurement from where data is coming
var measurement = 'cisco_router'

// RP from where data is coming
var RP = 'autogen'

// which influx cluster to use
var clus = 'network'

// durations
var period = 7m

var every = 10s

// alerts
var crit = 320

var alertName = 'cisco_router_bandwidth_alert'

var triggerType = 'threshold'

batch
|query(''' SELECT (mean("bandwidth_in") * 8) as "value" FROM "router"."autogen"."cisco_router" where  host = '10.1.11.1' and ( interface_name = 'GigabitEthernet0/0/0' or  interface_name = 'GigabitEthernet0/0/1')  ''')
    .cluster('network')
    .period(7m)
    .every(6m)
    .groupBy(*)
|derivative('value')
    .unit(1s)
    .nonNegative()
    .as('value')
|alert()
    .crit(lambda: "value" > crit)
    .stateChangesOnly()
    .message(' {{.Level}}  for {{ index .Tags "device_name" }} on Port {{ index .Tags "name" }} {{ .Time.Local.Format "2006.01.02 - 15:04:05" }} ')
    .details('''

 <pre>
 ------------------------------------------------------------------
 CLIENT NAME : XXXXXXXX
 ENVIRONMENT : Prod
 DEVICE TYPE : Router
 CATEGORY : {{ index .Tags "type" }}
 IP ADDRESS : {{ index .Tags "host" }}
 DATE : {{ .Time.Local.Format "2006.01.02 - 15:04:05" }}
 INTERFACE NAME : {{ index .Tags "name" }}
 VALUE : {{ index .Fields "value" }}
 SEVERITY : {{.Level}}
 ------------------------------------------------------------------
 </pre>
 
''')
    .log('/tmp/chronograf/cisco_router_interface_alert.log')
    .levelTag('level')
    .idTag('id')
    .messageField('message')
    .email()
    .to('XXXXXXX')
|influxDBOut()
    .database('chronograf')
    .retentionPolicy(RP)
    .measurement('alerts')
    .tag('alertName', alertName)

But it is not showing anything when i do kapacitor watch and not showing any errors in logs.

Hi Amit ,

there will only be an alert when for the mean(“bandwidth”),
the (current - previous ) / ( time_difference / unit) > 320 …

best regards ,
Marc

But the thing is when i run kapactior watch task name it does not show any thing it stuck there blank.

Hi Amit ,
what is the output of

  Kapacitor list tasks

 Kapacitor show task your_task

below are the output.


and for show task

kapacitor show cisco_router_bandwidth_alert
ID: cisco_router_bandwidth_alert
Error: 
Template: 
Type: batch
Status: enabled
Executing: true
Created: 14 Mar 19 16:22 IST
Modified: 01 Apr 19 12:35 IST
LastEnabled: 01 Apr 19 12:35 IST
Databases Retention Policies: ["router"."autogen"]
TICKscript:
// database
var database = 'router'

// measurement from where data is coming
var measurement = 'cisco_router'

// RP from where data is coming
var RP = 'autogen'

// which influx cluster to use
var clus = 'network'

// durations
var period = 7m

var every = 10s

// alerts
var crit = 320

var alertName = 'cisco_router_bandwidth_alert'

var triggerType = 'threshold'

batch
    |query(''' SELECT (mean("bandwidth_in") * 8) as "value" FROM "router"."autogen"."cisco_router" where  host = '10.1.11.1' and ( interface_name = 'GigabitEthernet0/0/0' or  interface_name = 'GigabitEthernet0/0/1')  ''')
        .cluster('network')
        .period(7m)
        .every(6m)
        .groupBy(*)
    |derivative('value')
        .unit(1s)
        .nonNegative()
        .as('value')
    |alert()
        .crit(lambda: "value" > crit)
        .stateChangesOnly()
        .message(' {{.Level}}  for {{ index .Tags "device_name" }} on Port {{ index .Tags "name" }} {{ .Time.Local.Format "2006.01.02 - 15:04:05" }} ')
        .details('''

 <pre>
 ------------------------------------------------------------------
 CLIENT NAME : XXXX
 ENVIRONMENT : Prod
 DEVICE TYPE : Router
 CATEGORY : {{ index .Tags "type" }}
 IP ADDRESS : {{ index .Tags "host" }}
 DATE : {{ .Time.Local.Format "2006.01.02 - 15:04:05" }}
 INTERFACE NAME : {{ index .Tags "name" }}
 VALUE : {{ index .Fields "value" }}
 SEVERITY : {{.Level}}
 ------------------------------------------------------------------
 </pre>
 
''')
        .log('/tmp/chronograf/cisco_router_interface_alert.log')
        .levelTag('level')
        .idTag('id')
        .messageField('message')
        .email()
        .to('XXXXXX')
    |influxDBOut()
        .database('chronograf')
        .retentionPolicy(RP)
        .measurement('alerts')
        .tag('alertName', alertName)

DOT:
digraph cisco_router_bandwidth_alert {
graph [throughput="0.00 batches/s"];

query1 [avg_exec_time_ns="2.686601ms" batches_queried="864" errors="0" points_queried="864" working_cardinality="0" ];
query1 -> derivative2 [processed="864"];

derivative2 [avg_exec_time_ns="1.408µs" errors="0" working_cardinality="2" ];
derivative2 -> alert3 [processed="864"];

alert3 [alerts_inhibited="0" alerts_triggered="0" avg_exec_time_ns="17.881µs" crits_triggered="0" errors="0" infos_triggered="0" oks_triggered="0" warns_triggered="0" working_cardinality="2" ];
alert3 -> influxdb_out4 [processed="0"];

influxdb_out4 [avg_exec_time_ns="0s" errors="0" points_written="0" working_cardinality="0" write_errors="0" ];
}

Hi Amit ,
Seems to work fine …
The DOT section shows that the derivative node has processes 864 data points
and none of them triggered an alert ,
It means the result of the DerivativeNode never passes 320 …

I changed the script to send mail when bandwidth is less than 320 but alert still not getting triggered.

image

and it is like this for minutes, every other task gives the log and query every time it runs.

Hi Amit ,

can you try with

|alert()
.crit(lambda: “value” > 0)

best regards ,

I did nothing changed. I think it is something else , kapacitor is not executing script otherwise it would show in kapacitor watch taskname.

what could be the reason for kapacitor watch taskname for not showing anything in output.

and thanks for the help.