Hello, Im trying to solve the following problem. I have a measurement from kubernetes wich exposes the max number of instance from hpa and the other one which exposes the current number of instances running, both metrics have a tag “hpa” and “deployment” wich content its the same. Im trying to obtain the percent running respect the total, everything its working fine until the join node.
I obtain the metrics from prometheus via telegraf so maybe the problem its from here. This is the tickscript code and task results.
var measurement_replicas = ‘kube_deployment_status_replicas’
var measurement_total = ‘kube_hpa_spec_max_replicas’
var name = ‘Porting - 80% instances’
var message = '{{ .ID }} is {{ .Level }} value : {{ index .Fields “percent_instance” }} ’
var idTag = ‘alertID’
var filter = lambda: “job” == ‘kubernetes-pods’ AND “deployment” == ‘atp’ OR “deployment” == ‘crossbenefits’ OR “deployment” == ‘servicetestmanagement’ OR “deployment” == ‘products’ OR “deployment” == ‘salesmanagement’ OR “deployment” == ‘mdggateway’ OR “deployment” == ‘customerviewmdg’ OR “deployment” == ‘xplmobileinsurance’
var filter2 = lambda: “job” == ‘kubernetes-service-endpoints’
var idVar = name + ‘-{{.Group}}’
var levelTag = ‘level’
var messageField = ‘message’
var durationField = ‘duration’
var triggerType = ‘threshold’
var crit = 0
var replicas_running = stream
|from()
.database(db)
.retentionPolicy(rp)
.measurement(measurement_replicas)
.where(filter)
.groupBy(‘deployment’)
|window()
.period(5m)
.every(1m)
|last(‘value’)
.as(‘value’)
var replicas_total = stream
|from()
.database(db)
.retentionPolicy(rp)
.measurement(measurement_total)
.where(filter2)
.groupBy(‘hpa’)
|window()
.period(10m)
.every(1m)
|last(‘value’)
.as(‘value’)
|eval(lambda: “hpa”)
.as(‘deployment’)
.keep()
.tags(‘deployment’)
|delete()
.tag(‘hpa’)
replicas_total
|join(replicas_running)
.as(‘replicas_total’, ‘replicas_running’)
.on(‘deployment’)
.tolerance(2m)
.fill(0.0)
|eval(lambda: float(100.0 * “replicas_running.value”) / float(“replicas_total.value”))
.as(‘percent_instance’)
|alert()
.crit(lambda: “percent_instance” >= crit)
.stateChangesOnly()
.message(message)
.id(idVar)
.idTag(idTag)
.levelTag(levelTag)
.messageField(messageField)
.durationField(durationField)
.log(’/var/lib/kapacitor/test.log’)
DOT:
digraph TEST-PODS-STREAM {
graph [throughput=“0.00 points/s”];
stream0 [avg_exec_time_ns=“0s” errors=“0” working_cardinality=“0” ];
stream0 -> from4 [processed=“928”];
stream0 -> from1 [processed=“928”];
from4 [avg_exec_time_ns=“1.934µs” errors=“0” working_cardinality=“0” ];
from4 -> window5 [processed=“232”];
window5 [avg_exec_time_ns=“1.447µs” errors=“0” working_cardinality=“8” ];
window5 -> last6 [processed=“64”];
last6 [avg_exec_time_ns=“9.236µs” errors=“0” working_cardinality=“8” ];
last6 -> eval7 [processed=“64”];
eval7 [avg_exec_time_ns=“8.885µs” errors=“0” working_cardinality=“8” ];
eval7 -> delete8 [processed=“64”];
delete8 [avg_exec_time_ns=“5.446µs” errors=“0” fields_deleted=“0” tags_deleted=“64” working_cardinality=“0” ];
delete8 -> join10 [processed=“64”];
from1 [avg_exec_time_ns=“3.328µs” errors=“0” working_cardinality=“0” ];
from1 -> window2 [processed=“232”];
window2 [avg_exec_time_ns=“2.747µs” errors=“0” working_cardinality=“8” ];
window2 -> last3 [processed=“72”];
last3 [avg_exec_time_ns=“10.633µs” errors=“0” working_cardinality=“8” ];
last3 -> join10 [processed=“72”];
join10 [avg_exec_time_ns=“4.794µs” errors=“0” working_cardinality=“0” ];
join10 -> eval11 [processed=“0”];
eval11 [avg_exec_time_ns=“0s” errors=“0” working_cardinality=“0” ];
eval11 -> alert12 [processed=“0”];
alert12 [alerts_inhibited=“0” alerts_triggered=“0” avg_exec_time_ns=“0s” crits_triggered=“0” errors=“0” infos_triggered=“0” oks_triggered=“0” warns_triggered=“0” working_cardinality=“0” ];
Thanks!