Hello @debnath,
Sorry for the delay, my brain was refusing to cooperate yesterday…anywho you could do:
- a check and notification combined into one task.
import "influxdata/influxdb/monitor"
import "influxdata/influxdb/v1"
import "slack"
import "experimental"
import "array"
option task = {
name: "check_cpu",
every: 1h,
offset: 0s,
}
endpoint = slack.endpoint(url: "https://hooks.slack.com/services/xxx/xxx")(
mapFn: (r) => ({
channel: "",
text: r._message,
color: "danger",
}),
)
notification_data = {
_notification_rule_id: "0000000000000001",
_notification_rule_name: "cpu_check",
_notification_endpoint_id: "0000000000000002",
_notification_endpoint_name: "cpu_check_slack",
}
status = from(bucket: "jetson-stats")
|> range(start: -1h)
|> filter(fn: (r) => r["_measurement"] == "exec_jetson_stats")
|> filter(fn: (r) => r["_field"] == "jetson_CPU1")
|> group(columns: ["_measurement"])
|> monitor.check(
crit: (r) => r._value > 90.0,
warn: (r) => r._value > 80.0,
info: (r) => r._value > 70.0,
ok: (r) => r._value <= 60.0,
messageFn: (r) => if r._level == "crit" then
"Critical alert!! CPU usage is at ${r._value}%!"
else if r._level == "warn" then
"Warning! CPU usage is at ${r._value}%."
else if r._level == "info" then
"CPU usage is at ${r._value}%."
else
"Things are looking good.",
data: {
_check_name: "CPU Utilization (Used Percentage)",
_check_id: "cpu_used_percent",
_type: "threshold",
tags: {},
},
)
crit = status |> filter(fn: (r) => r._level == "ok")
all_statuses = crit
|> filter(fn: (r) => r["_time"] >= experimental.subDuration(d: 10m, from: now()))
count_statuses = all_statuses
//why the heck did you add a _time column here?
//because of https://github.com/influxdata/flux/blob/f88b6c8560b248f93e6a9435c6b7278f915b547f/stdlib/influxdata/influxdb/monitor/monitor.flux#L126
|> map(fn: (r) => ({r with _time: now() }))
|> count()
|> yield(name: "count")
record = count_statuses |> findRecord(fn: (key) => key._type == "threshold", idx: 0)
notify = (tables=<-) => {
final_status = tables
|> monitor.notify(
endpoint: endpoint,
data: notification_data,
)
noneTable = array.from(rows: [{ _time: now(), _field: "myCount", _value: 0, _level: "", _notification_rule_name: "", _check_name: "", _sent: "", _message: "", _measurement:"", _status_timestamp: 0 }])
//WHY did i create a noneTable? See below
condition_met = if record._value > 3 then final_status else noneTable
return condition_met
}
count_statuses |> notify()
Some things to note here:
why can’t I just return a noneTable of noneTable = array.from(rows: [{ _time: now(), value:0.0 }]) in other words or why do I have to match the input table for an if statement?
The reason for this is that the the if
expression requires that the two branches must match in their type, so nondTable
has to have the same type as final_notify
. This is why I also can’t do the following:
notify = (tables=<-) => {
final_status = tables
|> monitor.notify(
endpoint: endpoint,
data: notification_data,
)
//Originally I wanted to do the following but tables which is all_statuses doesn't match the final_status after the monitor.notify() function adds metadata.
condition_met = if record._value > 3 then final_status else tables
return condition_met
}
Also Note:
I don’t need to write a custom function for this. I took that approach originally because I wanted to compare two tables with different columns (as described in the note above).
Instead you could do:
import "influxdata/influxdb/monitor"
import "influxdata/influxdb/v1"
import "slack"
import "experimental"
import "array"
option task = {
name: "check_cpu",
every: 1h,
offset: 0s,
}
endpoint = slack.endpoint(url: "https://hooks.slack.com/services/xxx/xxx")(
mapFn: (r) => ({
channel: "",
text: r._message,
color: "danger",
}),
)
notification_data = {
_notification_rule_id: "0000000000000001",
_notification_rule_name: "cpu_check",
_notification_endpoint_id: "0000000000000002",
_notification_endpoint_name: "cpu_check_slack",
}
status = from(bucket: "jetson-stats")
|> range(start: -1h)
|> filter(fn: (r) => r["_measurement"] == "exec_jetson_stats")
|> filter(fn: (r) => r["_field"] == "jetson_CPU1")
|> group(columns: ["_measurement"])
|> monitor.check(
crit: (r) => r._value > 90.0,
warn: (r) => r._value > 80.0,
info: (r) => r._value > 70.0,
ok: (r) => r._value <= 60.0,
messageFn: (r) => if r._level == "crit" then
"Critical alert!! CPU usage is at ${r._value}%!"
else if r._level == "warn" then
"Warning! CPU usage is at ${r._value}%."
else if r._level == "info" then
"CPU usage is at ${r._value}%."
else
"Things are looking good.",
data: {
_check_name: "CPU Utilization (Used Percentage)",
_check_id: "cpu_used_percent",
_type: "threshold",
tags: {},
},
)
crit = status |> filter(fn: (r) => r._level == "ok")
all_statuses = crit
|> filter(fn: (r) => r["_time"] >= experimental.subDuration(d: 10m, from: now()))
count_statuses = all_statuses
//why the heck did you add a _time column here?
//because of https://github.com/influxdata/flux/blob/f88b6c8560b248f93e6a9435c6b7278f915b547f/stdlib/influxdata/influxdb/monitor/monitor.flux#L126
|> map(fn: (r) => ({r with _time: now() }))
|> count()
|> yield(name: "count")
record = count_statuses |> findRecord(fn: (key) => key._type == "threshold", idx: 0)
final_notify = all_statuses |> last() |> map(fn: (r) => ({ r with_time: now() })) |> monitor.notify( endpoint: endpoint, data: notification_data )
noneTable = array.from(rows: [{ _time: now(), _field: "myCount", _value: 0, _level: "", _notification_rule_name: "", _check_name: "", _sent: "", _message: "", _measurement:"", _status_timestamp: 0 }])
if int(v: record._value) > 3 then final_notify else noneTable