Hi,
My Telegraf container ceases sending any and all metrics to the Influx database after periodic time intervals which are affected by the resolution and size of the data.
As you can see, it would consistently break after the same period of time has passed since the telegraf container was restarted via a cronjob at 8:00am each day. How long it lasts depends on the amount of data passing through.
I have the following relevant Docker instances for my server setup:
- MQTT broker (mosquitto)
- Telegraf
- Influxdb
# Telegraf Configuration
# Global tags can be specified here in key="value" format.
[global_tags]
# Configuration for telegraf agent
[agent]
interval = "1s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
#collection_interval = "10s"
collection_jitter = "1s"
flush_interval = "3s"
flush_jitter = "1s"
precision = ""
hostname = "telegraf-MQTT"
omit_hostname = false
# test1
[[outputs.influxdb]]
namepass = ["device1", "device2"]
database = "test1"
urls = ["http://influxdb:8086"]
username = "user-tel"
password = "pwd2"
timeout = "5s"
# test3_test
[[outputs.influxdb]]
namepass = ["device1_test3"]
database = "test3_test"
urls = ["http://influxdb:8086"]
username = "user-tel"
password = "pwd2"
timeout = "5s"
# === test1_device1 ===
[[inputs.mqtt_consumer]]
name_override = "device1"
servers = ["tcp://123.123.123.123:8883"]
qos = 0
persistent_session = false
## If unset, a random client ID will be generated.
client_id = "1002"
## Topics that will be subscribed to.
topics = [
"test1/processed/device1"
]
username = "user-name"
password = "pass-word"
data_format = "json"
json_name_key = "site"
tag_keys = ["device"]
json_time_key = "unix_time"
json_time_format = "unix"
# === test1_device2 ===
[[inputs.mqtt_consumer]]
name_override = "device2"
servers = ["tcp://123.123.123.123:8883"]
qos = 1
persistent_session = false
## If unset, a random client ID will be generated.
client_id = "1003"
## Topics that will be subscribed to.
topics = [
"test1/processed/device2"
]
username = "user-name"
password = "pass-word"
data_format = "json"
json_name_key = "site"
tag_keys = ["device"]
json_time_key = "unix_time"
json_time_format = "unix"
# === test2 ===
[[inputs.mqtt_consumer]]
name_override = "test2_tel"
servers = ["tcp://123.123.123.123:8883"]
qos = 0
persistent_session = false
## If unset, a random client ID will be generated.
client_id = "1005"
## Topics that will be subscribed to.
topics = [
"test2/processed/#"
]
username = "user-name"
password = "pass-word"
data_format = "json"
json_name_key = "site"
tag_keys = ["device"]
json_time_key = "unix_time"
json_time_format = "unix"
# === test3_test ===
[[inputs.mqtt_consumer]]
name_override = "device1_test3"
servers = ["tcp://123.123.123.123:8883"]
qos = 1
persistent_session = true
## If unset, a random client ID will be generated.
client_id = "1030"
## Topics that will be subscribed to.
topics = [
"test3/raw/device1"
]
username = "user-name"
password = "pass-word"
data_format = "csv"
csv_header_row_count = 0
# renamed columns names to something shorter for posted question
csv_column_names = ["TimeStamp","temp1","temp2","temp3","temp4","temp5","temp6","temp7","temp8","temp9","temp11","temp12","temp13","temp14","temp15","temp16","temp17","temp18","temp19","temp21","temp22","temp23","temp24","temp25","temp26","temp27","temp28","temp29","temp31","temp32","temp33","temp34","temp35","temp36","temp37","temp38","temp39","temp41","temp42","temp43","temp44","temp45","temp46","temp47","temp48","temp49","temp11","temp12","temp13","temp14","temp15","temp16","temp17","temp18","temp19","temp111","temp112","temp113","temp114","temp115","temp116","temp117","temp118","temp119","temp121","temp122","temp123","temp124","temp125","temp126","temp127","temp128","temp129","temp131","temp132","temp133","temp134","temp135","temp136","temp137","temp138","temp139","temp141","temp142","temp143","temp144","temp145","temp146","temp147","temp148","temp149","temp21","temp22","temp23","temp24","temp25","temp26","temp27","temp28","temp29","temp211","temp212","temp213","temp214","temp215","temp216","temp217","temp218","temp219","temp221","temp222","temp223","temp224","temp225","temp226","temp227","temp228","temp229","temp231","temp232","temp233","temp234","temp235","temp236","temp237","temp238","temp239","temp241","temp242","temp243","temp244","temp245","temp246","temp247","temp248","temp249","temp31","temp32","temp33","temp34","temp35","temp36","temp37","temp38","temp39","temp311","temp312","temp313","temp314","temp315","temp316","temp317","temp318","temp319","temp321","temp322","temp323","temp324","temp325","temp326","temp327","temp328","temp329","temp331","temp332","temp333","temp334","temp335","temp336","temp337","temp338","temp339","temp341","temp342","temp343","temp344","temp345","temp346","temp347","temp348","temp349","temp41","temp42","temp43","temp44","temp45","temp46","temp47","temp48","temp49","temp411","temp412","temp413","temp414","temp415","temp416","temp417","temp418","temp419","temp421","temp422","temp423","temp424","temp425","temp426","temp427","temp428","temp429","temp431","temp432","temp433","temp434","temp435","temp436","temp437","temp438","temp439","temp441","temp442","temp443","temp444","temp445","temp446","temp447","temp448","temp449"]
csv_skip_rows = 0
csv_skip_columns = 0
csv_delimiter = ","
csv_comment = ""
csv_trim_space = false
csv_tag_columns = []
csv_measurement_column = ""
The only (temporary solution) so far has been having regular cronjobs to restart the telegraf container at a frequency greater than its failure frequency.
Any help resolving this is much appreciated. Thank you.