Hi all
I’m running InfluxDB 2.7.1 and Telegraf 1.25.3 using Docker Compose and storing several metrics with outputs.influxdb_v2 plugin into an influxDB bucket with retention policy: 90 days.
It works perfectly, my influxDB container has been up for 10 months and I can restart Telegraf as many times as I want, no issues.
But when I try to migrate to Telegraf 1.29.5 (by doing few adaptations in telegraf.conf file) and start Telegraf, there is a huge quantity of errors popping in logs :
2024-04-18T08:14:12Z E! [outputs.influxdb_v2] Failed to write metric to telegraf (will be dropped: 422 Unprocessable Entity): unprocessable entity: failure writing points to database: partial write: points beyond retention policy dropped=2000
2024-04-18T08:14:12Z E! [outputs.influxdb_v2] Failed to write metric to telegraf (will be dropped: 422 Unprocessable Entity): unprocessable entity: failure writing points to database: partial write: points beyond retention policy dropped=2000
2024-04-18T08:14:12Z E! [outputs.influxdb_v2] Failed to write metric to telegraf (will be dropped: 422 Unprocessable Entity): unprocessable entity: failure writing points to database: partial write: points beyond retention policy dropped=2000
If I change the retention policy of my InfluxDB bucket to ‘never’ and start Telegraf 1.29.5, these errors do not appear.
I think it might be due to the fact that Telegraf 1.29.5 is trying to store some metrics with a timestamp related to the start date of InfluxDB (10 months > 90 days of my initial retention policy…) and that is causing all the errors ‘Unprocessable Entity’.
Has anyone encountered the same problem? Or is there any parameter that I can add to my telegraf.conf file to avoid this behaviour ?
Many thanks for your help
telegraf.conf
[global_tags]
[agent]
interval = "10s"
round_interval = true
metric_batch_size = 2000
metric_buffer_limit = 500000
flush_interval = "10s"
flush_jitter = "5s"
precision = "1s"
omit_hostname = false
###############################################################################
# OUTPUT PLUGINS #
###############################################################################
[[outputs.influxdb_v2]]
urls = ["my_url"]
token = my_token
organization = my_org
bucket = "telegraf"
content_encoding = "gzip"
###############################################################################
# INPUT PLUGINS #
###############################################################################
[[inputs.cpu]]
percpu = true
totalcpu = true
collect_cpu_time = false
report_active = false
core_tags = false
fieldexclude = ["time_guest","time_guest_nice","time_irq","time_nice","time_softirq","time_steal","usage_guest","usage_guest_nice","usage_irq","usage_nice","usage_softirq","usage_steal"]
[[inputs.disk]]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
[[inputs.diskio]]
[[inputs.kernel]]
[[inputs.mem]]
[[inputs.processes]]
[[inputs.swap]]
[[inputs.system]]
[[inputs.docker]]
endpoint = "unix:///var/run/docker.sock"
gather_services = false
container_name_include = []
container_name_exclude = []
timeout = "5s"
perdevice = false
perdevice_include = ["cpu", "blkio", "network"]
total = false
docker_label_include = []
docker_label_exclude = []
[[inputs.filestat]]
[[inputs.internal]]
[[inputs.interrupts]]
[[inputs.ipvs]]
[[inputs.kernel_vmstat]]
[[inputs.linux_sysctl_fs]]
[[inputs.multifile]]
[[inputs.net]]
ignore_protocol_stats = true
[[inputs.net_response]]
protocol = "tcp"
address = "localhost:80"
timeout = "1s"
read_timeout = "1s"
[[inputs.net_response]]
protocol = "tcp"
address = "localhost:443"
timeout = "1s"
read_timeout = "1s"
[[inputs.netstat]]
[[inputs.nstat]]
[[inputs.sysstat]]
sadc_path = "/usr/lib/sa/sadc" # required
[[inputs.zfs]]
[[inputs.docker_log]]
endpoint = "unix:///var/run/docker.sock"
timeout = "5s"
container_name_include = []
container_name_exclude = []
# Read metrics from the bare metal servers via IPMI
[[inputs.ipmi_sensor]]
use_sudo = true
privilege = "ADMINISTRATOR"
interval = "30s"
timeout = "20s"
metric_version = 2