Telegraf having High CPU spikes (40%)

Using Telegraf 1.20 but the issue also presents in other versions. I was using a version from early 2020, updated to see if would improve and nothing.

Screencapture of the issue. Telegraf CPU Spikes - YouTube

###############################################################################
#                               GLOBAL CONFIG                                 #
###############################################################################

# Global tags can be specified here in key="value" format.
[global_tags]
	system_type = "####################"

# Configuration for telegraf agent
[agent]
	interval = "10s"
	round_interval = true
	metric_batch_size = 1000
	metric_buffer_limit = 100000
	collection_jitter = "0s"
	flush_interval = "10s"
	flush_jitter = "0s"
	precision = ""
	debug = false
	quiet = false
	logfile = "/Program Files/Telegraf/telegraf.log"
	hostname = "$COMPUTERNAME.$DOMAINFQDN"
	omit_hostname = false

###############################################################################
#                                  OUTPUTS                                    #
###############################################################################

# Configuration for sending metrics to InfluxDB
[[outputs.influxdb]]
	urls = ["http://#################:8086"]
	database = "telegraf"
	retention_policy = ""
	write_consistency = "any"
	timeout = "10s"

###############################################################################
#                                  INPUTS                                     #
###############################################################################

[[inputs.system]]

[[inputs.exec]]
	commands = ['''PowerShell.exe -Command "$version = (Get-ItemProperty HKLM:\Software\Wow6432Node\Microsoft\Windows\CurrentVersion\Uninstall\* | Where-Object DisplayName -like "################").DisplayVersion; $buildNumber = (Get-Content -Path \"${ENV:PROGRAMFILES(x86)}\######\#########\BuildInfo#######.txt\").Split(\":\")[1].Trim(); Write-Host \"V\"$version.Split(\".\")[0]\" R\"$version.Split(\".\")[1]\".\"$version.Split(\".\")[2]\".\"$version.Split(\".\")[3]\" BUILD \"$buildNumber -Separator \"\""''']
	name_override = "softwareVersion"
	interval = "120s"
	data_format = "value"
	data_type = "string"
	[inputs.exec.tags]
		type = "application"

[[inputs.win_perf_counters]]
	[[inputs.win_perf_counters.object]]
		# Processor usage, alternative to native, reports on a per core.
		ObjectName = "Processor"
		Instances = ["*"]
		Counters = [
			"% Idle Time",
			"% Interrupt Time",
			"% Privileged Time",
			"% User Time",
			"% Processor Time",
			"% DPC Time",
			"DPC Rate",
		]
		Measurement = "win_cpu"
		IncludeTotal=true
	
	[[inputs.win_perf_counters.object]]
		# Processor usage, alternative to native, reports on a per core.
		ObjectName = "Process"
		Instances = ["*"]
		Counters = [
			"Working Set - Private",
			"IO Read Operations/sec",
			"IO Write Operations/sec",
			"IO Data Operations/sec",
			"IO Read Bytes/sec",
			"IO Write Bytes/sec",
			"Handle Count",
			"Private Bytes",
			"Working Set",
			"Working Set Peak",
			"% Processor Time",
		]
		Measurement = "win_process"
		IncludeTotal=true

	[[inputs.win_perf_counters.object]]
		# Disk times and queues
		ObjectName = "LogicalDisk"
		Instances = ["*"]
		Counters = [
			"% Idle Time",
			"% Disk Time",
			"% Disk Read Time",
			"% Disk Write Time",
			"% User Time",
			"Current Disk Queue Length",
			"% Free Space",
			"Free Megabytes",
		]
		Measurement = "win_disk" 
		IncludeTotal=true

	[[inputs.win_perf_counters.object]]
		ObjectName = "PhysicalDisk"
		Instances = ["*"]
		Counters = [
			"Disk Read Bytes/sec",
			"Disk Write Bytes/sec",
			"Current Disk Queue Length",
			"Avg. Disk sec/Read",
			"Avg. Disk sec/Write",
			"Split IO/sec",
			"Disk Reads/sec",
			"Disk Writes/sec",
			"% Disk Time",
			"% Disk Read Time",
			"% Disk Write Time",
		]
		Measurement = "win_physdisk"

	[[inputs.win_perf_counters.object]]
		ObjectName = "Network Interface"
		Instances = ["*"]
		Counters = [
			"Bytes Received/sec",
			"Bytes Sent/sec",
			"Bytes Total/sec",
			"Packets Received/sec",
			"Packets Sent/sec",
			"Packets Received Discarded",
			"Packets Outbound Discarded",
			"Packets Received Errors",
			"Packets Outbound Errors",
		]
		Measurement = "win_net"

	[[inputs.win_perf_counters.object]]
		ObjectName = "System"
		Counters = [
			"Context Switches/sec",
			"System Calls/sec",
			"Processor Queue Length",
			"System Up Time",
			"Processes",
			"Threads",
		]
		Instances = ["------"]
		Measurement = "win_system"
		IncludeTotal=true

	[[inputs.win_perf_counters.object]]
		ObjectName = "Memory"
		Counters = [
			"Available Bytes",
			"Cache Faults/sec",
			"Demand Zero Faults/sec",
			"Page Faults/sec",
			"Pages/sec",
			"Page Reads/sec",
			"Page Writes/sec",
			"Transition Faults/sec",
			"Pool Nonpaged Bytes",
			"Pool Paged Bytes",
			"Cache Bytes",
			"Standby Cache Reserve Bytes",
			"Standby Cache Normal Priority Bytes",
			"Standby Cache Core Bytes",
			"% Committed Bytes In Use",
		]
		# Use 6 x - to remove the Instance bit from the query.
		Instances = ["------"]
		Measurement = "win_mem"
		IncludeTotal=true

	[[inputs.win_perf_counters.object]]
		ObjectName = "Paging File"
		Counters = [
			"% Usage",
			"% Usage Peak",
		]
		Instances = ["_Total"]
		Measurement = "win_swap"

	[[inputs.win_perf_counters.object]]
		ObjectName = "Thread"
		Instances = ["*"]
		Counters = [
			"Thread State",
			"Thread Wait Reason",
			"ID Process",
			"ID Thread",
			"Prority Base",
			"Prority Current",
		]
		Measurement = "win_thread"
	
	[[inputs.net_response]]
		protocol = "tcp"
		address = "localhost:443"
		# timeout = "1s"
		# read_timeout = "1s"
		fielddrop = ["result_type", "string_found"]

	[[inputs.jolokia2_agent]]
		urls = ["http://localhost:8080/jolokia"]
		name_prefix = "tomcat_"

	### JVM Generic

	[[inputs.jolokia2_agent.metric]]
		name  = "OperatingSystem"
		mbean = "java.lang:type=OperatingSystem"
		paths = ["ProcessCpuLoad","SystemLoadAverage","SystemCpuLoad"]

	[[inputs.jolokia2_agent.metric]]
		name  = "jvm_runtime"
		mbean = "java.lang:type=Runtime"
		paths = ["Uptime"]

	[[inputs.jolokia2_agent.metric]]
		name  = "jvm_memory"
		mbean = "java.lang:type=Memory"
		paths = ["HeapMemoryUsage", "NonHeapMemoryUsage", "ObjectPendingFinalizationCount"]

	[[inputs.jolokia2_agent.metric]]
		name     = "jvm_garbage_collector"
		mbean    = "java.lang:name=*,type=GarbageCollector"
		paths    = ["CollectionTime", "CollectionCount"]
		tag_keys = ["name"]

	[[inputs.jolokia2_agent.metric]]
		name       = "jvm_memory_pool"
		mbean      = "java.lang:name=*,type=MemoryPool"
		paths      = ["Usage", "PeakUsage", "CollectionUsage"]
		tag_keys   = ["name"]
		tag_prefix = "pool_"

	### TOMCAT
	[[inputs.jolokia2_agent.metric]]
		name     = "GlobalRequestProcessor"
		mbean    = "Catalina:name=*,type=GlobalRequestProcessor"
		paths    = ["requestCount","bytesReceived","bytesSent","processingTime","errorCount"]
		tag_keys = ["name"]

	[[inputs.jolokia2_agent.metric]]
		name     = "JspMonitor"
		mbean    = "Catalina:J2EEApplication=*,J2EEServer=*,WebModule=*,name=jsp,type=JspMonitor"
		paths    = ["jspReloadCount","jspCount","jspUnloadCount"]
		tag_keys = ["J2EEApplication","J2EEServer","WebModule"]

	[[inputs.jolokia2_agent.metric]]
		name     = "ThreadPool"
		mbean    = "Catalina:name=*,type=ThreadPool"
		paths    = ["maxThreads","currentThreadCount","currentThreadsBusy"]
		tag_keys = ["name"]

	[[inputs.jolokia2_agent.metric]]
		name     = "Servlet"
		mbean    = "Catalina:J2EEApplication=*,J2EEServer=*,WebModule=*,j2eeType=Servlet,name=*"
		paths    = ["processingTime","errorCount","requestCount"]
		tag_keys = ["name","J2EEApplication","J2EEServer","WebModule"]

	[[inputs.jolokia2_agent.metric]]
		name     = "Cache"
		mbean    = "Catalina:context=*,host=*,name=Cache,type=WebResourceRoot"
		paths    = ["hitCount","lookupCount"]
		tag_keys = ["context","host"]

	[[inputs.jolokia2_agent.metric]]
		name     = "Server"	
		mbean    = "Catalina:type=Server"
		paths    = ["serverInfo","serverBuilt"]

Hi,

Your configuration has a number of inputs and when Telegraf runs it will need to consume some processing power to go grab all the data. That said, have you tried removing some of your inputs to see if you can pinpoint what input is causing the majority of the spike?

I run this same conf in several machines, and have no problem in most of them.

The problem only happens when Apache is running, forgot to mention that. And even if I disable Jolokia, the problem happens.

But I’ll try to test one imput by one.