# Telegraf Configuration # # Telegraf is entirely plugin driven. All metrics are gathered from the # declared inputs, and sent to the declared outputs. # # Plugins must be declared in here to be active. # To deactivate a plugin, comment out the name and any variables. # # Use 'telegraf -config telegraf.conf -test' to see what metrics a config # file would generate. # # Environment variables can be used anywhere in this config file, simply surround # them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"), # for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR}) # Global tags can be specified here in key="value" format. [global_tags] # dc = "us-east-1" # will tag all metrics with dc=us-east-1 # rack = "1a" ## Environment variables can be used as tags, and throughout the config file # user = "$USER" # Configuration for telegraf agent [agent] ## Default data collection interval for all inputs interval = "30s" ## Rounds collection interval to 'interval' ## ie, if interval="10s" then always collect on :00, :10, :20, etc. round_interval = true ## Telegraf will send metrics to outputs in batches of at most ## metric_batch_size metrics. ## This controls the size of writes that Telegraf sends to output plugins. metric_batch_size = 1000 ## Maximum number of unwritten metrics per output. Increasing this value ## allows for longer periods of output downtime without dropping metrics at the ## cost of higher maximum memory usage. metric_buffer_limit = 10000 ## Collection jitter is used to jitter the collection by a random amount. ## Each plugin will sleep for a random time within jitter before collecting. ## This can be used to avoid many plugins querying things like sysfs at the ## same time, which can have a measurable effect on the system. collection_jitter = "0s" ## Default flushing interval for all outputs. Maximum flush_interval will be ## flush_interval + flush_jitter flush_interval = "10s" ## Jitter the flush interval by a random amount. This is primarily to avoid ## large write spikes for users running a large number of telegraf instances. ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s flush_jitter = "0s" ## By default or when set to "0s", precision will be set to the same ## timestamp order as the collection interval, with the maximum being 1s. ## ie, when interval = "10s", precision will be "1s" ## when interval = "250ms", precision will be "1ms" ## Precision will NOT be used for service inputs. It is up to each individual ## service input to set the timestamp at the appropriate precision. ## Valid time units are "ns", "us" (or "µs"), "ms", "s". precision = "" ## Log at debug level. debug = false ## Log only error level messages. # quiet = false ## Log target controls the destination for logs and can be one of "file", ## "stderr" or, on Windows, "eventlog". When set to "file", the output file ## is determined by the "logfile" setting. logtarget = "file" ## Name of the file to be logged to when using the "file" logtarget. If set to ## the empty string then logs are written to stderr. logfile = "" ## The logfile will be rotated after the time interval specified. When set ## to 0 no time based rotation is performed. Logs are rotated only when ## written to, if there is no log activity rotation may be delayed. logfile_rotation_interval = "1d" ## The logfile will be rotated when it becomes larger than the specified ## size. When set to 0 no size based rotation is performed. logfile_rotation_max_size = "100MB" ## Maximum number of rotated archives to keep, any older logs are deleted. ## If set to -1, no archives are removed. logfile_rotation_max_archives = 5 ## Override default hostname, if empty use os.Hostname() hostname = "" ## If set to true, do no set the "host" tag in the telegraf agent. omit_hostname = false ############################################################################### # OUTPUT PLUGINS # ############################################################################### # Configuration for sending metrics to InfluxDB [[outputs.influxdb_v2]] ## The URLs of the InfluxDB cluster nodes. ## ## Multiple URLs can be specified for a single cluster, only ONE of the ## urls will be written to each interval. ## ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"] urls = ["http://xx.xx.xx.xx:8086"] ## Token for authentication. token = "kcOzAHLR_xEyxG5WJoKkL41-2muNR1-ZacM2BBZ67-sK4MG1oFjpGbtpKQyCicf73_61s_O0X76fkeIiGrdC0A==" ## Organization is the name of the organization you wish to write to; must exist. organization = "UKAS" ## Destination bucket to write into. bucket = "telegraf" ## The value of this tag will be used to determine the bucket. If this ## tag is not set the 'bucket' option is used as the default. # bucket_tag = "" ## If true, the bucket tag will not be added to the metric. # exclude_bucket_tag = false ## Timeout for HTTP messages. # timeout = "5s" ## Additional HTTP headers # http_headers = {"X-Special-Header" = "Special-Value"} ## HTTP Proxy override, if unset values the standard proxy environment ## variables are consulted to determine which proxy, if any, should be used. # http_proxy = "http://corporate.proxy:3128" ## HTTP User-Agent #user_agent = "telegraf" ## Content-Encoding for write request body, can be set to "gzip" to ## compress body or "identity" to apply no encoding. # content_encoding = "gzip" ## Enable or disable uint support for writing uints influxdb 2.0. # influx_uint_support = false ## Optional TLS Config for use on HTTP connections. # tls_ca = "/etc/telegraf/ca.pem" # tls_cert = "/etc/telegraf/cert.pem" # tls_key = "/etc/telegraf/key.pem" ## Use TLS but skip chain & host verification # insecure_skip_verify = false # Publish all metrics to /metrics for Prometheus to scrape ############################################################################### # INPUT PLUGINS # ############################################################################### [[inputs.system]] # Windows Performance Counters plugin. # These are the recommended method of monitoring system metrics on windows, # as the regular system plugins (inputs.cpu, inputs.mem, etc.) rely on WMI, # which utilize more system resources. # # See more configuration examples at: # https://github.com/influxdata/telegraf/tree/master/plugins/inputs/win_perf_counters [[inputs.mem]] # no configuration [[inputs.win_perf_counters]] [[inputs.win_perf_counters.object]] # Processor usage, alternative to native, reports on a per core. ObjectName = "Processor" Instances = ["*"] Counters = [ "% Idle Time", "% Interrupt Time", "% Privileged Time", "% User Time", "% Processor Time", "% DPC Time", ] Measurement = "win_cpu" # Set to true to include _Total instance when querying for all (*). IncludeTotal=true [[inputs.win_perf_counters.object]] # Disk times and queues ObjectName = "LogicalDisk" Instances = ["*"] Counters = [ "% Idle Time", "% Disk Time", "% Disk Read Time", "% Disk Write Time", "% Free Space", "Current Disk Queue Length", "Free Megabytes", ] Measurement = "win_disk" # Set to true to include _Total instance when querying for all (*). #IncludeTotal=false [[inputs.win_perf_counters.object]] ObjectName = "PhysicalDisk" Instances = ["*"] Counters = [ "Disk Read Bytes/sec", "Disk Write Bytes/sec", "Current Disk Queue Length", "Disk Reads/sec", "Disk Writes/sec", "% Disk Time", "% Disk Read Time", "% Disk Write Time", ] Measurement = "win_diskio" [[inputs.win_perf_counters.object]] ObjectName = "Network Interface" Instances = ["*"] Counters = [ "Bytes Received/sec", "Bytes Sent/sec", "Packets Received/sec", "Packets Sent/sec", "Packets Received Discarded", "Packets Outbound Discarded", "Packets Received Errors", "Packets Outbound Errors", ] Measurement = "win_net" [[inputs.win_perf_counters.object]] ObjectName = "System" Counters = [ "Context Switches/sec", "System Calls/sec", "Processor Queue Length", "System Up Time", ] Instances = ["------"] Measurement = "win_system" # Set to true to include _Total instance when querying for all (*). #IncludeTotal=false [[inputs.win_perf_counters.object]] # Example query where the Instance portion must be removed to get data back, # such as from the Memory object. ObjectName = "Memory" Counters = [ "Available Bytes", "Available MBytes", "% Committed Bytes In Use", "Committed Bytes In Use", "Cache Faults/sec", "Demand Zero Faults/sec", "Page Faults/sec", "Pages/sec", "Transition Faults/sec", "Pool Nonpaged Bytes", "Pool Paged Bytes", "Standby Cache Reserve Bytes", "Standby Cache Normal Priority Bytes", "Standby Cache Core Bytes", ] # Use 6 x - to remove the Instance bit from the query. Instances = ["------"] Measurement = "win_mem" # Set to true to include _Total instance when querying for all (*). IncludeTotal=true [[inputs.win_perf_counters.object]] # Example query where the Instance portion must be removed to get data back, # such as from the Paging File object. ObjectName = "Paging File" Counters = [ "% Usage", ] Instances = ["_Total"] Measurement = "win_swap" # Windows system plugins using WMI (disabled by default, using # win_perf_counters over WMI is recommended) [[inputs.win_eventlog]] ## Telegraf should have Administrator permissions to subscribe for some Windows Events channels ## (System log, for example) ## LCID (Locale ID) for event rendering ## 1033 to force English language ## 0 to use default Windows locale # locale = 0 ## Name of eventlog, used only if xpath_query is empty ## Example: "Application" # eventlog_name = "" ## xpath_query can be in defined short form like "Event/System[EventID=999]" ## or you can form a XML Query. Refer to the Consuming Events article: ## https://docs.microsoft.com/en-us/windows/win32/wes/consuming-events ## XML query is the recommended form, because it is most flexible ## You can create or debug XML Query by creating Custom View in Windows Event Viewer ## and then copying resulting XML here xpath_query = ''' *[System[( (EventID >= 5152 and EventID <= 5158) or EventID=5379 or EventID=4672)]] ''' ## System field names: ## "Source", "EventID", "Version", "Level", "Task", "Opcode", "Keywords", "TimeCreated", ## "EventRecordID", "ActivityID", "RelatedActivityID", "ProcessID", "ThreadID", "ProcessName", ## "Channel", "Computer", "UserID", "UserName", "Message", "LevelText", "TaskText", "OpcodeText" ## In addition to System, Data fields can be unrolled from additional XML nodes in event. ## Human-readable representation of those nodes is formatted into event Message field, ## but XML is more machine-parsable # Process UserData XML to fields, if this node exists in Event XML process_userdata = true # Process EventData XML to fields, if this node exists in Event XML process_eventdata = true ## Separator character to use for unrolled XML Data field names separator = "_" ## Get only first line of Message field. For most events first line is usually more than enough only_first_line_of_message = false ## Parse timestamp from TimeCreated.SystemTime event field. ## Will default to current time of telegraf processing on parsing error or if set to false timestamp_from_event = true ## Fields to include as tags. Globbing supported ("Level*" for both "Level" and "LevelText") event_tags = ["Source", "EventID", "Level", "LevelText", "Task", "TaskText", "Opcode", "OpcodeText", "Keywords", "Channel", "Computer"] ## Default list of fields to send. All fields are sent by default. Globbing supported event_fields = ["*"] ## Fields to exclude. Also applied to data fields. Globbing supported exclude_fields = ["TimeCreated", "Binary", "Data_Address*"] ## Skip those tags or fields if their value is empty or equals to zero. Globbing supported exclude_empty = ["*ActivityID", "UserID"] [[inputs.win_services]] ## Names of the services to monitor. Leave empty to monitor all the available services on the host ##service_names = ["LanmanServer", "TermService", "telegraf","Zabbix Agent", "bthserv"] service_names = [] [[inputs.win_perf_counters]] [[inputs.win_perf_counters.object]] # HTTP Service request queues in the Kernel before being handed over to User Mode. ObjectName = "HTTP Service Request Queues" Instances = ["*"] Counters = ["CurrentQueueSize","RejectedRequests"] Measurement = "win_http_queues" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters.object]] # IIS, ASP.NET Applications ObjectName = "ASP.NET Applications" Counters = ["Cache Total Entries","Cache Total Hit Ratio","Cache Total Turnover Rate","Output Cache Entries","Output Cache Hits","Output Cache Hit Ratio","Output Cache Turnover Rate","Compilations Total","Errors Total/Sec","Pipeline Instance Count","Requests Executing","Requests in Application Queue","Requests/Sec"] Instances = ["*"] Measurement = "win_aspnet_app" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters.object]] # IIS, ASP.NET ObjectName = "ASP.NET" Counters = ["Application Restarts","Request Wait Time","Requests Current","Requests Queued","Requests Rejected"] Instances = ["*"] Measurement = "win_aspnet" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters.object]] # IIS, Web Service ObjectName = "Web Service" Counters = ["Get Requests/sec","Post Requests/sec","Connection Attempts/sec","Current Connections","ISAPI Extension Requests/sec"] Instances = ["*"] Measurement = "win_websvc" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters.object]] # Web Service Cache / IIS ObjectName = "Web Service Cache" Counters = ["URI Cache Hits %","Kernel: URI Cache Hits %","File Cache Hits %"] Instances = ["*"] Measurement = "win_websvc_cache" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters]] [[inputs.win_perf_counters.object]] # Process metrics, in this case for IIS only ObjectName = "Process" Counters = ["% Processor Time","Handle Count","Private Bytes","Thread Count","Virtual Bytes","Working Set"] Instances = ["w3wp"] Measurement = "win_proc" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters]] [[inputs.win_perf_counters.object]] # .NET CLR Exceptions, in this case for IIS only ObjectName = ".NET CLR Exceptions" Counters = ["# of Exceps Thrown / sec"] Instances = ["w3wp"] Measurement = "win_dotnet_exceptions" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters.object]] # .NET CLR Jit, in this case for IIS only ObjectName = ".NET CLR Jit" Counters = ["% Time in Jit","IL Bytes Jitted / sec"] Instances = ["w3wp"] Measurement = "win_dotnet_jit" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters.object]] # .NET CLR Loading, in this case for IIS only ObjectName = ".NET CLR Loading" Counters = ["% Time Loading"] Instances = ["w3wp"] Measurement = "win_dotnet_loading" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters.object]] # .NET CLR LocksAndThreads, in this case for IIS only ObjectName = ".NET CLR LocksAndThreads" Counters = ["# of current logical Threads","# of current physical Threads","# of current recognized threads","# of total recognized threads","Queue Length / sec","Total # of Contentions","Current Queue Length"] Instances = ["w3wp"] Measurement = "win_dotnet_locks" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters.object]] # .NET CLR Memory, in this case for IIS only ObjectName = ".NET CLR Memory" Counters = ["% Time in GC","# Bytes in all Heaps","# Gen 0 Collections","# Gen 1 Collections","# Gen 2 Collections","# Induced GC","Allocated Bytes/sec","Finalization Survivors","Gen 0 heap size","Gen 1 heap size","Gen 2 heap size","Large Object Heap size","# of Pinned Objects"] Instances = ["w3wp"] Measurement = "win_dotnet_mem" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). [[inputs.win_perf_counters.object]] # .NET CLR Security, in this case for IIS only ObjectName = ".NET CLR Security" Counters = ["% Time in RT checks","Stack Walk Depth","Total Runtime Checks"] Instances = ["w3wp"] Measurement = "win_dotnet_security" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*).