1
0
Fork 0
home-stack-kustomize/kustomize/overlays/prod/configurations/grafana-agent/agent.yaml

89 lines
15 KiB
YAML

metrics:
wal_directory: /var/lib/agent/wal
global:
scrape_interval: 60s
external_labels:
cluster: cloud
# configs:
# - name: integrations
# remote_write:
# - url: https://prometheus-prod-10-prod-us-central-0.grafana.net/api/prom/push
# basic_auth:
# username: 443422
# password: ${ssm:/k3s/prod/monitoring/grafana-cloud/password}
# scrape_configs:
# - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# job_name: integrations/kubernetes/cadvisor
# kubernetes_sd_configs:
# - role: node
# metric_relabel_configs:
# - source_labels: [__name__]
# regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition
# action: keep
# relabel_configs:
# - replacement: kubernetes.default.svc.cluster.local:443
# target_label: __address__
# - regex: (.+)
# replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# source_labels:
# - __meta_kubernetes_node_name
# target_label: __metrics_path__
# scheme: https
# tls_config:
# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# insecure_skip_verify: false
# server_name: kubernetes
# - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# job_name: integrations/kubernetes/kubelet
# kubernetes_sd_configs:
# - role: node
# metric_relabel_configs:
# - source_labels: [__name__]
# regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition
# action: keep
# relabel_configs:
# - replacement: kubernetes.default.svc.cluster.local:443
# target_label: __address__
# - regex: (.+)
# replacement: /api/v1/nodes/${1}/proxy/metrics
# source_labels:
# - __meta_kubernetes_node_name
# target_label: __metrics_path__
# scheme: https
# tls_config:
# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# insecure_skip_verify: false
# server_name: kubernetes
# - job_name: integrations/kubernetes/kube-state-metrics
# kubernetes_sd_configs:
# - role: pod
# metric_relabel_configs:
# - source_labels: [__name__]
# regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition
# action: keep
# relabel_configs:
# - action: keep
# regex: kube-state-metrics
# source_labels:
# - __meta_kubernetes_pod_label_app_kubernetes_io_name
integrations:
eventhandler:
cache_path: /var/lib/agent/eventhandler.cache
logs_instance: integrations
logs:
configs:
- name: integrations
clients:
- url: https://logs-prod3.grafana.net/loki/api/v1/push
basic_auth:
username: 220681
password: ${ssm:/k3s/prod/monitoring/grafana-cloud/password}
external_labels:
cluster: cloud
job: integrations/kubernetes/eventhandler
positions:
filename: /tmp/positions.yaml
target_config:
sync_period: 10s