|
|
@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
metrics:
|
|
|
|
|
|
|
|
wal_directory: /var/lib/agent/wal
|
|
|
|
|
|
|
|
global:
|
|
|
|
|
|
|
|
scrape_interval: 60s
|
|
|
|
|
|
|
|
external_labels:
|
|
|
|
|
|
|
|
cluster: cloud
|
|
|
|
|
|
|
|
configs:
|
|
|
|
|
|
|
|
- name: integrations
|
|
|
|
|
|
|
|
remote_write:
|
|
|
|
|
|
|
|
- url: https://prometheus-prod-10-prod-us-central-0.grafana.net/api/prom/push
|
|
|
|
|
|
|
|
basic_auth:
|
|
|
|
|
|
|
|
username: 443422
|
|
|
|
|
|
|
|
password: ${ssm:/k3s/prod/monitoring/grafana-cloud/password}
|
|
|
|
|
|
|
|
scrape_configs:
|
|
|
|
|
|
|
|
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
|
|
|
|
job_name: integrations/kubernetes/cadvisor
|
|
|
|
|
|
|
|
kubernetes_sd_configs:
|
|
|
|
|
|
|
|
- role: node
|
|
|
|
|
|
|
|
metric_relabel_configs:
|
|
|
|
|
|
|
|
- source_labels: [__name__]
|
|
|
|
|
|
|
|
regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition
|
|
|
|
|
|
|
|
action: keep
|
|
|
|
|
|
|
|
relabel_configs:
|
|
|
|
|
|
|
|
- replacement: kubernetes.default.svc.cluster.local:443
|
|
|
|
|
|
|
|
target_label: __address__
|
|
|
|
|
|
|
|
- regex: (.+)
|
|
|
|
|
|
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
|
|
|
|
|
|
|
source_labels:
|
|
|
|
|
|
|
|
- __meta_kubernetes_node_name
|
|
|
|
|
|
|
|
target_label: __metrics_path__
|
|
|
|
|
|
|
|
scheme: https
|
|
|
|
|
|
|
|
tls_config:
|
|
|
|
|
|
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
|
|
|
|
|
|
insecure_skip_verify: false
|
|
|
|
|
|
|
|
server_name: kubernetes
|
|
|
|
|
|
|
|
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
|
|
|
|
job_name: integrations/kubernetes/kubelet
|
|
|
|
|
|
|
|
kubernetes_sd_configs:
|
|
|
|
|
|
|
|
- role: node
|
|
|
|
|
|
|
|
metric_relabel_configs:
|
|
|
|
|
|
|
|
- source_labels: [__name__]
|
|
|
|
|
|
|
|
regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition
|
|
|
|
|
|
|
|
action: keep
|
|
|
|
|
|
|
|
relabel_configs:
|
|
|
|
|
|
|
|
- replacement: kubernetes.default.svc.cluster.local:443
|
|
|
|
|
|
|
|
target_label: __address__
|
|
|
|
|
|
|
|
- regex: (.+)
|
|
|
|
|
|
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics
|
|
|
|
|
|
|
|
source_labels:
|
|
|
|
|
|
|
|
- __meta_kubernetes_node_name
|
|
|
|
|
|
|
|
target_label: __metrics_path__
|
|
|
|
|
|
|
|
scheme: https
|
|
|
|
|
|
|
|
tls_config:
|
|
|
|
|
|
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
|
|
|
|
|
|
insecure_skip_verify: false
|
|
|
|
|
|
|
|
server_name: kubernetes
|
|
|
|
|
|
|
|
- job_name: integrations/kubernetes/kube-state-metrics
|
|
|
|
|
|
|
|
kubernetes_sd_configs:
|
|
|
|
|
|
|
|
- role: pod
|
|
|
|
|
|
|
|
metric_relabel_configs:
|
|
|
|
|
|
|
|
- source_labels: [__name__]
|
|
|
|
|
|
|
|
regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition
|
|
|
|
|
|
|
|
action: keep
|
|
|
|
|
|
|
|
relabel_configs:
|
|
|
|
|
|
|
|
- action: keep
|
|
|
|
|
|
|
|
regex: kube-state-metrics
|
|
|
|
|
|
|
|
source_labels:
|
|
|
|
|
|
|
|
- __meta_kubernetes_pod_label_app_kubernetes_io_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
integrations:
|
|
|
|
|
|
|
|
eventhandler:
|
|
|
|
|
|
|
|
cache_path: /var/lib/agent/eventhandler.cache
|
|
|
|
|
|
|
|
logs_instance: integrations
|
|
|
|
|
|
|
|
logs:
|
|
|
|
|
|
|
|
configs:
|
|
|
|
|
|
|
|
- name: integrations
|
|
|
|
|
|
|
|
clients:
|
|
|
|
|
|
|
|
- url: https://logs-prod3.grafana.net/loki/api/v1/push
|
|
|
|
|
|
|
|
basic_auth:
|
|
|
|
|
|
|
|
username: 220681
|
|
|
|
|
|
|
|
password: ${ssm:/k3s/prod/monitoring/grafana-cloud/password}
|
|
|
|
|
|
|
|
external_labels:
|
|
|
|
|
|
|
|
cluster: cloud
|
|
|
|
|
|
|
|
job: integrations/kubernetes/eventhandler
|
|
|
|
|
|
|
|
positions:
|
|
|
|
|
|
|
|
filename: /tmp/positions.yaml
|
|
|
|
|
|
|
|
target_config:
|
|
|
|
|
|
|
|
sync_period: 10s
|