diff --git a/kustomize/bases/grafana-agent/grafana-agent-volume.yaml b/kustomize/bases/grafana-agent/grafana-agent-volume.yaml index f22ff07..959d4fc 100644 --- a/kustomize/bases/grafana-agent/grafana-agent-volume.yaml +++ b/kustomize/bases/grafana-agent/grafana-agent-volume.yaml @@ -7,4 +7,4 @@ spec: - ReadWriteOnce resources: requests: - storage: 5Gi + storage: 1Gi diff --git a/kustomize/bases/grafana-agent/patches/grafana-agent-statefulset-patch.yaml b/kustomize/bases/grafana-agent/patches/grafana-agent-statefulset-patch.yaml index e3a303b..381ffce 100644 --- a/kustomize/bases/grafana-agent/patches/grafana-agent-statefulset-patch.yaml +++ b/kustomize/bases/grafana-agent/patches/grafana-agent-statefulset-patch.yaml @@ -1,13 +1,19 @@ -- op: remove +- op: replace path: /spec/volumeClaimTemplates -- op: add + value: + - metadata: + name: agent-wal + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +- op: replace path: /spec/template/spec/volumes value: - name: grafana-agent secret: secretName: grafana-agent - - name: agent-wal - persistentVolumeClaim: - claimName: grafana-agent-wal-pvc diff --git a/kustomize/overlays/prod/configurations/grafana-agent/agent.yaml b/kustomize/overlays/prod/configurations/grafana-agent/agent.yaml index 0f94595..1557f90 100644 --- a/kustomize/overlays/prod/configurations/grafana-agent/agent.yaml +++ b/kustomize/overlays/prod/configurations/grafana-agent/agent.yaml @@ -4,68 +4,68 @@ metrics: scrape_interval: 60s external_labels: cluster: cloud - configs: - - name: integrations - remote_write: - - url: https://prometheus-prod-10-prod-us-central-0.grafana.net/api/prom/push - basic_auth: - username: 443422 - password: ${ssm:/k3s/prod/monitoring/grafana-cloud/password} - scrape_configs: - - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: integrations/kubernetes/cadvisor - kubernetes_sd_configs: - - role: node - metric_relabel_configs: - - source_labels: [__name__] - regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition - action: keep - relabel_configs: - - replacement: kubernetes.default.svc.cluster.local:443 - target_label: __address__ - - regex: (.+) - replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor - source_labels: - - __meta_kubernetes_node_name - target_label: __metrics_path__ - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: false - server_name: kubernetes - - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: integrations/kubernetes/kubelet - kubernetes_sd_configs: - - role: node - metric_relabel_configs: - - source_labels: [__name__] - regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition - action: keep - relabel_configs: - - replacement: kubernetes.default.svc.cluster.local:443 - target_label: __address__ - - regex: (.+) - replacement: /api/v1/nodes/${1}/proxy/metrics - source_labels: - - __meta_kubernetes_node_name - target_label: __metrics_path__ - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: false - server_name: kubernetes - - job_name: integrations/kubernetes/kube-state-metrics - kubernetes_sd_configs: - - role: pod - metric_relabel_configs: - - source_labels: [__name__] - regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition - action: keep - relabel_configs: - - action: keep - regex: kube-state-metrics - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_name + # configs: + # - name: integrations + # remote_write: + # - url: https://prometheus-prod-10-prod-us-central-0.grafana.net/api/prom/push + # basic_auth: + # username: 443422 + # password: ${ssm:/k3s/prod/monitoring/grafana-cloud/password} + # scrape_configs: + # - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + # job_name: integrations/kubernetes/cadvisor + # kubernetes_sd_configs: + # - role: node + # metric_relabel_configs: + # - source_labels: [__name__] + # regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition + # action: keep + # relabel_configs: + # - replacement: kubernetes.default.svc.cluster.local:443 + # target_label: __address__ + # - regex: (.+) + # replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + # source_labels: + # - __meta_kubernetes_node_name + # target_label: __metrics_path__ + # scheme: https + # tls_config: + # ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # insecure_skip_verify: false + # server_name: kubernetes + # - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + # job_name: integrations/kubernetes/kubelet + # kubernetes_sd_configs: + # - role: node + # metric_relabel_configs: + # - source_labels: [__name__] + # regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition + # action: keep + # relabel_configs: + # - replacement: kubernetes.default.svc.cluster.local:443 + # target_label: __address__ + # - regex: (.+) + # replacement: /api/v1/nodes/${1}/proxy/metrics + # source_labels: + # - __meta_kubernetes_node_name + # target_label: __metrics_path__ + # scheme: https + # tls_config: + # ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # insecure_skip_verify: false + # server_name: kubernetes + # - job_name: integrations/kubernetes/kube-state-metrics + # kubernetes_sd_configs: + # - role: pod + # metric_relabel_configs: + # - source_labels: [__name__] + # regex: namespace_memory:kube_pod_container_resource_requests:sum|kubelet_running_containers|container_cpu_usage_seconds_total|kube_pod_container_info|container_network_receive_packets_dropped_total|kube_pod_status_phase|kubelet_pod_start_duration_seconds_count|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_status_desired_replicas|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|node_namespace_pod_container:container_memory_swap|kube_statefulset_status_replicas_ready|kube_horizontalpodautoscaler_spec_max_replicas|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|process_cpu_seconds_total|process_resident_memory_bytes|kubelet_server_expiration_renew_errors|kube_daemonset.*|container_fs_reads_total|machine_memory_bytes|kubelet_volume_stats_inodes_used|volume_manager_total_volumes|kube_statefulset_status_replicas|namespace_cpu:kube_pod_container_resource_limits:sum|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kubelet_pod_worker_duration_seconds_count|namespace_workload_pod:kube_pod_owner:relabel|kubelet_cgroup_manager_duration_seconds_count|container_cpu_cfs_throttled_periods_total|kube_node_spec_taint|container_fs_reads_bytes_total|kubelet_certificate_manager_client_ttl_seconds|container_network_receive_bytes_total|kubelet_running_container_count|kube_daemonset_status_number_available|kube_node_status_allocatable|container_fs_writes_total|kube_namespace_status_phase|kubelet_volume_stats_available_bytes|kubelet_pleg_relist_duration_seconds_bucket|kubelet_runtime_operations_errors_total|kube_pod_container_status_waiting_reason|kube_replicaset_owner|kube_resourcequota|kube_pod_info|kubelet_pleg_relist_duration_seconds_count|kube_deployment_status_replicas_available|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|kubelet_running_pods|kube_statefulset_status_replicas_updated|kube_deployment_status_replicas_updated|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kubelet_certificate_manager_server_ttl_seconds|container_network_transmit_bytes_total|container_memory_cache|kubelet_volume_stats_capacity_bytes|node_namespace_pod_container:container_memory_cache|container_memory_rss|container_memory_swap|storage_operation_duration_seconds_count|kube_replicaset.*|kube_pod_owner|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kubelet_volume_stats_inodes|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_updated_number_scheduled|kube_statefulset.*|kube_node_info|go_goroutines|kubelet_pod_worker_duration_seconds_bucket|kubelet_node_config_error|container_cpu_cfs_periods_total|kubelet_pleg_relist_interval_seconds_bucket|kube_job.*|container_network_receive_packets_total|container_fs_writes_bytes_total|kubelet_running_pod_count|kube_deployment_spec_replicas|up|kube_node_status_capacity|namespace_cpu:kube_pod_container_resource_requests:sum|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_working_set_bytes|kubelet_node_name|node_namespace_pod_container:container_memory_rss|storage_operation_errors_total|kube_statefulset_metadata_generation|container_network_transmit_packets_total|kubelet_runtime_operations_total|kube_statefulset_status_observed_generation|kube_horizontalpodautoscaler_status_current_replicas|kubernetes_build_info|kubelet_certificate_manager_client_expiration_renew_errors|kube_job_failed|namespace_workload_pod|node_namespace_pod_container:container_memory_working_set_bytes|kube_statefulset_replicas|kube_deployment_status_observed_generation|kube_pod_container_status_restarts_total|kube_daemonset_status_current_number_scheduled|kube_pod_start_time|namespace_memory:kube_pod_container_resource_limits:sum|container_network_transmit_packets_dropped_total|rest_client_requests_total|kube_deployment_metadata_generation|kube_statefulset_status_update_revision|kube_job_status_succeeded|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_current_revision|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_node_status_condition + # action: keep + # relabel_configs: + # - action: keep + # regex: kube-state-metrics + # source_labels: + # - __meta_kubernetes_pod_label_app_kubernetes_io_name integrations: eventhandler: diff --git a/kustomize/overlays/prod/kustomization.yaml b/kustomize/overlays/prod/kustomization.yaml index 3ed4d9c..d7c91c7 100644 --- a/kustomize/overlays/prod/kustomization.yaml +++ b/kustomize/overlays/prod/kustomization.yaml @@ -12,7 +12,7 @@ images: - name: gitea/gitea newTag: 1.15.7 - name: grafana/grafana - newTag: 8.3.4 + newTag: 8.5.3 - name: prom/node-exporter newTag: v0.18.1 - name: prom/blackbox-exporter