manifests: Regenerate
This commit is contained in:
@@ -283,7 +283,7 @@ items:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "apiserver_request:availability7d{verb=\"read\"}",
|
||||
"expr": "apiserver_request:availability30d{verb=\"read\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@@ -654,7 +654,7 @@ items:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "apiserver_request:availability7d{verb=\"write\"}",
|
||||
"expr": "apiserver_request:availability30d{verb=\"write\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@@ -27084,10 +27084,10 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n",
|
||||
"expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -27175,10 +27175,10 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n",
|
||||
"expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -27279,10 +27279,10 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n",
|
||||
"expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -27387,7 +27387,7 @@ items:
|
||||
"expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -27478,7 +27478,7 @@ items:
|
||||
"expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -27569,7 +27569,7 @@ items:
|
||||
"expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -27660,7 +27660,7 @@ items:
|
||||
"expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -27764,7 +27764,7 @@ items:
|
||||
"expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -27855,7 +27855,7 @@ items:
|
||||
"expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -28050,7 +28050,7 @@ items:
|
||||
"expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{consumer}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -28154,7 +28154,7 @@ items:
|
||||
"expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -28245,7 +28245,7 @@ items:
|
||||
"expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -28336,7 +28336,7 @@ items:
|
||||
"expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -28427,7 +28427,7 @@ items:
|
||||
"expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -28580,11 +28580,11 @@ items:
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "queue",
|
||||
"name": "url",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, queue)",
|
||||
"query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
|
||||
@@ -4,7 +4,7 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@@ -19,4 +19,4 @@ spec:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
|
||||
@@ -389,12 +389,15 @@ spec:
|
||||
verb: write
|
||||
record: apiserver_request:availability30d
|
||||
- expr: |
|
||||
sum by (code) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30d]))
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver"}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
||||
labels:
|
||||
verb: read
|
||||
record: code:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code) (increase(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
||||
labels:
|
||||
verb: write
|
||||
record: code:apiserver_request_total:increase30d
|
||||
@@ -1311,29 +1314,6 @@ spec:
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeAPILatencyHigh
|
||||
annotations:
|
||||
message: The API server has a 99th percentile latency of {{ $value }} seconds
|
||||
for {{ $labels.verb }} {{ $labels.resource }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||
expr: |
|
||||
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} > 4
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
|
||||
}}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
@@ -1451,7 +1431,7 @@ spec:
|
||||
on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
|
||||
expr: |
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 60
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -1616,8 +1596,8 @@ spec:
|
||||
- alert: PrometheusRemoteStorageFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
|
||||
{{ printf "%.1f" $value }}% of the samples to {{ if $labels.queue }}{{ $labels.queue
|
||||
}}{{ else }}{{ $labels.url }}{{ end }}.
|
||||
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
|
||||
$labels.url }}
|
||||
summary: Prometheus fails to send samples to remote storage.
|
||||
expr: |
|
||||
(
|
||||
@@ -1637,8 +1617,8 @@ spec:
|
||||
- alert: PrometheusRemoteWriteBehind
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||
is {{ printf "%.1f" $value }}s behind for {{ if $labels.queue }}{{ $labels.queue
|
||||
}}{{ else }}{{ $labels.url }}{{ end }}.
|
||||
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
|
||||
}}.
|
||||
summary: Prometheus remote write is behind.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
@@ -1655,8 +1635,9 @@ spec:
|
||||
- alert: PrometheusRemoteWriteDesiredShards
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||
desired shards calculation wants to run {{ $value }} shards, which is more
|
||||
than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
|
||||
desired shards calculation wants to run {{ $value }} shards for queue {{
|
||||
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
|
||||
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
|
||||
$labels.instance | query | first | value }}.
|
||||
summary: Prometheus remote write desired shards calculation wants to run more
|
||||
than configured max shards.
|
||||
|
||||
@@ -612,6 +612,11 @@ spec:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
alertQueryUrl:
|
||||
description: The external Query URL the Thanos Ruler will set in the
|
||||
'Source' field of all alerts. Maps to the '--alert.query-url' CLI
|
||||
arg.
|
||||
type: string
|
||||
alertmanagersConfig:
|
||||
description: Define configuration for connecting to alertmanager. Only
|
||||
available with thanos v0.10.0 and higher. Maps to the `alertmanagers.config`
|
||||
|
||||
@@ -4,7 +4,7 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
rules:
|
||||
- apiGroups:
|
||||
|
||||
@@ -4,7 +4,7 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
||||
@@ -4,7 +4,7 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@@ -18,15 +18,15 @@ spec:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --kubelet-service=kube-system/kubelet
|
||||
- --logtostderr=true
|
||||
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
|
||||
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.38.0
|
||||
image: quay.io/coreos/prometheus-operator:v0.38.0
|
||||
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.38.1
|
||||
image: quay.io/coreos/prometheus-operator:v0.38.1
|
||||
name: prometheus-operator
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
|
||||
@@ -4,7 +4,7 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
||||
@@ -4,6 +4,6 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
|
||||
Reference in New Issue
Block a user