Merge pull request #1458 from paulfantom/grafana-mixin

This commit is contained in:
Paweł Krupa
2022-01-04 12:45:23 +01:00
committed by GitHub
8 changed files with 719 additions and 1 deletions

View File

@@ -24,6 +24,12 @@ local defaults = {
if !std.setMember(labelName, ['app.kubernetes.io/version'])
},
prometheusName:: error 'must provide prometheus name',
mixin: {
ruleLabels: {},
_config: {
runbookURLPattern: 'https://runbooks.prometheus-operator.dev/runbooks/grafana/%s',
},
},
};
function(params)
@@ -40,6 +46,27 @@ function(params)
labels: g._config.commonLabels,
},
mixin::
(import 'github.com/grafana/grafana/grafana-mixin/mixin.libsonnet') +
(import 'github.com/kubernetes-monitoring/kubernetes-mixin/lib/add-runbook-links.libsonnet') + {
_config+:: g._config.mixin._config,
},
prometheusRule: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
labels: g._config.commonLabels + g._config.mixin.ruleLabels,
name: g._config.name + '-rules',
namespace: g._config.namespace,
},
spec: {
local r = if std.objectHasAll(g.mixin, 'prometheusRules') then g.mixin.prometheusRules.groups else [],
local a = if std.objectHasAll(g.mixin, 'prometheusAlerts') then g.mixin.prometheusAlerts.groups else [],
groups: a + r,
},
},
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',

View File

@@ -10,6 +10,16 @@
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafana",
"subdir": "grafana-mixin"
}
},
"version": "main",
"name": "grafana-mixin"
},
{
"source": {
"git": {

View File

@@ -70,7 +70,12 @@ local utils = import './lib/utils.libsonnet';
image: $.values.common.images.grafana,
prometheusName: $.values.prometheus.name,
// TODO(paulfantom) This should be done by iterating over all objects and looking for object.mixin.grafanaDashboards
dashboards: $.nodeExporter.mixin.grafanaDashboards + $.prometheus.mixin.grafanaDashboards + $.kubernetesControlPlane.mixin.grafanaDashboards + $.alertmanager.mixin.grafanaDashboards,
dashboards: $.nodeExporter.mixin.grafanaDashboards +
$.prometheus.mixin.grafanaDashboards +
$.kubernetesControlPlane.mixin.grafanaDashboards +
$.alertmanager.mixin.grafanaDashboards +
$.grafana.mixin.grafanaDashboards,
mixin+: { ruleLabels: $.values.common.ruleLabels },
},
kubeStateMetrics: {
namespace: $.values.common.namespace,

View File

@@ -21,6 +21,16 @@
"version": "69279532f46cb7491bb60a588be534100fdc058d",
"sum": "cdKL5kPYfpWSpTCu4qctmh+gWQqL+4YWom6rw9qLYJU="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafana.git",
"subdir": "grafana-mixin"
}
},
"version": "88bc47441f75b5956023be0b1f79c371420cdcdd",
"sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs="
},
{
"source": {
"git": {

View File

@@ -20,6 +20,7 @@ resources:
- ./manifests/grafana-dashboardDefinitions.yaml
- ./manifests/grafana-dashboardSources.yaml
- ./manifests/grafana-deployment.yaml
- ./manifests/grafana-prometheusRule.yaml
- ./manifests/grafana-service.yaml
- ./manifests/grafana-serviceAccount.yaml
- ./manifests/grafana-serviceMonitor.yaml

View File

@@ -5410,6 +5410,631 @@ items:
app.kubernetes.io/version: 8.3.3
name: grafana-dashboard-controller-manager
namespace: monitoring
- apiVersion: v1
data:
grafana-overview.json: |-
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [
],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 3085,
"iteration": 1631554945276,
"links": [
],
"panels": [
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"mappings": [
],
"noValue": "0",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
]
},
"gridPos": {
"h": 5,
"w": 6,
"x": 0,
"y": 0
},
"id": 6,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"mean"
],
"fields": "",
"values": false
},
"text": {
},
"textMode": "auto"
},
"pluginVersion": "8.1.3",
"targets": [
{
"expr": "grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Firing Alerts",
"type": "stat"
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"mappings": [
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
]
},
"gridPos": {
"h": 5,
"w": 6,
"x": 6,
"y": 0
},
"id": 8,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"mean"
],
"fields": "",
"values": false
},
"text": {
},
"textMode": "auto"
},
"pluginVersion": "8.1.3",
"targets": [
{
"expr": "sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Dashboards",
"type": "stat"
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"align": null,
"displayMode": "auto"
},
"mappings": [
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 0
},
"id": 10,
"options": {
"showHeader": true
},
"pluginVersion": "8.1.3",
"targets": [
{
"expr": "grafana_build_info{job=~\"$job\", instance=~\"$instance\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Build Info",
"transformations": [
{
"id": "labelsToFields",
"options": {
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Value": true,
"branch": true,
"container": true,
"goversion": true,
"namespace": true,
"pod": true,
"revision": true
},
"indexByName": {
"Time": 7,
"Value": 11,
"branch": 4,
"container": 8,
"edition": 2,
"goversion": 6,
"instance": 1,
"job": 0,
"namespace": 9,
"pod": 10,
"revision": 5,
"version": 3
},
"renameByName": {
}
}
}
],
"type": "table"
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"links": [
]
},
"overrides": [
]
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 5
},
"hiddenSeries": false,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.1.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ",
"interval": "",
"legendFormat": "{{status_code}}",
"refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
"timeRegions": [
],
"timeShift": null,
"title": "RPS",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"$$hashKey": "object:157",
"format": "reqps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:158",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"links": [
]
},
"overrides": [
]
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 5
},
"hiddenSeries": false,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.1.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1",
"interval": "",
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1",
"interval": "",
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"exemplar": true,
"expr": "sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "Average",
"refId": "C"
}
],
"thresholds": [
],
"timeFrom": null,
"timeRegions": [
],
"timeShift": null,
"title": "Request Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"$$hashKey": "object:210",
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:211",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"schemaVersion": 30,
"style": "dark",
"tags": [
],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "dev-cortex",
"value": "dev-cortex"
},
"description": null,
"error": null,
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "datasource",
"options": [
],
"query": "prometheus",
"queryValue": "",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": ".*",
"current": {
"selected": false,
"text": [
"default/grafana"
],
"value": [
"default/grafana"
]
},
"datasource": "$datasource",
"definition": "label_values(grafana_build_info, job)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "job",
"options": [
],
"query": {
"query": "label_values(grafana_build_info, job)",
"refId": "Billing Admin-job-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"definition": "label_values(grafana_build_info, instance)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "instance",
"options": [
],
"query": {
"query": "label_values(grafana_build_info, instance)",
"refId": "Billing Admin-instance-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Grafana Overview",
"uid": "6be0s85Mk",
"version": 2
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 8.3.3
name: grafana-dashboard-grafana-overview
namespace: monitoring
- apiVersion: v1
data:
k8s-resources-cluster.json: |-

View File

@@ -67,6 +67,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/controller-manager
name: grafana-dashboard-controller-manager
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/grafana-overview
name: grafana-dashboard-grafana-overview
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
readOnly: false
@@ -155,6 +158,9 @@ spec:
- configMap:
name: grafana-dashboard-controller-manager
name: grafana-dashboard-controller-manager
- configMap:
name: grafana-dashboard-grafana-overview
name: grafana-dashboard-grafana-overview
- configMap:
name: grafana-dashboard-k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster

View File

@@ -0,0 +1,34 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 8.3.3
prometheus: k8s
role: alert-rules
name: grafana-rules
namespace: monitoring
spec:
groups:
- name: GrafanaAlerts
rules:
- alert: GrafanaRequestsFailing
annotations:
message: '{{ $labels.namespace }}/{{ $labels.job }}/{{ $labels.handler }}
is experiencing {{ $value | humanize }}% errors'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/grafana/grafanarequestsfailing
expr: |
100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."}
/ ignoring (status_code)
sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"})
> 50
for: 5m
labels:
severity: warning
- name: grafana_rules
rules:
- expr: |
sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m]))
record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m