diff --git a/README.md b/README.md index 3948cdd..bb1ab61 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,12 @@ To import the dashboards stored in the `dashboards/` folder: To test the installation, you can import the `Examon Test - Random Sensor.json` dashboard. +> **Docker Compose users:** the dashboards directly under `dashboards/` target +> Grafana 10+/11+ and the React-based `arpnetworking-kairosdb-datasource` +> plugin used by the Kubernetes (v0.5.0+) deployment. For this legacy Docker +> Compose stack (Grafana 7.3.10 + `grafana-kairosdb-datasource`) import the +> snapshots from `dashboards/legacy/` instead. + ### Configure the plugins diff --git a/dashboards/Examon Test - Random Sensor.json b/dashboards/Examon Test - Random Sensor.json index cbd4f72..8d45315 100644 --- a/dashboards/Examon Test - Random Sensor.json +++ b/dashboards/Examon Test - Random Sensor.json @@ -3,7 +3,7 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", @@ -13,433 +13,111 @@ ] }, "editable": true, - "gnetId": null, "graphTooltip": 0, - "id": 1, "links": [], "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, + "datasource": { + "type": "arpnetworking-kairosdb-datasource", + "uid": "examon-kairosdb" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { "mode": "palette-classic" }, + "custom": { + "drawStyle": "line", + "lineWidth": 1, + "fillOpacity": 10, + "pointSize": 5, + "showPoints": "never" + } }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 14, - "w": 24, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, + "gridPos": { "h": 14, "w": 24, "x": 0, "y": 0 }, "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom" }, + "tooltip": { "mode": "multi" } }, - "percentage": false, - "pluginVersion": "7.3.10", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { + "refId": "A", "query": { - "aggregators": [ - { - "$$hashKey": "object:137", - "autoValueSwitch": { - "dependentParameters": [ - { - "name": "value", - "text": "every", - "type": "sampling", - "value": "1h" - } - ], - "enabled": true - }, - "name": "avg", - "parameters": [ - { - "$$hashKey": "object:839", - "allowedValues": { - "0": "NONE", - "1": "START_TIME", - "2": "SAMPLING" - }, - "name": "sampling", - "text": "align by", - "type": "alignment", - "value": "NONE" - }, - { - "name": "value", - "text": "every", - "type": "sampling", - "value": "1h" - } - ] - } - ], - "groupBy": { - "tags": [ - "id" - ], - "time": [], - "value": [] - }, "metricName": "random_sensor", + "alias": "$_tag_group_id", "tags": { - "chnl": [], - "id": [], - "org": [ - "examon" - ], - "plugin": [ - "random_pub" - ] - } - }, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "ExaMon Test - Random Sensor", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:235", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:236", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 13, - "w": 12, - "x": 0, - "y": 14 - }, - "hiddenSeries": false, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.3.10", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "query": { + "org": ["examon"], + "plugin": ["random_pub"] + }, + "groupBy": { + "tags": ["id"] + }, "aggregators": [ { - "$$hashKey": "object:137", - "autoValueSwitch": { - "dependentParameters": [ - { - "name": "value", - "text": "every", - "type": "sampling", - "value": "1h" - } - ], - "enabled": true - }, "name": "avg", - "parameters": [ - { - "$$hashKey": "object:664", - "allowedValues": { - "0": "NONE", - "1": "START_TIME", - "2": "SAMPLING" - }, - "name": "sampling", - "text": "align by", - "type": "alignment", - "value": "NONE" - }, - { - "name": "value", - "text": "every", - "type": "sampling", - "value": "1h" - } - ] + "parameters": [], + "visible": true } ], - "groupBy": { - "tags": [ - "id" - ], - "time": [], - "value": [] - }, - "metricName": "random_sensor", - "tags": { - "chnl": [], - "id": [], - "org": [ - "examon" - ], - "plugin": [ - "random_pub" - ] - } - }, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "ExaMon Test - Random Sensor", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": 50, - "min": null, - "mode": "histogram", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:235", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:236", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "overrideScalar": false + } } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Random Sensor - Time Series", + "type": "timeseries" }, { - "aliasColors": {}, - "breakPoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 + "datasource": { + "type": "arpnetworking-kairosdb-datasource", + "uid": "examon-kairosdb" }, - "datasource": null, "fieldConfig": { - "defaults": { - "custom": {} - }, + "defaults": { "color": { "mode": "palette-classic" } }, "overrides": [] }, - "fontSize": "70%", - "format": "short", - "gridPos": { - "h": 13, - "w": 12, - "x": 12, - "y": 14 - }, + "gridPos": { "h": 13, "w": 24, "x": 0, "y": 14 }, "id": 4, - "interval": null, - "legend": { - "percentage": false, - "show": true, - "values": true + "options": { + "legend": { "displayMode": "table", "placement": "right", "values": ["percent"] }, + "pieType": "donut", + "tooltip": { "mode": "single" } }, - "legendType": "On graph", - "links": [], - "nullPointMode": "connected", - "pieType": "donut", - "pluginVersion": "7.3.10", - "strokeWidth": 1, "targets": [ { + "refId": "A", "query": { + "metricName": "random_sensor", + "alias": "", + "tags": { + "org": ["examon"], + "plugin": ["random_pub"] + }, + "groupBy": { + "tags": ["id"] + }, "aggregators": [ { - "$$hashKey": "object:137", - "autoValueSwitch": { - "dependentParameters": [ - { - "name": "value", - "text": "every", - "type": "sampling", - "value": "1h" - } - ], - "enabled": true - }, "name": "avg", - "parameters": [ - { - "$$hashKey": "object:664", - "allowedValues": { - "0": "NONE", - "1": "START_TIME", - "2": "SAMPLING" - }, - "name": "sampling", - "text": "align by", - "type": "alignment", - "value": "NONE" - }, - { - "name": "value", - "text": "every", - "type": "sampling", - "value": "1h" - } - ] + "parameters": [], + "visible": true } ], - "groupBy": { - "tags": [ - "id" - ], - "time": [], - "value": [] - }, - "metricName": "random_sensor", - "tags": { - "chnl": [], - "id": [], - "org": [ - "examon" - ], - "plugin": [ - "random_pub" - ] - } - }, - "refId": "A" + "overrideScalar": false + } } ], - "timeFrom": null, - "timeShift": null, - "title": "ExaMon Test - Random Sensor", - "type": "grafana-piechart-panel", - "valueName": "avg" + "title": "Random Sensor - Distribution", + "type": "piechart" } ], "refresh": "1m", - "schemaVersion": 26, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-30m", - "to": "now" - }, + "schemaVersion": 39, + "tags": ["examon"], + "time": { "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Examon Test - Random Sensor", - "uid": "mnBblLFHk", - "version": 8 -} \ No newline at end of file + "uid": "examon-random-sensor", + "version": 1 +} diff --git a/dashboards/legacy/Examon Test - Random Sensor.json b/dashboards/legacy/Examon Test - Random Sensor.json new file mode 100644 index 0000000..cbd4f72 --- /dev/null +++ b/dashboards/legacy/Examon Test - Random Sensor.json @@ -0,0 +1,445 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 1, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 14, + "w": 24, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "query": { + "aggregators": [ + { + "$$hashKey": "object:137", + "autoValueSwitch": { + "dependentParameters": [ + { + "name": "value", + "text": "every", + "type": "sampling", + "value": "1h" + } + ], + "enabled": true + }, + "name": "avg", + "parameters": [ + { + "$$hashKey": "object:839", + "allowedValues": { + "0": "NONE", + "1": "START_TIME", + "2": "SAMPLING" + }, + "name": "sampling", + "text": "align by", + "type": "alignment", + "value": "NONE" + }, + { + "name": "value", + "text": "every", + "type": "sampling", + "value": "1h" + } + ] + } + ], + "groupBy": { + "tags": [ + "id" + ], + "time": [], + "value": [] + }, + "metricName": "random_sensor", + "tags": { + "chnl": [], + "id": [], + "org": [ + "examon" + ], + "plugin": [ + "random_pub" + ] + } + }, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ExaMon Test - Random Sensor", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:235", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:236", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 13, + "w": 12, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "query": { + "aggregators": [ + { + "$$hashKey": "object:137", + "autoValueSwitch": { + "dependentParameters": [ + { + "name": "value", + "text": "every", + "type": "sampling", + "value": "1h" + } + ], + "enabled": true + }, + "name": "avg", + "parameters": [ + { + "$$hashKey": "object:664", + "allowedValues": { + "0": "NONE", + "1": "START_TIME", + "2": "SAMPLING" + }, + "name": "sampling", + "text": "align by", + "type": "alignment", + "value": "NONE" + }, + { + "name": "value", + "text": "every", + "type": "sampling", + "value": "1h" + } + ] + } + ], + "groupBy": { + "tags": [ + "id" + ], + "time": [], + "value": [] + }, + "metricName": "random_sensor", + "tags": { + "chnl": [], + "id": [], + "org": [ + "examon" + ], + "plugin": [ + "random_pub" + ] + } + }, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ExaMon Test - Random Sensor", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": 50, + "min": null, + "mode": "histogram", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:235", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:236", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "70%", + "format": "short", + "gridPos": { + "h": 13, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 4, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "On graph", + "links": [], + "nullPointMode": "connected", + "pieType": "donut", + "pluginVersion": "7.3.10", + "strokeWidth": 1, + "targets": [ + { + "query": { + "aggregators": [ + { + "$$hashKey": "object:137", + "autoValueSwitch": { + "dependentParameters": [ + { + "name": "value", + "text": "every", + "type": "sampling", + "value": "1h" + } + ], + "enabled": true + }, + "name": "avg", + "parameters": [ + { + "$$hashKey": "object:664", + "allowedValues": { + "0": "NONE", + "1": "START_TIME", + "2": "SAMPLING" + }, + "name": "sampling", + "text": "align by", + "type": "alignment", + "value": "NONE" + }, + { + "name": "value", + "text": "every", + "type": "sampling", + "value": "1h" + } + ] + } + ], + "groupBy": { + "tags": [ + "id" + ], + "time": [], + "value": [] + }, + "metricName": "random_sensor", + "tags": { + "chnl": [], + "id": [], + "org": [ + "examon" + ], + "plugin": [ + "random_pub" + ] + } + }, + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ExaMon Test - Random Sensor", + "type": "grafana-piechart-panel", + "valueName": "avg" + } + ], + "refresh": "1m", + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Examon Test - Random Sensor", + "uid": "mnBblLFHk", + "version": 8 +} \ No newline at end of file diff --git a/dashboards/legacy/README.md b/dashboards/legacy/README.md new file mode 100644 index 0000000..e923091 --- /dev/null +++ b/dashboards/legacy/README.md @@ -0,0 +1,25 @@ +# Legacy dashboards (docker-compose v0.4.0) + +This folder holds dashboards that target the **legacy docker-compose v0.4.0** +ExaMon stack only. They are kept here for users who are still running that +deployment and have not migrated to the Kubernetes-based v0.5.0+ stack. + +## What's here + +- `Examon Test - Random Sensor.json`: random-sensor verification dashboard + compatible with Grafana 7.3.10 and the AngularJS-based + `grafana-kairosdb-datasource` plugin, both shipped by `docker-compose.yml`. + +## When to use these + +Use these files **only** with the legacy Docker Compose deployment described +in the top-level `README.md`. They are imported manually via the Grafana UI +or HTTP API on a running Grafana 7.x instance. + +## For Kubernetes (v0.5.0+) users + +Use the dashboards in the parent `dashboards/` folder. They are compatible +with Grafana 10+ / 11+ (the Kubernetes Helm chart deploys current Grafana) +and the React-based `arpnetworking-kairosdb-datasource` plugin. The bundled +"Examon Test - Random Sensor" dashboard is also auto-provisioned by the +Helm chart via the Grafana dashboard sidecar: no manual import needed. diff --git a/deploy/helm/examon/dashboards/Examon Test - Random Sensor.json b/deploy/helm/examon/dashboards/Examon Test - Random Sensor.json new file mode 100644 index 0000000..8d45315 --- /dev/null +++ b/deploy/helm/examon/dashboards/Examon Test - Random Sensor.json @@ -0,0 +1,123 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "arpnetworking-kairosdb-datasource", + "uid": "examon-kairosdb" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "drawStyle": "line", + "lineWidth": 1, + "fillOpacity": 10, + "pointSize": 5, + "showPoints": "never" + } + }, + "overrides": [] + }, + "gridPos": { "h": 14, "w": 24, "x": 0, "y": 0 }, + "id": 2, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom" }, + "tooltip": { "mode": "multi" } + }, + "targets": [ + { + "refId": "A", + "query": { + "metricName": "random_sensor", + "alias": "$_tag_group_id", + "tags": { + "org": ["examon"], + "plugin": ["random_pub"] + }, + "groupBy": { + "tags": ["id"] + }, + "aggregators": [ + { + "name": "avg", + "parameters": [], + "visible": true + } + ], + "overrideScalar": false + } + } + ], + "title": "Random Sensor - Time Series", + "type": "timeseries" + }, + { + "datasource": { + "type": "arpnetworking-kairosdb-datasource", + "uid": "examon-kairosdb" + }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" } }, + "overrides": [] + }, + "gridPos": { "h": 13, "w": 24, "x": 0, "y": 14 }, + "id": 4, + "options": { + "legend": { "displayMode": "table", "placement": "right", "values": ["percent"] }, + "pieType": "donut", + "tooltip": { "mode": "single" } + }, + "targets": [ + { + "refId": "A", + "query": { + "metricName": "random_sensor", + "alias": "", + "tags": { + "org": ["examon"], + "plugin": ["random_pub"] + }, + "groupBy": { + "tags": ["id"] + }, + "aggregators": [ + { + "name": "avg", + "parameters": [], + "visible": true + } + ], + "overrideScalar": false + } + } + ], + "title": "Random Sensor - Distribution", + "type": "piechart" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": ["examon"], + "time": { "from": "now-30m", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Examon Test - Random Sensor", + "uid": "examon-random-sensor", + "version": 1 +} diff --git a/deploy/helm/examon/dashboards/README.md b/deploy/helm/examon/dashboards/README.md new file mode 100644 index 0000000..2937a3f --- /dev/null +++ b/deploy/helm/examon/dashboards/README.md @@ -0,0 +1,24 @@ +# Chart-bundled Grafana dashboards + +> **Most users should NOT add files here.** To ship a custom dashboard, +> create a `ConfigMap` labeled `grafana_dashboard=1` in any namespace; +> the Grafana sidecar will load it automatically without a `helm +> upgrade`. See +> [Grafana Dashboards](../../../../docs/Deployment/kubernetes.md#grafana-dashboards) +> in the Kubernetes deployment guide for the recipe. +> +> This folder is for chart **forks and maintainers** who want a +> dashboard baked into the umbrella chart artifact itself. + +Dashboards in this folder are packaged into the umbrella chart and +auto-provisioned in Grafana via the dashboard sidecar (one ConfigMap per +file, labeled `grafana_dashboard: "1"`). + +The canonical user-facing copy lives at the repository root under +`dashboards/`. When updating a dashboard, edit both copies (or sync from +the root via `cp ../../../dashboards/*.json .`). Helm's `.Files` API can +only read files inside the chart directory, so the chart needs its own +copy of any dashboard it ships. + +To disable bundling without removing the files, set +`bundledDashboards.enabled=false` in `values.yaml`. diff --git a/deploy/helm/examon/templates/grafana-dashboards.yaml b/deploy/helm/examon/templates/grafana-dashboards.yaml new file mode 100644 index 0000000..7e98bfd --- /dev/null +++ b/deploy/helm/examon/templates/grafana-dashboards.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.grafana.enabled .Values.bundledDashboards.enabled -}} +{{- range $path, $bytes := .Files.Glob "dashboards/*.json" }} +{{- $stem := trimSuffix ".json" (base $path) }} +{{- $sanitized := regexReplaceAll "[^A-Za-z0-9-]" $stem "-" }} +{{- $name := regexReplaceAll "-+" $sanitized "-" | trimAll "-" | lower }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ printf "examon-dashboard-%s" $name | trunc 63 | trimSuffix "-" }} + namespace: {{ $.Release.Namespace }} + labels: + grafana_dashboard: "1" + app.kubernetes.io/name: examon + app.kubernetes.io/instance: {{ $.Release.Name }} + app.kubernetes.io/managed-by: {{ $.Release.Service }} +data: + {{ printf "%s.json" $name | quote }}: |- +{{ $.Files.Get $path | indent 4 }} +{{- end }} +{{- end }} diff --git a/deploy/helm/examon/templates/k8ssandra-cluster.yaml b/deploy/helm/examon/templates/k8ssandra-cluster.yaml index f30c9ee..8fb125b 100644 --- a/deploy/helm/examon/templates/k8ssandra-cluster.yaml +++ b/deploy/helm/examon/templates/k8ssandra-cluster.yaml @@ -36,6 +36,15 @@ spec: racks: {{- toYaml .Values.cassandra.datacenters.dc1.racks | nindent 10 }} {{- end }} + {{- with .Values.cassandra.telemetry }} + telemetry: + prometheus: + enabled: {{ .prometheus.enabled | default false }} + {{- with .prometheus.commonLabels }} + commonLabels: + {{- toYaml . | nindent 14 }} + {{- end }} + {{- end }} {{- if .Values.cassandra.reaper }} reaper: autoScheduling: diff --git a/deploy/helm/examon/values-production.yaml b/deploy/helm/examon/values-production.yaml index 1b63752..12c1747 100644 --- a/deploy/helm/examon/values-production.yaml +++ b/deploy/helm/examon/values-production.yaml @@ -67,8 +67,12 @@ grafana: - marcusolsson-gantt-panel - flant-statusmap-panel - gapit-htmlgraphics-panel + - https://github.com/ArpNetworking/kairosdb-datasource/releases/download/v4.0.7/kairosdb-datasource.zip;arpnetworking-kairosdb-datasource env: GF_PANELS_DISABLE_SANITIZE_HTML: "true" + grafana.ini: + plugins: + allow_loading_unsigned_plugins: arpnetworking-kairosdb-datasource persistence: enabled: true size: 10Gi @@ -77,7 +81,8 @@ grafana: apiVersion: 1 datasources: - name: kairosdb - type: grafana-kairosdb-datasource + uid: examon-kairosdb + type: arpnetworking-kairosdb-datasource access: proxy url: http://examon-kairosdb:8083 isDefault: true diff --git a/deploy/helm/examon/values-staging.yaml b/deploy/helm/examon/values-staging.yaml index 438e50c..82a0cae 100644 --- a/deploy/helm/examon/values-staging.yaml +++ b/deploy/helm/examon/values-staging.yaml @@ -51,8 +51,12 @@ grafana: - marcusolsson-gantt-panel - flant-statusmap-panel - gapit-htmlgraphics-panel + - https://github.com/ArpNetworking/kairosdb-datasource/releases/download/v4.0.7/kairosdb-datasource.zip;arpnetworking-kairosdb-datasource env: GF_PANELS_DISABLE_SANITIZE_HTML: "true" + grafana.ini: + plugins: + allow_loading_unsigned_plugins: arpnetworking-kairosdb-datasource persistence: enabled: true size: 1Gi @@ -61,7 +65,8 @@ grafana: apiVersion: 1 datasources: - name: kairosdb - type: grafana-kairosdb-datasource + uid: examon-kairosdb + type: arpnetworking-kairosdb-datasource access: proxy url: http://examon-kairosdb:8083 isDefault: true diff --git a/deploy/helm/examon/values.yaml b/deploy/helm/examon/values.yaml index 064d29c..cea1387 100644 --- a/deploy/helm/examon/values.yaml +++ b/deploy/helm/examon/values.yaml @@ -31,6 +31,11 @@ random-pub: examon-server: enabled: true +# -- Bundled dashboards loaded via the Grafana sidecar (ConfigMaps labeled +# grafana_dashboard=1). Disable to skip shipping the test dashboard. +bundledDashboards: + enabled: true + # -- Cassandra configuration (via K8ssandra Operator) cassandra: enabled: true @@ -52,11 +57,29 @@ cassandra: racks: [] reaper: enabled: false + # -- Native K8ssandra Prometheus telemetry. + # When enabled, K8ssandra creates a ServiceMonitor for Cassandra metrics + # that an existing Prometheus operator can scrape directly. Requires a + # ServiceMonitor CRD installed in the cluster (kube-prometheus-stack or + # similar). Disabled by default so the chart installs on clusters + # without Prometheus. + telemetry: + prometheus: + enabled: false + # commonLabels are attached to every metric emitted by Cassandra, + # e.g. release: kube-prometheus-stack to satisfy a serviceMonitor + # selector. + commonLabels: {} # -- Grafana subchart overrides grafana: enabled: true adminPassword: "" # pass via --set grafana.adminPassword=... at deploy time + # Grafana plugins. + # The KairosDB datasource uses the React-based ArpNetworking fork + # (the original grafana-kairosdb-datasource is AngularJS-only and is + # blocked by Grafana 11+). It is shipped as an unsigned plugin and must + # be installed from a release URL with the GRAFANA_PLUGIN_ID appended. plugins: - ae3e-plotly-panel - grafana-piechart-panel @@ -64,17 +87,28 @@ grafana: - marcusolsson-gantt-panel - flant-statusmap-panel - gapit-htmlgraphics-panel + - https://github.com/ArpNetworking/kairosdb-datasource/releases/download/v4.0.7/kairosdb-datasource.zip;arpnetworking-kairosdb-datasource env: GF_PANELS_DISABLE_SANITIZE_HTML: "true" + grafana.ini: + plugins: + allow_loading_unsigned_plugins: arpnetworking-kairosdb-datasource sidecar: datasources: enabled: true + dashboards: + enabled: true + label: grafana_dashboard + labelValue: "1" + folder: /tmp/dashboards + searchNamespace: ALL datasources: datasources.yaml: apiVersion: 1 datasources: - name: kairosdb - type: grafana-kairosdb-datasource + uid: examon-kairosdb + type: arpnetworking-kairosdb-datasource access: proxy url: http://examon-kairosdb:8083 isDefault: true diff --git a/docs/Deployment/change-propagation.md b/docs/Deployment/change-propagation.md index 1a346e4..0af0629 100644 --- a/docs/Deployment/change-propagation.md +++ b/docs/Deployment/change-propagation.md @@ -72,7 +72,7 @@ umbrella file, which wins over the subchart default. all environments unless they override it. 3. If **environment-specific**: edit the relevant `values-.yaml`. 4. **Check all three environment files** to verify they are consistent. - Values files only override fields they explicitly set — if a file doesn't + Values files only override fields they explicitly set; if a file doesn't mention a field, it inherits from `values.yaml`. **Checklist for value changes:** @@ -110,7 +110,7 @@ deploy/helm/examon/subcharts//templates/ _helpers.tpl # Template helper functions ``` -**Critical step — subchart packaging:** +**Critical step: subchart packaging.** Helm does **not** read templates directly from `subcharts/`. Instead, it uses pre-packaged `.tgz` archives inside `deploy/helm/examon/charts/`. You must @@ -191,7 +191,7 @@ the same tag (e.g. `latest`), nodes with `imagePullPolicy: IfNotPresent` For production images on GHCR, use semantic versioning (e.g. `1.3.0`, `1.3.1`). For development, use descriptive suffixes (e.g. `1.3.0-fix3`). -**What to propagate — image tag changes:** +**What to propagate when changing image tags:** | File | Update | |------|--------| @@ -223,7 +223,7 @@ For production images on GHCR, use semantic versioning (e.g. `1.3.0`, This is the most complex scenario because it touches every layer: -**Step 1 — Subchart template:** +**Step 1: Subchart template.** Edit `subcharts/examon-server/templates/configmap.yaml` to render the new field: @@ -231,7 +231,7 @@ field: CASSANDRA_TIMEOUT = {{ .Values.config.cassandraTimeout }} ``` -**Step 2 — Subchart default:** +**Step 2: Subchart default.** Edit `subcharts/examon-server/values.yaml`: ```yaml @@ -239,7 +239,7 @@ config: cassandraTimeout: 30 ``` -**Step 3 — Umbrella default:** +**Step 3: Umbrella default.** Edit `values.yaml`: ```yaml @@ -248,7 +248,7 @@ examon-server: cassandraTimeout: 30 ``` -**Step 4 — Environment overrides (if needed):** +**Step 4: Environment overrides (if needed).** Edit `values-production.yaml`: ```yaml @@ -257,7 +257,7 @@ examon-server: cassandraTimeout: 120 ``` -**Step 5 — Rebuild and deploy:** +**Step 5: Rebuild and deploy.** ```bash cd deploy/helm/examon && helm dependency update && cd ../../.. @@ -265,13 +265,13 @@ helm upgrade examon ./deploy/helm/examon \ -f ./deploy/helm/examon/values-.yaml -n examon ``` -**Step 6 — Verify:** +**Step 6: Verify.** ```bash helm get manifest examon -n examon | grep CASSANDRA_TIMEOUT ``` -**Step 7 — Documentation:** +**Step 7: Documentation.** Update `docs/Deployment/configuration.md` with the new parameter. --- @@ -280,14 +280,14 @@ Update `docs/Deployment/configuration.md` with the new parameter. | What you changed | Rebuild image | `helm dependency update` | `helm upgrade` | Update values files | |------------------|:---:|:---:|:---:|:---:| -| Value in `values-.yaml` only | | | Yes | — | +| Value in `values-.yaml` only | | | Yes | N/A | | Value in umbrella `values.yaml` | | | Yes | Check env files | | Subchart template (`subcharts/*/templates/`) | | **Yes** | Yes | If new field | | Subchart `values.yaml` (defaults) | | **Yes** | Yes | Check umbrella + env files | | Dockerfile or container scripts | **Yes** | | Yes | Update image tags | -| K3d/K8s cluster config (`deploy/k3d/`) | — | — | Recreate cluster | — | -| External chart version (`Chart.yaml`) | | **Yes** | Yes | — | -| K8ssandra operator version | | | `helm upgrade k8ssandra-operator` | — | +| K3d/K8s cluster config (`deploy/k3d/`) | N/A | N/A | Recreate cluster | N/A | +| External chart version (`Chart.yaml`) | | **Yes** | Yes | N/A | +| K8ssandra operator version | | | `helm upgrade k8ssandra-operator` | N/A | !!! note "K8ssandra operator is a separate Helm release" The K8ssandra operator is **not** listed in `Chart.yaml`. It is installed @@ -389,7 +389,7 @@ The only secret that still requires `--set` is the **Grafana admin password**: `deploy/helm/examon/values-*.secret.yaml` and will never be committed: ```yaml - # values-local.secret.yaml — DO NOT COMMIT + # values-local.secret.yaml: DO NOT COMMIT grafana: adminPassword: "my-grafana-password" ``` diff --git a/docs/Deployment/configuration.md b/docs/Deployment/configuration.md index f0b293e..88ad9e9 100644 --- a/docs/Deployment/configuration.md +++ b/docs/Deployment/configuration.md @@ -13,6 +13,7 @@ All ExaMon Helm chart configuration is managed through values files. This docume | `mqtt2kairosdb.enabled` | Deploy MQTT-to-KairosDB bridge | `true` | | `random-pub.enabled` | Deploy random test publisher | `true` | | `examon-server.enabled` | Deploy ExaMon REST API server | `true` | +| `bundledDashboards.enabled` | Ship the chart's bundled Grafana dashboards (in `deploy/helm/examon/dashboards/`) as `grafana_dashboard=1` ConfigMaps for the Grafana sidecar to load. Set to `false` to skip the bundled test dashboard. | `true` | ## Cassandra (K8ssandra) @@ -30,6 +31,8 @@ All ExaMon Helm chart configuration is managed through values files. This docume | `cassandra.datacenters.dc1.podAntiAffinity` | Enable pod anti-affinity | `false` | | `cassandra.datacenters.dc1.racks` | Rack definitions with zone labels | `[]` | | `cassandra.reaper.enabled` | Enable Reaper for repairs | `false` | +| `cassandra.telemetry.prometheus.enabled` | Let K8ssandra emit a `ServiceMonitor` for Cassandra metrics. Requires the `ServiceMonitor` CRD (kube-prometheus-stack or equivalent) in the cluster. | `false` | +| `cassandra.telemetry.prometheus.commonLabels` | Labels attached to every Cassandra metric. Use to match a `ServiceMonitor` selector (e.g. `release: kube-prometheus-stack`). | `{}` | ## KairosDB @@ -52,10 +55,19 @@ Grafana uses the [official Grafana Helm chart](https://github.com/grafana/helm-c |-----------|-------------|---------| | `grafana.adminPassword` | Admin password | `Password` | | `grafana.plugins` | Grafana plugins to install | See `values.yaml` | +| `grafana.datasources` | Datasource provisioning (KairosDB pre-configured with `uid: examon-kairosdb`, type `arpnetworking-kairosdb-datasource`) | See `values.yaml` | +| `grafana.sidecar.dashboards.enabled` | Auto-load dashboards from ConfigMaps labeled `grafana_dashboard=1` (cluster-wide; `searchNamespace: ALL`). See [Grafana Dashboards](kubernetes.md#grafana-dashboards) in the K8s guide for the recipe to add custom dashboards without editing the chart. | `true` | | `grafana.persistence.enabled` | Enable persistent storage | `false` | | `grafana.persistence.size` | PVC size | `10Gi` | | `grafana.ingress.enabled` | Enable ingress | `false` | +The KairosDB datasource ships as the React-based +[ArpNetworking fork](https://github.com/ArpNetworking/kairosdb-datasource) +(`arpnetworking-kairosdb-datasource`). The legacy AngularJS +`grafana-kairosdb-datasource` is not compatible with Grafana 11+ and is +not used by this chart. The plugin is installed from its GitHub release +URL and allowed via `grafana.ini.plugins.allow_loading_unsigned_plugins`. + ## Mosquitto | Parameter | Description | Default | diff --git a/docs/Deployment/docker-compose.md b/docs/Deployment/docker-compose.md index 96c098a..ed94055 100644 --- a/docs/Deployment/docker-compose.md +++ b/docs/Deployment/docker-compose.md @@ -42,6 +42,17 @@ This will build and start: - **URL:** `http://kairosdb:8083` - **Access:** Server +### Test Dashboard + +This Docker Compose stack runs Grafana 7.3.10 with the legacy AngularJS +`grafana-kairosdb-datasource` plugin. Import the v0.4.0-compatible +snapshot from `dashboards/legacy/Examon Test - Random Sensor.json`. + +The dashboards directly under `dashboards/` target the Kubernetes +(v0.5.0+) stack instead: they use the React-based +`arpnetworking-kairosdb-datasource` plugin and are not compatible with +Grafana 7.x. + ### Data Persistence Two Docker volumes are created: diff --git a/docs/Deployment/kubernetes-local.md b/docs/Deployment/kubernetes-local.md index 0311d47..a393b15 100644 --- a/docs/Deployment/kubernetes-local.md +++ b/docs/Deployment/kubernetes-local.md @@ -64,7 +64,7 @@ echo "127.0.0.1 examon-registry" | sudo tee -a /etc/hosts !!! note The automated setup script (`k8s-local-setup.sh`) performs this step - automatically. You only need to do this once per machine — the entry + automatically. You only need to do this once per machine; the entry persists across cluster recreations. ### Step 3: Build and Push Images @@ -134,7 +134,7 @@ initial deployment, a superuser secret is automatically generated. Both **KairosDB** and **examon-server** read these credentials automatically from the K8ssandra secret via `secretKeyRef` environment variables. No -manual `--set` flags or second `helm upgrade` is needed — the pods pick up +manual `--set` flags or second `helm upgrade` is needed: the pods pick up credentials on startup once the secret exists. `examon-server` uses env var overrides (`CASSANDRA_USER`, `CASSANDRA_PASSWORD`) @@ -151,7 +151,7 @@ examon-server: !!! note "Bootstrap restarts" On a fresh install, `examon-server` and `kairosdb` may restart a few times while Cassandra initializes and creates the superuser secret. - This is expected — Kubernetes restarts them automatically and they + This is expected: Kubernetes restarts them automatically and they connect once Cassandra is ready. ### Step 8: Verify @@ -200,10 +200,33 @@ If the graph shows data, the entire pipeline is working end-to-end: is consuming it and writing to KairosDB, and KairosDB is persisting it in Cassandra. +**Verify via Grafana (auto-provisioned).** Unlike the v0.4.0 docker-compose +stack, no manual datasource or dashboard setup is required: + +1. Open [http://localhost:3000](http://localhost:3000) and log in as + `admin` with the password set via `--set grafana.adminPassword=...` + (default: `admin` for `values-local.yaml`). +2. Under **Connections → Data sources**, the `kairosdb` data source + (type `arpnetworking-kairosdb-datasource`, `uid: examon-kairosdb`) is + already configured. Clicking **Test** returns *"Data source is + working"*. +3. Under **Dashboards**, open **Examon Test - Random Sensor**. It is + loaded automatically by the Grafana dashboard sidecar from a + chart-bundled ConfigMap labeled `grafana_dashboard=1`. The dashboard + should render live data from `random_pub`. + +If the dashboard is missing, give the sidecar ~30s to pick it up after +the initial install, then check: + +```bash +kubectl get configmap -n examon -l grafana_dashboard=1 +kubectl logs -l app.kubernetes.io/name=grafana -c grafana-sc-dashboard -n examon +``` + ## Accessing Services All user-facing services are exposed directly on the host via the K3d load -balancer and `NodePort` services — no `kubectl port-forward` or Kubernetes +balancer and `NodePort` services. No `kubectl port-forward` or Kubernetes knowledge required. External clients (e.g. `examon-client` on user laptops, admins accessing Grafana) connect to these addresses just like with Docker Compose: @@ -256,8 +279,8 @@ examon-server: KairosDB 1.3.0 loads two configuration files: -1. **`kairosdb.properties`** — legacy Java properties format -2. **`kairosdb.conf`** — HOCON format (takes precedence) +1. **`kairosdb.properties`**: legacy Java properties format +2. **`kairosdb.conf`**: HOCON format (takes precedence) The `config-kairos.sh` entrypoint script patches both files at startup using environment variables (`CASSANDRA_HOST_LIST`, `CASSANDRA_USER`, @@ -272,9 +295,9 @@ built on the `ExamonApp` framework from the `examon-common` library. They expect their configuration in `.conf` files (INI format) mounted in the working directory: -- `random_pub.conf` — mounted from ConfigMap via Helm -- `mqtt2kairosdb.conf` — mounted from ConfigMap via Helm -- `server.conf` — mounted from ConfigMap via Helm +- `random_pub.conf`: mounted from ConfigMap via Helm +- `mqtt2kairosdb.conf`: mounted from ConfigMap via Helm +- `server.conf`: mounted from ConfigMap via Helm These are generated from the Helm `values.yaml` settings by each subchart's `configmap.yaml` template. @@ -299,7 +322,7 @@ to pull the image based on the Kubernetes `imagePullPolicy`: `values-local.yaml` sets `pullPolicy: Always` for all custom ExaMon images. This means the standard build-push-restart cycle works reliably with the -`:latest` tag — no stale cache surprises. +`:latest` tag, with no stale cache surprises. ### Scenario 1: Application Code Change @@ -434,7 +457,7 @@ well with K3d and Helm charts: pipeline, supports file syncing, and integrates with CI/CD. Both tools work with ExaMon's Helm chart structure out of the box. They -are optional power-ups — the manual workflow above is sufficient for most +are optional power-ups; the manual workflow above is sufficient for most development tasks. ### Teardown diff --git a/docs/Deployment/kubernetes-production.md b/docs/Deployment/kubernetes-production.md index 3d492a6..806f7cb 100644 --- a/docs/Deployment/kubernetes-production.md +++ b/docs/Deployment/kubernetes-production.md @@ -1,6 +1,6 @@ # Production Deployment -Production targets a real Kubernetes cluster — on-premises (OpenStack, +Production targets a real Kubernetes cluster: on-premises (OpenStack, RKE2, kubeadm) or cloud-managed (EKS, GKE, AKS). ## Service Exposure Architecture @@ -265,7 +265,7 @@ helm install examon ./deploy/helm/examon \ -n examon --wait --timeout 20m ``` -Cassandra credentials are injected automatically — both KairosDB and +Cassandra credentials are injected automatically. Both KairosDB and examon-server read them from the K8ssandra-generated secret (`examon-cassandra-superuser`) via `secretKeyRef` environment variables. No second `helm upgrade` is needed. @@ -315,13 +315,30 @@ Create DNS records pointing to the services: ### Configure Grafana Data Source -The data source is auto-provisioned via `values-production.yaml`. If you -need to add it manually: - -- **Type:** KairosDB -- **Name:** kairosdb -- **URL:** `http://examon-kairosdb:8083` -- **Access:** Server +The KairosDB data source is **fully auto-provisioned** by the chart on +every deploy; no manual setup is required. The umbrella chart: + +1. Installs the React-based [ArpNetworking + KairosDB data source plugin](https://github.com/ArpNetworking/kairosdb-datasource) + from its GitHub release URL via `grafana.plugins`. +2. Whitelists the unsigned plugin via + `grafana.grafana.ini.plugins.allow_loading_unsigned_plugins`. +3. Provisions the data source as `type: arpnetworking-kairosdb-datasource` + with `uid: examon-kairosdb`, pointing at `http://examon-kairosdb:8083` + in `accesss: proxy` mode. +4. Auto-loads the bundled "Examon Test - Random Sensor" dashboard via the + Grafana dashboard sidecar (ConfigMaps labeled `grafana_dashboard=1`). + +After `helm upgrade`, open Grafana and you should already see the data +source listed (test connection returns OK) and the dashboard available +under Dashboards. The legacy `grafana-kairosdb-datasource` plugin is +AngularJS-only and is **not** compatible with Grafana 11+; do not +provision it manually. + +The relevant Helm values are documented in +[configuration.md](configuration.md): `grafana.plugins`, +`grafana.datasources`, `grafana.sidecar.dashboards.enabled`, and the +top-level `bundledDashboards.enabled` toggle. ### Backups @@ -330,8 +347,45 @@ backup storage (S3, GCS, Azure Blob, Ceph/S3) in the K8ssandraCluster CR. ### Monitoring -Install Prometheus and ServiceMonitors for all components to monitor the -ExaMon infrastructure itself. +Production deployments should run a cluster-side Prometheus operator +(typically [`kube-prometheus-stack`](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack)) +both to monitor ExaMon itself and to scrape Cassandra. + +**Cassandra metrics (K8ssandra-native):** the umbrella chart exposes +`cassandra.telemetry.prometheus.*`, which is wired straight into the +K8ssandra `CassandraDatacenter` CR. When enabled, K8ssandra creates a +`ServiceMonitor` for the Cassandra metric endpoint, with no extra manifest +to maintain. Requires the `ServiceMonitor` CRD (shipped by +`kube-prometheus-stack`): + +```yaml +# values-production.yaml +cassandra: + telemetry: + prometheus: + enabled: true + commonLabels: + # Match the kube-prometheus-stack default ServiceMonitor selector + release: kube-prometheus-stack +``` + +Or via `--set` at deploy time: + +```bash +helm upgrade examon ./deploy/helm/examon \ + -f ./deploy/helm/examon/values-production.yaml \ + --set cassandra.telemetry.prometheus.enabled=true \ + --set cassandra.telemetry.prometheus.commonLabels.release=kube-prometheus-stack \ + -n examon +``` + +If your Prometheus operator uses a different `ServiceMonitor` selector, +adjust `commonLabels` accordingly. See +[configuration.md](configuration.md) for the full key reference. + +**Other ExaMon components** (KairosDB, examon-server, mqtt2kairosdb, +Mosquitto) do not yet ship their own `ServiceMonitor` manifests; if you +need them, define your own pointing at the existing Services for now. ## Scaling @@ -343,7 +397,7 @@ kubectl scale deployment examon-kairosdb --replicas=3 -n examon kubectl scale deployment examon-examon-server --replicas=3 -n examon ``` -Cassandra scaling is managed via the K8ssandraCluster CR — update the +Cassandra scaling is managed via the K8ssandraCluster CR: update the datacenter `size` in `values-production.yaml` and run `helm upgrade`. ## Platform-Specific Notes diff --git a/docs/Deployment/kubernetes-staging.md b/docs/Deployment/kubernetes-staging.md index b4b829f..baba7c0 100644 --- a/docs/Deployment/kubernetes-staging.md +++ b/docs/Deployment/kubernetes-staging.md @@ -155,7 +155,7 @@ kubectl exec -it examon-cassandra-dc1-default-sts-0 -c cassandra -n examon \ All user-facing services are exposed directly on the host via the K3d load balancer and `NodePort` services. External clients connect to the VM's -IP/hostname — no Kubernetes knowledge required: +IP/hostname; no Kubernetes knowledge required: | Service | Address | Protocol | Users | |---------|---------|----------|-------| diff --git a/docs/Deployment/kubernetes.md b/docs/Deployment/kubernetes.md index 35e7af0..a54e4a1 100644 --- a/docs/Deployment/kubernetes.md +++ b/docs/Deployment/kubernetes.md @@ -37,8 +37,8 @@ For production (GitHub Container Registry): ### Iterative Development (Single Service) During development, you typically modify and rebuild a single service -rather than all images. The recommended inner-loop workflow — build, push -to the local registry, restart the pod — is documented in detail in the +rather than all images. The recommended inner-loop workflow (build, push +to the local registry, restart the pod) is documented in detail in the [Local Development Workflow](kubernetes-local.md#local-development-workflow) section. That section also covers K3d image caching behavior, the `pullPolicy: Always` setting, and alternatives like `k3d image import` @@ -151,7 +151,7 @@ kubectl get secret examon-cassandra-superuser -n examon \ ### Automatic credential injection Both services that connect to Cassandra read credentials **automatically** -from this secret — no manual `--set` flags are needed: +from this secret. No manual `--set` flags are needed: | Service | Mechanism | Values key | |---------|-----------|------------| @@ -171,13 +171,13 @@ examon-server: The `server.py` application checks environment variables `CASSANDRA_USER` and `CASSANDRA_PASSWORD` first, falling back to `server.conf` values if the env vars are not set. This means a simple `helm install` (or upgrade) is -sufficient — the pod will authenticate to Cassandra automatically on +sufficient: the pod will authenticate to Cassandra automatically on startup once the secret exists. !!! note "Bootstrap ordering" On a fresh `helm install`, `examon-server` and `kairosdb` may restart a few times while Cassandra initializes and the superuser secret is - created. This is expected — Kubernetes will restart them automatically + created. This is expected: Kubernetes will restart them automatically and they will connect once Cassandra is ready. ### Custom secret name @@ -227,7 +227,7 @@ Create a file named `values-.secret.yaml` (e.g. be committed: ```yaml -# values-local.secret.yaml — DO NOT COMMIT +# values-local.secret.yaml: DO NOT COMMIT grafana: adminPassword: "my-grafana-password" ``` @@ -245,12 +245,12 @@ helm upgrade examon ./deploy/helm/examon \ For production, use a secrets management solution such as: -- [External Secrets Operator](https://external-secrets.io/) — syncs +- [External Secrets Operator](https://external-secrets.io/): syncs secrets from AWS Secrets Manager, Vault, GCP Secret Manager, etc. -- [Sealed Secrets](https://sealed-secrets.netlify.app/) — encrypted +- [Sealed Secrets](https://sealed-secrets.netlify.app/): encrypted secrets that are safe to commit to git - [SOPS](https://github.com/getsops/sops) with - [helm-secrets](https://github.com/jkroepke/helm-secrets) — encrypts + [helm-secrets](https://github.com/jkroepke/helm-secrets): encrypts values files in-place ### Secret fields reference @@ -286,7 +286,7 @@ credentials. For GHCR, the password is a Personal Access Token (PAT) with ### Step 2: Reference the Secret in Values -**Option A — Global (recommended):** Set once, applies to all subcharts. +**Option A: Global (recommended).** Set once, applies to all subcharts. ```yaml global: @@ -304,7 +304,7 @@ helm upgrade examon ./deploy/helm/examon \ -n examon ``` -**Option B — Per-subchart:** Override for a specific component only. +**Option B: Per-subchart.** Override for a specific component only. ```yaml kairosdb: @@ -332,7 +332,7 @@ that subchart. !!! note "Local Development" For local K3d with a local registry (`examon-registry:5111`), image pull - secrets are not needed — K3d connects to the local registry without + secrets are not needed: K3d connects to the local registry without authentication. ## Service Name Reference @@ -412,6 +412,144 @@ This works on OpenStack (with Octavia), RKE2, cloud providers, and bare metal (with MetalLB). See the [Production guide](kubernetes-production.md) for platform-specific details. +## Grafana Dashboards + +### Bundled dashboard + +The chart auto-provisions a KairosDB datasource and bundles the test +dashboard `Examon Test - Random Sensor.json` (Grafana 10+/11+ compatible). +After install it appears automatically in Grafana: no manual import is +required to verify the data pipeline. + +The legacy v0.4.0 version of the same dashboard, kept for users of the +docker-compose stack, lives under `dashboards/legacy/` and is **not** +loaded by the chart. + +### How dashboard auto-provisioning works + +The chart enables the [Grafana dashboard sidecar](https://github.com/grafana/helm-charts/tree/main/charts/grafana#sidecar-for-dashboards) +with the following defaults (see [`deploy/helm/examon/values.yaml`](../../deploy/helm/examon/values.yaml)): + +```yaml +grafana: + sidecar: + dashboards: + enabled: true + label: grafana_dashboard + labelValue: "1" + searchNamespace: ALL +``` + +The sidecar watches **any namespace** for `ConfigMap`s carrying the label +`grafana_dashboard=1`. Every JSON entry under the ConfigMap's `data:` +field is loaded into Grafana as a dashboard, hot-reloaded within ~30s, +and removed when the ConfigMap is deleted. The umbrella chart uses the +same mechanism for its bundled dashboard via +[`templates/grafana-dashboards.yaml`](../../deploy/helm/examon/templates/grafana-dashboards.yaml), +which globs `dashboards/*.json` and emits one ConfigMap per file. + +Two equivalent ways to ship custom dashboards are described next: + +- **Strategy A (recommended):** create labeled ConfigMaps from outside the + chart. No chart edits, no `helm upgrade` needed: works with plain + `kubectl`, Kustomize, ArgoCD, Flux, etc. +- **Strategy B:** drop extra `*.json` files into the chart's + `dashboards/` folder. Best for forks/maintainers who want dashboards + baked into the chart artifact itself. + +### Adding your own dashboards (Strategy A, recommended) + +**Single dashboard.** Create a ConfigMap from a JSON file and label it: + +```bash +kubectl -n examon create configmap my-dashboard \ + --from-file=my-dashboard.json=./my-dashboard.json +kubectl -n examon label configmap my-dashboard grafana_dashboard=1 +``` + +Within ~30s the dashboard appears in Grafana under **Dashboards**. To +remove it, delete the ConfigMap. + +**A whole directory of dashboards.** One ConfigMap per file: + +```bash +for f in ./dashboards/*.json; do + name="examon-dash-$(basename "$f" .json | tr '[:upper:] _' '[:lower:]--')" + kubectl -n examon create configmap "$name" --from-file="$(basename "$f")=$f" + kubectl -n examon label configmap "$name" grafana_dashboard=1 +done +``` + +Keep one ConfigMap per dashboard (rather than packing many JSONs into a +single ConfigMap) so you stay well below the 1 MiB ConfigMap size limit +and so adding/removing a single dashboard does not invalidate the rest. + +**GitOps / YAML manifest variant.** Useful with ArgoCD, Flux, or just +plain `kubectl apply -f`: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-dashboard + namespace: examon + labels: + grafana_dashboard: "1" +data: + my-dashboard.json: |- + { + "title": "My Dashboard", + "panels": [ /* ... */ ] + } +``` + +The ConfigMap does not have to live in the `examon` namespace: the +sidecar runs with `searchNamespace: ALL`, so any namespace works. Pick +the one that fits your RBAC / GitOps layout. + +### Bundling extra dashboards into the chart (Strategy B) + +If you maintain a fork of the umbrella chart and want dashboards shipped +inside the chart artifact: + +1. Drop additional `*.json` files into + [`deploy/helm/examon/dashboards/`](../../deploy/helm/examon/dashboards/). +2. Run `helm upgrade examon ./deploy/helm/examon -n examon`. + +The existing template iterates every `*.json` in that folder and emits +one labeled ConfigMap per file automatically: no template changes are +required. Set `bundledDashboards.enabled=false` to skip them at install +time without removing the files. + +### Tips + +- **Datasource references.** Every panel and the dashboard root must + point at the chart-provisioned KairosDB datasource. Use: + + ```json + { "type": "arpnetworking-kairosdb-datasource", "uid": "examon-kairosdb" } + ``` + + Dashboards exported from Grafana 7.x (v0.4.0 docker-compose stack) + still reference the legacy `grafana-kairosdb-datasource` plugin and + must be rewritten before import. See the `jq` snippet in + [upgrading.md](upgrading.md#step-4-restore-data). +- **ConfigMap size limit.** Kubernetes caps each ConfigMap at 1 MiB. One + ConfigMap per dashboard keeps you safely under it. +- **Grafana folder grouping (optional).** The Grafana sidecar can place + dashboards into named Grafana UI folders via an annotation on the + ConfigMap (default annotation name: `grafana_folder`). This is not + enabled by chart default; see the + [Grafana sidecar docs](https://github.com/grafana/helm-charts/tree/main/charts/grafana#sidecar-for-dashboards) + if you want to use it. +- **Troubleshooting.** If a dashboard does not appear, give the sidecar + ~30s and then check: + + ```bash + kubectl get configmap -n examon -l grafana_dashboard=1 + kubectl logs -l app.kubernetes.io/name=grafana -c grafana-sc-dashboard -n examon + ``` + ## Managing Plugins In the Kubernetes deployment, plugins run as separate Deployments. To enable/disable: diff --git a/docs/Deployment/troubleshooting.md b/docs/Deployment/troubleshooting.md index 507dd64..8914eab 100644 --- a/docs/Deployment/troubleshooting.md +++ b/docs/Deployment/troubleshooting.md @@ -198,7 +198,7 @@ passed via the `JAVA_OPTS` environment variable from the Kubernetes Deployment spec. **Solution:** Modified `kairosdb-env.sh` to only set defaults when `JAVA_OPTS` -is not already defined (see fix in issue #2 above — the `if [ -z "$JAVA_OPTS" ]` +is not already defined (see fix in issue #2 above; the `if [ -z "$JAVA_OPTS" ]` guard serves both purposes). **Files changed:** `deploy/docker/kairosdb/kairosdb-env.sh` @@ -323,7 +323,7 @@ examon-server: ``` If the error persists on a fresh install, it's likely a bootstrap timing -issue — `examon-server` starts before the K8ssandra secret is created. +issue: `examon-server` starts before the K8ssandra secret is created. Wait for Kubernetes to restart the pod automatically (it will succeed once the secret exists). @@ -461,7 +461,7 @@ the new pod still runs the old image. **Root cause:** K3d nodes run containerd, which caches images independently from the host Docker daemon. With `imagePullPolicy: IfNotPresent` (the subchart default), containerd resolves the tag from its local cache and -never re-pulls from the registry — even if the registry has a newer image +never re-pulls from the registry, even if the registry has a newer image with the same tag. **Prevention:** `values-local.yaml` now sets `pullPolicy: Always` for all @@ -553,7 +553,7 @@ no endpoints available for service "k8ssandra-operator-webhook-service" **Root cause:** The `K8ssandraCluster` CR was submitted before the operator's webhook endpoint was ready. This happens when the operator and the CR are -deployed in the same Helm release — Helm cannot guarantee ordering between +deployed in the same Helm release: Helm cannot guarantee ordering between a subchart's Deployment and the parent chart's custom resource. **Solution:** @@ -588,7 +588,7 @@ examon-k8ssandra-operator-webhook-service.examon.svc, not k8ssandra-operator-webhook-service.examon.svc ``` -**Root cause:** Two K8ssandra operator installations exist — one standalone +**Root cause:** Two K8ssandra operator installations exist: one standalone and one from a previous umbrella chart dependency. They create webhook services with different names but the same CRD validators. @@ -661,6 +661,73 @@ start before Cassandra is ready and fail their initial connection attempts. --- +### 18. Grafana: KairosDB Data Source Fails or Dashboards Show "No Data" + +**Symptom:** On a freshly installed v0.5.0 chart, the Grafana KairosDB +data source either does not appear, fails the **Test** action, or panels +show "No data" / `Datasource not found` errors. The browser console +typically reports a plugin loading or AngularJS-related failure. + +**Root cause:** The original `grafana-kairosdb-datasource` plugin is +AngularJS-based and is no longer compatible with Grafana 11+ (AngularJS +support has been removed). The v0.5.0 chart switches to the React-based +[ArpNetworking +fork](https://github.com/ArpNetworking/kairosdb-datasource), which is +unsigned and must be explicitly whitelisted in the Grafana config. The +auto-provisioned data source must also reference the plugin by its new +`type`. This is tracked as [Issue #25](https://github.com/ExamonHPC/examon/issues/25). + +**Resolution.** Re-`helm upgrade` with the chart's defaults; they +already encode all three pieces: + +1. Plugin install in `grafana.plugins`: + ```yaml + grafana: + plugins: + - https://github.com/ArpNetworking/kairosdb-datasource/releases/download/v1.4.0/arpnetworking-kairosdb-datasource-1.4.0.zip;arpnetworking-kairosdb-datasource + ``` +2. Unsigned-plugin allowlist in `grafana.grafana.ini`: + ```yaml + grafana: + grafana.ini: + plugins: + allow_loading_unsigned_plugins: arpnetworking-kairosdb-datasource + ``` +3. Data source provisioning in `grafana.datasources`: + ```yaml + grafana: + datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: kairosdb + type: arpnetworking-kairosdb-datasource + uid: examon-kairosdb + url: http://examon-kairosdb:8083 + access: proxy + isDefault: true + ``` + +**Verification:** + +```bash +kubectl exec -n examon deploy/examon-grafana -c grafana -- \ + curl -s -u admin: http://localhost:3000/api/datasources \ + | jq '.[] | {name, type, uid}' +``` + +Expect `type: arpnetworking-kairosdb-datasource` and `uid: examon-kairosdb`. +If you see `grafana-kairosdb-datasource` instead, you are still on the +legacy plugin: re-run `helm upgrade` against the v0.5.0 chart and +restart the Grafana pod so the plugin install init container re-runs. + +**Note for legacy Docker Compose v0.4.0:** the Docker Compose stack still +runs Grafana 7.3.10 with the AngularJS plugin and keeps working. The +matching v0.4.0 snapshot of the test dashboard is preserved under +`dashboards/legacy/`. + +--- + ## General Debugging Commands ```bash diff --git a/docs/Deployment/upgrading.md b/docs/Deployment/upgrading.md index 8a5dd6d..c501e20 100644 --- a/docs/Deployment/upgrading.md +++ b/docs/Deployment/upgrading.md @@ -13,6 +13,8 @@ This guide covers migrating from the Docker Compose deployment (v0.4.0) to the K | Cassandra | Docker container (3.0.19) | K8ssandra operator (4.0+) | | KairosDB | Docker container (1.2.2) | Deployment (1.3.0) | | Grafana | Docker container (7.3.10) | Helm subchart (latest) | +| Grafana KairosDB plugin | `grafana-kairosdb-datasource` (AngularJS) | `arpnetworking-kairosdb-datasource` (React fork, Grafana 11+ compatible) | +| Grafana dashboards | Manual import via UI/API | Auto-provisioned via Grafana sidecar (ConfigMaps labeled `grafana_dashboard=1`) | | Configuration | Environment variables + sed | ConfigMaps + Secrets | | Scaling | Manual (add more containers) | `kubectl scale` / HPA | | HA | Not supported | Built-in (anti-affinity, replicas) | @@ -61,7 +63,44 @@ Follow one of the deployment guides: For Cassandra data migration from 3.0.19 to 4.0+, consult the [Apache Cassandra upgrade documentation](https://cassandra.apache.org/doc/latest/cassandra/operating/upgrading.html). -Re-import Grafana dashboards through the Grafana UI or API. +**Grafana dashboards.** Behaviour differs between bundled and +user-created dashboards: + +- **Bundled "Examon Test - Random Sensor" dashboard:** no action + required. The Helm chart ships the Grafana 10+/11+ compatible version + inside the chart and auto-provisions it via the Grafana dashboard + sidecar. +- **User-created dashboards exported from v0.4.0:** these still need to + be imported manually, but the v0.5.0 Grafana uses a different KairosDB + data source plugin + (`arpnetworking-kairosdb-datasource`, the React fork required for + Grafana 11+). Every panel and the dashboard root reference the data + source by `type` and `uid`, so the exported JSON must be rewritten + before import. The chart provisions the data source as: + + ```json + { "type": "arpnetworking-kairosdb-datasource", "uid": "examon-kairosdb" } + ``` + + Quick rewrite with `jq` (point at each exported `*.json`): + + ```bash + jq ' + walk( + if type == "object" and .type == "grafana-kairosdb-datasource" + then .type = "arpnetworking-kairosdb-datasource" + | .uid = "examon-kairosdb" + else . end) + ' dashboard.json > dashboard-v0.5.0.json + ``` + + Then import the rewritten JSON via the Grafana UI or API. For a + reproducible, GitOps-friendly setup, wrap it in a `grafana_dashboard=1` + labeled ConfigMap and let the Grafana sidecar load it automatically. + The full recipe (single dashboard, directory of dashboards, YAML + manifest variant) lives in + [Grafana Dashboards](kubernetes.md#grafana-dashboards) in the K8s + guide; this guide only covers the v0.4.0 → v0.5.0 JSON rewrite. ### Step 5: Update Publishers diff --git a/scripts/k8s-smoke-test.sh b/scripts/k8s-smoke-test.sh index f9968a7..d7a8a72 100755 --- a/scripts/k8s-smoke-test.sh +++ b/scripts/k8s-smoke-test.sh @@ -67,6 +67,38 @@ else fail "Grafana did not return 302 redirect" fi +# --- Test 2b: Grafana KairosDB datasource provisioned with ArpNetworking type --- +echo "--- Test: Grafana KairosDB datasource provisioning ---" +GF_ADMIN_PASS="${GF_ADMIN_PASS:-admin}" +DS_JSON=$(curl -s -u "admin:${GF_ADMIN_PASS}" "http://localhost:${GF_PORT}/api/datasources/name/kairosdb" 2>/dev/null || echo "{}") +DS_TYPE=$(echo "$DS_JSON" | python3 -c "import sys,json; print(json.load(sys.stdin).get('type',''))" 2>/dev/null || echo "") +DS_UID=$(echo "$DS_JSON" | python3 -c "import sys,json; print(json.load(sys.stdin).get('uid',''))" 2>/dev/null || echo "") +if [[ "$DS_TYPE" == "arpnetworking-kairosdb-datasource" && "$DS_UID" == "examon-kairosdb" ]]; then + pass "KairosDB datasource is provisioned (type=$DS_TYPE, uid=$DS_UID)" +else + fail "KairosDB datasource not provisioned correctly (type=$DS_TYPE, uid=$DS_UID)" +fi + +# --- Test 2c: Bundled test dashboard auto-provisioned via sidecar --- +echo "--- Test: Bundled dashboard auto-provisioning ---" +if kubectl get configmap -n "$NAMESPACE" -l grafana_dashboard=1 -o name 2>/dev/null | grep -q "examon-dashboard"; then + pass "Dashboard ConfigMap labeled grafana_dashboard=1 is present" +else + fail "No dashboard ConfigMap with label grafana_dashboard=1 found" +fi +DASH_FOUND="" +for _ in $(seq 1 20); do + DASH_FOUND=$(curl -s -u "admin:${GF_ADMIN_PASS}" "http://localhost:${GF_PORT}/api/search?query=Random%20Sensor" 2>/dev/null \ + | python3 -c "import sys,json; r=json.load(sys.stdin); print(next((d['title'] for d in r if 'Random Sensor' in d.get('title','')), ''))" 2>/dev/null || echo "") + [[ -n "$DASH_FOUND" ]] && break + sleep 3 +done +if [[ -n "$DASH_FOUND" ]]; then + pass "Dashboard '$DASH_FOUND' is loaded in Grafana" +else + fail "Random Sensor dashboard not found in Grafana via API" +fi + # --- Test 3: ExaMon API server --- echo "--- Test: ExaMon API server ---" STATUS=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${ES_PORT}/" 2>/dev/null || echo "000")