{ "general-alerting": [ { "name": "nebula-ci", "interval": "1m", "rules": [ { "expr": "", "for": "5m", "labels": { "issue": "pending_builds", "service": "ci" }, "annotations": { "__dashboardUid__": "Ha2VyrtGz", "__panelId__": "8", "description": "monitor the number of pending CI builds and alert if the number has been >= 3 for the duration of 5 minutes", "summary": "3 (or more) CI builds have been in a pending state for (at least) the duration of 5 minutes" }, "grafana_alert": { "id": 2, "orgId": 2, "title": "ci - pending builds", "condition": "E", "data": [ { "refId": "A", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "exemplar": true, "expr": "drone_running_builds", "format": "time_series", "interval": "", "intervalFactor": 1, "intervalMs": 15000, "legendFormat": "running builds", "maxDataPoints": 43200, "refId": "A" } }, { "refId": "C", "queryType": "", "relativeTimeRange": { "from": 43200, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "editorMode": "code", "exemplar": true, "expr": "drone_running_jobs", "hide": false, "interval": "", "intervalFactor": 1, "intervalMs": 15000, "legendFormat": "running jobs", "maxDataPoints": 43200, "range": true, "refId": "C" } }, { "refId": "B", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "expr": "drone_pending_builds", "format": "time_series", "interval": "", "intervalMs": 15000, "legendFormat": "pending builds", "maxDataPoints": 43200, "refId": "B" } }, { "refId": "D", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "exemplar": true, "expr": "drone_pending_jobs", "hide": false, "interval": "", "intervalMs": 15000, "legendFormat": "pending jobs", "maxDataPoints": 43200, "refId": "D" } }, { "refId": "E", "queryType": "", "relativeTimeRange": { "from": 0, "to": 0 }, "datasourceUid": "-100", "model": { "conditions": [ { "evaluator": { "params": [ 3 ], "type": "gt" }, "operator": { "type": "when" }, "query": { "params": [ "B" ] }, "reducer": { "params": [], "type": "last" }, "type": "query" } ], "datasource": { "type": "__expr__", "uid": "-100" }, "hide": false, "intervalMs": 1000, "maxDataPoints": 43200, "refId": "E", "type": "classic_conditions" } } ], "updated": "2022-08-16T11:01:40Z", "intervalSeconds": 60, "version": 8, "uid": "r6IWmzg4z", "namespace_uid": "cxtgDx37z", "namespace_id": 11, "rule_group": "nebula-ci", "no_data_state": "NoData", "exec_err_state": "Alerting" } }, { "expr": "", "for": "5m", "labels": { "issue": "pending_jobs", "service": "ci" }, "annotations": { "__dashboardUid__": "Ha2VyrtGz", "__panelId__": "8", "description": "monitor the number of pending CI jobs and alert if the number has been >=3 for the duration of 5 minutes", "summary": "3 (or more) CI jobs have been in a pending state for (at least) the duration of 5 minutes" }, "grafana_alert": { "id": 3, "orgId": 2, "title": "ci - pending jobs", "condition": "E", "data": [ { "refId": "A", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "exemplar": true, "expr": "drone_running_builds", "format": "time_series", "interval": "", "intervalFactor": 1, "intervalMs": 15000, "legendFormat": "running builds", "maxDataPoints": 43200, "refId": "A" } }, { "refId": "C", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "exemplar": true, "expr": "drone_running_jobs", "hide": false, "interval": "", "intervalFactor": 1, "intervalMs": 15000, "legendFormat": "running jobs", "maxDataPoints": 43200, "refId": "C" } }, { "refId": "B", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "expr": "drone_pending_builds", "format": "time_series", "interval": "", "intervalMs": 15000, "legendFormat": "pending builds", "maxDataPoints": 43200, "refId": "B" } }, { "refId": "D", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "exemplar": true, "expr": "drone_pending_jobs", "hide": false, "interval": "", "intervalMs": 15000, "legendFormat": "pending jobs", "maxDataPoints": 43200, "refId": "D" } }, { "refId": "E", "queryType": "", "relativeTimeRange": { "from": 0, "to": 0 }, "datasourceUid": "-100", "model": { "conditions": [ { "evaluator": { "params": [ 3 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "D" ] }, "reducer": { "params": [], "type": "last" }, "type": "query" } ], "datasource": { "type": "__expr__", "uid": "-100" }, "hide": false, "intervalMs": 1000, "maxDataPoints": 43200, "refId": "E", "type": "classic_conditions" } } ], "updated": "2022-07-14T12:34:50Z", "intervalSeconds": 60, "version": 3, "uid": "20B7ZzRVz", "namespace_uid": "cxtgDx37z", "namespace_id": 11, "rule_group": "nebula-ci", "no_data_state": "NoData", "exec_err_state": "Alerting" } }, { "expr": "", "for": "5m", "labels": { "alertname": "ci - missing build count data", "issue": "missing_build_count_data", "service": "ci" }, "annotations": { "__dashboardUid__": "Ha2VyrtGz", "__panelId__": "8", "description": "we are missing build count data, which likely means that Prometheus wasn't able to scrape CI (likely because CI is down).", "summary": "Prometheus wasn't able to scrape fresh build count data from CI." }, "grafana_alert": { "id": 4, "orgId": 2, "title": "ci - missing build count data", "condition": "F", "data": [ { "refId": "A", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "exemplar": true, "expr": "drone_running_builds", "format": "time_series", "interval": "", "intervalFactor": 1, "intervalMs": 15000, "legendFormat": "running builds", "maxDataPoints": 43200, "refId": "A" } }, { "refId": "B", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "exemplar": true, "expr": "drone_running_jobs", "hide": false, "interval": "", "intervalFactor": 1, "intervalMs": 15000, "legendFormat": "running jobs", "maxDataPoints": 43200, "refId": "B" } }, { "refId": "C", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "expr": "drone_pending_builds", "format": "time_series", "interval": "", "intervalMs": 15000, "legendFormat": "pending builds", "maxDataPoints": 43200, "refId": "C" } }, { "refId": "D", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "exemplar": true, "expr": "drone_pending_jobs", "hide": false, "interval": "", "intervalMs": 15000, "legendFormat": "pending jobs", "maxDataPoints": 43200, "refId": "D" } }, { "refId": "E", "queryType": "", "relativeTimeRange": { "from": 86400, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "editorMode": "code", "exemplar": false, "expr": "absent(drone_build_count)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalMs": 1000, "legendFormat": "missing build count data", "maxDataPoints": 43200, "range": true, "refId": "E" } }, { "refId": "F", "queryType": "", "relativeTimeRange": { "from": 0, "to": 0 }, "datasourceUid": "-100", "model": { "conditions": [ { "evaluator": { "params": [ 0, 0 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "E" ] }, "reducer": { "params": [], "type": "last" }, "type": "query" }, { "evaluator": { "params": [ 0, 0 ], "type": "no_value" }, "operator": { "type": "and" }, "query": { "params": [ "A" ] }, "reducer": { "params": [], "type": "last" }, "type": "query" }, { "evaluator": { "params": [ 0, 0 ], "type": "no_value" }, "operator": { "type": "and" }, "query": { "params": [ "B" ] }, "reducer": { "params": [], "type": "last" }, "type": "query" } ], "datasource": { "name": "Expression", "type": "__expr__", "uid": "__expr__" }, "hide": false, "intervalMs": 1000, "maxDataPoints": 43200, "refId": "F", "type": "classic_conditions" } } ], "updated": "2022-08-16T11:02:29Z", "intervalSeconds": 60, "version": 12, "uid": "Tw1EuLiVz", "namespace_uid": "cxtgDx37z", "namespace_id": 11, "rule_group": "nebula-ci", "no_data_state": "NoData", "exec_err_state": "Alerting" } } ] }, { "name": "nebula", "interval": "5m", "rules": [ { "expr": "", "for": "30m", "annotations": { "__dashboardUid__": "Ha2VyrtGz", "__panelId__": "4", "summary": "fs free bytes dropped below threshold (currently 35GB)" }, "grafana_alert": { "id": 1, "orgId": 2, "title": "node_rootfs-low_on_disk_space", "condition": "B", "data": [ { "refId": "A", "queryType": "", "relativeTimeRange": { "from": 3600, "to": 0 }, "datasourceUid": "wntsJ72Gz", "model": { "datasource": { "type": "prometheus", "uid": "wntsJ72Gz" }, "editorMode": "code", "exemplar": false, "expr": "node_filesystem_free_bytes{mountpoint=\"/rootfs\"} / 1000000000", "hide": false, "instant": false, "interval": "", "intervalMs": 15000, "legendFormat": "{{mountpoint}}", "maxDataPoints": 43200, "range": true, "refId": "A" } }, { "refId": "B", "queryType": "", "relativeTimeRange": { "from": 0, "to": 0 }, "datasourceUid": "-100", "model": { "conditions": [ { "evaluator": { "params": [ 35, 0 ], "type": "lt" }, "operator": { "type": "when" }, "query": { "params": [ "A" ] }, "reducer": { "params": [], "type": "min" }, "type": "query" }, { "evaluator": { "params": [ 0, 0 ], "type": "no_value" }, "operator": { "type": "or" }, "query": { "params": [ "A" ] }, "reducer": { "params": [], "type": "last" }, "type": "query" } ], "datasource": { "name": "Expression", "type": "__expr__", "uid": "__expr__" }, "hide": false, "intervalMs": 1000, "maxDataPoints": 43200, "refId": "B", "type": "classic_conditions" } } ], "updated": "2022-08-16T11:03:09Z", "intervalSeconds": 300, "version": 4, "uid": "LZoRhwenk", "namespace_uid": "cxtgDx37z", "namespace_id": 11, "rule_group": "nebula", "no_data_state": "NoData", "exec_err_state": "Error" } } ] } ] }