mirror of
https://gitlab.archlinux.org/archlinux/infrastructure.git
synced 2025-01-18 08:06:16 +01:00
It's actually quite difficult to check that a service has failed X times within a time frame. Mostly because the service may be finished within a few seconds and zabbix only fetches the state at one instance of time and not the number of failures. To fix this, we go back to the old monitoring, but let systemd restart the service. Signed-off-by: Florian Pritz <bluewind@xinu.at>
23 lines
539 B
Python
23 lines
539 B
Python
#!/usr/bin/python
|
|
|
|
import dbus
|
|
|
|
|
|
ignore = set(["syncrepo.service", "syncrepo_arch32.service"])
|
|
|
|
bus = dbus.SystemBus()
|
|
systemd1 = bus.get_object("org.freedesktop.systemd1", "/org/freedesktop/systemd1")
|
|
systemd1_manager = dbus.Interface(
|
|
systemd1, dbus_interface="org.freedesktop.systemd1.Manager"
|
|
)
|
|
|
|
units = systemd1_manager.ListUnits()
|
|
for unit in filter(
|
|
lambda u: u[3] == "failed"
|
|
and u[0] not in ignore
|
|
and not u[0].startswith("user@")
|
|
and not u[0].startswith("user-runtime-dir@"),
|
|
units,
|
|
):
|
|
print(unit[0])
|