Monit - Services automatisch überwachen und neu starten | Blog

Monit ist ein leichtgewichtiges Tool zur Überwachung und automatischen Wiederherstellung von Services. Es startet abgestürzte Prozesse neu und warnt bei Ressourcen-Problemen.

Installation

# Debian/Ubuntu
apt install monit

# RHEL/CentOS
dnf install monit

# Service aktivieren
systemctl enable monit
systemctl start monit

Grundkonfiguration

Haupt-Konfiguration

# /etc/monit/monitrc (Debian)
# /etc/monitrc (RHEL)

# Prüf-Intervall (Sekunden)
set daemon 60
    with start delay 120

# Log-Datei
set log /var/log/monit.log

# ID-Datei
set idfile /var/lib/monit/id

# Status-Datei
set statefile /var/lib/monit/state

# Event-Queue
set eventqueue
    basedir /var/lib/monit/events
    slots 100

# Mail-Konfiguration
set mailserver smtp.example.de port 587
    username "monit@example.de"
    password "secret"
    using tls

# Alert-Empfänger
set alert admin@example.de

# Web-Interface
set httpd port 2812
    use address localhost
    allow localhost
    allow admin:secret

Service-Konfigurationen

# /etc/monit/conf.d/ (Debian)
# /etc/monit.d/ (RHEL)

Prozess-Überwachung

Nginx

# /etc/monit/conf.d/nginx

check process nginx with pidfile /var/run/nginx.pid
    start program = "/bin/systemctl start nginx"
    stop program = "/bin/systemctl stop nginx"
    if failed host 127.0.0.1 port 80 protocol http
        and request "/" with timeout 10 seconds
        then restart
    if cpu > 80% for 5 cycles then alert
    if totalmem > 500 MB for 5 cycles then restart
    if 3 restarts within 5 cycles then timeout

Apache

# /etc/monit/conf.d/apache

check process apache with pidfile /var/run/apache2/apache2.pid
    start program = "/bin/systemctl start apache2"
    stop program = "/bin/systemctl stop apache2"
    if failed host 127.0.0.1 port 80 protocol http then restart
    if cpu > 90% for 3 cycles then restart
    if totalmem > 1024 MB for 3 cycles then restart
    if children > 250 then restart
    if 5 restarts within 5 cycles then timeout

MySQL

# /etc/monit/conf.d/mysql

check process mysql with pidfile /var/run/mysqld/mysqld.pid
    start program = "/bin/systemctl start mysql"
    stop program = "/bin/systemctl stop mysql"
    if failed host 127.0.0.1 port 3306 protocol mysql
        username "monit" password "secret"
        then restart
    if cpu > 80% for 5 cycles then alert
    if totalmem > 2048 MB for 5 cycles then alert
    if 3 restarts within 5 cycles then timeout

PostgreSQL

# /etc/monit/conf.d/postgresql

check process postgresql with pidfile /var/run/postgresql/14-main.pid
    start program = "/bin/systemctl start postgresql"
    stop program = "/bin/systemctl stop postgresql"
    if failed host 127.0.0.1 port 5432 protocol pgsql then restart
    if cpu > 80% for 5 cycles then alert
    if 3 restarts within 5 cycles then timeout

SSH

# /etc/monit/conf.d/sshd

check process sshd with pidfile /var/run/sshd.pid
    start program = "/bin/systemctl start sshd"
    stop program = "/bin/systemctl stop sshd"
    if failed port 22 protocol ssh then restart
    if 5 restarts within 5 cycles then timeout

Redis

# /etc/monit/conf.d/redis

check process redis with pidfile /var/run/redis/redis-server.pid
    start program = "/bin/systemctl start redis-server"
    stop program = "/bin/systemctl stop redis-server"
    if failed host 127.0.0.1 port 6379 then restart
    if cpu > 90% for 3 cycles then restart
    if totalmem > 2048 MB for 3 cycles then restart
    if 3 restarts within 5 cycles then timeout

Datei-Überwachung

Log-Dateien

# /etc/monit/conf.d/logs

check file syslog with path /var/log/syslog
    if size > 500 MB then exec "/usr/sbin/logrotate -f /etc/logrotate.d/rsyslog"

check file auth_log with path /var/log/auth.log
    if match "Failed password" then alert

Konfigurationsdateien

check file nginx_conf with path /etc/nginx/nginx.conf
    if changed checksum then exec "/bin/systemctl reload nginx"
    if changed timestamp then alert

Verzeichnisse

check directory www with path /var/www/html
    if changed timestamp then alert
    if permission failed then alert

System-Überwachung

Ressourcen

# /etc/monit/conf.d/system

check system $HOST
    if loadavg (1min) > 4 then alert
    if loadavg (5min) > 2 for 10 cycles then alert
    if cpu usage > 95% for 10 cycles then alert
    if memory usage > 90% for 5 cycles then alert
    if swap usage > 25% then alert

Festplatten

check filesystem rootfs with path /
    if space usage > 80% then alert
    if space usage > 90% then exec "/usr/local/bin/cleanup.sh"
    if inode usage > 90% then alert

check filesystem data with path /data
    if space usage > 85% then alert

Netzwerk

check network eth0 with interface eth0
    if failed link then alert
    if changed link capacity then alert
    if saturation > 80% then alert
    if upload > 500 MB/s then alert
    if download > 1 GB/s then alert

Host-Überwachung

Externe Server

check host google with address google.com
    if failed ping count 3 with timeout 5 seconds then alert
    if failed port 443 protocol https then alert

check host database_server with address 192.168.1.10
    if failed ping then alert
    if failed port 3306 protocol mysql then alert

Programme überwachen

Cron-Jobs

check program backup with path "/usr/local/bin/backup.sh"
    with timeout 3600 seconds
    every "0 2 * * *"
    if status != 0 then alert

Benutzerdefinierte Checks

check program diskcheck with path "/usr/local/bin/check_disk.sh"
    if status != 0 then alert
    if status = 1 then exec "/usr/local/bin/cleanup.sh"

Web-Interface

Konfiguration

set httpd port 2812
    use address 0.0.0.0
    allow 192.168.1.0/24
    allow admin:secret

# Mit SSL
set httpd port 2812
    ssl enable
    pemfile /etc/monit/monit.pem
    allow admin:secret

Zugriff

# http://server:2812
# Login: admin / secret

Kommandozeile

Status

# Zusammenfassung
monit summary

# Details
monit status

# Einzelner Service
monit status nginx

Steuerung

# Service starten
monit start nginx

# Service stoppen
monit stop nginx

# Service neu starten
monit restart nginx

# Überwachung deaktivieren
monit unmonitor nginx

# Überwachung aktivieren
monit monitor nginx

# Alle Services neu laden
monit reload

Konfiguration prüfen

# Syntax prüfen
monit -t

# Verbose prüfen
monit -t -v

Alerting

Email

set alert admin@example.de
set alert oncall@example.de only on { timeout, nonexist }

# Pro Service
check process nginx...
    alert webteam@example.de

Benutzerdefinierte Aktionen

check process nginx...
    if failed port 80 then exec "/usr/local/bin/notify.sh"

Slack-Integration

#!/bin/bash
# /usr/local/bin/slack-notify.sh

curl -X POST -H 'Content-type: application/json' \
    --data "{\"text\":\"Monit Alert: $MONIT_SERVICE - $MONIT_DESCRIPTION\"}" \
    https://hooks.slack.com/services/xxx/yyy/zzz

check process nginx...
    if failed port 80 then exec "/usr/local/bin/slack-notify.sh"

Zusammenfassung

Befehl	Funktion
monit summary	Übersicht
monit status	Details
monit start SERVICE	Starten
monit stop SERVICE	Stoppen
monit restart SERVICE	Neustart
monit reload	Config neu laden
monit -t	Syntax prüfen

Check-Typ	Syntax
Prozess	check process NAME with pidfile PATH
Datei	check file NAME with path PATH
Host	check host NAME with address HOST
Filesystem	check filesystem NAME with path PATH
System	check system $HOST
Programm	check program NAME with path PATH

Aktion	Beschreibung
alert	Email senden
restart	Service neu starten
start	Service starten
stop	Service stoppen
exec	Programm ausführen
timeout	Überwachung pausieren

Datei	Beschreibung
/etc/monit/monitrc	Hauptkonfiguration
/etc/monit/conf.d/	Service-Configs
/var/log/monit.log	Log-Datei

Fazit

Monit ist ideal für automatische Service-Recovery. Die Konfiguration ist einfach und lesbar. Das Web-Interface bietet schnellen Überblick. Für komplexere Setups sind Prometheus oder Zabbix besser geeignet. Monit ergänzt diese als lokaler "Watchdog" hervorragend.