Monit ist ein leichtgewichtiges Tool zur Überwachung und automatischen Wiederherstellung von Services. Es startet abgestürzte Prozesse neu und warnt bei Ressourcen-Problemen.

Installation

# Debian/Ubuntu
apt install monit

# RHEL/CentOS
dnf install monit

# Service aktivieren
systemctl enable monit
systemctl start monit

Grundkonfiguration

Haupt-Konfiguration

# /etc/monit/monitrc (Debian)
# /etc/monitrc (RHEL)

# Prüf-Intervall (Sekunden)
set daemon 60
    with start delay 120

# Log-Datei
set log /var/log/monit.log

# ID-Datei
set idfile /var/lib/monit/id

# Status-Datei
set statefile /var/lib/monit/state

# Event-Queue
set eventqueue
    basedir /var/lib/monit/events
    slots 100

# Mail-Konfiguration
set mailserver smtp.example.de port 587
    username "monit@example.de"
    password "secret"
    using tls

# Alert-Empfänger
set alert admin@example.de

# Web-Interface
set httpd port 2812
    use address localhost
    allow localhost
    allow admin:secret

Service-Konfigurationen

# /etc/monit/conf.d/ (Debian)
# /etc/monit.d/ (RHEL)

Prozess-Überwachung

Nginx

# /etc/monit/conf.d/nginx

check process nginx with pidfile /var/run/nginx.pid
    start program = "/bin/systemctl start nginx"
    stop program = "/bin/systemctl stop nginx"
    if failed host 127.0.0.1 port 80 protocol http
        and request "/" with timeout 10 seconds
        then restart
    if cpu > 80% for 5 cycles then alert
    if totalmem > 500 MB for 5 cycles then restart
    if 3 restarts within 5 cycles then timeout

Apache

# /etc/monit/conf.d/apache

check process apache with pidfile /var/run/apache2/apache2.pid
    start program = "/bin/systemctl start apache2"
    stop program = "/bin/systemctl stop apache2"
    if failed host 127.0.0.1 port 80 protocol http then restart
    if cpu > 90% for 3 cycles then restart
    if totalmem > 1024 MB for 3 cycles then restart
    if children > 250 then restart
    if 5 restarts within 5 cycles then timeout

MySQL

# /etc/monit/conf.d/mysql

check process mysql with pidfile /var/run/mysqld/mysqld.pid
    start program = "/bin/systemctl start mysql"
    stop program = "/bin/systemctl stop mysql"
    if failed host 127.0.0.1 port 3306 protocol mysql
        username "monit" password "secret"
        then restart
    if cpu > 80% for 5 cycles then alert
    if totalmem > 2048 MB for 5 cycles then alert
    if 3 restarts within 5 cycles then timeout

PostgreSQL

# /etc/monit/conf.d/postgresql

check process postgresql with pidfile /var/run/postgresql/14-main.pid
    start program = "/bin/systemctl start postgresql"
    stop program = "/bin/systemctl stop postgresql"
    if failed host 127.0.0.1 port 5432 protocol pgsql then restart
    if cpu > 80% for 5 cycles then alert
    if 3 restarts within 5 cycles then timeout

SSH

# /etc/monit/conf.d/sshd

check process sshd with pidfile /var/run/sshd.pid
    start program = "/bin/systemctl start sshd"
    stop program = "/bin/systemctl stop sshd"
    if failed port 22 protocol ssh then restart
    if 5 restarts within 5 cycles then timeout

Redis

# /etc/monit/conf.d/redis

check process redis with pidfile /var/run/redis/redis-server.pid
    start program = "/bin/systemctl start redis-server"
    stop program = "/bin/systemctl stop redis-server"
    if failed host 127.0.0.1 port 6379 then restart
    if cpu > 90% for 3 cycles then restart
    if totalmem > 2048 MB for 3 cycles then restart
    if 3 restarts within 5 cycles then timeout

Datei-Überwachung

Log-Dateien

# /etc/monit/conf.d/logs

check file syslog with path /var/log/syslog
    if size > 500 MB then exec "/usr/sbin/logrotate -f /etc/logrotate.d/rsyslog"

check file auth_log with path /var/log/auth.log
    if match "Failed password" then alert

Konfigurationsdateien

check file nginx_conf with path /etc/nginx/nginx.conf
    if changed checksum then exec "/bin/systemctl reload nginx"
    if changed timestamp then alert

Verzeichnisse

check directory www with path /var/www/html
    if changed timestamp then alert
    if permission failed then alert

System-Überwachung

Ressourcen

# /etc/monit/conf.d/system

check system $HOST
    if loadavg (1min) > 4 then alert
    if loadavg (5min) > 2 for 10 cycles then alert
    if cpu usage > 95% for 10 cycles then alert
    if memory usage > 90% for 5 cycles then alert
    if swap usage > 25% then alert

Festplatten

check filesystem rootfs with path /
    if space usage > 80% then alert
    if space usage > 90% then exec "/usr/local/bin/cleanup.sh"
    if inode usage > 90% then alert

check filesystem data with path /data
    if space usage > 85% then alert

Netzwerk

check network eth0 with interface eth0
    if failed link then alert
    if changed link capacity then alert
    if saturation > 80% then alert
    if upload > 500 MB/s then alert
    if download > 1 GB/s then alert

Host-Überwachung

Externe Server

check host google with address google.com
    if failed ping count 3 with timeout 5 seconds then alert
    if failed port 443 protocol https then alert

check host database_server with address 192.168.1.10
    if failed ping then alert
    if failed port 3306 protocol mysql then alert

Programme überwachen

Cron-Jobs

check program backup with path "/usr/local/bin/backup.sh"
    with timeout 3600 seconds
    every "0 2 * * *"
    if status != 0 then alert

Benutzerdefinierte Checks

check program diskcheck with path "/usr/local/bin/check_disk.sh"
    if status != 0 then alert
    if status = 1 then exec "/usr/local/bin/cleanup.sh"

Web-Interface

Konfiguration

set httpd port 2812
    use address 0.0.0.0
    allow 192.168.1.0/24
    allow admin:secret

# Mit SSL
set httpd port 2812
    ssl enable
    pemfile /etc/monit/monit.pem
    allow admin:secret

Zugriff

# http://server:2812
# Login: admin / secret

Kommandozeile

Status

# Zusammenfassung
monit summary

# Details
monit status

# Einzelner Service
monit status nginx

Steuerung

# Service starten
monit start nginx

# Service stoppen
monit stop nginx

# Service neu starten
monit restart nginx

# Überwachung deaktivieren
monit unmonitor nginx

# Überwachung aktivieren
monit monitor nginx

# Alle Services neu laden
monit reload

Konfiguration prüfen

# Syntax prüfen
monit -t

# Verbose prüfen
monit -t -v

Alerting

Email

set alert admin@example.de
set alert oncall@example.de only on { timeout, nonexist }

# Pro Service
check process nginx...
    alert webteam@example.de

Benutzerdefinierte Aktionen

check process nginx...
    if failed port 80 then exec "/usr/local/bin/notify.sh"

Slack-Integration

#!/bin/bash
# /usr/local/bin/slack-notify.sh

curl -X POST -H 'Content-type: application/json' \
    --data "{\"text\":\"Monit Alert: $MONIT_SERVICE - $MONIT_DESCRIPTION\"}" \
    https://hooks.slack.com/services/xxx/yyy/zzz
check process nginx...
    if failed port 80 then exec "/usr/local/bin/slack-notify.sh"

Zusammenfassung

| Befehl | Funktion | |--------|----------| | monit summary | Übersicht | | monit status | Details | | monit start SERVICE | Starten | | monit stop SERVICE | Stoppen | | monit restart SERVICE | Neustart | | monit reload | Config neu laden | | monit -t | Syntax prüfen |

| Check-Typ | Syntax | |-----------|--------| | Prozess | check process NAME with pidfile PATH | | Datei | check file NAME with path PATH | | Host | check host NAME with address HOST | | Filesystem | check filesystem NAME with path PATH | | System | check system $HOST | | Programm | check program NAME with path PATH |

| Aktion | Beschreibung | |--------|--------------| | alert | Email senden | | restart | Service neu starten | | start | Service starten | | stop | Service stoppen | | exec | Programm ausführen | | timeout | Überwachung pausieren |

| Datei | Beschreibung | |-------|--------------| | /etc/monit/monitrc | Hauptkonfiguration | | /etc/monit/conf.d/ | Service-Configs | | /var/log/monit.log | Log-Datei |

Fazit

Monit ist ideal für automatische Service-Recovery. Die Konfiguration ist einfach und lesbar. Das Web-Interface bietet schnellen Überblick. Für komplexere Setups sind Prometheus oder Zabbix besser geeignet. Monit ergänzt diese als lokaler "Watchdog" hervorragend.