Monit ist ein leichtgewichtiges Tool zur Überwachung und automatischen Wiederherstellung von Services. Es startet abgestürzte Prozesse neu und warnt bei Ressourcen-Problemen.
Installation
# Debian/Ubuntu
apt install monit
# RHEL/CentOS
dnf install monit
# Service aktivieren
systemctl enable monit
systemctl start monitGrundkonfiguration
Haupt-Konfiguration
# /etc/monit/monitrc (Debian)
# /etc/monitrc (RHEL)
# Prüf-Intervall (Sekunden)
set daemon 60
with start delay 120
# Log-Datei
set log /var/log/monit.log
# ID-Datei
set idfile /var/lib/monit/id
# Status-Datei
set statefile /var/lib/monit/state
# Event-Queue
set eventqueue
basedir /var/lib/monit/events
slots 100
# Mail-Konfiguration
set mailserver smtp.example.de port 587
username "monit@example.de"
password "secret"
using tls
# Alert-Empfänger
set alert admin@example.de
# Web-Interface
set httpd port 2812
use address localhost
allow localhost
allow admin:secretService-Konfigurationen
# /etc/monit/conf.d/ (Debian)
# /etc/monit.d/ (RHEL)Prozess-Überwachung
Nginx
# /etc/monit/conf.d/nginx
check process nginx with pidfile /var/run/nginx.pid
start program = "/bin/systemctl start nginx"
stop program = "/bin/systemctl stop nginx"
if failed host 127.0.0.1 port 80 protocol http
and request "/" with timeout 10 seconds
then restart
if cpu > 80% for 5 cycles then alert
if totalmem > 500 MB for 5 cycles then restart
if 3 restarts within 5 cycles then timeoutApache
# /etc/monit/conf.d/apache
check process apache with pidfile /var/run/apache2/apache2.pid
start program = "/bin/systemctl start apache2"
stop program = "/bin/systemctl stop apache2"
if failed host 127.0.0.1 port 80 protocol http then restart
if cpu > 90% for 3 cycles then restart
if totalmem > 1024 MB for 3 cycles then restart
if children > 250 then restart
if 5 restarts within 5 cycles then timeoutMySQL
# /etc/monit/conf.d/mysql
check process mysql with pidfile /var/run/mysqld/mysqld.pid
start program = "/bin/systemctl start mysql"
stop program = "/bin/systemctl stop mysql"
if failed host 127.0.0.1 port 3306 protocol mysql
username "monit" password "secret"
then restart
if cpu > 80% for 5 cycles then alert
if totalmem > 2048 MB for 5 cycles then alert
if 3 restarts within 5 cycles then timeoutPostgreSQL
# /etc/monit/conf.d/postgresql
check process postgresql with pidfile /var/run/postgresql/14-main.pid
start program = "/bin/systemctl start postgresql"
stop program = "/bin/systemctl stop postgresql"
if failed host 127.0.0.1 port 5432 protocol pgsql then restart
if cpu > 80% for 5 cycles then alert
if 3 restarts within 5 cycles then timeoutSSH
# /etc/monit/conf.d/sshd
check process sshd with pidfile /var/run/sshd.pid
start program = "/bin/systemctl start sshd"
stop program = "/bin/systemctl stop sshd"
if failed port 22 protocol ssh then restart
if 5 restarts within 5 cycles then timeoutRedis
# /etc/monit/conf.d/redis
check process redis with pidfile /var/run/redis/redis-server.pid
start program = "/bin/systemctl start redis-server"
stop program = "/bin/systemctl stop redis-server"
if failed host 127.0.0.1 port 6379 then restart
if cpu > 90% for 3 cycles then restart
if totalmem > 2048 MB for 3 cycles then restart
if 3 restarts within 5 cycles then timeoutDatei-Überwachung
Log-Dateien
# /etc/monit/conf.d/logs
check file syslog with path /var/log/syslog
if size > 500 MB then exec "/usr/sbin/logrotate -f /etc/logrotate.d/rsyslog"
check file auth_log with path /var/log/auth.log
if match "Failed password" then alertKonfigurationsdateien
check file nginx_conf with path /etc/nginx/nginx.conf
if changed checksum then exec "/bin/systemctl reload nginx"
if changed timestamp then alertVerzeichnisse
check directory www with path /var/www/html
if changed timestamp then alert
if permission failed then alertSystem-Überwachung
Ressourcen
# /etc/monit/conf.d/system
check system $HOST
if loadavg (1min) > 4 then alert
if loadavg (5min) > 2 for 10 cycles then alert
if cpu usage > 95% for 10 cycles then alert
if memory usage > 90% for 5 cycles then alert
if swap usage > 25% then alertFestplatten
check filesystem rootfs with path /
if space usage > 80% then alert
if space usage > 90% then exec "/usr/local/bin/cleanup.sh"
if inode usage > 90% then alert
check filesystem data with path /data
if space usage > 85% then alertNetzwerk
check network eth0 with interface eth0
if failed link then alert
if changed link capacity then alert
if saturation > 80% then alert
if upload > 500 MB/s then alert
if download > 1 GB/s then alertHost-Überwachung
Externe Server
check host google with address google.com
if failed ping count 3 with timeout 5 seconds then alert
if failed port 443 protocol https then alert
check host database_server with address 192.168.1.10
if failed ping then alert
if failed port 3306 protocol mysql then alertProgramme überwachen
Cron-Jobs
check program backup with path "/usr/local/bin/backup.sh"
with timeout 3600 seconds
every "0 2 * * *"
if status != 0 then alertBenutzerdefinierte Checks
check program diskcheck with path "/usr/local/bin/check_disk.sh"
if status != 0 then alert
if status = 1 then exec "/usr/local/bin/cleanup.sh"Web-Interface
Konfiguration
set httpd port 2812
use address 0.0.0.0
allow 192.168.1.0/24
allow admin:secret
# Mit SSL
set httpd port 2812
ssl enable
pemfile /etc/monit/monit.pem
allow admin:secretZugriff
# http://server:2812
# Login: admin / secretKommandozeile
Status
# Zusammenfassung
monit summary
# Details
monit status
# Einzelner Service
monit status nginxSteuerung
# Service starten
monit start nginx
# Service stoppen
monit stop nginx
# Service neu starten
monit restart nginx
# Überwachung deaktivieren
monit unmonitor nginx
# Überwachung aktivieren
monit monitor nginx
# Alle Services neu laden
monit reloadKonfiguration prüfen
# Syntax prüfen
monit -t
# Verbose prüfen
monit -t -vAlerting
set alert admin@example.de
set alert oncall@example.de only on { timeout, nonexist }
# Pro Service
check process nginx...
alert webteam@example.deBenutzerdefinierte Aktionen
check process nginx...
if failed port 80 then exec "/usr/local/bin/notify.sh"Slack-Integration
#!/bin/bash
# /usr/local/bin/slack-notify.sh
curl -X POST -H 'Content-type: application/json' \
--data "{\"text\":\"Monit Alert: $MONIT_SERVICE - $MONIT_DESCRIPTION\"}" \
https://hooks.slack.com/services/xxx/yyy/zzzcheck process nginx...
if failed port 80 then exec "/usr/local/bin/slack-notify.sh"Zusammenfassung
| Befehl | Funktion | |--------|----------| | monit summary | Übersicht | | monit status | Details | | monit start SERVICE | Starten | | monit stop SERVICE | Stoppen | | monit restart SERVICE | Neustart | | monit reload | Config neu laden | | monit -t | Syntax prüfen |
| Check-Typ | Syntax | |-----------|--------| | Prozess | check process NAME with pidfile PATH | | Datei | check file NAME with path PATH | | Host | check host NAME with address HOST | | Filesystem | check filesystem NAME with path PATH | | System | check system $HOST | | Programm | check program NAME with path PATH |
| Aktion | Beschreibung | |--------|--------------| | alert | Email senden | | restart | Service neu starten | | start | Service starten | | stop | Service stoppen | | exec | Programm ausführen | | timeout | Überwachung pausieren |
| Datei | Beschreibung | |-------|--------------| | /etc/monit/monitrc | Hauptkonfiguration | | /etc/monit/conf.d/ | Service-Configs | | /var/log/monit.log | Log-Datei |
Fazit
Monit ist ideal für automatische Service-Recovery. Die Konfiguration ist einfach und lesbar. Das Web-Interface bietet schnellen Überblick. Für komplexere Setups sind Prometheus oder Zabbix besser geeignet. Monit ergänzt diese als lokaler "Watchdog" hervorragend.