scripts/xymon/xymon.files.alert.sh

103 lines
5.0 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/sh
# Purpose {{{
## If Xymon server says that a file is in error on a remote host, try to restart the related service.
## 1. Create a ssh keyring for xymon user {{{
# sudo mkdir -p -- /var/lib/xymon/.ssh/
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
## }}}
## 2. Remote user {{{
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys):
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh):
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
## }}}
# }}}
# Vars {{{
DEBUG=1
LOCAL_SSH_USER="xymon"
REMOTE_SSH_USER="xymon-ssh"
temp_dir=$(mktemp -d -t xymon-files-alert-XXXXXX.tmp)
debug_stdout="${temp_dir}/debug.stdout"
debug_stderr="${temp_dir}/debug.stderr"
file_list="${temp_dir}/services.error.list"
# }}}
# Create log files
touch "${debug_stdout}" "${debug_stderr}"
# Manage only files probe {{{
if [ "${BBSVCNAME}" = "files" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} error" >> "${debug_stdout}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0
fi
# }}}
# Check if a file exceeds it's modification time {{{
if echo "${BBALPHAMSG}" | grep -qE "File was modified.*ago - should be.*" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test file — Some files exceeds their modification time." >> "${debug_stdout}"
# First restart Rsyslog service {{{
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — First restart rsyslog service." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart rsyslog.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart rsyslog.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
# }}}
## Get the list of files path
## TODO: Set a pattern for both yellow and red colors
echo "${BBALPHAMSG}" | sed -n 's;^\&yellow <a.*>\(.*\)</a>;\1;p' > "${file_list}"
while IFS= read -r file_path; do
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — ${file_path} exceeds it's modification time." >> "${debug_stdout}"
# Match files path and services name {{{
case "${file_path}" in
# Cron
'/var/log/cron.log' )
service_name="cron"
;;
# default
* )
service_name="NOT.MANAGED"
;;
esac
# }}}
# Restart service if needed {{{
if [ "${service_name}" != "NOT.MANAGED" ] && [ "${service_name}" != "${previous_service_name}" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — ${service_name} need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — ssh -n -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
previous_service_name="${service_name}"
else
if [ "${service_name}" = "NOT.MANAGED" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — service for ${file_path} is not managed." >> "${debug_stdout}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — ${service_name} was already restarted." >> "${debug_stdout}"
fi
fi
# }}}
done < "${file_list}"
# Also restart xymon-client service {{{
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — xymon-client also need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
# }}}
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test file — All files seems up to date." >> "${debug_stdout}"
fi
# }}}
# Remove empty error file
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
# Remove temp_dir if DEBUG is disable
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0