#!/bin/bash # Version: 1.2.2 # ============================================================================= # SAP HANA Monitoring Script # # Checks HANA processes, disk usage, and log segment state. # Sends ntfy.sh notifications if thresholds are exceeded. # ============================================================================= # --- Lock File Implementation --- LOCK_FILE="/tmp/hana_monitor.lock" if [ -e "$LOCK_FILE" ]; then echo "â–ļī¸ Script is already running. Exiting." exit 1 fi touch "$LOCK_FILE" # Ensure lock file is removed on script exit trap 'rm -f "$LOCK_FILE"' EXIT # --- Configuration and Setup --- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" CONFIG_FILE="${SCRIPT_DIR}/monitor.conf" if [ ! -f "$CONFIG_FILE" ]; then echo "❌ Error: Configuration file not found at ${CONFIG_FILE}" >&2 rm -f "$LOCK_FILE" exit 1 fi source "$CONFIG_FILE" STATE_DIR="${SCRIPT_DIR}/monitor_state" mkdir -p "${STATE_DIR}" # Helper functions for state management get_state() { local key="$1" if [ -f "${STATE_DIR}/${key}.state" ]; then cat "${STATE_DIR}/${key}.state" else echo "" fi } set_state() { local key="$1" local value="$2" echo "$value" > "${STATE_DIR}/${key}.state" } HOSTNAME=$(hostname) SQL_QUERY="SELECT b.host, b.service_name, a.state, count(*) FROM PUBLIC.M_LOG_SEGMENTS a JOIN PUBLIC.M_SERVICES b ON (a.host = b.host AND a.port = b.port) GROUP BY b.host, b.service_name, a.state;" send_notification_if_changed() { local alert_key="$1" local title_prefix="$2" # e.g., "HANA Process" local current_message="$3" local is_alert_condition="$4" # "true" or "false" local current_value="$5" # The value to store as state (e.g., "85%", "GREEN", "ALERT") local previous_value=$(get_state "${alert_key}") if [ "$current_value" != "$previous_value" ]; then local full_title="" local full_message="" if [ "$is_alert_condition" == "true" ]; then full_title="${title_prefix} Alert" full_message="🚨 Critical: ${current_message}" else # Check if it was previously an alert (i.e., previous_value was not "OK") if [ -n "$previous_value" ] && [ "$previous_value" != "OK" ]; then full_title="${title_prefix} Resolved" full_message="✅ Resolved: ${current_message}" else # No alert, and no previous alert to resolve, so just update state silently set_state "${alert_key}" "$current_value" return fi fi local final_message="[${COMPANY_NAME} | ${HOSTNAME}] ${full_message}" curl -H "Authorization: Bearer ${NTFY_TOKEN}" -H "Title: ${full_title}" -d "${final_message}" "${NTFY_TOPIC_URL}" > /dev/null 2>&1 set_state "${alert_key}" "$current_value" echo "🔔 Notification sent for ${alert_key}: ${full_message}" else # State unchanged, no notification needed. fi } # --- HANA Process Status --- echo "âš™ī¸ Checking HANA process status..." if [ ! -x "$SAPCONTROL_PATH" ]; then echo "❌ Error: sapcontrol not found or not executable at ${SAPCONTROL_PATH}" >&2 send_notification_if_changed "hana_sapcontrol_path" "HANA Monitor Error" "sapcontrol not found or not executable at ${SAPCONTROL_PATH}" "true" "SAPCONTROL_ERROR" exit 1 fi non_green_processes=$("${SAPCONTROL_PATH}" -nr "${HANA_INSTANCE_NR}" -function GetProcessList | tail -n +6 | grep -v 'GREEN') if [ -n "$non_green_processes" ]; then echo "🚨 Alert: One or more HANA processes are not running!" >&2 echo "$non_green_processes" >&2 send_notification_if_changed "hana_processes" "HANA Process" "One or more HANA processes are not GREEN. Problem processes: ${non_green_processes}" "true" "PROCESS_ALERT:${non_green_processes}" exit 1 # Exit early as other checks might fail else send_notification_if_changed "hana_processes" "HANA Process" "All HANA processes are GREEN." "false" "OK" echo "✅ Success! All HANA processes are GREEN." fi # --- Disk Space Monitoring --- echo "â„šī¸ Checking disk usage..." for dir in "${DIRECTORIES_TO_MONITOR[@]}"; do if [ ! -d "$dir" ]; then echo "âš ī¸ Warning: Directory '$dir' not found. Skipping." >&2 send_notification_if_changed "disk_dir_not_found_${dir//\//_}" "HANA Disk Warning" "Directory '$dir' not found." "true" "DIR_NOT_FOUND" continue fi usage=$(df -h "$dir" | awk 'NR==2 {print $5}' | sed 's/%//') echo " - ${dir} is at ${usage}%" if (( $(echo "$usage > $DISK_USAGE_THRESHOLD" | bc -l) )); then echo "🚨 Alert: ${dir} usage is at ${usage}% which is above the ${DISK_USAGE_THRESHOLD}% threshold." >&2 send_notification_if_changed "disk_usage_${dir//\//_}" "HANA Disk" "Disk usage for ${dir} is at ${usage}%." "true" "${usage}%" else send_notification_if_changed "disk_usage_${dir//\//_}" "HANA Disk" "Disk usage for ${dir} is at ${usage}% (below threshold)." "false" "OK" fi done # --- HANA Log Segment Monitoring --- echo "âš™ī¸ Executing HANA SQL query..." if [ ! -x "$HDBSQL_PATH" ]; then echo "❌ Error: hdbsql not found or not executable at ${HDBSQL_PATH}" >&2 send_notification_if_changed "hana_hdbsql_path" "HANA Monitor Error" "hdbsql not found or not executable at ${HDBSQL_PATH}" "true" "HDBSQL_ERROR" exit 1 fi readarray -t sql_output < <("$HDBSQL_PATH" -U "$HANA_USER_KEY" -c ";" "$SQL_QUERY" 2>&1) if [ $? -ne 0 ]; then echo "❌ Failure! The hdbsql command failed. Please check logs." >&2 error_message=$(printf '%s\n' "${sql_output[@]}") send_notification_if_changed "hana_hdbsql_command" "HANA Monitor Error" "The hdbsql command failed. Details: ${error_message}" "true" "HDBSQL_COMMAND_FAILED" exit 1 fi total_segments=0 truncated_segments=0 free_segments=0 for line in "${sql_output[@]}"; do if [[ -z "$line" || "$line" == *"STATE"* ]]; then continue; fi cleaned_line=$(echo "$line" | tr -d '"') state=$(echo "$cleaned_line" | awk -F',' '{print $3}') count=$(echo "$cleaned_line" | awk -F',' '{print $4}') total_segments=$((total_segments + count)) if [[ "$state" == "Truncated" ]]; then truncated_segments=$((truncated_segments + count)) elif [[ "$state" == "Free" ]]; then free_segments=$((free_segments + count)) fi done echo "â„šī¸ Total Segments: ${total_segments}" echo "â„šī¸ Truncated Segments: ${truncated_segments}" echo "â„šī¸ Free Segments: ${free_segments}" if [ $total_segments -eq 0 ]; then echo "âš ī¸ Warning: No log segments found. Skipping percentage checks." >&2 send_notification_if_changed "hana_log_segments_total" "HANA Log Segment Warning" "No log segments found. Skipping percentage checks." "true" "NO_LOG_SEGMENTS" exit 0 else send_notification_if_changed "hana_log_segments_total" "HANA Log Segment" "Log segments found." "false" "OK" fi truncated_percentage=$((truncated_segments * 100 / total_segments)) if (( $(echo "$truncated_percentage > $TRUNCATED_PERCENTAGE_THRESHOLD" | bc -l) )); then echo "🚨 Alert: ${truncated_percentage}% of log segments are 'Truncated'." >&2 send_notification_if_changed "hana_log_truncated" "HANA Log Segment" "${truncated_percentage}% of HANA log segments are in 'Truncated' state." "true" "${truncated_percentage}%" else send_notification_if_changed "hana_log_truncated" "HANA Log Segment" "${truncated_percentage}% of HANA log segments are in 'Truncated' state (below threshold)." "false" "OK" fi free_percentage=$((free_segments * 100 / total_segments)) if (( $(echo "$free_percentage < $FREE_PERCENTAGE_THRESHOLD" | bc -l) )); then echo "🚨 Alert: Only ${free_percentage}% of log segments are 'Free'." >&2 send_notification_if_changed "hana_log_free" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state." "true" "${free_percentage}%" else send_notification_if_changed "hana_log_free" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state (above threshold)." "false" "OK" fi echo "â„šī¸ Checking last successful data backup status..." # Query to get the start time of the most recent successful complete data backup last_backup_date=$("$HDBSQL_PATH" -U "$HANA_USER_KEY" -j -a -x \ "SELECT TOP 1 SYS_START_TIME FROM M_BACKUP_CATALOG WHERE ENTRY_TYPE_NAME = 'complete data backup' AND STATE_NAME = 'successful' ORDER BY SYS_START_TIME DESC" 2>/dev/null | tr -d "\"" | sed 's/\..*//') # sed removes fractional seconds if [[ -z "$last_backup_date" ]]; then # No successful backup found at all message="No successful complete data backup found for ${COMPANY_NAME} HANA." echo "🚨 Critical: ${message}" send_notification_if_changed "hana_backup_status" "HANA Backup" "${message}" "true" "NO_BACKUP" return fi # Convert dates to epoch seconds for comparison last_backup_epoch=$(date -d "$last_backup_date" +%s) current_epoch=$(date +%s) threshold_seconds=$((BACKUP_THRESHOLD_HOURS * 3600)) age_seconds=$((current_epoch - last_backup_epoch)) age_hours=$((age_seconds / 3600)) if (( age_seconds > threshold_seconds )); then message="Last successful HANA backup for ${COMPANY_NAME} is ${age_hours} hours old, which exceeds the threshold of ${BACKUP_THRESHOLD_HOURS} hours. Last backup was on: ${last_backup_date}." echo "🚨 Critical: ${message}" send_notification_if_changed "hana_backup_status" "HANA Backup" "${message}" "true" "${age_hours}h" else message="Last successful backup is ${age_hours} hours old (Threshold: ${BACKUP_THRESHOLD_HOURS} hours)." echo "✅ Success! ${message}" send_notification_if_changed "hana_backup_status" "HANA Backup" "${message}" "false" "OK" fi echo "✅ Success! HANA monitoring check complete."