feat(monitor): Add HANA statement queue monitoring

This commit introduces a new feature to monitor the HANA statement queue.

Added STATEMENT_QUEUE_THRESHOLD and STATEMENT_QUEUE_CONSECUTIVE_RUNS to monitor/monitor.conf.
Implemented logic in monitor/monitor.sh to query the statement queue length, track consecutive breaches of the defined threshold, and send notifications.
Updated the script version to 1.3.0.
Refactored log segment checks to only run when segments are found.
This commit is contained in:
2025-10-01 13:10:57 +02:00
parent 92a2b963c4
commit bb0531aeea
2 changed files with 70 additions and 42 deletions

View File

@@ -29,8 +29,11 @@ TRUNCATED_PERCENTAGE_THRESHOLD=50
FREE_PERCENTAGE_THRESHOLD=25 FREE_PERCENTAGE_THRESHOLD=25
# Maximum age of the last successful full data backup in hours. # Maximum age of the last successful full data backup in hours.
BACKUP_THRESHOLD_HOURS=25 BACKUP_THRESHOLD_HOURS=25
# Statement queue length that triggers a check
STATEMENT_QUEUE_THRESHOLD=100
# Number of consecutive runs the queue must be over threshold to trigger an alert
STATEMENT_QUEUE_CONSECUTIVE_RUNS=3
# --- Monitored Directories --- # --- Monitored Directories ---
# List of directories to check for disk usage (space-separated) # List of directories to check for disk usage (space-separated)
DIRECTORIES_TO_MONITOR=("/hana/log" "/hana/shared" "/hana/data" "/usr/sap") DIRECTORIES_TO_MONITOR=("/hana/log" "/hana/shared" "/hana/data" "/usr/sap")

View File

@@ -1,9 +1,9 @@
#!/bin/bash #!/bin/bash
# Version: 1.2.3 # Version: 1.3.0
# ============================================================================= # =============================================================================
# SAP HANA Monitoring Script # SAP HANA Monitoring Script
# #
# Checks HANA processes, disk usage, and log segment state. # Checks HANA processes, disk usage, log segments, and statement queue.
# Sends ntfy.sh notifications if thresholds are exceeded. # Sends ntfy.sh notifications if thresholds are exceeded.
# ============================================================================= # =============================================================================
@@ -161,11 +161,8 @@ echo " Free Segments: ${free_segments}"
if [ $total_segments -eq 0 ]; then if [ $total_segments -eq 0 ]; then
echo "⚠️ Warning: No log segments found. Skipping percentage checks." >&2 echo "⚠️ Warning: No log segments found. Skipping percentage checks." >&2
send_notification_if_changed "hana_log_segments_total" "HANA Log Segment Warning" "No log segments found. Skipping percentage checks." "true" "NO_LOG_SEGMENTS" send_notification_if_changed "hana_log_segments_total" "HANA Log Segment Warning" "No log segments found. Skipping percentage checks." "true" "NO_LOG_SEGMENTS"
exit 0
else else
send_notification_if_changed "hana_log_segments_total" "HANA Log Segment" "Log segments found." "false" "OK" send_notification_if_changed "hana_log_segments_total" "HANA Log Segment" "Log segments found." "false" "OK"
fi
truncated_percentage=$((truncated_segments * 100 / total_segments)) truncated_percentage=$((truncated_segments * 100 / total_segments))
if (( $(echo "$truncated_percentage > $TRUNCATED_PERCENTAGE_THRESHOLD" | bc -l) )); then if (( $(echo "$truncated_percentage > $TRUNCATED_PERCENTAGE_THRESHOLD" | bc -l) )); then
echo "🚨 Alert: ${truncated_percentage}% of log segments are 'Truncated'." >&2 echo "🚨 Alert: ${truncated_percentage}% of log segments are 'Truncated'." >&2
@@ -181,26 +178,54 @@ if (( $(echo "$free_percentage < $FREE_PERCENTAGE_THRESHOLD" | bc -l) )); then
else else
send_notification_if_changed "hana_log_free" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state (above threshold)." "false" "OK" send_notification_if_changed "hana_log_free" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state (above threshold)." "false" "OK"
fi fi
fi
# --- HANA Statement Queue Monitoring ---
echo "⚙️ Checking HANA statement queue..."
STATEMENT_QUEUE_SQL="SELECT COUNT(*) FROM M_SERVICE_THREADS WHERE THREAD_TYPE = 'SqlExecutor' AND THREAD_STATE = 'Queueing';"
queue_count=$("$HDBSQL_PATH" -U "$HANA_USER_KEY" -j -a -x "$STATEMENT_QUEUE_SQL" 2>/dev/null | tr -d '"')
if ! [[ "$queue_count" =~ ^[0-9]+$ ]]; then
echo "⚠️ Warning: Could not retrieve HANA statement queue count. Skipping check." >&2
send_notification_if_changed "hana_statement_queue_check_fail" "HANA Monitor Warning" "Could not retrieve statement queue count." "true" "QUEUE_CHECK_FAIL"
else
send_notification_if_changed "hana_statement_queue_check_fail" "HANA Monitor Warning" "Statement queue check is working." "false" "OK"
echo " Current statement queue length: ${queue_count}"
breach_count=$(get_state "statement_queue_breach_count")
breach_count=${breach_count:-0}
if (( queue_count > STATEMENT_QUEUE_THRESHOLD )); then
breach_count=$((breach_count + 1))
echo "📈 Statement queue is above threshold. Consecutive breach count: ${breach_count}/${STATEMENT_QUEUE_CONSECUTIVE_RUNS}."
else
breach_count=0
fi
set_state "statement_queue_breach_count" "$breach_count"
if (( breach_count >= STATEMENT_QUEUE_CONSECUTIVE_RUNS )); then
message="Statement queue has been over ${STATEMENT_QUEUE_THRESHOLD} for ${breach_count} checks. Current count: ${queue_count}."
send_notification_if_changed "hana_statement_queue_status" "HANA Statement Queue" "${message}" "true" "ALERT:${queue_count}"
else
message="Statement queue is normal. Current count: ${queue_count}."
send_notification_if_changed "hana_statement_queue_status" "HANA Statement Queue" "${message}" "false" "OK"
fi
fi
# --- HANA Backup Status Monitoring ---
echo " Checking last successful data backup status..." echo " Checking last successful data backup status..."
# Query to get the start time of the most recent successful complete data backup
last_backup_date=$("$HDBSQL_PATH" -U "$HANA_USER_KEY" -j -a -x \ last_backup_date=$("$HDBSQL_PATH" -U "$HANA_USER_KEY" -j -a -x \
"SELECT TOP 1 SYS_START_TIME FROM M_BACKUP_CATALOG WHERE ENTRY_TYPE_NAME = 'complete data backup' AND STATE_NAME = 'successful' ORDER BY SYS_START_TIME DESC" 2>/dev/null | tr -d "\"" | sed 's/\..*//') # sed removes fractional seconds "SELECT TOP 1 SYS_START_TIME FROM M_BACKUP_CATALOG WHERE ENTRY_TYPE_NAME = 'complete data backup' AND STATE_NAME = 'successful' ORDER BY SYS_START_TIME DESC" 2>/dev/null | tr -d "\"" | sed 's/\..*//')
if [[ -z "$last_backup_date" ]]; then if [[ -z "$last_backup_date" ]]; then
# No successful backup found at all
message="No successful complete data backup found for ${COMPANY_NAME} HANA." message="No successful complete data backup found for ${COMPANY_NAME} HANA."
echo "🚨 Critical: ${message}" echo "🚨 Critical: ${message}"
send_notification_if_changed "hana_backup_status" "HANA Backup" "${message}" "true" "NO_BACKUP" send_notification_if_changed "hana_backup_status" "HANA Backup" "${message}" "true" "NO_BACKUP"
return else
fi
# Convert dates to epoch seconds for comparison
last_backup_epoch=$(date -d "$last_backup_date" +%s) last_backup_epoch=$(date -d "$last_backup_date" +%s)
current_epoch=$(date +%s) current_epoch=$(date +%s)
threshold_seconds=$((BACKUP_THRESHOLD_HOURS * 3600)) threshold_seconds=$((BACKUP_THRESHOLD_HOURS * 3600))
age_seconds=$((current_epoch - last_backup_epoch)) age_seconds=$((current_epoch - last_backup_epoch))
age_hours=$((age_seconds / 3600)) age_hours=$((age_seconds / 3600))
@@ -213,6 +238,6 @@ else
echo "✅ Success! ${message}" echo "✅ Success! ${message}"
send_notification_if_changed "hana_backup_status" "HANA Backup" "${message}" "false" "OK" send_notification_if_changed "hana_backup_status" "HANA Backup" "${message}" "false" "OK"
fi fi
fi
echo "✅ Success! HANA monitoring check complete." echo "✅ Success! HANA monitoring check complete."