refactor(monitoring): simplify notification system and remove auto-cleanup

- Replace state-based notifications with direct alert functions
- Remove auto-cleanup functionality from disk monitoring and configuration
- Simplify lock acquisition/release across all monitoring scripts
- Add execute_hana_sql helper functions for consistent SQL execution
- Remove state file tracking in favor of direct file operations
- Standardize error handling with exit codes on critical failures
- Clean up hana.conf by removing unused auto-delete directory settings
This commit is contained in:
2026-03-12 21:52:49 +01:00
parent 5a92bc4e93
commit cf5b81889d
8 changed files with 183 additions and 391 deletions

View File

@@ -12,32 +12,30 @@ source "${SCRIPT_DIR}/hana.conf"
source "${SCRIPT_DIR}/hana_lib.sh"
# Acquire lock
LOCK_FILE=$(acquire_lock "$SCRIPT_NAME")
if [ $? -ne 0 ]; then
if ! acquire_lock "$SCRIPT_NAME"; then
exit 1
fi
trap 'release_lock "$LOCK_FILE"' EXIT
trap 'release_lock "$SCRIPT_NAME"' EXIT
log_message "$SCRIPT_NAME" "Starting log segment check..."
# SQL Query for log segments
SQL_QUERY="SELECT b.host, b.service_name, a.state, count(*) FROM PUBLIC.M_LOG_SEGMENTS a JOIN PUBLIC.M_SERVICES b ON (a.host = b.host AND a.port = b.port) GROUP BY b.host, b.service_name, a.state;"
# Check if hdbsql is available
if [ ! -x "$HDBSQL_PATH" ]; then
log_message "$SCRIPT_NAME" "ERROR: hdbsql not found or not executable at ${HDBSQL_PATH}"
send_notification_if_changed "$SCRIPT_NAME" "hana_hdbsql_path" "HANA Monitor Error" "hdbsql not found or not executable at ${HDBSQL_PATH}" "true" "HDBSQL_ERROR"
send_alert "$SCRIPT_NAME" "HANA Monitor Error" "hdbsql not found or not executable at ${HDBSQL_PATH}"
exit 1
fi
# Execute SQL query as HANA user with improved error handling
readarray -t sql_output < <(su - "$HANA_USER" -c "$HDBSQL_PATH -U $HANA_USER_KEY -c \";\" \"$SQL_QUERY\"" 2>&1)
# SQL Query for log segments
SQL_QUERY="SELECT b.host, b.service_name, a.state, count(*) FROM PUBLIC.M_LOG_SEGMENTS a JOIN PUBLIC.M_SERVICES b ON (a.host = b.host AND a.port = b.port) GROUP BY b.host, b.service_name, a.state;"
# Execute SQL query
sql_output=$(execute_hana_sql "$SQL_QUERY")
sql_status=$?
if [ $sql_status -ne 0 ]; then
error_message=$(printf '%s\n' "${sql_output[@]}")
log_message "$SCRIPT_NAME" "ERROR: The hdbsql command failed. Details: ${error_message}"
send_notification_if_changed "$SCRIPT_NAME" "hana_hdbsql_command" "HANA Monitor Error" "The hdbsql command failed. Details: ${error_message}" "true" "HDBSQL_COMMAND_FAILED"
log_message "$SCRIPT_NAME" "ERROR: The hdbsql command failed."
send_alert "$SCRIPT_NAME" "HANA Monitor Error" "The hdbsql command failed."
exit 1
fi
@@ -46,15 +44,15 @@ total_segments=0
truncated_segments=0
free_segments=0
for line in "${sql_output[@]}"; do
while IFS= read -r line; do
# Skip empty lines and header
if [[ -z "$line" || "$line" == *"STATE"* || "$line" == *"host"* ]]; then
continue
fi
cleaned_line=$(echo "$line" | tr -d '"')
state=$(echo "$cleaned_line" | awk -F',' '{print $3}' | xargs) # Trim whitespace
count=$(echo "$cleaned_line" | awk -F',' '{print $4}' | xargs) # Trim whitespace
state=$(echo "$cleaned_line" | awk -F',' '{print $3}' | xargs)
count=$(echo "$cleaned_line" | awk -F',' '{print $4}' | xargs)
# Validate count is a number
if ! [[ "$count" =~ ^[0-9]+$ ]]; then
@@ -67,7 +65,7 @@ for line in "${sql_output[@]}"; do
elif [[ "$state" == "Free" ]]; then
free_segments=$((free_segments + count))
fi
done
done <<< "$sql_output"
log_message "$SCRIPT_NAME" "Total Segments: ${total_segments}"
log_message "$SCRIPT_NAME" "Truncated Segments: ${truncated_segments}"
@@ -75,37 +73,24 @@ log_message "$SCRIPT_NAME" "Free Segments: ${free_segments}"
if [ $total_segments -eq 0 ]; then
log_message "$SCRIPT_NAME" "WARNING: No log segments found. Skipping percentage checks."
send_notification_if_changed "$SCRIPT_NAME" "hana_log_segments_total" "HANA Log Segment Warning" "No log segments found. Skipping percentage checks." "true" "NO_LOG_SEGMENTS"
else
send_notification_if_changed "$SCRIPT_NAME" "hana_log_segments_total" "HANA Log Segment" "Log segments found." "false" "OK"
# Calculate truncated percentage with integer arithmetic
if [ $total_segments -gt 0 ]; then
truncated_percentage=$((truncated_segments * 100 / total_segments))
else
truncated_percentage=0
fi
if [ $truncated_percentage -gt $TRUNCATED_PERCENTAGE_THRESHOLD ]; then
log_message "$SCRIPT_NAME" "ALERT: ${truncated_percentage}% of log segments are 'Truncated'."
send_notification_if_changed "$SCRIPT_NAME" "hana_log_truncated" "HANA Log Segment" "${truncated_percentage}% of HANA log segments are in 'Truncated' state." "true" "${truncated_percentage}%"
else
send_notification_if_changed "$SCRIPT_NAME" "hana_log_truncated" "HANA Log Segment" "${truncated_percentage}% of HANA log segments are in 'Truncated' state (below threshold)." "false" "OK"
fi
# Calculate free percentage with integer arithmetic
if [ $total_segments -gt 0 ]; then
free_percentage=$((free_segments * 100 / total_segments))
else
free_percentage=0
fi
if [ $free_percentage -lt $FREE_PERCENTAGE_THRESHOLD ]; then
log_message "$SCRIPT_NAME" "ALERT: Only ${free_percentage}% of log segments are 'Free'."
send_notification_if_changed "$SCRIPT_NAME" "hana_log_free" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state." "true" "${free_percentage}%"
else
send_notification_if_changed "$SCRIPT_NAME" "hana_log_free" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state (above threshold)." "false" "OK"
fi
send_alert "$SCRIPT_NAME" "HANA Log Segment Warning" "No log segments found."
exit 1
fi
# Calculate truncated percentage with integer arithmetic
truncated_percentage=$((truncated_segments * 100 / total_segments))
if [ $truncated_percentage -gt $TRUNCATED_PERCENTAGE_THRESHOLD ]; then
log_message "$SCRIPT_NAME" "ALERT: ${truncated_percentage}% of log segments are 'Truncated'."
send_alert "$SCRIPT_NAME" "HANA Log Segment" "${truncated_percentage}% of HANA log segments are in 'Truncated' state."
fi
# Calculate free percentage with integer arithmetic
free_percentage=$((free_segments * 100 / total_segments))
if [ $free_percentage -lt $FREE_PERCENTAGE_THRESHOLD ]; then
log_message "$SCRIPT_NAME" "ALERT: Only ${free_percentage}% of log segments are 'Free'."
send_alert "$SCRIPT_NAME" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state."
fi
log_message "$SCRIPT_NAME" "Log segment check complete."