refactor(monitoring): simplify notification system and remove auto-cleanup
- Replace state-based notifications with direct alert functions - Remove auto-cleanup functionality from disk monitoring and configuration - Simplify lock acquisition/release across all monitoring scripts - Add execute_hana_sql helper functions for consistent SQL execution - Remove state file tracking in favor of direct file operations - Standardize error handling with exit codes on critical failures - Clean up hana.conf by removing unused auto-delete directory settings
This commit is contained in:
@@ -12,32 +12,30 @@ source "${SCRIPT_DIR}/hana.conf"
|
||||
source "${SCRIPT_DIR}/hana_lib.sh"
|
||||
|
||||
# Acquire lock
|
||||
LOCK_FILE=$(acquire_lock "$SCRIPT_NAME")
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! acquire_lock "$SCRIPT_NAME"; then
|
||||
exit 1
|
||||
fi
|
||||
trap 'release_lock "$LOCK_FILE"' EXIT
|
||||
trap 'release_lock "$SCRIPT_NAME"' EXIT
|
||||
|
||||
log_message "$SCRIPT_NAME" "Starting log segment check..."
|
||||
|
||||
# SQL Query for log segments
|
||||
SQL_QUERY="SELECT b.host, b.service_name, a.state, count(*) FROM PUBLIC.M_LOG_SEGMENTS a JOIN PUBLIC.M_SERVICES b ON (a.host = b.host AND a.port = b.port) GROUP BY b.host, b.service_name, a.state;"
|
||||
|
||||
# Check if hdbsql is available
|
||||
if [ ! -x "$HDBSQL_PATH" ]; then
|
||||
log_message "$SCRIPT_NAME" "ERROR: hdbsql not found or not executable at ${HDBSQL_PATH}"
|
||||
send_notification_if_changed "$SCRIPT_NAME" "hana_hdbsql_path" "HANA Monitor Error" "hdbsql not found or not executable at ${HDBSQL_PATH}" "true" "HDBSQL_ERROR"
|
||||
send_alert "$SCRIPT_NAME" "HANA Monitor Error" "hdbsql not found or not executable at ${HDBSQL_PATH}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Execute SQL query as HANA user with improved error handling
|
||||
readarray -t sql_output < <(su - "$HANA_USER" -c "$HDBSQL_PATH -U $HANA_USER_KEY -c \";\" \"$SQL_QUERY\"" 2>&1)
|
||||
# SQL Query for log segments
|
||||
SQL_QUERY="SELECT b.host, b.service_name, a.state, count(*) FROM PUBLIC.M_LOG_SEGMENTS a JOIN PUBLIC.M_SERVICES b ON (a.host = b.host AND a.port = b.port) GROUP BY b.host, b.service_name, a.state;"
|
||||
|
||||
# Execute SQL query
|
||||
sql_output=$(execute_hana_sql "$SQL_QUERY")
|
||||
sql_status=$?
|
||||
|
||||
if [ $sql_status -ne 0 ]; then
|
||||
error_message=$(printf '%s\n' "${sql_output[@]}")
|
||||
log_message "$SCRIPT_NAME" "ERROR: The hdbsql command failed. Details: ${error_message}"
|
||||
send_notification_if_changed "$SCRIPT_NAME" "hana_hdbsql_command" "HANA Monitor Error" "The hdbsql command failed. Details: ${error_message}" "true" "HDBSQL_COMMAND_FAILED"
|
||||
log_message "$SCRIPT_NAME" "ERROR: The hdbsql command failed."
|
||||
send_alert "$SCRIPT_NAME" "HANA Monitor Error" "The hdbsql command failed."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -46,15 +44,15 @@ total_segments=0
|
||||
truncated_segments=0
|
||||
free_segments=0
|
||||
|
||||
for line in "${sql_output[@]}"; do
|
||||
while IFS= read -r line; do
|
||||
# Skip empty lines and header
|
||||
if [[ -z "$line" || "$line" == *"STATE"* || "$line" == *"host"* ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
cleaned_line=$(echo "$line" | tr -d '"')
|
||||
state=$(echo "$cleaned_line" | awk -F',' '{print $3}' | xargs) # Trim whitespace
|
||||
count=$(echo "$cleaned_line" | awk -F',' '{print $4}' | xargs) # Trim whitespace
|
||||
state=$(echo "$cleaned_line" | awk -F',' '{print $3}' | xargs)
|
||||
count=$(echo "$cleaned_line" | awk -F',' '{print $4}' | xargs)
|
||||
|
||||
# Validate count is a number
|
||||
if ! [[ "$count" =~ ^[0-9]+$ ]]; then
|
||||
@@ -67,7 +65,7 @@ for line in "${sql_output[@]}"; do
|
||||
elif [[ "$state" == "Free" ]]; then
|
||||
free_segments=$((free_segments + count))
|
||||
fi
|
||||
done
|
||||
done <<< "$sql_output"
|
||||
|
||||
log_message "$SCRIPT_NAME" "Total Segments: ${total_segments}"
|
||||
log_message "$SCRIPT_NAME" "Truncated Segments: ${truncated_segments}"
|
||||
@@ -75,37 +73,24 @@ log_message "$SCRIPT_NAME" "Free Segments: ${free_segments}"
|
||||
|
||||
if [ $total_segments -eq 0 ]; then
|
||||
log_message "$SCRIPT_NAME" "WARNING: No log segments found. Skipping percentage checks."
|
||||
send_notification_if_changed "$SCRIPT_NAME" "hana_log_segments_total" "HANA Log Segment Warning" "No log segments found. Skipping percentage checks." "true" "NO_LOG_SEGMENTS"
|
||||
else
|
||||
send_notification_if_changed "$SCRIPT_NAME" "hana_log_segments_total" "HANA Log Segment" "Log segments found." "false" "OK"
|
||||
|
||||
# Calculate truncated percentage with integer arithmetic
|
||||
if [ $total_segments -gt 0 ]; then
|
||||
truncated_percentage=$((truncated_segments * 100 / total_segments))
|
||||
else
|
||||
truncated_percentage=0
|
||||
fi
|
||||
|
||||
if [ $truncated_percentage -gt $TRUNCATED_PERCENTAGE_THRESHOLD ]; then
|
||||
log_message "$SCRIPT_NAME" "ALERT: ${truncated_percentage}% of log segments are 'Truncated'."
|
||||
send_notification_if_changed "$SCRIPT_NAME" "hana_log_truncated" "HANA Log Segment" "${truncated_percentage}% of HANA log segments are in 'Truncated' state." "true" "${truncated_percentage}%"
|
||||
else
|
||||
send_notification_if_changed "$SCRIPT_NAME" "hana_log_truncated" "HANA Log Segment" "${truncated_percentage}% of HANA log segments are in 'Truncated' state (below threshold)." "false" "OK"
|
||||
fi
|
||||
|
||||
# Calculate free percentage with integer arithmetic
|
||||
if [ $total_segments -gt 0 ]; then
|
||||
free_percentage=$((free_segments * 100 / total_segments))
|
||||
else
|
||||
free_percentage=0
|
||||
fi
|
||||
|
||||
if [ $free_percentage -lt $FREE_PERCENTAGE_THRESHOLD ]; then
|
||||
log_message "$SCRIPT_NAME" "ALERT: Only ${free_percentage}% of log segments are 'Free'."
|
||||
send_notification_if_changed "$SCRIPT_NAME" "hana_log_free" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state." "true" "${free_percentage}%"
|
||||
else
|
||||
send_notification_if_changed "$SCRIPT_NAME" "hana_log_free" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state (above threshold)." "false" "OK"
|
||||
fi
|
||||
send_alert "$SCRIPT_NAME" "HANA Log Segment Warning" "No log segments found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Calculate truncated percentage with integer arithmetic
|
||||
truncated_percentage=$((truncated_segments * 100 / total_segments))
|
||||
|
||||
if [ $truncated_percentage -gt $TRUNCATED_PERCENTAGE_THRESHOLD ]; then
|
||||
log_message "$SCRIPT_NAME" "ALERT: ${truncated_percentage}% of log segments are 'Truncated'."
|
||||
send_alert "$SCRIPT_NAME" "HANA Log Segment" "${truncated_percentage}% of HANA log segments are in 'Truncated' state."
|
||||
fi
|
||||
|
||||
# Calculate free percentage with integer arithmetic
|
||||
free_percentage=$((free_segments * 100 / total_segments))
|
||||
|
||||
if [ $free_percentage -lt $FREE_PERCENTAGE_THRESHOLD ]; then
|
||||
log_message "$SCRIPT_NAME" "ALERT: Only ${free_percentage}% of log segments are 'Free'."
|
||||
send_alert "$SCRIPT_NAME" "HANA Log Segment" "Only ${free_percentage}% of HANA log segments are in 'Free' state."
|
||||
fi
|
||||
|
||||
log_message "$SCRIPT_NAME" "Log segment check complete."
|
||||
|
||||
Reference in New Issue
Block a user