refactor(monitoring): simplify monitoring scripts and remove state tracking
- Remove consecutive breach tracking for statement queue (immediate alerts) - Consolidate script initialization into init_script() function - Remove unused helper functions (send_ok, run_as_hana_user, get_mount_point) - Flatten sld_watchdog.sh structure by removing main() wrapper - Remove state directory and lock directory configuration from hana.conf - Simplify alert messages to include threshold values This continues the simplification effort from previous commits by removing stateful tracking mechanisms and streamlining the monitoring logic for easier maintenance.
This commit is contained in:
@@ -30,7 +30,7 @@ fi
|
||||
STATEMENT_QUEUE_SQL="SELECT COUNT(*) FROM M_SERVICE_THREADS WHERE THREAD_TYPE = 'SqlExecutor' AND THREAD_STATE = 'Queueing';"
|
||||
|
||||
# Execute SQL query
|
||||
queue_result=$(execute_hana_sql_query "$STATEMENT_QUEUE_SQL")
|
||||
queue_count=$(execute_hana_sql_query "$STATEMENT_QUEUE_SQL")
|
||||
sql_status=$?
|
||||
|
||||
if [ $sql_status -ne 0 ]; then
|
||||
@@ -48,30 +48,11 @@ fi
|
||||
|
||||
log_message "$SCRIPT_NAME" "Current statement queue length: ${queue_count}"
|
||||
|
||||
# Get breach count from state file
|
||||
breach_count_file="${STATE_DIR}/statement_queue_breach_count"
|
||||
breach_count=0
|
||||
if [ -f "$breach_count_file" ]; then
|
||||
breach_count=$(cat "$breach_count_file")
|
||||
fi
|
||||
|
||||
# Alert immediately if queue exceeds threshold
|
||||
if [ "$queue_count" -gt "$STATEMENT_QUEUE_THRESHOLD" ]; then
|
||||
breach_count=$((breach_count + 1))
|
||||
log_message "$SCRIPT_NAME" "Statement queue is above threshold (${queue_count} > ${STATEMENT_QUEUE_THRESHOLD}). Consecutive breach count: ${breach_count}/${STATEMENT_QUEUE_CONSECUTIVE_RUNS}."
|
||||
else
|
||||
if [ "$breach_count" -gt 0 ]; then
|
||||
log_message "$SCRIPT_NAME" "Statement queue returned to normal. Resetting breach count from ${breach_count} to 0."
|
||||
fi
|
||||
breach_count=0
|
||||
fi
|
||||
echo "$breach_count" > "$breach_count_file"
|
||||
|
||||
if [ "$breach_count" -ge "$STATEMENT_QUEUE_CONSECUTIVE_RUNS" ]; then
|
||||
message="Statement queue has been over ${STATEMENT_QUEUE_THRESHOLD} for ${breach_count} checks. Current count: ${queue_count}."
|
||||
send_alert "$SCRIPT_NAME" "HANA Statement Queue" "$message"
|
||||
send_alert "$SCRIPT_NAME" "HANA Statement Queue" "Statement queue count is ${queue_count}, which exceeds threshold of ${STATEMENT_QUEUE_THRESHOLD}."
|
||||
exit 1
|
||||
else
|
||||
log_message "$SCRIPT_NAME" "Statement queue is normal. Current count: ${queue_count}."
|
||||
fi
|
||||
|
||||
log_message "$SCRIPT_NAME" "Statement queue is normal. Current count: ${queue_count}."
|
||||
log_message "$SCRIPT_NAME" "Statement queue check complete."
|
||||
|
||||
Reference in New Issue
Block a user