refactor(monitoring): simplify monitoring scripts and remove state tracking

- Remove consecutive breach tracking for statement queue (immediate alerts)
- Consolidate script initialization into init_script() function
- Remove unused helper functions (send_ok, run_as_hana_user, get_mount_point)
- Flatten sld_watchdog.sh structure by removing main() wrapper
- Remove state directory and lock directory configuration from hana.conf
- Simplify alert messages to include threshold values

This continues the simplification effort from previous commits by removing stateful tracking mechanisms and streamlining the monitoring logic for easier maintenance.
This commit is contained in:
2026-03-12 22:18:29 +01:00
parent cf5b81889d
commit 0beef6fa48
6 changed files with 83 additions and 148 deletions

View File

@@ -30,7 +30,7 @@ fi
STATEMENT_QUEUE_SQL="SELECT COUNT(*) FROM M_SERVICE_THREADS WHERE THREAD_TYPE = 'SqlExecutor' AND THREAD_STATE = 'Queueing';"
# Execute SQL query
queue_result=$(execute_hana_sql_query "$STATEMENT_QUEUE_SQL")
queue_count=$(execute_hana_sql_query "$STATEMENT_QUEUE_SQL")
sql_status=$?
if [ $sql_status -ne 0 ]; then
@@ -48,30 +48,11 @@ fi
log_message "$SCRIPT_NAME" "Current statement queue length: ${queue_count}"
# Get breach count from state file
breach_count_file="${STATE_DIR}/statement_queue_breach_count"
breach_count=0
if [ -f "$breach_count_file" ]; then
breach_count=$(cat "$breach_count_file")
fi
# Alert immediately if queue exceeds threshold
if [ "$queue_count" -gt "$STATEMENT_QUEUE_THRESHOLD" ]; then
breach_count=$((breach_count + 1))
log_message "$SCRIPT_NAME" "Statement queue is above threshold (${queue_count} > ${STATEMENT_QUEUE_THRESHOLD}). Consecutive breach count: ${breach_count}/${STATEMENT_QUEUE_CONSECUTIVE_RUNS}."
else
if [ "$breach_count" -gt 0 ]; then
log_message "$SCRIPT_NAME" "Statement queue returned to normal. Resetting breach count from ${breach_count} to 0."
fi
breach_count=0
fi
echo "$breach_count" > "$breach_count_file"
if [ "$breach_count" -ge "$STATEMENT_QUEUE_CONSECUTIVE_RUNS" ]; then
message="Statement queue has been over ${STATEMENT_QUEUE_THRESHOLD} for ${breach_count} checks. Current count: ${queue_count}."
send_alert "$SCRIPT_NAME" "HANA Statement Queue" "$message"
send_alert "$SCRIPT_NAME" "HANA Statement Queue" "Statement queue count is ${queue_count}, which exceeds threshold of ${STATEMENT_QUEUE_THRESHOLD}."
exit 1
else
log_message "$SCRIPT_NAME" "Statement queue is normal. Current count: ${queue_count}."
fi
log_message "$SCRIPT_NAME" "Statement queue is normal. Current count: ${queue_count}."
log_message "$SCRIPT_NAME" "Statement queue check complete."