refactor(monitoring): simplify notification system and remove auto-cleanup
- Replace state-based notifications with direct alert functions - Remove auto-cleanup functionality from disk monitoring and configuration - Simplify lock acquisition/release across all monitoring scripts - Add execute_hana_sql helper functions for consistent SQL execution - Remove state file tracking in favor of direct file operations - Standardize error handling with exit codes on critical failures - Clean up hana.conf by removing unused auto-delete directory settings
This commit is contained in:
271
hana_lib.sh
271
hana_lib.sh
@@ -25,83 +25,55 @@ acquire_lock() {
|
||||
fi
|
||||
|
||||
touch "$lock_file"
|
||||
echo "$lock_file"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Release lock
|
||||
# Usage: release_lock "LOCK_FILE"
|
||||
# Usage: release_lock "SCRIPT_NAME"
|
||||
release_lock() {
|
||||
local lock_file="$1"
|
||||
if [ -n "$lock_file" ] && [ -f "$lock_file" ]; then
|
||||
local script_name="$1"
|
||||
local lock_file="${LOCK_DIR}/hana_${script_name}.lock"
|
||||
if [ -f "$lock_file" ]; then
|
||||
rm -f "$lock_file"
|
||||
fi
|
||||
}
|
||||
|
||||
# Get state value
|
||||
# Usage: get_state "KEY"
|
||||
get_state() {
|
||||
local key="$1"
|
||||
if [ -f "${STATE_DIR}/${key}.state" ]; then
|
||||
cat "${STATE_DIR}/${key}.state"
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
# Set state value
|
||||
# Usage: set_state "KEY" "VALUE"
|
||||
set_state() {
|
||||
local key="$1"
|
||||
local value="$2"
|
||||
echo "$value" > "${STATE_DIR}/${key}.state"
|
||||
}
|
||||
|
||||
# Send notification if state changed
|
||||
# Usage: send_notification_if_changed "SCRIPT_NAME" "ALERT_KEY" "TITLE_PREFIX" "MESSAGE" "IS_ALERT" "CURRENT_VALUE"
|
||||
send_notification_if_changed() {
|
||||
local script_name="$1"
|
||||
local alert_key="$2"
|
||||
local title_prefix="$3"
|
||||
local current_message="$4"
|
||||
local is_alert_condition="$5"
|
||||
local current_value="$6"
|
||||
# Send notification via ntfy
|
||||
# Usage: send_notification "TITLE" "MESSAGE"
|
||||
send_notification() {
|
||||
local title="$1"
|
||||
local message="$2"
|
||||
local hostname=$(hostname)
|
||||
local full_message="[${COMPANY_NAME} | ${hostname}] ${message}"
|
||||
|
||||
local previous_value=$(get_state "$alert_key")
|
||||
|
||||
if [ "$current_value" != "$previous_value" ]; then
|
||||
local full_title=""
|
||||
local full_message=""
|
||||
|
||||
if [ "$is_alert_condition" == "true" ]; then
|
||||
full_title="${title_prefix} Alert"
|
||||
full_message="🚨 Critical: ${current_message}"
|
||||
log_message "$script_name" "ALERT: ${full_message}"
|
||||
else
|
||||
if [ -n "$previous_value" ] && [ "$previous_value" != "OK" ]; then
|
||||
full_title="${title_prefix} Resolved"
|
||||
full_message="✅ Resolved: ${current_message}"
|
||||
log_message "$script_name" "RESOLVED: ${full_message}"
|
||||
else
|
||||
set_state "$alert_key" "$current_value"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
local final_message="[${COMPANY_NAME} | ${hostname}] ${full_message}"
|
||||
|
||||
if [ -n "$NTFY_TOKEN" ] && [ -n "$NTFY_TOPIC_URL" ]; then
|
||||
curl -H "Authorization: Bearer ${NTFY_TOKEN}" -H "Title: ${full_title}" -d "${final_message}" "${NTFY_TOPIC_URL}" > /dev/null 2>&1
|
||||
log_message "$script_name" "Notification sent: ${full_title}"
|
||||
else
|
||||
log_message "$script_name" "Ntfy not configured, skipping notification"
|
||||
fi
|
||||
|
||||
set_state "$alert_key" "$current_value"
|
||||
if [ -n "$NTFY_TOKEN" ] && [ -n "$NTFY_TOPIC_URL" ]; then
|
||||
curl -H "Authorization: Bearer ${NTFY_TOKEN}" -H "Title: ${title}" -d "${full_message}" "${NTFY_TOPIC_URL}" > /dev/null 2>&1
|
||||
log_message "NOTIFY" "Notification sent: ${title}"
|
||||
else
|
||||
log_message "NOTIFY" "Ntfy not configured, skipping notification"
|
||||
fi
|
||||
}
|
||||
|
||||
# Send alert notification
|
||||
# Usage: send_alert "SCRIPT_NAME" "TITLE_PREFIX" "MESSAGE"
|
||||
send_alert() {
|
||||
local script_name="$1"
|
||||
local title_prefix="$2"
|
||||
local message="$3"
|
||||
send_notification "${title_prefix} Alert" "🚨 Critical: ${message}"
|
||||
log_message "$script_name" "ALERT: ${message}"
|
||||
}
|
||||
|
||||
# Send OK notification (state change from alert to normal)
|
||||
# Usage: send_ok "SCRIPT_NAME" "TITLE_PREFIX" "MESSAGE"
|
||||
send_ok() {
|
||||
local script_name="$1"
|
||||
local title_prefix="$2"
|
||||
local message="$3"
|
||||
send_notification "${title_prefix} Resolved" "✅ Resolved: ${message}"
|
||||
log_message "$script_name" "RESOLVED: ${message}"
|
||||
}
|
||||
|
||||
# Run command as HANA user using su
|
||||
# Usage: run_as_hana_user "COMMAND"
|
||||
run_as_hana_user() {
|
||||
@@ -109,6 +81,45 @@ run_as_hana_user() {
|
||||
su - "$HANA_USER" -c "$command"
|
||||
}
|
||||
|
||||
# Execute SQL query as HANA user
|
||||
# Usage: execute_hana_sql "SQL_QUERY"
|
||||
# Returns: SQL output on stdout, returns 0 on success, 1 on failure
|
||||
execute_hana_sql() {
|
||||
local sql_query="$1"
|
||||
local output
|
||||
|
||||
output=$(su - "$HANA_USER" -c "$HDBSQL_PATH -U $HANA_USER_KEY -j -a -x \"$sql_query\"" 2>&1)
|
||||
local sql_status=$?
|
||||
|
||||
if [ $sql_status -ne 0 ]; then
|
||||
log_message "SQL" "ERROR: Failed to execute SQL query. Exit code: ${sql_status}"
|
||||
echo "$output" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "$output"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Execute SQL query and return result (for single-value queries)
|
||||
# Usage: execute_hana_sql_query "SQL_QUERY"
|
||||
# Returns: Query result on stdout, returns 0 on success, 1 on failure
|
||||
execute_hana_sql_query() {
|
||||
local sql_query="$1"
|
||||
local output
|
||||
|
||||
output=$(execute_hana_sql "$sql_query")
|
||||
local sql_status=$?
|
||||
|
||||
if [ $sql_status -ne 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Clean output: remove quotes and whitespace
|
||||
echo "$output" | tr -d '"' | xargs
|
||||
return 0
|
||||
}
|
||||
|
||||
# Get disk usage percentage for a directory
|
||||
# Usage: get_disk_usage_percentage "/path/to/dir"
|
||||
# Returns: Usage percentage as integer (without % sign)
|
||||
@@ -136,137 +147,3 @@ get_available_space_kb() {
|
||||
local dir="$1"
|
||||
df -k "$dir" 2>/dev/null | awk 'NR==2 {print $4}'
|
||||
}
|
||||
|
||||
# Find directories for auto-delete on the same mount point
|
||||
# Usage: find_autodelete_dirs_on_mount "mount_point"
|
||||
# Returns: Space-separated list of directories to clean
|
||||
find_autodelete_dirs_on_mount() {
|
||||
local mount_point="$1"
|
||||
local result=""
|
||||
|
||||
for entry in "${DIRS_FOR_AUTODELETE[@]}"; do
|
||||
local entry_mount="${entry%%:*}"
|
||||
local cleanup_dir="${entry#*:}"
|
||||
|
||||
if [ "$entry_mount" == "$mount_point" ] && [ -d "$cleanup_dir" ]; then
|
||||
if [ -n "$result" ]; then
|
||||
result="$result $cleanup_dir"
|
||||
else
|
||||
result="$cleanup_dir"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "$result"
|
||||
}
|
||||
|
||||
# Clean old files in a directory
|
||||
# Usage: clean_directory_files "/path/to/dir" "max_age_days"
|
||||
# Returns: Number of files deleted and space freed
|
||||
clean_directory_files() {
|
||||
local cleanup_dir="$1"
|
||||
local max_age_days="${2:-7}"
|
||||
local files_deleted=0
|
||||
local space_freed=0
|
||||
|
||||
if [ ! -d "$cleanup_dir" ]; then
|
||||
log_message "CLEANUP" "Directory '$cleanup_dir' not found. Skipping."
|
||||
echo "0:0"
|
||||
return
|
||||
fi
|
||||
|
||||
# Find and delete old files
|
||||
while IFS= read -r -d '' file; do
|
||||
if [ -f "$file" ]; then
|
||||
local file_size=$(stat -c%s "$file" 2>/dev/null || echo "0")
|
||||
rm -f "$file" 2>/dev/null && {
|
||||
files_deleted=$((files_deleted + 1))
|
||||
space_freed=$((space_freed + file_size))
|
||||
}
|
||||
fi
|
||||
done < <(find "$cleanup_dir" -type f -mtime +$max_age_days -print0 2>/dev/null)
|
||||
|
||||
# Also clean empty directories
|
||||
find "$cleanup_dir" -type d -empty -delete 2>/dev/null
|
||||
|
||||
log_message "CLEANUP" "Deleted $files_deleted files from '$cleanup_dir', freed $((space_freed / 1024)) KB"
|
||||
echo "${files_deleted}:${space_freed}"
|
||||
}
|
||||
|
||||
# Automatic disk cleanup function
|
||||
# Usage: auto_cleanup "mount_point" "target_free_percentage"
|
||||
# Returns: 0 if cleanup successful, 1 if failed or not needed
|
||||
auto_cleanup() {
|
||||
local mount_point="$1"
|
||||
local target_free_percentage="${2:-5}"
|
||||
|
||||
if [ "$AUTO_CLEANUP_ENABLED" != "true" ]; then
|
||||
log_message "CLEANUP" "Auto-cleanup is disabled. Skipping."
|
||||
return 1
|
||||
fi
|
||||
|
||||
local cleanup_dirs=$(find_autodelete_dirs_on_mount "$mount_point")
|
||||
|
||||
if [ -z "$cleanup_dirs" ]; then
|
||||
log_message "CLEANUP" "No auto-delete directories configured for mount point '$mount_point'. Skipping cleanup."
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_message "CLEANUP" "Starting auto-cleanup for mount point '$mount_point'. Directories: $cleanup_dirs"
|
||||
|
||||
local total_freed=0
|
||||
local total_files=0
|
||||
|
||||
for cleanup_dir in $cleanup_dirs; do
|
||||
local result=$(clean_directory_files "$cleanup_dir" "$MAX_FILE_AGE_DAYS")
|
||||
local files="${result%%:*}"
|
||||
local freed="${result#*:}"
|
||||
total_files=$((total_files + files))
|
||||
total_freed=$((total_freed + freed))
|
||||
done
|
||||
|
||||
log_message "CLEANUP" "Cleanup complete. Total files deleted: $total_files, Total space freed: $((total_freed / 1024)) KB"
|
||||
|
||||
if [ $total_freed -gt 0 ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check disk space and perform auto-cleanup if needed
|
||||
# Usage: check_and_cleanup_disk "directory" "threshold"
|
||||
# Returns: 0 if OK or cleanup successful, 1 if critical and cleanup failed
|
||||
check_and_cleanup_disk() {
|
||||
local dir="$1"
|
||||
local threshold="${2:-85}"
|
||||
|
||||
local usage=$(get_disk_usage_percentage "$dir")
|
||||
local mount_point=$(get_mount_point "$dir")
|
||||
|
||||
if [ -z "$usage" ] || [ "$usage" -eq 0 ]; then
|
||||
log_message "CLEANUP" "Could not determine disk usage for '$dir'. Skipping."
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [ "$usage" -gt "$threshold" ]; then
|
||||
log_message "CLEANUP" "Disk usage ${usage}% exceeds threshold ${threshold}% for '$dir'. Attempting cleanup..."
|
||||
|
||||
if auto_cleanup "$mount_point" "$MIN_FREE_SPACE_AFTER_CLEANUP"; then
|
||||
local new_usage=$(get_disk_usage_percentage "$dir")
|
||||
log_message "CLEANUP" "After cleanup, disk usage is ${new_usage}%"
|
||||
|
||||
if [ "$new_usage" -le "$threshold" ]; then
|
||||
return 0
|
||||
else
|
||||
log_message "CLEANUP" "Cleanup completed but usage ${new_usage}% still above threshold"
|
||||
return 0
|
||||
fi
|
||||
else
|
||||
log_message "CLEANUP" "Cleanup failed or no files to clean for '$dir'"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user