first commit
This commit is contained in:
229
Services/MonitorService.cs
Normal file
229
Services/MonitorService.cs
Normal file
@@ -0,0 +1,229 @@
|
||||
using HanaToolbox.Config;
|
||||
using HanaToolbox.Logging;
|
||||
using HanaToolbox.Services.Interfaces;
|
||||
|
||||
namespace HanaToolbox.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors HANA health: process status, disk usage, log segments, statement queue, backup age.
|
||||
/// Sends state-change notifications via ntfy to avoid alert spam.
|
||||
/// </summary>
|
||||
public sealed class MonitorService(
|
||||
IProcessRunner runner,
|
||||
IUserSwitcher switcher,
|
||||
IHdbClientLocator locator,
|
||||
INotificationService ntfy,
|
||||
IMonitorStateService state,
|
||||
AppLogger logger) : IMonitorService
|
||||
{
|
||||
public async Task RunAsync(
|
||||
MonitorConfig config, HanaConfig hana, string sid,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var hdbsql = locator.LocateHdbsql(hana.HdbsqlPath, sid, hana.InstanceNumber);
|
||||
var host = System.Net.Dns.GetHostName();
|
||||
var prefix = $"[{config.CompanyName} | {host}]";
|
||||
|
||||
// 1. HANA processes (sapcontrol runs as root)
|
||||
logger.Step("Checking HANA processes...");
|
||||
var sapResult = await runner.RunAsync(
|
||||
config.SapcontrolPath,
|
||||
["-nr", config.HanaInstanceNumber, "-function", "GetProcessList"], ct);
|
||||
|
||||
var nonGreen = sapResult.StdOut
|
||||
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Skip(5) // skip header lines
|
||||
.Where(l => !l.Contains("GREEN"))
|
||||
.ToList();
|
||||
|
||||
if (nonGreen.Count > 0)
|
||||
{
|
||||
var msg = string.Join(", ", nonGreen.Select(l => l.Trim()));
|
||||
await NotifyIfChanged("hana_processes", "HANA Process",
|
||||
$"{prefix} One or more HANA processes are not GREEN: {msg}",
|
||||
isAlert: true, currentVal: $"ALERT:{msg}", ct);
|
||||
return; // Exit early — other checks may also fail
|
||||
}
|
||||
else
|
||||
{
|
||||
await NotifyIfChanged("hana_processes", "HANA Process",
|
||||
$"{prefix} All HANA processes are GREEN.",
|
||||
isAlert: false, currentVal: "OK", ct);
|
||||
}
|
||||
|
||||
// 2. Disk usage
|
||||
logger.Step("Checking disk usage...");
|
||||
foreach (var dir in config.DirectoriesToMonitor)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
var dfResult = await runner.RunAsync("/bin/df", ["-h", dir], ct);
|
||||
var usageStr = dfResult.StdOut
|
||||
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Skip(1).FirstOrDefault()
|
||||
?.Split(' ', StringSplitOptions.RemoveEmptyEntries)
|
||||
.ElementAtOrDefault(4)
|
||||
?.TrimEnd('%');
|
||||
|
||||
if (!int.TryParse(usageStr, out var usage))
|
||||
{
|
||||
logger.Warning($"Could not parse disk usage for '{dir}'.");
|
||||
continue;
|
||||
}
|
||||
|
||||
var key = $"disk_{dir.Replace('/', '_')}";
|
||||
if (usage > config.DiskUsageThresholdPercent)
|
||||
{
|
||||
await NotifyIfChanged(key, "HANA Disk",
|
||||
$"{prefix} Disk usage for '{dir}' is at {usage}% (threshold: {config.DiskUsageThresholdPercent}%).",
|
||||
isAlert: true, currentVal: $"{usage}%", ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
await NotifyIfChanged(key, "HANA Disk",
|
||||
$"{prefix} Disk '{dir}' is at {usage}% (OK).",
|
||||
isAlert: false, currentVal: "OK", ct);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Log segments
|
||||
logger.Step("Checking HANA log segments...");
|
||||
var segSql = "SELECT b.host, b.service_name, a.state, count(*) " +
|
||||
"FROM PUBLIC.M_LOG_SEGMENTS a " +
|
||||
"JOIN PUBLIC.M_SERVICES b ON (a.host = b.host AND a.port = b.port) " +
|
||||
"GROUP BY b.host, b.service_name, a.state;";
|
||||
|
||||
var segResult = await RunSql(hdbsql, config.HanaUserKey, segSql, sid, false, ct);
|
||||
|
||||
int total = 0, truncated = 0, free = 0;
|
||||
foreach (var line in segResult.StdOut.Split('\n', StringSplitOptions.RemoveEmptyEntries))
|
||||
{
|
||||
if (line.Contains("host") || line.Contains("HOST")) continue;
|
||||
var parts = line.Replace("\"", "").Split(',');
|
||||
if (parts.Length < 4) continue;
|
||||
if (!int.TryParse(parts[3].Trim(), out var cnt)) continue;
|
||||
total += cnt;
|
||||
var seg = parts[2].Trim();
|
||||
if (seg == "Truncated") truncated += cnt;
|
||||
else if (seg == "Free") free += cnt;
|
||||
}
|
||||
|
||||
if (total > 0)
|
||||
{
|
||||
var truncPct = truncated * 100 / total;
|
||||
var freePct = free * 100 / total;
|
||||
|
||||
if (truncPct > config.TruncatedSegmentThresholdPercent)
|
||||
await NotifyIfChanged("hana_log_truncated", "HANA Log Segment",
|
||||
$"{prefix} {truncPct}% of log segments are 'Truncated' (threshold: {config.TruncatedSegmentThresholdPercent}%).",
|
||||
isAlert: true, currentVal: $"{truncPct}%", ct);
|
||||
else
|
||||
await NotifyIfChanged("hana_log_truncated", "HANA Log Segment",
|
||||
$"{prefix} Log segments OK ({truncPct}% truncated).",
|
||||
isAlert: false, currentVal: "OK", ct);
|
||||
|
||||
if (freePct < config.FreeSegmentThresholdPercent)
|
||||
await NotifyIfChanged("hana_log_free", "HANA Log Segment",
|
||||
$"{prefix} Only {freePct}% of log segments are 'Free' (threshold: {config.FreeSegmentThresholdPercent}%).",
|
||||
isAlert: true, currentVal: $"{freePct}%", ct);
|
||||
else
|
||||
await NotifyIfChanged("hana_log_free", "HANA Log Segment",
|
||||
$"{prefix} Free log segments OK ({freePct}%).",
|
||||
isAlert: false, currentVal: "OK", ct);
|
||||
}
|
||||
|
||||
// 4. Statement queue
|
||||
logger.Step("Checking HANA statement queue...");
|
||||
var queueSql = "SELECT COUNT(*) FROM M_SERVICE_THREADS " +
|
||||
"WHERE THREAD_TYPE = 'SqlExecutor' AND THREAD_STATE = 'Queueing';";
|
||||
var queueResult = await RunSql(hdbsql, config.HanaUserKey, queueSql, sid, scalar: true, ct);
|
||||
|
||||
if (int.TryParse(queueResult.StdOut.Trim().Replace("\"", ""), out var queueCount))
|
||||
{
|
||||
var breachStr = state.GetState("statement_queue_breach_count") ?? "0";
|
||||
var breachCount = int.TryParse(breachStr, out var b) ? b : 0;
|
||||
|
||||
if (queueCount > config.StatementQueueThreshold)
|
||||
breachCount++;
|
||||
else
|
||||
breachCount = 0;
|
||||
|
||||
state.SetState("statement_queue_breach_count", breachCount.ToString());
|
||||
|
||||
if (breachCount >= config.StatementQueueConsecutiveRuns)
|
||||
await NotifyIfChanged("hana_statement_queue", "HANA Statement Queue",
|
||||
$"{prefix} Statement queue has been over {config.StatementQueueThreshold} for {breachCount} checks. Current: {queueCount}.",
|
||||
isAlert: true, currentVal: $"ALERT:{queueCount}", ct);
|
||||
else
|
||||
await NotifyIfChanged("hana_statement_queue", "HANA Statement Queue",
|
||||
$"{prefix} Statement queue is normal ({queueCount}).",
|
||||
isAlert: false, currentVal: "OK", ct);
|
||||
}
|
||||
|
||||
// 5. Backup age
|
||||
logger.Step("Checking last successful backup age...");
|
||||
var bakSql = "SELECT TOP 1 SYS_START_TIME FROM M_BACKUP_CATALOG " +
|
||||
"WHERE ENTRY_TYPE_NAME = 'complete data backup' AND STATE_NAME = 'successful' " +
|
||||
"ORDER BY SYS_START_TIME DESC;";
|
||||
var bakResult = await RunSql(hdbsql, config.HanaUserKey, bakSql, sid, scalar: true, ct);
|
||||
var bakDateStr = bakResult.StdOut.Trim().Replace("\"", "").Split('.')[0];
|
||||
|
||||
if (string.IsNullOrWhiteSpace(bakDateStr) || !DateTime.TryParse(bakDateStr, out var lastBak))
|
||||
{
|
||||
await NotifyIfChanged("hana_backup_status", "HANA Backup",
|
||||
$"{prefix} No successful backup found.",
|
||||
isAlert: true, currentVal: "NO_BACKUP", ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
var ageHours = (int)(DateTime.UtcNow - lastBak.ToUniversalTime()).TotalHours;
|
||||
if (ageHours > config.BackupThresholdHours)
|
||||
await NotifyIfChanged("hana_backup_status", "HANA Backup",
|
||||
$"{prefix} Last successful backup is {ageHours}h old (threshold: {config.BackupThresholdHours}h). Last backup: {lastBak:yyyy-MM-dd HH:mm}.",
|
||||
isAlert: true, currentVal: $"{ageHours}h", ct);
|
||||
else
|
||||
await NotifyIfChanged("hana_backup_status", "HANA Backup",
|
||||
$"{prefix} Backup age is {ageHours}h (OK).",
|
||||
isAlert: false, currentVal: "OK", ct);
|
||||
}
|
||||
|
||||
logger.Success("Monitor check complete.");
|
||||
}
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
private async Task<ProcessResult> RunSql(
|
||||
string hdbsql, string userKey, string sql, string sid,
|
||||
bool scalar, CancellationToken ct)
|
||||
{
|
||||
var tmpFile = Path.Combine("/tmp", $"ht_{Guid.NewGuid():N}.sql");
|
||||
await File.WriteAllTextAsync(tmpFile, sql, ct);
|
||||
var flags = scalar ? $"-a -x" : string.Empty;
|
||||
var result = await switcher.RunAsAsync(sid,
|
||||
$"\"{hdbsql}\" -U {userKey} {flags} -I \"{tmpFile}\" 2>&1", ct);
|
||||
File.Delete(tmpFile);
|
||||
return result;
|
||||
}
|
||||
|
||||
private async Task NotifyIfChanged(
|
||||
string key, string titlePrefix, string message,
|
||||
bool isAlert, string currentVal, CancellationToken ct)
|
||||
{
|
||||
var prev = state.GetState(key);
|
||||
if (currentVal == prev) return; // No change — don't spam
|
||||
|
||||
string title;
|
||||
if (isAlert)
|
||||
title = $"{titlePrefix} Alert";
|
||||
else if (!string.IsNullOrEmpty(prev) && prev != "OK")
|
||||
title = $"{titlePrefix} Resolved";
|
||||
else
|
||||
{
|
||||
state.SetState(key, currentVal);
|
||||
return; // Transition OK→OK: update silently
|
||||
}
|
||||
|
||||
await ntfy.SendAsync(title, message, ct);
|
||||
state.SetState(key, currentVal);
|
||||
logger.Info($"Notification sent: [{title}]");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user