first commit

This commit is contained in:
2026-03-02 20:53:28 +01:00
commit d27c205106
63 changed files with 4593 additions and 0 deletions

229
Services/MonitorService.cs Normal file
View File

@@ -0,0 +1,229 @@
using HanaToolbox.Config;
using HanaToolbox.Logging;
using HanaToolbox.Services.Interfaces;
namespace HanaToolbox.Services;
/// <summary>
/// Monitors HANA health: process status, disk usage, log segments, statement queue, backup age.
/// Sends state-change notifications via ntfy to avoid alert spam.
/// </summary>
public sealed class MonitorService(
IProcessRunner runner,
IUserSwitcher switcher,
IHdbClientLocator locator,
INotificationService ntfy,
IMonitorStateService state,
AppLogger logger) : IMonitorService
{
public async Task RunAsync(
MonitorConfig config, HanaConfig hana, string sid,
CancellationToken ct = default)
{
var hdbsql = locator.LocateHdbsql(hana.HdbsqlPath, sid, hana.InstanceNumber);
var host = System.Net.Dns.GetHostName();
var prefix = $"[{config.CompanyName} | {host}]";
// 1. HANA processes (sapcontrol runs as root)
logger.Step("Checking HANA processes...");
var sapResult = await runner.RunAsync(
config.SapcontrolPath,
["-nr", config.HanaInstanceNumber, "-function", "GetProcessList"], ct);
var nonGreen = sapResult.StdOut
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
.Skip(5) // skip header lines
.Where(l => !l.Contains("GREEN"))
.ToList();
if (nonGreen.Count > 0)
{
var msg = string.Join(", ", nonGreen.Select(l => l.Trim()));
await NotifyIfChanged("hana_processes", "HANA Process",
$"{prefix} One or more HANA processes are not GREEN: {msg}",
isAlert: true, currentVal: $"ALERT:{msg}", ct);
return; // Exit early — other checks may also fail
}
else
{
await NotifyIfChanged("hana_processes", "HANA Process",
$"{prefix} All HANA processes are GREEN.",
isAlert: false, currentVal: "OK", ct);
}
// 2. Disk usage
logger.Step("Checking disk usage...");
foreach (var dir in config.DirectoriesToMonitor)
{
ct.ThrowIfCancellationRequested();
var dfResult = await runner.RunAsync("/bin/df", ["-h", dir], ct);
var usageStr = dfResult.StdOut
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
.Skip(1).FirstOrDefault()
?.Split(' ', StringSplitOptions.RemoveEmptyEntries)
.ElementAtOrDefault(4)
?.TrimEnd('%');
if (!int.TryParse(usageStr, out var usage))
{
logger.Warning($"Could not parse disk usage for '{dir}'.");
continue;
}
var key = $"disk_{dir.Replace('/', '_')}";
if (usage > config.DiskUsageThresholdPercent)
{
await NotifyIfChanged(key, "HANA Disk",
$"{prefix} Disk usage for '{dir}' is at {usage}% (threshold: {config.DiskUsageThresholdPercent}%).",
isAlert: true, currentVal: $"{usage}%", ct);
}
else
{
await NotifyIfChanged(key, "HANA Disk",
$"{prefix} Disk '{dir}' is at {usage}% (OK).",
isAlert: false, currentVal: "OK", ct);
}
}
// 3. Log segments
logger.Step("Checking HANA log segments...");
var segSql = "SELECT b.host, b.service_name, a.state, count(*) " +
"FROM PUBLIC.M_LOG_SEGMENTS a " +
"JOIN PUBLIC.M_SERVICES b ON (a.host = b.host AND a.port = b.port) " +
"GROUP BY b.host, b.service_name, a.state;";
var segResult = await RunSql(hdbsql, config.HanaUserKey, segSql, sid, false, ct);
int total = 0, truncated = 0, free = 0;
foreach (var line in segResult.StdOut.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
if (line.Contains("host") || line.Contains("HOST")) continue;
var parts = line.Replace("\"", "").Split(',');
if (parts.Length < 4) continue;
if (!int.TryParse(parts[3].Trim(), out var cnt)) continue;
total += cnt;
var seg = parts[2].Trim();
if (seg == "Truncated") truncated += cnt;
else if (seg == "Free") free += cnt;
}
if (total > 0)
{
var truncPct = truncated * 100 / total;
var freePct = free * 100 / total;
if (truncPct > config.TruncatedSegmentThresholdPercent)
await NotifyIfChanged("hana_log_truncated", "HANA Log Segment",
$"{prefix} {truncPct}% of log segments are 'Truncated' (threshold: {config.TruncatedSegmentThresholdPercent}%).",
isAlert: true, currentVal: $"{truncPct}%", ct);
else
await NotifyIfChanged("hana_log_truncated", "HANA Log Segment",
$"{prefix} Log segments OK ({truncPct}% truncated).",
isAlert: false, currentVal: "OK", ct);
if (freePct < config.FreeSegmentThresholdPercent)
await NotifyIfChanged("hana_log_free", "HANA Log Segment",
$"{prefix} Only {freePct}% of log segments are 'Free' (threshold: {config.FreeSegmentThresholdPercent}%).",
isAlert: true, currentVal: $"{freePct}%", ct);
else
await NotifyIfChanged("hana_log_free", "HANA Log Segment",
$"{prefix} Free log segments OK ({freePct}%).",
isAlert: false, currentVal: "OK", ct);
}
// 4. Statement queue
logger.Step("Checking HANA statement queue...");
var queueSql = "SELECT COUNT(*) FROM M_SERVICE_THREADS " +
"WHERE THREAD_TYPE = 'SqlExecutor' AND THREAD_STATE = 'Queueing';";
var queueResult = await RunSql(hdbsql, config.HanaUserKey, queueSql, sid, scalar: true, ct);
if (int.TryParse(queueResult.StdOut.Trim().Replace("\"", ""), out var queueCount))
{
var breachStr = state.GetState("statement_queue_breach_count") ?? "0";
var breachCount = int.TryParse(breachStr, out var b) ? b : 0;
if (queueCount > config.StatementQueueThreshold)
breachCount++;
else
breachCount = 0;
state.SetState("statement_queue_breach_count", breachCount.ToString());
if (breachCount >= config.StatementQueueConsecutiveRuns)
await NotifyIfChanged("hana_statement_queue", "HANA Statement Queue",
$"{prefix} Statement queue has been over {config.StatementQueueThreshold} for {breachCount} checks. Current: {queueCount}.",
isAlert: true, currentVal: $"ALERT:{queueCount}", ct);
else
await NotifyIfChanged("hana_statement_queue", "HANA Statement Queue",
$"{prefix} Statement queue is normal ({queueCount}).",
isAlert: false, currentVal: "OK", ct);
}
// 5. Backup age
logger.Step("Checking last successful backup age...");
var bakSql = "SELECT TOP 1 SYS_START_TIME FROM M_BACKUP_CATALOG " +
"WHERE ENTRY_TYPE_NAME = 'complete data backup' AND STATE_NAME = 'successful' " +
"ORDER BY SYS_START_TIME DESC;";
var bakResult = await RunSql(hdbsql, config.HanaUserKey, bakSql, sid, scalar: true, ct);
var bakDateStr = bakResult.StdOut.Trim().Replace("\"", "").Split('.')[0];
if (string.IsNullOrWhiteSpace(bakDateStr) || !DateTime.TryParse(bakDateStr, out var lastBak))
{
await NotifyIfChanged("hana_backup_status", "HANA Backup",
$"{prefix} No successful backup found.",
isAlert: true, currentVal: "NO_BACKUP", ct);
}
else
{
var ageHours = (int)(DateTime.UtcNow - lastBak.ToUniversalTime()).TotalHours;
if (ageHours > config.BackupThresholdHours)
await NotifyIfChanged("hana_backup_status", "HANA Backup",
$"{prefix} Last successful backup is {ageHours}h old (threshold: {config.BackupThresholdHours}h). Last backup: {lastBak:yyyy-MM-dd HH:mm}.",
isAlert: true, currentVal: $"{ageHours}h", ct);
else
await NotifyIfChanged("hana_backup_status", "HANA Backup",
$"{prefix} Backup age is {ageHours}h (OK).",
isAlert: false, currentVal: "OK", ct);
}
logger.Success("Monitor check complete.");
}
// ── Helpers ───────────────────────────────────────────────────────────────
private async Task<ProcessResult> RunSql(
string hdbsql, string userKey, string sql, string sid,
bool scalar, CancellationToken ct)
{
var tmpFile = Path.Combine("/tmp", $"ht_{Guid.NewGuid():N}.sql");
await File.WriteAllTextAsync(tmpFile, sql, ct);
var flags = scalar ? $"-a -x" : string.Empty;
var result = await switcher.RunAsAsync(sid,
$"\"{hdbsql}\" -U {userKey} {flags} -I \"{tmpFile}\" 2>&1", ct);
File.Delete(tmpFile);
return result;
}
private async Task NotifyIfChanged(
string key, string titlePrefix, string message,
bool isAlert, string currentVal, CancellationToken ct)
{
var prev = state.GetState(key);
if (currentVal == prev) return; // No change — don't spam
string title;
if (isAlert)
title = $"{titlePrefix} Alert";
else if (!string.IsNullOrEmpty(prev) && prev != "OK")
title = $"{titlePrefix} Resolved";
else
{
state.SetState(key, currentVal);
return; // Transition OK→OK: update silently
}
await ntfy.SendAsync(title, message, ct);
state.SetState(key, currentVal);
logger.Info($"Notification sent: [{title}]");
}
}