using System.Text.Json; using DiunaBI.Domain.Entities; using DiunaBI.Infrastructure.Data; using DiunaBI.Infrastructure.Plugins; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Connectors.Ollama; namespace DiunaBI.Infrastructure.Validators; public class LlmAnomalyValidator : BaseDataValidator { public override string ValidatorType => "LlmAnomalyValidator"; private readonly AppDbContext _db; private readonly IConfiguration _config; private readonly ILogger _logger; private readonly Kernel _kernel; // Configuration loaded from appsettings.json private readonly string _provider; private readonly string _model; private readonly int _minHistoricalImports; private readonly int _recentImportsWindow; private readonly int _monthlyImportsWindow; private readonly double _confidenceThreshold; // Configuration loaded from ValidationWorker records private string? SourceLayerName { get; set; } private Layer? SourceImportWorker { get; set; } public LlmAnomalyValidator( AppDbContext db, IConfiguration config, ILogger logger) { _db = db; _config = config; _logger = logger; // Load configuration from appsettings.json _provider = config["AnomalyDetection:Provider"] ?? "OpenAI"; _model = config["AnomalyDetection:Model"] ?? "gpt-4o-mini"; _minHistoricalImports = int.Parse(config["AnomalyDetection:MinHistoricalImports"] ?? "5"); _recentImportsWindow = int.Parse(config["AnomalyDetection:RecentImportsWindow"] ?? "5"); _monthlyImportsWindow = int.Parse(config["AnomalyDetection:MonthlyImportsWindow"] ?? "5"); _confidenceThreshold = double.Parse(config["AnomalyDetection:ConfidenceThreshold"] ?? "0.7"); // Initialize Semantic Kernel based on provider _kernel = InitializeKernel(); _logger.LogInformation("LlmAnomalyValidator initialized with provider: {Provider}, model: {Model}", _provider, _model); } private Kernel InitializeKernel() { var builder = Kernel.CreateBuilder(); switch (_provider.ToLower()) { case "openai": var openAiKey = _config["AnomalyDetection:ApiKey"]; if (string.IsNullOrEmpty(openAiKey)) { throw new InvalidOperationException("OpenAI API key not configured"); } builder.AddOpenAIChatCompletion(_model, openAiKey); break; case "azureopenai": var azureEndpoint = _config["AnomalyDetection:Endpoint"]; var azureKey = _config["AnomalyDetection:ApiKey"]; if (string.IsNullOrEmpty(azureEndpoint) || string.IsNullOrEmpty(azureKey)) { throw new InvalidOperationException("Azure OpenAI endpoint or API key not configured"); } builder.AddAzureOpenAIChatCompletion(_model, azureEndpoint, azureKey); break; case "ollama": var ollamaEndpoint = _config["AnomalyDetection:Endpoint"] ?? "http://localhost:11434"; builder.AddOllamaChatCompletion(_model, new Uri(ollamaEndpoint)); break; default: throw new NotSupportedException($"LLM provider '{_provider}' is not supported"); } return builder.Build(); } public override void Validate(Layer validationWorker) { try { _logger.LogInformation("{ValidatorType}: Starting validation for {ValidationWorkerName} ({ValidationWorkerId})", ValidatorType, validationWorker.Name, validationWorker.Id); // Load configuration from layer records LoadConfiguration(validationWorker); // Validate configuration ValidateConfiguration(); // Find latest import layer var latestImport = GetLatestImportLayer(); // Get historical context var historicalImports = GetHistoricalImports(); // Check if enough historical data if (historicalImports.Count < _minHistoricalImports) { _logger.LogWarning("{ValidatorType}: Not enough historical imports: {Count} (need {Min}). Skipping validation.", ValidatorType, historicalImports.Count, _minHistoricalImports); return; } // Perform validation PerformValidation(validationWorker, latestImport, historicalImports); _logger.LogInformation("{ValidatorType}: Successfully completed validation for {ValidationWorkerName}", ValidatorType, validationWorker.Name); } catch (Exception e) { _logger.LogError(e, "{ValidatorType}: Failed to validate {ValidationWorkerName} ({ValidationWorkerId})", ValidatorType, validationWorker.Name, validationWorker.Id); throw; } } private void LoadConfiguration(Layer validationWorker) { if (validationWorker.Records == null) { throw new InvalidOperationException("ValidationWorker has no records"); } // Load source layer name (ImportWorker Administration Layer) SourceLayerName = GetRecordValue(validationWorker.Records, "SourceLayer"); if (string.IsNullOrEmpty(SourceLayerName)) { throw new InvalidOperationException("SourceLayer record not found"); } _logger.LogDebug("{ValidatorType}: Configuration loaded - SourceLayer: {SourceLayer}", ValidatorType, SourceLayerName); } private void ValidateConfiguration() { var errors = new List(); if (string.IsNullOrEmpty(SourceLayerName)) errors.Add("SourceLayer is required"); // Find source import worker (Administration Layer) SourceImportWorker = _db.Layers .SingleOrDefault(x => x.Name == SourceLayerName && x.Type == LayerType.Administration && !x.IsDeleted && !x.IsCancelled); if (SourceImportWorker == null) { errors.Add($"SourceImportWorker layer '{SourceLayerName}' not found"); } if (errors.Any()) { throw new InvalidOperationException($"Configuration validation failed: {string.Join(", ", errors)}"); } _logger.LogDebug("{ValidatorType}: Configuration validation passed", ValidatorType); } private Layer GetLatestImportLayer() { // Find latest Import layer where ParentId = SourceImportWorker.Id var latestImport = _db.Layers .Include(x => x.Records) .Where(x => x.ParentId == SourceImportWorker!.Id && x.Type == LayerType.Import && !x.IsDeleted && !x.IsCancelled) .OrderByDescending(x => x.CreatedAt) .FirstOrDefault(); if (latestImport == null) { throw new InvalidOperationException( $"No import layers found for import worker '{SourceImportWorker!.Name}'"); } _logger.LogDebug("{ValidatorType}: Found latest import layer: {LayerName} ({LayerId})", ValidatorType, latestImport.Name, latestImport.Id); return latestImport; } private List GetHistoricalImports() { // Get last N import layers (ordered by CreatedAt) var historicalImports = _db.Layers .Include(x => x.Records) .Where(x => x.ParentId == SourceImportWorker!.Id && x.Type == LayerType.Import && !x.IsDeleted && !x.IsCancelled) .OrderByDescending(x => x.CreatedAt) .Take(_recentImportsWindow) .AsNoTracking() .ToList(); _logger.LogDebug("{ValidatorType}: Found {Count} historical imports for recent window", ValidatorType, historicalImports.Count); return historicalImports; } private List GetMonthlyBaselineImports() { // Get last N "first-of-month" import layers var monthlyImports = _db.Layers .Include(x => x.Records) .Where(x => x.ParentId == SourceImportWorker!.Id && x.Type == LayerType.Import && x.CreatedAt.Day == 1 && !x.IsDeleted && !x.IsCancelled) .OrderByDescending(x => x.CreatedAt) .Take(_monthlyImportsWindow) .AsNoTracking() .ToList(); _logger.LogDebug("{ValidatorType}: Found {Count} monthly baseline imports", ValidatorType, monthlyImports.Count); return monthlyImports; } private void PerformValidation(Layer validationWorker, Layer latestImport, List historicalImports) { _logger.LogDebug("{ValidatorType}: Performing validation for import: {ImportName}", ValidatorType, latestImport.Name); // Get monthly baseline if available var monthlyBaseline = GetMonthlyBaselineImports(); // Build prompt with all data var prompt = BuildPrompt(latestImport, historicalImports, monthlyBaseline); // Call LLM var startTime = DateTime.UtcNow; var llmResponse = CallLlm(prompt); var processingTime = DateTime.UtcNow - startTime; // Create Validation Layer with results var validationLayer = CreateValidationLayer(validationWorker, latestImport, llmResponse, processingTime); // Save to database SaveValidationLayer(validationLayer, llmResponse); _logger.LogInformation("{ValidatorType}: Created validation layer {LayerName} ({LayerId}) in {ProcessingTime}ms", ValidatorType, validationLayer.Name, validationLayer.Id, processingTime.TotalMilliseconds); } private string BuildPrompt(Layer currentImport, List recentImports, List monthlyBaseline) { var currentRecords = currentImport.Records?.OrderBy(r => r.Code).ToList() ?? new List(); var importType = SourceImportWorker?.Name ?? "Unknown"; var prompt = $@"You are a data quality analyst specializing in anomaly detection for business intelligence imports. **Import Type:** {importType} **Import Date:** {currentImport.CreatedAt:yyyy-MM-dd HH:mm:ss} **Current Import:** {currentImport.Name} **Current Import Data ({currentRecords.Count} records):** {JsonSerializer.Serialize(currentRecords.Select(r => new { code = r.Code, value1 = r.Value1 }), new JsonSerializerOptions { WriteIndented = true })} **Historical Context - Last {recentImports.Count} Imports:** {string.Join("\n", recentImports.Select((imp, idx) => $"Import {idx + 1} ({imp.CreatedAt:yyyy-MM-dd}): {JsonSerializer.Serialize(imp.Records?.OrderBy(r => r.Code).Select(r => new { code = r.Code, value1 = r.Value1 }) ?? Enumerable.Empty())}"))} "; if (monthlyBaseline.Any()) { prompt += $@" **Monthly Baseline - Last {monthlyBaseline.Count} First-Day Imports:** {string.Join("\n", monthlyBaseline.Select((imp, idx) => $"Monthly Import {idx + 1} ({imp.CreatedAt:yyyy-MM-dd}): {JsonSerializer.Serialize(imp.Records?.OrderBy(r => r.Code).Select(r => new { code = r.Code, value1 = r.Value1 }) ?? Enumerable.Empty())}"))} "; } prompt += @" **Analysis Tasks:** 1. **Record-level anomalies:** Identify unusual values for specific codes compared to historical patterns 2. **Structural issues:** Detect missing codes, new codes, or unexpected count changes 3. **Pattern breaks:** Find trend reversals, unexpected correlations, or statistical outliers **Response Format (JSON):** ```json { ""overallStatus"": ""pass|warning|critical"", ""recordAnomalies"": [ { ""code"": ""string"", ""value1"": number, ""confidence"": 0.0-1.0, ""severity"": ""low|medium|high|critical"", ""reason"": ""brief explanation"", ""recommendation"": ""suggested action"" } ], ""structuralIssues"": [ { ""issueType"": ""missing_codes|new_codes|count_change"", ""description"": ""string"", ""codes"": [""code1"", ""code2""], ""severity"": ""low|medium|high|critical"" } ], ""summary"": ""Brief overall assessment"" } ``` Analyze the data and respond ONLY with the JSON object. Do not include any markdown formatting or additional text."; return prompt; } private AnomalyResponse CallLlm(string prompt) { try { var chatService = _kernel.GetRequiredService(); var chatHistory = new ChatHistory(); chatHistory.AddUserMessage(prompt); var result = chatService.GetChatMessageContentAsync( chatHistory, new OpenAIPromptExecutionSettings { Temperature = _config.GetValue("AnomalyDetection:Temperature") ?? 0.1, MaxTokens = _config.GetValue("AnomalyDetection:MaxTokens") ?? 4000 }).GetAwaiter().GetResult(); var jsonResponse = result.Content?.Trim() ?? "{}"; // Try to parse JSON response try { return JsonSerializer.Deserialize(jsonResponse) ?? throw new InvalidOperationException("LLM returned null response"); } catch (JsonException) { _logger.LogWarning("Failed to parse LLM response as JSON. Raw response: {Response}", jsonResponse); throw new InvalidOperationException($"LLM did not return valid JSON. Response: {jsonResponse}"); } } catch (Exception ex) { _logger.LogError(ex, "Failed to call LLM for anomaly detection"); throw; } } private Layer CreateValidationLayer(Layer validationWorker, Layer importLayer, AnomalyResponse response, TimeSpan processingTime) { var layerNumber = _db.Layers.Count() + 1; var timestamp = DateTime.UtcNow.ToString("yyyyMMddHHmmss"); var validationLayer = new Layer { Id = Guid.NewGuid(), Type = LayerType.Validation, ParentId = importLayer.Id, // Links to the import that was validated Number = layerNumber, Name = $"L{layerNumber}-V-{timestamp}", CreatedById = User.AutoImportUserId, ModifiedById = User.AutoImportUserId, CreatedAt = DateTime.UtcNow, ModifiedAt = DateTime.UtcNow }; _logger.LogDebug("{ValidatorType}: Created validation layer {LayerName}", ValidatorType, validationLayer.Name); return validationLayer; } private void SaveValidationLayer(Layer validationLayer, AnomalyResponse response) { // Add the validation layer _db.Layers.Add(validationLayer); var records = new List(); // Add metadata records records.Add(CreateRecord(validationLayer.Id, "ValidatedAt", DateTime.UtcNow.ToString("yyyy-MM-dd HH:mm:ss"))); records.Add(CreateRecord(validationLayer.Id, "OverallStatus", response.OverallStatus)); records.Add(CreateRecord(validationLayer.Id, "RecordsChecked", value1: response.RecordAnomalies?.Count ?? 0)); records.Add(CreateRecord(validationLayer.Id, "AnomaliesDetected", value1: response.RecordAnomalies?.Count ?? 0)); records.Add(CreateRecord(validationLayer.Id, "StructuralIssuesDetected", value1: response.StructuralIssues?.Count ?? 0)); records.Add(CreateRecord(validationLayer.Id, "LlmProvider", _provider)); records.Add(CreateRecord(validationLayer.Id, "LlmModel", _model)); records.Add(CreateRecord(validationLayer.Id, "Summary", response.Summary)); // Add individual anomaly records if (response.RecordAnomalies != null) { foreach (var anomaly in response.RecordAnomalies) { records.Add(CreateRecord( validationLayer.Id, $"ANOMALY_{anomaly.Code}", $"[{anomaly.Severity}] {anomaly.Reason}. Recommendation: {anomaly.Recommendation}", anomaly.Confidence )); } } // Add structural issue records if (response.StructuralIssues != null) { foreach (var issue in response.StructuralIssues) { var codes = issue.Codes != null ? string.Join(", ", issue.Codes) : ""; records.Add(CreateRecord( validationLayer.Id, $"STRUCTURAL_{issue.IssueType?.ToUpper()}", $"[{issue.Severity}] {issue.Description}. Codes: {codes}" )); } } // Store full LLM response as JSON (for debugging) records.Add(CreateRecord(validationLayer.Id, "LLM_RESPONSE_JSON", JsonSerializer.Serialize(response))); // Add all records to database _db.Records.AddRange(records); _db.SaveChanges(); _logger.LogDebug("{ValidatorType}: Saved {RecordCount} records for validation layer {LayerId}", ValidatorType, records.Count, validationLayer.Id); } private Record CreateRecord(Guid layerId, string code, string? desc1 = null, double? value1 = null) { return new Record { Id = Guid.NewGuid(), LayerId = layerId, Code = code, Desc1 = desc1, Value1 = value1, CreatedById = User.AutoImportUserId, ModifiedById = User.AutoImportUserId, CreatedAt = DateTime.UtcNow, ModifiedAt = DateTime.UtcNow }; } } // Response models for LLM public class AnomalyResponse { public string OverallStatus { get; set; } = "pass"; public List? RecordAnomalies { get; set; } public List? StructuralIssues { get; set; } public string Summary { get; set; } = ""; } public class RecordAnomaly { public string Code { get; set; } = ""; public double? Value1 { get; set; } public double Confidence { get; set; } public string Severity { get; set; } = "low"; public string Reason { get; set; } = ""; public string Recommendation { get; set; } = ""; } public class StructuralIssue { public string? IssueType { get; set; } public string Description { get; set; } = ""; public List? Codes { get; set; } public string Severity { get; set; } = "low"; }