WIP: AI Validator
This commit is contained in:
496
DiunaBI.Infrastructure/Validators/LlmAnomalyValidator.cs
Normal file
496
DiunaBI.Infrastructure/Validators/LlmAnomalyValidator.cs
Normal file
@@ -0,0 +1,496 @@
|
||||
using System.Text.Json;
|
||||
using DiunaBI.Domain.Entities;
|
||||
using DiunaBI.Infrastructure.Data;
|
||||
using DiunaBI.Infrastructure.Plugins;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.SemanticKernel;
|
||||
using Microsoft.SemanticKernel.ChatCompletion;
|
||||
using Microsoft.SemanticKernel.Connectors.OpenAI;
|
||||
using Microsoft.SemanticKernel.Connectors.Ollama;
|
||||
|
||||
namespace DiunaBI.Infrastructure.Validators;
|
||||
|
||||
public class LlmAnomalyValidator : BaseDataValidator
|
||||
{
|
||||
public override string ValidatorType => "LlmAnomalyValidator";
|
||||
|
||||
private readonly AppDbContext _db;
|
||||
private readonly IConfiguration _config;
|
||||
private readonly ILogger<LlmAnomalyValidator> _logger;
|
||||
private readonly Kernel _kernel;
|
||||
|
||||
// Configuration loaded from appsettings.json
|
||||
private readonly string _provider;
|
||||
private readonly string _model;
|
||||
private readonly int _minHistoricalImports;
|
||||
private readonly int _recentImportsWindow;
|
||||
private readonly int _monthlyImportsWindow;
|
||||
private readonly double _confidenceThreshold;
|
||||
|
||||
// Configuration loaded from ValidationWorker records
|
||||
private string? SourceLayerName { get; set; }
|
||||
private Layer? SourceImportWorker { get; set; }
|
||||
|
||||
public LlmAnomalyValidator(
|
||||
AppDbContext db,
|
||||
IConfiguration config,
|
||||
ILogger<LlmAnomalyValidator> logger)
|
||||
{
|
||||
_db = db;
|
||||
_config = config;
|
||||
_logger = logger;
|
||||
|
||||
// Load configuration from appsettings.json
|
||||
_provider = config["AnomalyDetection:Provider"] ?? "OpenAI";
|
||||
_model = config["AnomalyDetection:Model"] ?? "gpt-4o-mini";
|
||||
_minHistoricalImports = int.Parse(config["AnomalyDetection:MinHistoricalImports"] ?? "5");
|
||||
_recentImportsWindow = int.Parse(config["AnomalyDetection:RecentImportsWindow"] ?? "5");
|
||||
_monthlyImportsWindow = int.Parse(config["AnomalyDetection:MonthlyImportsWindow"] ?? "5");
|
||||
_confidenceThreshold = double.Parse(config["AnomalyDetection:ConfidenceThreshold"] ?? "0.7");
|
||||
|
||||
// Initialize Semantic Kernel based on provider
|
||||
_kernel = InitializeKernel();
|
||||
|
||||
_logger.LogInformation("LlmAnomalyValidator initialized with provider: {Provider}, model: {Model}",
|
||||
_provider, _model);
|
||||
}
|
||||
|
||||
private Kernel InitializeKernel()
|
||||
{
|
||||
var builder = Kernel.CreateBuilder();
|
||||
|
||||
switch (_provider.ToLower())
|
||||
{
|
||||
case "openai":
|
||||
var openAiKey = _config["AnomalyDetection:ApiKey"];
|
||||
if (string.IsNullOrEmpty(openAiKey))
|
||||
{
|
||||
throw new InvalidOperationException("OpenAI API key not configured");
|
||||
}
|
||||
builder.AddOpenAIChatCompletion(_model, openAiKey);
|
||||
break;
|
||||
|
||||
case "azureopenai":
|
||||
var azureEndpoint = _config["AnomalyDetection:Endpoint"];
|
||||
var azureKey = _config["AnomalyDetection:ApiKey"];
|
||||
if (string.IsNullOrEmpty(azureEndpoint) || string.IsNullOrEmpty(azureKey))
|
||||
{
|
||||
throw new InvalidOperationException("Azure OpenAI endpoint or API key not configured");
|
||||
}
|
||||
builder.AddAzureOpenAIChatCompletion(_model, azureEndpoint, azureKey);
|
||||
break;
|
||||
|
||||
case "ollama":
|
||||
var ollamaEndpoint = _config["AnomalyDetection:Endpoint"] ?? "http://localhost:11434";
|
||||
builder.AddOllamaChatCompletion(_model, new Uri(ollamaEndpoint));
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new NotSupportedException($"LLM provider '{_provider}' is not supported");
|
||||
}
|
||||
|
||||
return builder.Build();
|
||||
}
|
||||
|
||||
public override void Validate(Layer validationWorker)
|
||||
{
|
||||
try
|
||||
{
|
||||
_logger.LogInformation("{ValidatorType}: Starting validation for {ValidationWorkerName} ({ValidationWorkerId})",
|
||||
ValidatorType, validationWorker.Name, validationWorker.Id);
|
||||
|
||||
// Load configuration from layer records
|
||||
LoadConfiguration(validationWorker);
|
||||
|
||||
// Validate configuration
|
||||
ValidateConfiguration();
|
||||
|
||||
// Find latest import layer
|
||||
var latestImport = GetLatestImportLayer();
|
||||
|
||||
// Get historical context
|
||||
var historicalImports = GetHistoricalImports();
|
||||
|
||||
// Check if enough historical data
|
||||
if (historicalImports.Count < _minHistoricalImports)
|
||||
{
|
||||
_logger.LogWarning("{ValidatorType}: Not enough historical imports: {Count} (need {Min}). Skipping validation.",
|
||||
ValidatorType, historicalImports.Count, _minHistoricalImports);
|
||||
return;
|
||||
}
|
||||
|
||||
// Perform validation
|
||||
PerformValidation(validationWorker, latestImport, historicalImports);
|
||||
|
||||
_logger.LogInformation("{ValidatorType}: Successfully completed validation for {ValidationWorkerName}",
|
||||
ValidatorType, validationWorker.Name);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
_logger.LogError(e, "{ValidatorType}: Failed to validate {ValidationWorkerName} ({ValidationWorkerId})",
|
||||
ValidatorType, validationWorker.Name, validationWorker.Id);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
private void LoadConfiguration(Layer validationWorker)
|
||||
{
|
||||
if (validationWorker.Records == null)
|
||||
{
|
||||
throw new InvalidOperationException("ValidationWorker has no records");
|
||||
}
|
||||
|
||||
// Load source layer name (ImportWorker Administration Layer)
|
||||
SourceLayerName = GetRecordValue(validationWorker.Records, "SourceLayer");
|
||||
if (string.IsNullOrEmpty(SourceLayerName))
|
||||
{
|
||||
throw new InvalidOperationException("SourceLayer record not found");
|
||||
}
|
||||
|
||||
_logger.LogDebug("{ValidatorType}: Configuration loaded - SourceLayer: {SourceLayer}",
|
||||
ValidatorType, SourceLayerName);
|
||||
}
|
||||
|
||||
private void ValidateConfiguration()
|
||||
{
|
||||
var errors = new List<string>();
|
||||
|
||||
if (string.IsNullOrEmpty(SourceLayerName)) errors.Add("SourceLayer is required");
|
||||
|
||||
// Find source import worker (Administration Layer)
|
||||
SourceImportWorker = _db.Layers
|
||||
.SingleOrDefault(x => x.Name == SourceLayerName &&
|
||||
x.Type == LayerType.Administration &&
|
||||
!x.IsDeleted &&
|
||||
!x.IsCancelled);
|
||||
|
||||
if (SourceImportWorker == null)
|
||||
{
|
||||
errors.Add($"SourceImportWorker layer '{SourceLayerName}' not found");
|
||||
}
|
||||
|
||||
if (errors.Any())
|
||||
{
|
||||
throw new InvalidOperationException($"Configuration validation failed: {string.Join(", ", errors)}");
|
||||
}
|
||||
|
||||
_logger.LogDebug("{ValidatorType}: Configuration validation passed", ValidatorType);
|
||||
}
|
||||
|
||||
private Layer GetLatestImportLayer()
|
||||
{
|
||||
// Find latest Import layer where ParentId = SourceImportWorker.Id
|
||||
var latestImport = _db.Layers
|
||||
.Include(x => x.Records)
|
||||
.Where(x => x.ParentId == SourceImportWorker!.Id &&
|
||||
x.Type == LayerType.Import &&
|
||||
!x.IsDeleted &&
|
||||
!x.IsCancelled)
|
||||
.OrderByDescending(x => x.CreatedAt)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (latestImport == null)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"No import layers found for import worker '{SourceImportWorker!.Name}'");
|
||||
}
|
||||
|
||||
_logger.LogDebug("{ValidatorType}: Found latest import layer: {LayerName} ({LayerId})",
|
||||
ValidatorType, latestImport.Name, latestImport.Id);
|
||||
|
||||
return latestImport;
|
||||
}
|
||||
|
||||
private List<Layer> GetHistoricalImports()
|
||||
{
|
||||
// Get last N import layers (ordered by CreatedAt)
|
||||
var historicalImports = _db.Layers
|
||||
.Include(x => x.Records)
|
||||
.Where(x => x.ParentId == SourceImportWorker!.Id &&
|
||||
x.Type == LayerType.Import &&
|
||||
!x.IsDeleted &&
|
||||
!x.IsCancelled)
|
||||
.OrderByDescending(x => x.CreatedAt)
|
||||
.Take(_recentImportsWindow)
|
||||
.AsNoTracking()
|
||||
.ToList();
|
||||
|
||||
_logger.LogDebug("{ValidatorType}: Found {Count} historical imports for recent window",
|
||||
ValidatorType, historicalImports.Count);
|
||||
|
||||
return historicalImports;
|
||||
}
|
||||
|
||||
private List<Layer> GetMonthlyBaselineImports()
|
||||
{
|
||||
// Get last N "first-of-month" import layers
|
||||
var monthlyImports = _db.Layers
|
||||
.Include(x => x.Records)
|
||||
.Where(x => x.ParentId == SourceImportWorker!.Id &&
|
||||
x.Type == LayerType.Import &&
|
||||
x.CreatedAt.Day == 1 &&
|
||||
!x.IsDeleted &&
|
||||
!x.IsCancelled)
|
||||
.OrderByDescending(x => x.CreatedAt)
|
||||
.Take(_monthlyImportsWindow)
|
||||
.AsNoTracking()
|
||||
.ToList();
|
||||
|
||||
_logger.LogDebug("{ValidatorType}: Found {Count} monthly baseline imports",
|
||||
ValidatorType, monthlyImports.Count);
|
||||
|
||||
return monthlyImports;
|
||||
}
|
||||
|
||||
private void PerformValidation(Layer validationWorker, Layer latestImport, List<Layer> historicalImports)
|
||||
{
|
||||
_logger.LogDebug("{ValidatorType}: Performing validation for import: {ImportName}",
|
||||
ValidatorType, latestImport.Name);
|
||||
|
||||
// Get monthly baseline if available
|
||||
var monthlyBaseline = GetMonthlyBaselineImports();
|
||||
|
||||
// Build prompt with all data
|
||||
var prompt = BuildPrompt(latestImport, historicalImports, monthlyBaseline);
|
||||
|
||||
// Call LLM
|
||||
var startTime = DateTime.UtcNow;
|
||||
var llmResponse = CallLlm(prompt);
|
||||
var processingTime = DateTime.UtcNow - startTime;
|
||||
|
||||
// Create Validation Layer with results
|
||||
var validationLayer = CreateValidationLayer(validationWorker, latestImport, llmResponse, processingTime);
|
||||
|
||||
// Save to database
|
||||
SaveValidationLayer(validationLayer, llmResponse);
|
||||
|
||||
_logger.LogInformation("{ValidatorType}: Created validation layer {LayerName} ({LayerId}) in {ProcessingTime}ms",
|
||||
ValidatorType, validationLayer.Name, validationLayer.Id, processingTime.TotalMilliseconds);
|
||||
}
|
||||
|
||||
private string BuildPrompt(Layer currentImport, List<Layer> recentImports, List<Layer> monthlyBaseline)
|
||||
{
|
||||
var currentRecords = currentImport.Records?.OrderBy(r => r.Code).ToList() ?? new List<Record>();
|
||||
var importType = SourceImportWorker?.Name ?? "Unknown";
|
||||
|
||||
var prompt = $@"You are a data quality analyst specializing in anomaly detection for business intelligence imports.
|
||||
|
||||
**Import Type:** {importType}
|
||||
**Import Date:** {currentImport.CreatedAt:yyyy-MM-dd HH:mm:ss}
|
||||
**Current Import:** {currentImport.Name}
|
||||
|
||||
**Current Import Data ({currentRecords.Count} records):**
|
||||
{JsonSerializer.Serialize(currentRecords.Select(r => new { code = r.Code, value1 = r.Value1 }), new JsonSerializerOptions { WriteIndented = true })}
|
||||
|
||||
**Historical Context - Last {recentImports.Count} Imports:**
|
||||
{string.Join("\n", recentImports.Select((imp, idx) => $"Import {idx + 1} ({imp.CreatedAt:yyyy-MM-dd}): {JsonSerializer.Serialize(imp.Records?.OrderBy(r => r.Code).Select(r => new { code = r.Code, value1 = r.Value1 }) ?? Enumerable.Empty<object>())}"))}
|
||||
";
|
||||
|
||||
if (monthlyBaseline.Any())
|
||||
{
|
||||
prompt += $@"
|
||||
**Monthly Baseline - Last {monthlyBaseline.Count} First-Day Imports:**
|
||||
{string.Join("\n", monthlyBaseline.Select((imp, idx) => $"Monthly Import {idx + 1} ({imp.CreatedAt:yyyy-MM-dd}): {JsonSerializer.Serialize(imp.Records?.OrderBy(r => r.Code).Select(r => new { code = r.Code, value1 = r.Value1 }) ?? Enumerable.Empty<object>())}"))}
|
||||
";
|
||||
}
|
||||
|
||||
prompt += @"
|
||||
**Analysis Tasks:**
|
||||
1. **Record-level anomalies:** Identify unusual values for specific codes compared to historical patterns
|
||||
2. **Structural issues:** Detect missing codes, new codes, or unexpected count changes
|
||||
3. **Pattern breaks:** Find trend reversals, unexpected correlations, or statistical outliers
|
||||
|
||||
**Response Format (JSON):**
|
||||
```json
|
||||
{
|
||||
""overallStatus"": ""pass|warning|critical"",
|
||||
""recordAnomalies"": [
|
||||
{
|
||||
""code"": ""string"",
|
||||
""value1"": number,
|
||||
""confidence"": 0.0-1.0,
|
||||
""severity"": ""low|medium|high|critical"",
|
||||
""reason"": ""brief explanation"",
|
||||
""recommendation"": ""suggested action""
|
||||
}
|
||||
],
|
||||
""structuralIssues"": [
|
||||
{
|
||||
""issueType"": ""missing_codes|new_codes|count_change"",
|
||||
""description"": ""string"",
|
||||
""codes"": [""code1"", ""code2""],
|
||||
""severity"": ""low|medium|high|critical""
|
||||
}
|
||||
],
|
||||
""summary"": ""Brief overall assessment""
|
||||
}
|
||||
```
|
||||
|
||||
Analyze the data and respond ONLY with the JSON object. Do not include any markdown formatting or additional text.";
|
||||
|
||||
return prompt;
|
||||
}
|
||||
|
||||
private AnomalyResponse CallLlm(string prompt)
|
||||
{
|
||||
try
|
||||
{
|
||||
var chatService = _kernel.GetRequiredService<IChatCompletionService>();
|
||||
|
||||
var chatHistory = new ChatHistory();
|
||||
chatHistory.AddUserMessage(prompt);
|
||||
|
||||
var result = chatService.GetChatMessageContentAsync(
|
||||
chatHistory,
|
||||
new OpenAIPromptExecutionSettings
|
||||
{
|
||||
Temperature = _config.GetValue<double?>("AnomalyDetection:Temperature") ?? 0.1,
|
||||
MaxTokens = _config.GetValue<int?>("AnomalyDetection:MaxTokens") ?? 4000
|
||||
}).GetAwaiter().GetResult();
|
||||
|
||||
var jsonResponse = result.Content?.Trim() ?? "{}";
|
||||
|
||||
// Try to parse JSON response
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<AnomalyResponse>(jsonResponse)
|
||||
?? throw new InvalidOperationException("LLM returned null response");
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
_logger.LogWarning("Failed to parse LLM response as JSON. Raw response: {Response}", jsonResponse);
|
||||
throw new InvalidOperationException($"LLM did not return valid JSON. Response: {jsonResponse}");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to call LLM for anomaly detection");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
private Layer CreateValidationLayer(Layer validationWorker, Layer importLayer, AnomalyResponse response, TimeSpan processingTime)
|
||||
{
|
||||
var layerNumber = _db.Layers.Count() + 1;
|
||||
var timestamp = DateTime.UtcNow.ToString("yyyyMMddHHmmss");
|
||||
|
||||
var validationLayer = new Layer
|
||||
{
|
||||
Id = Guid.NewGuid(),
|
||||
Type = LayerType.Validation,
|
||||
ParentId = importLayer.Id, // Links to the import that was validated
|
||||
Number = layerNumber,
|
||||
Name = $"L{layerNumber}-V-{timestamp}",
|
||||
CreatedById = User.AutoImportUserId,
|
||||
ModifiedById = User.AutoImportUserId,
|
||||
CreatedAt = DateTime.UtcNow,
|
||||
ModifiedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
_logger.LogDebug("{ValidatorType}: Created validation layer {LayerName}",
|
||||
ValidatorType, validationLayer.Name);
|
||||
|
||||
return validationLayer;
|
||||
}
|
||||
|
||||
private void SaveValidationLayer(Layer validationLayer, AnomalyResponse response)
|
||||
{
|
||||
// Add the validation layer
|
||||
_db.Layers.Add(validationLayer);
|
||||
|
||||
var records = new List<Record>();
|
||||
|
||||
// Add metadata records
|
||||
records.Add(CreateRecord(validationLayer.Id, "ValidatedAt", DateTime.UtcNow.ToString("yyyy-MM-dd HH:mm:ss")));
|
||||
records.Add(CreateRecord(validationLayer.Id, "OverallStatus", response.OverallStatus));
|
||||
records.Add(CreateRecord(validationLayer.Id, "RecordsChecked", value1: response.RecordAnomalies?.Count ?? 0));
|
||||
records.Add(CreateRecord(validationLayer.Id, "AnomaliesDetected", value1: response.RecordAnomalies?.Count ?? 0));
|
||||
records.Add(CreateRecord(validationLayer.Id, "StructuralIssuesDetected", value1: response.StructuralIssues?.Count ?? 0));
|
||||
records.Add(CreateRecord(validationLayer.Id, "LlmProvider", _provider));
|
||||
records.Add(CreateRecord(validationLayer.Id, "LlmModel", _model));
|
||||
records.Add(CreateRecord(validationLayer.Id, "Summary", response.Summary));
|
||||
|
||||
// Add individual anomaly records
|
||||
if (response.RecordAnomalies != null)
|
||||
{
|
||||
foreach (var anomaly in response.RecordAnomalies)
|
||||
{
|
||||
records.Add(CreateRecord(
|
||||
validationLayer.Id,
|
||||
$"ANOMALY_{anomaly.Code}",
|
||||
$"[{anomaly.Severity}] {anomaly.Reason}. Recommendation: {anomaly.Recommendation}",
|
||||
anomaly.Confidence
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Add structural issue records
|
||||
if (response.StructuralIssues != null)
|
||||
{
|
||||
foreach (var issue in response.StructuralIssues)
|
||||
{
|
||||
var codes = issue.Codes != null ? string.Join(", ", issue.Codes) : "";
|
||||
records.Add(CreateRecord(
|
||||
validationLayer.Id,
|
||||
$"STRUCTURAL_{issue.IssueType?.ToUpper()}",
|
||||
$"[{issue.Severity}] {issue.Description}. Codes: {codes}"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Store full LLM response as JSON (for debugging)
|
||||
records.Add(CreateRecord(validationLayer.Id, "LLM_RESPONSE_JSON", JsonSerializer.Serialize(response)));
|
||||
|
||||
// Add all records to database
|
||||
_db.Records.AddRange(records);
|
||||
_db.SaveChanges();
|
||||
|
||||
_logger.LogDebug("{ValidatorType}: Saved {RecordCount} records for validation layer {LayerId}",
|
||||
ValidatorType, records.Count, validationLayer.Id);
|
||||
}
|
||||
|
||||
private Record CreateRecord(Guid layerId, string code, string? desc1 = null, double? value1 = null)
|
||||
{
|
||||
return new Record
|
||||
{
|
||||
Id = Guid.NewGuid(),
|
||||
LayerId = layerId,
|
||||
Code = code,
|
||||
Desc1 = desc1,
|
||||
Value1 = value1,
|
||||
CreatedById = User.AutoImportUserId,
|
||||
ModifiedById = User.AutoImportUserId,
|
||||
CreatedAt = DateTime.UtcNow,
|
||||
ModifiedAt = DateTime.UtcNow
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Response models for LLM
|
||||
public class AnomalyResponse
|
||||
{
|
||||
public string OverallStatus { get; set; } = "pass";
|
||||
public List<RecordAnomaly>? RecordAnomalies { get; set; }
|
||||
public List<StructuralIssue>? StructuralIssues { get; set; }
|
||||
public string Summary { get; set; } = "";
|
||||
}
|
||||
|
||||
public class RecordAnomaly
|
||||
{
|
||||
public string Code { get; set; } = "";
|
||||
public double? Value1 { get; set; }
|
||||
public double Confidence { get; set; }
|
||||
public string Severity { get; set; } = "low";
|
||||
public string Reason { get; set; } = "";
|
||||
public string Recommendation { get; set; } = "";
|
||||
}
|
||||
|
||||
public class StructuralIssue
|
||||
{
|
||||
public string? IssueType { get; set; }
|
||||
public string Description { get; set; } = "";
|
||||
public List<string>? Codes { get; set; }
|
||||
public string Severity { get; set; } = "low";
|
||||
}
|
||||
Reference in New Issue
Block a user