diff --git a/README.md b/README.md index 2b85f2a90..8f6b15cc3 100644 --- a/README.md +++ b/README.md @@ -126,11 +126,12 @@ These health checks are integrated with Azure Container Apps' health probe syste ## Observability with OpenTelemetry -This project uses OpenTelemetry for distributed tracing and metrics collection. The setup includes: +This project uses OpenTelemetry for distributed tracing, metrics collection, and logging. The setup includes: ### Core Features - Distributed tracing across services - Runtime and application metrics +- Log aggregation and correlation - Integration with Azure Monitor/Application Insights - Support for both OTLP and Azure Monitor exporters - Automatic instrumentation for: @@ -157,15 +158,72 @@ OpenTelemetry is configured through environment variables that are automatically ### Local Development For local development, the project includes a docker-compose setup with: -- OpenTelemetry Collector -- Grafana -- Other supporting services +- OpenTelemetry Collector (ports 4317/4318 for OTLP receivers) +- Grafana (port 3000) +- Jaeger (port 16686) +- Loki (port 3100) +- Prometheus (port 9090) To run the local observability stack: ```bash podman compose -f docker-compose-otel.yml up ``` +### Accessing Observability Tools + +Once the local stack is running, you can access the following tools: + +#### Distributed Tracing with Jaeger +- URL: http://localhost:16686 +- Features: + - View distributed traces across services + - Search by service, operation, or trace ID + - Analyze timing and dependencies + - Debug request flows and errors + +#### Metrics with Prometheus +- URL: http://localhost:9090 +- Features: + - Query raw metrics data + - View metric targets and service discovery + - Debug metric collection + +#### Log Aggregation with Loki +- Direct URL: http://localhost:3100 +- Grafana Integration: http://localhost:3000 (preferred interface) +- Features: + - Search and filter logs across all services + - Correlate logs with traces using trace IDs + - Create log-based alerts and dashboards + - Use LogQL to query logs: + ```logql + # Example: Find all error logs + {container="web-api"} |= "error" + + # Example: Find logs with specific trace ID + {container=~"web-api|graphql"} |~ "trace_id=([a-f0-9]{32})" + ``` + +#### Metrics and Dashboards in Grafana +- URL: http://localhost:3000 +- Features: + - Pre-configured dashboards for: + - Application metrics + - Runtime metrics + - HTTP request metrics + - Data sources: + - Prometheus (metrics) + - Loki (logs) + - Jaeger (traces) + - Create custom dashboards + - Set up alerts + +#### OpenTelemetry Collector Endpoints +- OTLP gRPC receiver: localhost:4317 +- OTLP HTTP receiver: localhost:4318 +- Prometheus metrics: localhost:8888 +- Prometheus exporter metrics: localhost:8889 + ### Request Filtering The telemetry setup includes smart filtering to: diff --git a/docker-compose-otel.yml b/docker-compose-otel.yml index cb8f38aad..6106e8a90 100644 --- a/docker-compose-otel.yml +++ b/docker-compose-otel.yml @@ -22,6 +22,12 @@ services: - "14250:14250" # Model used by collector environment: - COLLECTOR_OTLP_ENABLED=true + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "localhost:16686"] + interval: 3s + timeout: 3s + retries: 10 + start_period: 10s # Prometheus for metrics prometheus: @@ -31,6 +37,21 @@ services: ports: - "9090:9090" + # Loki for log aggregation + loki: + image: grafana/loki:3.2.2 + ports: + - "3100:3100" + volumes: + - ./local-otel-configuration/loki-config.yaml:/etc/loki/local-config.yaml + command: -config.file=/etc/loki/local-config.yaml + healthcheck: + test: ["CMD-SHELL", "wget -q --tries=1 -O- http://localhost:3100/ready"] + interval: 3s + timeout: 3s + retries: 10 + start_period: 10s + # Grafana for metrics visualization grafana: image: grafana/grafana:11.4.0 @@ -43,3 +64,5 @@ services: - ./local-otel-configuration/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml - ./local-otel-configuration/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml - ./local-otel-configuration/dashboards:/etc/grafana/provisioning/dashboards + depends_on: + - loki diff --git a/local-otel-configuration/grafana-datasources.yml b/local-otel-configuration/grafana-datasources.yml index 4139ccba6..0efe29c53 100644 --- a/local-otel-configuration/grafana-datasources.yml +++ b/local-otel-configuration/grafana-datasources.yml @@ -5,4 +5,11 @@ datasources: type: prometheus access: proxy url: http://prometheus:9090 - isDefault: true \ No newline at end of file + isDefault: true + + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + jsonData: + maxLines: 1000 \ No newline at end of file diff --git a/local-otel-configuration/loki-config.yaml b/local-otel-configuration/loki-config.yaml new file mode 100644 index 000000000..a9f289d68 --- /dev/null +++ b/local-otel-configuration/loki-config.yaml @@ -0,0 +1,45 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /tmp/loki + +compactor: + working_directory: /tmp/loki/compactor + compaction_interval: 10m + +ingester: + lifecycler: + address: 127.0.0.1 + ring: + kvstore: + store: inmemory + replication_factor: 1 + final_sleep: 0s + chunk_idle_period: 5m + chunk_retain_period: 30s + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /tmp/loki/tsdb-index + cache_location: /tmp/loki/tsdb-cache + cache_ttl: 24h + filesystem: + directory: /tmp/loki/chunks + +limits_config: + reject_old_samples: true + reject_old_samples_max_age: 168h + allow_structured_metadata: true diff --git a/local-otel-configuration/otel-collector-config.yaml b/local-otel-configuration/otel-collector-config.yaml index 97bc0c2b6..d5cba28c3 100644 --- a/local-otel-configuration/otel-collector-config.yaml +++ b/local-otel-configuration/otel-collector-config.yaml @@ -25,6 +25,8 @@ exporters: verbosity: detailed sampling_initial: 5 sampling_thereafter: 200 + otlphttp: + endpoint: "http://loki:3100/otlp" extensions: health_check: @@ -49,4 +51,4 @@ service: logs: receivers: [otlp] processors: [batch] - exporters: [debug] + exporters: [otlphttp, debug] diff --git a/src/Digdir.Domain.Dialogporten.WebApi/Common/Middleware/RequestLoggingMiddleware.cs b/src/Digdir.Domain.Dialogporten.WebApi/Common/Middleware/RequestLoggingMiddleware.cs new file mode 100644 index 000000000..57029db46 --- /dev/null +++ b/src/Digdir.Domain.Dialogporten.WebApi/Common/Middleware/RequestLoggingMiddleware.cs @@ -0,0 +1,38 @@ +using Microsoft.AspNetCore.Http; +using Microsoft.Extensions.Logging; + +namespace Digdir.Domain.Dialogporten.WebApi.Common.Middleware; + +public sealed class RequestLoggingMiddleware +{ + private readonly RequestDelegate _next; + private readonly ILogger _logger; + + public RequestLoggingMiddleware(RequestDelegate next, ILogger logger) + { + _next = next; + _logger = logger; + } + + public async Task InvokeAsync(HttpContext context) + { + try + { + await _next(context); + } + finally + { + _logger.LogInformation( + "HTTP {RequestMethod} {RequestPath} responded {StatusCode}", + context.Request.Method, + context.Request.Path, + context.Response.StatusCode); + } + } +} + +public static class RequestLoggingMiddlewareExtensions +{ + public static IApplicationBuilder UseRequestLogging(this IApplicationBuilder app) + => app.UseMiddleware(); +} \ No newline at end of file diff --git a/src/Digdir.Domain.Dialogporten.WebApi/Program.cs b/src/Digdir.Domain.Dialogporten.WebApi/Program.cs index 6b58f6b5a..092b8a820 100644 --- a/src/Digdir.Domain.Dialogporten.WebApi/Program.cs +++ b/src/Digdir.Domain.Dialogporten.WebApi/Program.cs @@ -24,48 +24,17 @@ using NSwag; using Serilog; using Microsoft.Extensions.Options; +using Digdir.Domain.Dialogporten.WebApi.Common.Middleware; -// Using two-stage initialization to catch startup errors. -var telemetryConfiguration = TelemetryConfiguration.CreateDefault(); -Log.Logger = new LoggerConfiguration() - .MinimumLevel.Warning() - .Enrich.WithEnvironmentName() - .Enrich.FromLogContext() - .WriteTo.Console(formatProvider: CultureInfo.InvariantCulture) - .WriteTo.ApplicationInsights(telemetryConfiguration, TelemetryConverter.Traces) - .CreateBootstrapLogger(); +var builder = WebApplication.CreateBuilder(args); try { - BuildAndRun(args, telemetryConfiguration); -} -catch (Exception ex) when (ex is not OperationCanceledException) -{ - Log.Fatal(ex, "Application terminated unexpectedly"); - throw; -} -finally -{ - Log.CloseAndFlush(); -} - -static void BuildAndRun(string[] args, TelemetryConfiguration telemetryConfiguration) -{ - var builder = WebApplication.CreateBuilder(args); - builder.WebHost.ConfigureKestrel(kestrelOptions => { kestrelOptions.Limits.MaxRequestBodySize = Constants.MaxRequestBodySize; }); - builder.Host.UseSerilog((context, services, configuration) => configuration - .MinimumLevel.Warning() - .ReadFrom.Configuration(context.Configuration) - .ReadFrom.Services(services) - .Enrich.WithEnvironmentName() - .Enrich.FromLogContext() - .WriteTo.ApplicationInsights(telemetryConfiguration, TelemetryConverter.Traces)); - builder.Configuration .AddAzureConfiguration(builder.Environment.EnvironmentName) .AddLocalConfiguration(builder.Environment); @@ -154,11 +123,8 @@ static void BuildAndRun(string[] args, TelemetryConfiguration telemetryConfigura var app = builder.Build(); - app.MapAspNetHealthChecks() - .MapControllers(); - app.UseHttpsRedirection() - .UseSerilogRequestLogging() + .UseRequestLogging() .UseDefaultExceptionHandler() .UseJwtSchemeSelector() .UseAuthentication() @@ -222,6 +188,11 @@ static void BuildAndRun(string[] args, TelemetryConfiguration telemetryConfigura app.Run(); } +catch (Exception ex) when (ex is not OperationCanceledException) +{ + Console.WriteLine($"Application terminated unexpectedly: {ex}"); + throw; +} static void IgnoreEmptyCollections(JsonTypeInfo typeInfo) { diff --git a/src/Digdir.Library.Utils.AspNet/AspNetUtilitiesExtensions.cs b/src/Digdir.Library.Utils.AspNet/AspNetUtilitiesExtensions.cs index c1388ec0d..d8604f439 100644 --- a/src/Digdir.Library.Utils.AspNet/AspNetUtilitiesExtensions.cs +++ b/src/Digdir.Library.Utils.AspNet/AspNetUtilitiesExtensions.cs @@ -14,6 +14,8 @@ using OpenTelemetry.Exporter; using System.Diagnostics; using Azure.Monitor.OpenTelemetry.Exporter; +using OpenTelemetry.Logs; +using Microsoft.Extensions.Logging; namespace Digdir.Library.Utils.AspNet; @@ -156,6 +158,14 @@ public static WebApplicationBuilder ConfigureTelemetry( }); } }); + + if (!builder.Environment.IsDevelopment()) + { + // Clear existing logging providers. If development, we want to keep the console logging. + builder.Logging.ClearProviders(); + } + + telemetryBuilder.WithLogging(); } else { diff --git a/src/Digdir.Library.Utils.AspNet/Digdir.Library.Utils.AspNet.csproj b/src/Digdir.Library.Utils.AspNet/Digdir.Library.Utils.AspNet.csproj index 36a45f2a5..d494619c0 100644 --- a/src/Digdir.Library.Utils.AspNet/Digdir.Library.Utils.AspNet.csproj +++ b/src/Digdir.Library.Utils.AspNet/Digdir.Library.Utils.AspNet.csproj @@ -10,6 +10,7 @@ +