From 84e26ddd33bc17a38a50594cca25b05afb21dc1b Mon Sep 17 00:00:00 2001 From: Sander Saares Date: Sun, 22 Dec 2019 18:10:34 +0000 Subject: [PATCH] Implemented basic probing --- Constants.cs | 16 ++- ContainerTracker.cs | 223 +++++++++++++++++++++++++++++ ContainerTrackerResourceMetrics.cs | 77 ++++++++++ ContainerTrackerStateMetrics.cs | 51 +++++++ DockerExporter.csproj | 1 + DockerTracker.cs | 145 +++++++++++++++++++ DockerTrackerMetrics.cs | 13 ++ ExporterLogic.cs | 83 +++++++++-- ExporterLogicMetrics.cs | 9 ++ Program.cs | 18 +-- 10 files changed, 610 insertions(+), 26 deletions(-) create mode 100644 ContainerTracker.cs create mode 100644 ContainerTrackerResourceMetrics.cs create mode 100644 ContainerTrackerStateMetrics.cs create mode 100644 DockerTracker.cs create mode 100644 DockerTrackerMetrics.cs create mode 100644 ExporterLogicMetrics.cs diff --git a/Constants.cs b/Constants.cs index 9a93cb1..8e3935c 100644 --- a/Constants.cs +++ b/Constants.cs @@ -8,8 +8,22 @@ namespace DockerExporter public const string VersionString = "__VERSIONSTRING__"; /// - /// Docker can sometimes be slow to respond. If that is the case, we just give up and try again later. + /// Docker can sometimes be slow to respond. If that is the case, we just give up and try + /// again later. This limit is applied per individual API call, so does not reflect the + /// total possible duration of a scrape, which is handled by the timeout values below. /// public static readonly TimeSpan DockerCommandTimeout = TimeSpan.FromSeconds(30); + + /// + /// We are willing to delay a single scrape up to this long to wait for fresh data. + /// Beyond this point, the update can still continue but will be done in the background. + /// + public static readonly TimeSpan MaxInlineUpdateDuration = TimeSpan.FromSeconds(20); + + /// + /// Even if the update happens in the background, it will be cancelled if it takes + /// more time than this. The next scrape will try again from scratch. + /// + public static readonly TimeSpan MaxTotalUpdateDuration = TimeSpan.FromMinutes(2); } } diff --git a/ContainerTracker.cs b/ContainerTracker.cs new file mode 100644 index 0000000..142a484 --- /dev/null +++ b/ContainerTracker.cs @@ -0,0 +1,223 @@ +using Axinom.Toolkit; +using Prometheus; +using Docker.DotNet; +using Docker.DotNet.Models; +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using System.Diagnostics; +using System.Linq; + +namespace DockerExporter +{ + /// + /// Tracks the status of one container and exports metrics, updating the data when new scrapes are requested. + /// + /// + /// NOT thread-safe! No concurrent usage is expected. + /// DockerTracker performs the necessary synchronization logic. + /// + sealed class ContainerTracker : IDisposable + { + public string Id { get; } + + public ContainerTracker(string id) + { + Id = id; + } + + public void Dispose() + { + _resourceMetrics?.Dispose(); + _stateMetrics?.Dispose(); + } + + /// + /// Requests the tracker to update its data set. + /// + /// + /// May be called multiple times concurrently. + /// + /// Method does not throw exceptions on transient failures, merely logs and ignores them. + /// + public async Task TryUpdateAsync(DockerClient client, CancellationToken cancel) + { + ContainerInspectResponse container; + StatsRecorder resourceStatsRecorder = new StatsRecorder(); + + try + { + // First, inspect to get some basic information. + container = await client.Containers.InspectContainerAsync(Id, cancel); + + // Then query for the latest resource usage stats (if container is running). + if (container.State.Running) + { + await client.Containers.GetContainerStatsAsync(Id, new ContainerStatsParameters + { + Stream = false // Only get latest, then stop. + }, resourceStatsRecorder, cancel); + } + } + catch (Exception ex) + { + // TODO: DockerTrackerMetrics.ListContainersErrorCount.Inc(); + _log.Error(Helpers.Debug.GetAllExceptionMessages(ex)); + _log.Debug(ex.ToString()); // Only to verbose output. + + // Errors are ignored - if we fail to get data, we just skip an update and log the failure. + // The next update will hopefully get past the error. + return; + } + + // If anything goes wrong below, it is a fatal error not to be ignored, so not in the try block. + + // Now that we have the data assembled, update the metrics. + if (_stateMetrics == null) + { + var displayName = GetDisplayNameOrId(container); + _log.Debug($"First update of state metrics for {displayName} ({Id})."); + _stateMetrics = new ContainerTrackerStateMetrics(Id, displayName); + } + + UpdateStateMetrics(_stateMetrics, container); + + if (resourceStatsRecorder.Response != null) + { + if (_resourceMetrics == null) + { + var displayName = GetDisplayNameOrId(container); + _log.Debug($"Initializing resource metrics for {displayName} ({Id})."); + _resourceMetrics = new ContainerTrackerResourceMetrics(Id, displayName); + } + + UpdateResourceMetrics(_resourceMetrics, container, resourceStatsRecorder.Response); + } + else + { + // TODO: It could be we already had resource metrics and now they should go away. + _resourceMetrics?.Dispose(); + _resourceMetrics = null; + } + } + + private void UpdateStateMetrics(ContainerTrackerStateMetrics metrics, ContainerInspectResponse container) + { + metrics.RestartCount.Set(container.RestartCount); + + if (container.State.Running) + metrics.RunningState.Set(1); + else if (container.State.Restarting) + metrics.RunningState.Set(0.5); + else + metrics.RunningState.Set(0); + + if (container.State.Running && !string.IsNullOrWhiteSpace(container.State.StartedAt)) + metrics.StartTime.SetToTimeUtc(DateTimeOffset.Parse(container.State.StartedAt)); + } + + private void UpdateResourceMetrics(ContainerTrackerResourceMetrics metrics, ContainerInspectResponse container, ContainerStatsResponse resources) + { + // The resource reporting is very different for different operating systems. + // This field is only used on Windows. We assume a container can't exist with 0 memory. + bool isWindowsContainer = resources.MemoryStats.Commit != 0; + + // CPU usage + // The mechanism of calculation is the rate of increase in container CPU time versus available ("system") CPU time. + // The idea here is that we build two series - one counting used CPU in whatever units + // the other counting potentially available CPU in whatever units. The % always comes right. + // Docker CPU usage on Windows counts 100ns ticks. + // Docker CPU usage on Linux counts unspecified ticks in relation to some other stats. + // See https://github.com/moby/moby/blob/eb131c5383db8cac633919f82abad86c99bffbe5/cli/command/container/stats_helpers.go#L175 + if (isWindowsContainer) + { + // To compensate for core count on Windows, we normalize the container usage to a single core. + // We also normalize the available CPU time to a single core. + // This way the Windows calculation is always per-core averaged. + // A .NET DateTimeOffset tick is 100ns, exactly, so matches what Docker uses. + metrics.CpuCapacity.Set(CpuBaselineTimer.Elapsed.Ticks); + metrics.CpuUsage.Set(resources.CPUStats.CPUUsage.TotalUsage / resources.NumProcs); + } + else + { + // This is counting all cores (right?). + metrics.CpuCapacity.Set(resources.CPUStats.SystemUsage); + metrics.CpuUsage.Set(resources.CPUStats.CPUUsage.TotalUsage); + } + + // Memory usage + if (isWindowsContainer) + { + // Windows reports Private Working Set in Docker stats... but seems to use Commit Bytes to enforce limit! + // We want to report the same metric that is limited, so there we go. + metrics.MemoryUsage.Set(resources.MemoryStats.Commit); + } + else + { + metrics.MemoryUsage.Set(resources.MemoryStats.Usage); + } + + // Network I/O + if (resources.Networks == null) + { + metrics.TotalNetworkBytesIn.Set(0); + metrics.TotalNetworkBytesOut.Set(0); + } + else + { + metrics.TotalNetworkBytesIn.Set(resources.Networks.Values.Sum(n => (double)n.RxBytes)); + metrics.TotalNetworkBytesOut.Set(resources.Networks.Values.Sum(n => (double)n.TxBytes)); + } + + // Disk I/O + if (isWindowsContainer) + { + metrics.TotalDiskBytesRead.Set(resources.StorageStats.ReadSizeBytes); + metrics.TotalDiskBytesWrite.Set(resources.StorageStats.WriteSizeBytes); + } + else + { + var readEntries = resources.BlkioStats.IoServiceBytesRecursive + .Where(entry => entry.Op.Equals("read", StringComparison.InvariantCultureIgnoreCase)) + .ToArray(); + + var writeEntries = resources.BlkioStats.IoServiceBytesRecursive + .Where(entry => entry.Op.Equals("write", StringComparison.InvariantCultureIgnoreCase)) + .ToArray(); + + var totalRead = readEntries.Any() ? readEntries.Sum(entry => (long)entry.Value) : 0; + var totalWrite = writeEntries.Any() ? writeEntries.Sum(entry => (long)entry.Value) : 0; + + metrics.TotalDiskBytesRead.Set(totalRead); + metrics.TotalDiskBytesWrite.Set(totalWrite); + } + } + + private sealed class StatsRecorder : IProgress + { + public ContainerStatsResponse? Response { get; private set; } + public void Report(ContainerStatsResponse value) => Response = value; + } + + /// + /// If a display name can be determined, returns it. Otherwise returns the container ID. + /// + private static string GetDisplayNameOrId(ContainerInspectResponse container) + { + if (!string.IsNullOrWhiteSpace(container.Name)) + return container.Name.Trim('/'); + + return container.ID; + } + + // We just need a monotonically increasing timer that does not use excessively large numbers (no 1970 base). + private static readonly Stopwatch CpuBaselineTimer = Stopwatch.StartNew(); + + private ContainerTrackerStateMetrics? _stateMetrics; + private ContainerTrackerResourceMetrics? _resourceMetrics; + + private readonly LogSource _log = Log.Default; + } +} diff --git a/ContainerTrackerResourceMetrics.cs b/ContainerTrackerResourceMetrics.cs new file mode 100644 index 0000000..037f561 --- /dev/null +++ b/ContainerTrackerResourceMetrics.cs @@ -0,0 +1,77 @@ +using Prometheus; +using System; +using System.Linq; + +namespace DockerExporter +{ + sealed class ContainerTrackerResourceMetrics : IDisposable + { + public Gauge.Child CpuUsage { get; private set; } + public Gauge.Child CpuCapacity { get; private set; } + public Gauge.Child MemoryUsage { get; private set; } + public Gauge.Child TotalNetworkBytesIn { get; private set; } + public Gauge.Child TotalNetworkBytesOut { get; private set; } + public Gauge.Child TotalDiskBytesRead { get; private set; } + public Gauge.Child TotalDiskBytesWrite { get; private set; } + + public ContainerTrackerResourceMetrics(string id, string displayName) + { + _id = id; + _displayName = displayName; + + CpuUsage = BaseCpuUsage.WithLabels(id, displayName); + CpuCapacity = BaseCpuCapacity.WithLabels(id, displayName); + MemoryUsage = BaseMemoryUsage.WithLabels(id, displayName); + TotalNetworkBytesIn = BaseTotalNetworkBytesIn.WithLabels(id, displayName); + TotalNetworkBytesOut = BaseTotalNetworkBytesOut.WithLabels(id, displayName); + TotalDiskBytesRead = BaseTotalDiskBytesRead.WithLabels(id, displayName); + TotalDiskBytesWrite = BaseTotalDiskBytesWrite.WithLabels(id, displayName); + } + + private readonly string _id; + private readonly string _displayName; + + public void Dispose() + { + BaseCpuUsage.RemoveLabelled(_id, _displayName); + BaseCpuCapacity.RemoveLabelled(_id, _displayName); + BaseMemoryUsage.RemoveLabelled(_id, _displayName); + BaseTotalNetworkBytesIn.RemoveLabelled(_id, _displayName); + BaseTotalNetworkBytesOut.RemoveLabelled(_id, _displayName); + BaseTotalDiskBytesRead.RemoveLabelled(_id, _displayName); + BaseTotalDiskBytesWrite.RemoveLabelled(_id, _displayName); + } + + // While logically counters, all of these are gauges because we do not know when Docker might reset the values. + + private static readonly Gauge BaseCpuUsage = Metrics + .CreateGauge("docker_container_cpu_used_total", "Accumulated CPU usage of a container, in unspecified units, averaged for all logical CPUs usable by the container.", ConfigureGauge()); + + private static readonly Gauge BaseCpuCapacity = Metrics + .CreateGauge("docker_container_cpu_capacity_total", "All potential CPU usage available to a container, in unspecified units, averaged for all logical CPUs usable by the container. Start point of measurement is undefined - only relative values should be used in analytics.", ConfigureGauge()); + + private static readonly Gauge BaseMemoryUsage = Metrics + .CreateGauge("docker_container_memory_used_bytes", "Memory usage of a container.", ConfigureGauge()); + + private static readonly Gauge BaseTotalNetworkBytesIn = Metrics + .CreateGauge("docker_container_network_in_bytes", "Total bytes received by the container's network interfaces.", ConfigureGauge()); + + private static readonly Gauge BaseTotalNetworkBytesOut = Metrics + .CreateGauge("docker_container_network_out_bytes", "Total bytes sent by the container's network interfaces.", ConfigureGauge()); + + private static readonly Gauge BaseTotalDiskBytesRead = Metrics + .CreateGauge("docker_container_disk_read_bytes", "Total bytes read from disk by a container.", ConfigureGauge()); + + private static readonly Gauge BaseTotalDiskBytesWrite = Metrics + .CreateGauge("docker_container_disk_write_bytes", "Total bytes written to disk by a container.", ConfigureGauge()); + + private static string[] LabelNames(params string[] extra) => + new[] { "id", "display_name" }.Concat(extra).ToArray(); + + private static GaugeConfiguration ConfigureGauge() => new GaugeConfiguration + { + LabelNames = LabelNames(), + SuppressInitialValue = true + }; + } +} diff --git a/ContainerTrackerStateMetrics.cs b/ContainerTrackerStateMetrics.cs new file mode 100644 index 0000000..a098a39 --- /dev/null +++ b/ContainerTrackerStateMetrics.cs @@ -0,0 +1,51 @@ +using Prometheus; +using System; +using System.Linq; + +namespace DockerExporter +{ + sealed class ContainerTrackerStateMetrics : IDisposable + { + public Gauge.Child RestartCount { get; private set; } + public Gauge.Child RunningState { get; private set; } + public Gauge.Child StartTime { get; private set; } + + public ContainerTrackerStateMetrics(string id, string displayName) + { + _id = id; + _displayName = displayName; + + RestartCount = BaseRestartCount.WithLabels(id, displayName); + RunningState = BaseRunningState.WithLabels(id, displayName); + StartTime = BaseStartTime.WithLabels(id, displayName); + } + + private readonly string _id; + private readonly string _displayName; + + public void Dispose() + { + BaseRestartCount.RemoveLabelled(_id, _displayName); + BaseRunningState.RemoveLabelled(_id, _displayName); + BaseStartTime.RemoveLabelled(_id, _displayName); + } + + private static readonly Gauge BaseRestartCount = Metrics + .CreateGauge("docker_container_restart_count", "Number of times the runtime has restarted this container without explicit user action, since the container was last started.", ConfigureGauge()); + + private static readonly Gauge BaseRunningState = Metrics + .CreateGauge("docker_container_running_state", "Whether the container is running (value 1), restarting (value 0.5) or stopped (value 0).", ConfigureGauge()); + + private static readonly Gauge BaseStartTime = Metrics + .CreateGauge("docker_container_start_time", "Timestamp indicating when the container was started. Does not get reset by automatic restarts.", ConfigureGauge()); + + private static string[] LabelNames(params string[] extra) => + new[] { "id", "display_name" }.Concat(extra).ToArray(); + + private static GaugeConfiguration ConfigureGauge() => new GaugeConfiguration + { + LabelNames = LabelNames(), + SuppressInitialValue = true + }; + } +} diff --git a/DockerExporter.csproj b/DockerExporter.csproj index b110c6d..385899f 100644 --- a/DockerExporter.csproj +++ b/DockerExporter.csproj @@ -25,6 +25,7 @@ + diff --git a/DockerTracker.cs b/DockerTracker.cs new file mode 100644 index 0000000..a6c7510 --- /dev/null +++ b/DockerTracker.cs @@ -0,0 +1,145 @@ +using Axinom.Toolkit; +using Docker.DotNet; +using Docker.DotNet.Models; +using Prometheus; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace DockerExporter +{ + /// + /// Tracks the status of one instance of Docker and exports metrics, updating the data when new scrapes are requested. + /// + /// + /// Thread-safe. + /// + sealed class DockerTracker + { + public Uri DockerUrl { get; } + + private readonly DockerClientConfiguration _clientConfiguration; + + // If an execution can get the lock on first try, it will really perform the update. + // Otherwise, it will wait for the lock and then perform a no-op update to just leave + // the tracker with the same data the just-finished update generated. + // This acts as basic rate control. + private readonly SemaphoreSlim _updateLock = new SemaphoreSlim(1); + + public DockerTracker(Uri dockerUrl) + { + DockerUrl = dockerUrl; + + // TODO: Support mutual authentication via certificates. + _clientConfiguration = new DockerClientConfiguration(dockerUrl, null, Constants.DockerCommandTimeout); + } + + /// + /// Requests the tracker to update its data set. + /// + /// + /// May be called multiple times concurrently. + /// + /// The method returns to signal that the trackerss of all containers + /// when the method was called have attempted an update to their data. + /// It may be that some updates failed - all we can say is that we tried. + /// + /// Method does not throw exceptions on transient failures, merely logs and ignores them. + /// + public async Task TryUpdateAsync() + { + using var cts = new CancellationTokenSource(Constants.MaxTotalUpdateDuration); + + // If we get this lock, we will actually perform the update. + using var writeLock = await SemaphoreLock.TryTakeAsync(_updateLock, TimeSpan.Zero); + + if (writeLock == null) + { + // Otherwise, we just no-op once the one that came before has updated the data. + await WaitForPredecessorUpdateAsync(cts.Token); + return; + } + + using var client = _clientConfiguration.CreateClient(); + + IList allContainers; + + try + { + allContainers = await client.Containers.ListContainersAsync(new ContainersListParameters + { + All = true + }, cts.Token); + } + catch (Exception ex) + { + DockerTrackerMetrics.ListContainersErrorCount.Inc(); + _log.Error(Helpers.Debug.GetAllExceptionMessages(ex)); + _log.Debug(ex.ToString()); // Only to verbose output. + + // Errors are ignored - if we fail to get data, we just skip an update and log the failure. + // The next update will hopefully get past the error. + + // We won't even try update the trackers if we can't even list the containers. + // TODO: Is this wise? What if individual container data is still available? + // Then again, if listing containers already does not work, can you expect anything to work? + return; + } + + DockerTrackerMetrics.ContainerCount.Set(allContainers.Count); + SynchronizeTrackerSet(allContainers); + + // Update each tracker. We do them in parallel to minimize the total time span spent on probing. + var updateTasks = new List(); + + foreach (var tracker in _containerTrackers.Values) + updateTasks.Add(tracker.TryUpdateAsync(client, cts.Token)); + + // Only exceptions from the update calls should be terminal exceptions, + // so it is fine not to catch anything that may be thrown here. + await Task.WhenAll(updateTasks); + } + + private async Task WaitForPredecessorUpdateAsync(CancellationToken cancel) + { + _log.Debug("Will not trigger new probe as it overlaps with existing probe."); + using var readLock = await SemaphoreLock.TakeAsync(_updateLock, cancel); + } + + /// + /// Ensures that we have a tracker for every listed container + /// and removes trackers for any containers not in the list. + /// + private void SynchronizeTrackerSet(IList allContainers) + { + var containerIds = allContainers.Select(c => c.ID).ToArray(); + var trackedIds = _containerTrackers.Keys.ToArray(); + + // Create a tracker for any new containers. + var newIds = containerIds.Except(trackedIds); + foreach (var id in newIds) + { + _log.Debug($"Encountered container for the first time: {id}"); + _containerTrackers[id] = new ContainerTracker(id); + } + + // Remove the trackers of any removed containers. + var removedIds = trackedIds.Except(containerIds); + foreach (var id in removedIds) + { + _log.Debug($"Tracked container no longer exists. Removing: {id}"); + var tracker = _containerTrackers[id]; + tracker.Dispose(); + _containerTrackers.Remove(id); + } + } + + // Synchronized - only single threaded access occurs. + private readonly Dictionary _containerTrackers = new Dictionary(); + + private readonly LogSource _log = Log.Default; + } +} diff --git a/DockerTrackerMetrics.cs b/DockerTrackerMetrics.cs new file mode 100644 index 0000000..a6a9513 --- /dev/null +++ b/DockerTrackerMetrics.cs @@ -0,0 +1,13 @@ +using Prometheus; + +namespace DockerExporter +{ + sealed class DockerTrackerMetrics + { + public static readonly Gauge ContainerCount = Metrics + .CreateGauge("docker_containers", "Number of containers that exist."); + + public static readonly Counter ListContainersErrorCount = Metrics + .CreateCounter("docker_list_containers_failed_total", "How many times the attempt to list all containers has failed."); + } +} diff --git a/ExporterLogic.cs b/ExporterLogic.cs index a942e01..ff2d146 100644 --- a/ExporterLogic.cs +++ b/ExporterLogic.cs @@ -1,8 +1,7 @@ using Axinom.Toolkit; -using Docker.DotNet; +using Prometheus; using System; -using System.Collections.Generic; -using System.Text; +using System.Diagnostics; using System.Threading; using System.Threading.Tasks; @@ -15,6 +14,7 @@ namespace DockerExporter public ExporterLogic() { // Default value only valid if not running as container. + // This is intended for development purposes only. if (Helpers.Environment.IsMicrosoftOperatingSystem()) { DockerUrl = "npipe://./pipe/docker_engine"; @@ -27,18 +27,81 @@ namespace DockerExporter public async Task RunAsync(CancellationToken cancel) { - _log.Info($"Connecting to Docker via {DockerUrl}"); + _log.Info($"Configured to probe Docker on {DockerUrl}"); - var clientConfig = new DockerClientConfiguration(new Uri(DockerUrl), null, Constants.DockerCommandTimeout); + _tracker = new DockerTracker(new Uri(DockerUrl)); - using (var client = clientConfig.CreateClient()) + Metrics.DefaultRegistry.AddBeforeCollectCallback(UpdateMetrics); + +#if DEBUG + var server = new MetricServer("localhost", 3652); + _log.Info($"Open http://localhost:3652/metrics to initiate a probe."); +#else + var server = new MetricServer(80); +#endif + + server.Start(); + + while (!cancel.IsCancellationRequested) { - var allContainers = await client.Containers.ListContainersAsync(new Docker.DotNet.Models.ContainersListParameters + try { - All = true - }, cancel); + await Task.Delay(-1, cancel); + } + catch (TaskCanceledException) when (cancel.IsCancellationRequested) + { + // Totally normal - we are exiting. + break; + } + } - _log.Info(Helpers.Debug.ToDebugString(allContainers)); + await server.StopAsync(); + } + + private DockerTracker? _tracker; + + /// + /// Called before every Prometheus collection in order to update metrics. + /// + /// + /// The Docker API can be very slow at times, so there is a risk that the scrape will + /// just time out under load. To avoid that, we enforce a maximum update duration and + /// will give up on fetching new values if the update takes longer than that. If the + /// threshold is crossed, we simply allow the scrape to proceed with stale data, while + /// the update keeps running in the background, hopefully eventually succeeding. + /// + /// If multiple parallel scrapes are made, the results from the first one will be used + /// to satisfy all requests that come in while the data loading triggered by the first + /// scrape is still being performed (even if we give up with the scrape before loading finishes). + /// This acts as a primitive form of rate control to avoid overloading the fragile Docker API. + /// The implementation for this is in DockerTracker. + /// + private void UpdateMetrics() + { + _log.Debug("Probing Docker."); + + using var inlineCancellation = new CancellationTokenSource(Constants.MaxInlineUpdateDuration); + var updateTask = _tracker!.TryUpdateAsync() + .WithAbandonment(inlineCancellation.Token); + + try + { + updateTask.WaitAndUnwrapExceptions(); + } + catch (TaskCanceledException) when (inlineCancellation.IsCancellationRequested) + { + _log.Debug("Probe took too long - will return stale results and finish probe in background."); + + // This is expected if it goes above the inline threshold, and will be ignored. + // Other exceptions are caught, logged, and ignored in DockerState itself. + ExporterLogicMetrics.InlineTimeouts.Inc(); + } + catch (Exception ex) + { + // TODO: Now what? If we throw here prometheus-net will just reject the scrape... + // ... but what if this is a fatal error that we want to crash the app with? + _log.Error(Helpers.Debug.GetAllExceptionMessages(ex)); + Debugger.Break(); } } diff --git a/ExporterLogicMetrics.cs b/ExporterLogicMetrics.cs new file mode 100644 index 0000000..e4cf518 --- /dev/null +++ b/ExporterLogicMetrics.cs @@ -0,0 +1,9 @@ +using Prometheus; + +namespace DockerExporter +{ + static class ExporterLogicMetrics + { + public static readonly Counter InlineTimeouts = Metrics.CreateCounter("docker_probe_inline_timeouts_total", "Total number of times we have forced the scrape to happen in the background and returned outdated data because performing an update inline took too long."); + } +} diff --git a/Program.cs b/Program.cs index fb4f241..936a305 100644 --- a/Program.cs +++ b/Program.cs @@ -54,23 +54,9 @@ namespace DockerExporter Environment.ExitCode = -1; } - catch (AggregateException ex) - { - foreach (var innerException in ex.InnerExceptions) - { - _log.Error(innerException.Message); - _log.Error(innerException.GetType().Name); - } - - Environment.ExitCode = -1; - } catch (Exception ex) { - if (!string.IsNullOrWhiteSpace(ex.Message)) - { - _log.Error(ex.Message); - _log.Error(ex.GetType().Name); - } + _log.Error(Helpers.Debug.GetAllExceptionMessages(ex)); Environment.ExitCode = -1; } @@ -141,7 +127,9 @@ namespace DockerExporter // We default to displaying Info or higher but allow this to be reconfiured later, if the user wishes. _filteringLogListener = new FilteringLogListener(new ConsoleLogListener()) { +#if !DEBUG MinimumSeverity = LogEntrySeverity.Info +#endif }; Log.Default.RegisterListener(_filteringLogListener);