diff --git a/Constants.cs b/Constants.cs
index 8e3935c..64aa8c7 100644
--- a/Constants.cs
+++ b/Constants.cs
@@ -1,4 +1,5 @@
-using System;
+using Prometheus;
+using System;
namespace DockerExporter
{
@@ -25,5 +26,10 @@ namespace DockerExporter
/// more time than this. The next scrape will try again from scratch.
///
public static readonly TimeSpan MaxTotalUpdateDuration = TimeSpan.FromMinutes(2);
+
+ ///
+ /// The default buckets used to measure Docker probe operation durations.
+ ///
+ public static readonly double[] DurationBuckets = Histogram.ExponentialBuckets(0.5, 1.5, 14);
}
}
diff --git a/ContainerTracker.cs b/ContainerTracker.cs
index 142a484..0b3dab3 100644
--- a/ContainerTracker.cs
+++ b/ContainerTracker.cs
@@ -1,14 +1,12 @@
using Axinom.Toolkit;
-using Prometheus;
using Docker.DotNet;
using Docker.DotNet.Models;
+using Prometheus;
using System;
-using System.Collections.Generic;
-using System.Text;
-using System.Threading;
-using System.Threading.Tasks;
using System.Diagnostics;
using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
namespace DockerExporter
{
@@ -22,10 +20,14 @@ namespace DockerExporter
sealed class ContainerTracker : IDisposable
{
public string Id { get; }
+ public string DisplayName { get; }
- public ContainerTracker(string id)
+ public ContainerTracker(string id, string displayName)
{
Id = id;
+ DisplayName = displayName;
+
+ _metrics = new ContainerTrackerMetrics(id, displayName);
}
public void Dispose()
@@ -34,27 +36,33 @@ namespace DockerExporter
_stateMetrics?.Dispose();
}
+ public void Unpublish()
+ {
+ _resourceMetrics?.Unpublish();
+ _stateMetrics?.Unpublish();
+ }
+
///
/// Requests the tracker to update its data set.
///
///
- /// May be called multiple times concurrently.
- ///
/// Method does not throw exceptions on transient failures, merely logs and ignores them.
///
public async Task TryUpdateAsync(DockerClient client, CancellationToken cancel)
{
ContainerInspectResponse container;
- StatsRecorder resourceStatsRecorder = new StatsRecorder();
+ var resourceStatsRecorder = new StatsRecorder();
try
{
// First, inspect to get some basic information.
- container = await client.Containers.InspectContainerAsync(Id, cancel);
+ using (_metrics.InspectContainerDuration.NewTimer())
+ container = await client.Containers.InspectContainerAsync(Id, cancel);
// Then query for the latest resource usage stats (if container is running).
if (container.State.Running)
{
+ using var statsTimer = _metrics.GetResourceStatsDuration.NewTimer();
await client.Containers.GetContainerStatsAsync(Id, new ContainerStatsParameters
{
Stream = false // Only get latest, then stop.
@@ -63,12 +71,13 @@ namespace DockerExporter
}
catch (Exception ex)
{
- // TODO: DockerTrackerMetrics.ListContainersErrorCount.Inc();
+ _metrics.FailedProbeCount.Inc();
_log.Error(Helpers.Debug.GetAllExceptionMessages(ex));
_log.Debug(ex.ToString()); // Only to verbose output.
// Errors are ignored - if we fail to get data, we just skip an update and log the failure.
- // The next update will hopefully get past the error.
+ // The next update will hopefully get past the error. For now, we just unpublish.
+ Unpublish();
return;
}
@@ -77,9 +86,8 @@ namespace DockerExporter
// Now that we have the data assembled, update the metrics.
if (_stateMetrics == null)
{
- var displayName = GetDisplayNameOrId(container);
- _log.Debug($"First update of state metrics for {displayName} ({Id}).");
- _stateMetrics = new ContainerTrackerStateMetrics(Id, displayName);
+ _log.Debug($"First update of state metrics for {DisplayName} ({Id}).");
+ _stateMetrics = new ContainerTrackerStateMetrics(Id, DisplayName);
}
UpdateStateMetrics(_stateMetrics, container);
@@ -88,16 +96,16 @@ namespace DockerExporter
{
if (_resourceMetrics == null)
{
- var displayName = GetDisplayNameOrId(container);
- _log.Debug($"Initializing resource metrics for {displayName} ({Id}).");
- _resourceMetrics = new ContainerTrackerResourceMetrics(Id, displayName);
+ _log.Debug($"Initializing resource metrics for {DisplayName} ({Id}).");
+ _resourceMetrics = new ContainerTrackerResourceMetrics(Id, DisplayName);
}
UpdateResourceMetrics(_resourceMetrics, container, resourceStatsRecorder.Response);
}
else
{
- // TODO: It could be we already had resource metrics and now they should go away.
+ // It could be we already had resource metrics and now they should go away.
+ // They'll be recreated once we get the resource metrics again (e.g. after it starts).
_resourceMetrics?.Dispose();
_resourceMetrics = null;
}
@@ -201,20 +209,10 @@ namespace DockerExporter
public void Report(ContainerStatsResponse value) => Response = value;
}
- ///
- /// If a display name can be determined, returns it. Otherwise returns the container ID.
- ///
- private static string GetDisplayNameOrId(ContainerInspectResponse container)
- {
- if (!string.IsNullOrWhiteSpace(container.Name))
- return container.Name.Trim('/');
-
- return container.ID;
- }
-
// We just need a monotonically increasing timer that does not use excessively large numbers (no 1970 base).
private static readonly Stopwatch CpuBaselineTimer = Stopwatch.StartNew();
+ private ContainerTrackerMetrics _metrics;
private ContainerTrackerStateMetrics? _stateMetrics;
private ContainerTrackerResourceMetrics? _resourceMetrics;
diff --git a/ContainerTrackerMetrics.cs b/ContainerTrackerMetrics.cs
new file mode 100644
index 0000000..6ace054
--- /dev/null
+++ b/ContainerTrackerMetrics.cs
@@ -0,0 +1,42 @@
+using Prometheus;
+using System;
+
+namespace DockerExporter
+{
+ sealed class ContainerTrackerMetrics : IDisposable
+ {
+ public Counter.Child FailedProbeCount { get; }
+
+ // These two are NOT differentiated by container, just to avoid a large number of series for each container.
+ // Aggregate results seem useful, container scope less so. Can be expanded in the future if need be.
+ public Histogram InspectContainerDuration => BaseInspectContainerDuration;
+ public Histogram GetResourceStatsDuration => BaseGetResourceStatsDuration;
+
+ public ContainerTrackerMetrics(string id, string displayName)
+ {
+ FailedProbeCount = BaseFailedProbeCount.WithLabels(id, displayName);
+ }
+
+ public void Dispose()
+ {
+ FailedProbeCount.Remove();
+ }
+
+ private static readonly Counter BaseFailedProbeCount = Metrics.CreateCounter("docker_probe_container_failed_total", "Number of times the exporter failed to collect information about a specific container.", new CounterConfiguration
+ {
+ LabelNames = new[] { "id", "display_name" }
+ });
+
+ private static readonly Histogram BaseInspectContainerDuration = Metrics
+ .CreateHistogram("docker_probe_inspect_duration_seconds", "How long it takes to query Docker for the basic information about a single container. Includes failed requests.", new HistogramConfiguration
+ {
+ Buckets = Constants.DurationBuckets
+ });
+
+ private static readonly Histogram BaseGetResourceStatsDuration = Metrics
+ .CreateHistogram("docker_probe_stats_duration_seconds", "How long it takes to query Docker for the resource usage of a single container. Includes failed requests.", new HistogramConfiguration
+ {
+ Buckets = Constants.DurationBuckets
+ });
+ }
+}
diff --git a/ContainerTrackerResourceMetrics.cs b/ContainerTrackerResourceMetrics.cs
index 037f561..3586775 100644
--- a/ContainerTrackerResourceMetrics.cs
+++ b/ContainerTrackerResourceMetrics.cs
@@ -33,13 +33,24 @@ namespace DockerExporter
public void Dispose()
{
- BaseCpuUsage.RemoveLabelled(_id, _displayName);
- BaseCpuCapacity.RemoveLabelled(_id, _displayName);
- BaseMemoryUsage.RemoveLabelled(_id, _displayName);
- BaseTotalNetworkBytesIn.RemoveLabelled(_id, _displayName);
- BaseTotalNetworkBytesOut.RemoveLabelled(_id, _displayName);
- BaseTotalDiskBytesRead.RemoveLabelled(_id, _displayName);
- BaseTotalDiskBytesWrite.RemoveLabelled(_id, _displayName);
+ CpuUsage.Remove();
+ CpuCapacity.Remove();
+ MemoryUsage.Remove();
+ TotalNetworkBytesIn.Remove();
+ TotalNetworkBytesOut.Remove();
+ TotalDiskBytesRead.Remove();
+ TotalDiskBytesWrite.Remove();
+ }
+
+ public void Unpublish()
+ {
+ CpuUsage.Unpublish();
+ CpuCapacity.Unpublish();
+ MemoryUsage.Unpublish();
+ TotalNetworkBytesIn.Unpublish();
+ TotalNetworkBytesOut.Unpublish();
+ TotalDiskBytesRead.Unpublish();
+ TotalDiskBytesWrite.Unpublish();
}
// While logically counters, all of these are gauges because we do not know when Docker might reset the values.
diff --git a/ContainerTrackerStateMetrics.cs b/ContainerTrackerStateMetrics.cs
index a098a39..e71d13e 100644
--- a/ContainerTrackerStateMetrics.cs
+++ b/ContainerTrackerStateMetrics.cs
@@ -12,22 +12,23 @@ namespace DockerExporter
public ContainerTrackerStateMetrics(string id, string displayName)
{
- _id = id;
- _displayName = displayName;
-
RestartCount = BaseRestartCount.WithLabels(id, displayName);
RunningState = BaseRunningState.WithLabels(id, displayName);
StartTime = BaseStartTime.WithLabels(id, displayName);
}
- private readonly string _id;
- private readonly string _displayName;
-
public void Dispose()
{
- BaseRestartCount.RemoveLabelled(_id, _displayName);
- BaseRunningState.RemoveLabelled(_id, _displayName);
- BaseStartTime.RemoveLabelled(_id, _displayName);
+ RestartCount.Remove();
+ RunningState.Remove();
+ StartTime.Remove();
+ }
+
+ public void Unpublish()
+ {
+ RestartCount.Unpublish();
+ RunningState.Unpublish();
+ StartTime.Unpublish();
}
private static readonly Gauge BaseRestartCount = Metrics
diff --git a/DockerExporter.csproj b/DockerExporter.csproj
index 385899f..17b145b 100644
--- a/DockerExporter.csproj
+++ b/DockerExporter.csproj
@@ -25,7 +25,7 @@
-
+
diff --git a/DockerTracker.cs b/DockerTracker.cs
index a6c7510..f456bc7 100644
--- a/DockerTracker.cs
+++ b/DockerTracker.cs
@@ -58,17 +58,21 @@ namespace DockerExporter
if (writeLock == null)
{
- // Otherwise, we just no-op once the one that came before has updated the data.
+ // Otherwise, we just no-op once the earlier probe request has updated the data.
await WaitForPredecessorUpdateAsync(cts.Token);
return;
}
+ using var probeDurationTimer = DockerTrackerMetrics.ProbeDuration.NewTimer();
+
using var client = _clientConfiguration.CreateClient();
IList allContainers;
try
{
+ using var listDurationTimer = DockerTrackerMetrics.ListContainersDuration.NewTimer();
+
allContainers = await client.Containers.ListContainersAsync(new ContainersListParameters
{
All = true
@@ -83,9 +87,10 @@ namespace DockerExporter
// Errors are ignored - if we fail to get data, we just skip an update and log the failure.
// The next update will hopefully get past the error.
- // We won't even try update the trackers if we can't even list the containers.
- // TODO: Is this wise? What if individual container data is still available?
- // Then again, if listing containers already does not work, can you expect anything to work?
+ // We will not remove the trackers yet but we will unpublish so we don't keep stale data published.
+ foreach (var tracker in _containerTrackers.Values)
+ tracker.Unpublish();
+
return;
}
@@ -122,21 +127,38 @@ namespace DockerExporter
var newIds = containerIds.Except(trackedIds);
foreach (var id in newIds)
{
- _log.Debug($"Encountered container for the first time: {id}");
- _containerTrackers[id] = new ContainerTracker(id);
+ var displayName = GetDisplayNameOrId(allContainers.Single(c => c.ID == id));
+ _log.Debug($"Encountered container for the first time: {displayName} ({id}).");
+
+ _containerTrackers[id] = new ContainerTracker(id, displayName);
}
// Remove the trackers of any removed containers.
var removedIds = trackedIds.Except(containerIds);
foreach (var id in removedIds)
{
- _log.Debug($"Tracked container no longer exists. Removing: {id}");
var tracker = _containerTrackers[id];
+
+ _log.Debug($"Tracked container no longer exists. Removing: {tracker.DisplayName} ({id}).");
+
tracker.Dispose();
_containerTrackers.Remove(id);
}
}
+ ///
+ /// If a display name can be determined, returns it. Otherwise returns the container ID.
+ ///
+ private static string GetDisplayNameOrId(ContainerListResponse container)
+ {
+ var name = container.Names.FirstOrDefault();
+
+ if (!string.IsNullOrWhiteSpace(name))
+ return name.Trim('/');
+
+ return container.ID;
+ }
+
// Synchronized - only single threaded access occurs.
private readonly Dictionary _containerTrackers = new Dictionary();
diff --git a/DockerTrackerMetrics.cs b/DockerTrackerMetrics.cs
index a6a9513..9abbb06 100644
--- a/DockerTrackerMetrics.cs
+++ b/DockerTrackerMetrics.cs
@@ -8,6 +8,18 @@ namespace DockerExporter
.CreateGauge("docker_containers", "Number of containers that exist.");
public static readonly Counter ListContainersErrorCount = Metrics
- .CreateCounter("docker_list_containers_failed_total", "How many times the attempt to list all containers has failed.");
+ .CreateCounter("docker_probe_list_containers_failed_total", "How many times the attempt to list all containers has failed.");
+
+ public static readonly Histogram ProbeDuration = Metrics
+ .CreateHistogram("docker_probe_duration_seconds", "How long it takes to query Docker for the complete data set. Includes failed requests.", new HistogramConfiguration
+ {
+ Buckets = Constants.DurationBuckets
+ });
+
+ public static readonly Histogram ListContainersDuration = Metrics
+ .CreateHistogram("docker_probe_list_containers_duration_seconds", "How long it takes to query Docker for the list of containers. Includes failed requests.", new HistogramConfiguration
+ {
+ Buckets = Constants.DurationBuckets
+ });
}
}
diff --git a/ExporterLogic.cs b/ExporterLogic.cs
index 1ae03c7..6314db9 100644
--- a/ExporterLogic.cs
+++ b/ExporterLogic.cs
@@ -31,7 +31,7 @@ namespace DockerExporter
_tracker = new DockerTracker(new Uri(DockerUrl));
- Metrics.DefaultRegistry.AddBeforeCollectCallback(UpdateMetrics);
+ Metrics.DefaultRegistry.AddBeforeCollectCallback(UpdateMetricsAsync);
var server = new MetricServer(9417);
#if DEBUG
@@ -74,17 +74,19 @@ namespace DockerExporter
/// This acts as a primitive form of rate control to avoid overloading the fragile Docker API.
/// The implementation for this is in DockerTracker.
///
- private void UpdateMetrics()
+ private async Task UpdateMetricsAsync(CancellationToken cancel)
{
_log.Debug("Probing Docker.");
using var inlineCancellation = new CancellationTokenSource(Constants.MaxInlineUpdateDuration);
+ using var combinedCancellation = CancellationTokenSource.CreateLinkedTokenSource(inlineCancellation.Token, cancel);
+
var updateTask = _tracker!.TryUpdateAsync()
- .WithAbandonment(inlineCancellation.Token);
+ .WithAbandonment(combinedCancellation.Token);
try
{
- updateTask.WaitAndUnwrapExceptions();
+ await updateTask;
}
catch (TaskCanceledException) when (inlineCancellation.IsCancellationRequested)
{