using Axinom.Toolkit;
using Docker.DotNet;
using Docker.DotNet.Models;
using Prometheus;
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace DockerExporter
{
///
/// Tracks the status of one container and exports metrics, updating the data when new scrapes are requested.
///
///
/// NOT thread-safe! No concurrent usage is expected.
/// DockerTracker performs the necessary synchronization logic.
///
sealed class ContainerTracker : IDisposable
{
public string Id { get; }
public string DisplayName { get; }
public ContainerTracker(string id, string displayName)
{
Id = id;
DisplayName = displayName;
_metrics = new ContainerTrackerMetrics(displayName);
}
public void Dispose()
{
_resourceMetrics?.Dispose();
_stateMetrics?.Dispose();
}
public void Unpublish()
{
_resourceMetrics?.Unpublish();
_stateMetrics?.Unpublish();
}
///
/// Requests the tracker to update its data set.
///
///
/// Method does not throw exceptions on transient failures, merely logs and ignores them.
///
public async Task TryUpdateAsync(DockerClient client, CancellationToken cancel)
{
ContainerInspectResponse container;
var resourceStatsRecorder = new StatsRecorder();
try
{
// First, inspect to get some basic information.
using (_metrics.InspectContainerDuration.NewTimer())
container = await client.Containers.InspectContainerAsync(Id, cancel);
// Then query for the latest resource usage stats (if container is running).
if (container.State.Running)
{
using var statsTimer = _metrics.GetResourceStatsDuration.NewTimer();
await client.Containers.GetContainerStatsAsync(Id, new ContainerStatsParameters
{
Stream = false // Only get latest, then stop.
}, resourceStatsRecorder, cancel);
}
}
catch (Exception ex)
{
_metrics.FailedProbeCount.Inc();
_log.Error(Helpers.Debug.GetAllExceptionMessages(ex));
_log.Debug(ex.ToString()); // Only to verbose output.
// Errors are ignored - if we fail to get data, we just skip an update and log the failure.
// The next update will hopefully get past the error. For now, we just unpublish.
Unpublish();
return;
}
// If anything goes wrong below, it is a fatal error not to be ignored, so not in the try block.
// Now that we have the data assembled, update the metrics.
if (_stateMetrics == null)
{
_log.Debug($"First update of state metrics for {DisplayName} ({Id}).");
_stateMetrics = new ContainerTrackerStateMetrics(DisplayName);
}
UpdateStateMetrics(_stateMetrics, container);
if (resourceStatsRecorder.Response != null)
{
if (_resourceMetrics == null)
{
_log.Debug($"Initializing resource metrics for {DisplayName} ({Id}).");
_resourceMetrics = new ContainerTrackerResourceMetrics(DisplayName);
}
UpdateResourceMetrics(_resourceMetrics, container, resourceStatsRecorder.Response);
}
else
{
// It could be we already had resource metrics and now they should go away.
// They'll be recreated once we get the resource metrics again (e.g. after it starts).
_resourceMetrics?.Dispose();
_resourceMetrics = null;
}
}
private void UpdateStateMetrics(ContainerTrackerStateMetrics metrics, ContainerInspectResponse container)
{
metrics.RestartCount.Set(container.RestartCount);
if (container.State.Running)
metrics.RunningState.Set(1);
else if (container.State.Restarting)
metrics.RunningState.Set(0.5);
else
metrics.RunningState.Set(0);
if (container.State.Running && !string.IsNullOrWhiteSpace(container.State.StartedAt))
metrics.StartTime.SetToTimeUtc(DateTimeOffset.Parse(container.State.StartedAt));
}
private void UpdateResourceMetrics(ContainerTrackerResourceMetrics metrics, ContainerInspectResponse container, ContainerStatsResponse resources)
{
// The resource reporting is very different for different operating systems.
// This field is only used on Windows. We assume a container can't exist with 0 memory.
bool isWindowsContainer = resources.MemoryStats.Commit != 0;
// CPU usage
// The mechanism of calculation is the rate of increase in container CPU time versus available ("system") CPU time.
// The idea here is that we build two series - one counting used CPU in whatever units
// the other counting potentially available CPU in whatever units. The % always comes right.
// Docker CPU usage on Windows counts 100ns ticks.
// Docker CPU usage on Linux counts unspecified ticks in relation to some other stats.
// See https://github.com/moby/moby/blob/eb131c5383db8cac633919f82abad86c99bffbe5/cli/command/container/stats_helpers.go#L175
if (isWindowsContainer)
{
// To compensate for core count on Windows, we normalize the container usage to a single core.
// We also normalize the available CPU time to a single core.
// This way the Windows calculation is always per-core averaged.
// A .NET DateTimeOffset tick is 100ns, exactly, so matches what Docker uses.
metrics.CpuCapacity.Set(CpuBaselineTimer.Elapsed.Ticks);
metrics.CpuUsage.Set(resources.CPUStats.CPUUsage.TotalUsage / resources.NumProcs);
}
else
{
// This is counting all cores (right?).
metrics.CpuCapacity.Set(resources.CPUStats.SystemUsage);
metrics.CpuUsage.Set(resources.CPUStats.CPUUsage.TotalUsage);
}
// Memory usage
if (isWindowsContainer)
{
// Windows reports Private Working Set in Docker stats... but seems to use Commit Bytes to enforce limit!
// We want to report the same metric that is limited, so there we go.
metrics.MemoryUsage.Set(resources.MemoryStats.Commit);
}
else
{
metrics.MemoryUsage.Set(resources.MemoryStats.Usage);
}
// Network I/O
if (resources.Networks == null)
{
metrics.TotalNetworkBytesIn.Set(0);
metrics.TotalNetworkBytesOut.Set(0);
}
else
{
metrics.TotalNetworkBytesIn.Set(resources.Networks.Values.Sum(n => (double)n.RxBytes));
metrics.TotalNetworkBytesOut.Set(resources.Networks.Values.Sum(n => (double)n.TxBytes));
}
// Disk I/O
if (isWindowsContainer)
{
metrics.TotalDiskBytesRead.Set(resources.StorageStats.ReadSizeBytes);
metrics.TotalDiskBytesWrite.Set(resources.StorageStats.WriteSizeBytes);
}
else
{
var readEntries = resources.BlkioStats.IoServiceBytesRecursive
.Where(entry => entry.Op.Equals("read", StringComparison.InvariantCultureIgnoreCase))
.ToArray();
var writeEntries = resources.BlkioStats.IoServiceBytesRecursive
.Where(entry => entry.Op.Equals("write", StringComparison.InvariantCultureIgnoreCase))
.ToArray();
var totalRead = readEntries.Any() ? readEntries.Sum(entry => (long)entry.Value) : 0;
var totalWrite = writeEntries.Any() ? writeEntries.Sum(entry => (long)entry.Value) : 0;
metrics.TotalDiskBytesRead.Set(totalRead);
metrics.TotalDiskBytesWrite.Set(totalWrite);
}
}
private sealed class StatsRecorder : IProgress
{
public ContainerStatsResponse? Response { get; private set; }
public void Report(ContainerStatsResponse value) => Response = value;
}
// We just need a monotonically increasing timer that does not use excessively large numbers (no 1970 base).
private static readonly Stopwatch CpuBaselineTimer = Stopwatch.StartNew();
private ContainerTrackerMetrics _metrics;
private ContainerTrackerStateMetrics? _stateMetrics;
private ContainerTrackerResourceMetrics? _resourceMetrics;
private readonly LogSource _log = Log.Default;
}
}