From 5afbf4e4287d891d5d964c9b135dc80b2d4350a0 Mon Sep 17 00:00:00 2001 From: Simon Gruber Date: Sun, 14 Dec 2025 19:54:57 +0100 Subject: [PATCH] Added container disk usage --- Readme.md | 53 +++++++++++++++++++++++++-- src/collectors/container_collector.py | 36 ++++++++++++++++++ 2 files changed, 85 insertions(+), 4 deletions(-) diff --git a/Readme.md b/Readme.md index eccfa49..8a1b3bc 100644 --- a/Readme.md +++ b/Readme.md @@ -10,6 +10,7 @@ A lightweight, modular Docker monitoring tool that collects comprehensive metric - CPU usage percentage (accurate per-container calculation) - Memory usage (bytes and percentage) +- Disk usage per container (filesystem size in bytes) - Network I/O (rx/tx bytes and packets) - Block I/O (read/write bytes) - Container state (running=2, paused=1, stopped=0) @@ -73,6 +74,7 @@ All metrics follow the pattern: `{prefix}.{category}.{name}.{metric}` docker-metrics.containers.{container_name}.cpu_percent docker-metrics.containers.{container_name}.memory_bytes docker-metrics.containers.{container_name}.memory_percent +docker-metrics.containers.{container_name}.disk_usage_bytes docker-metrics.containers.{container_name}.state docker-metrics.containers.{container_name}.health docker-metrics.containers.{container_name}.restart_count @@ -102,11 +104,54 @@ docker-metrics.aggregated.system.container_utilization_percent ## 📊 Grafana Queries -A few example queries for common Grafana selections (container, host, or aggregate views) +Powerful queries to visualize your Docker metrics: -- Top 10 CPU consumers: `aliasByNode(highestMax(docker-metrics.containers.*.cpu_percent, 10), 2)` -- Total network traffic: `sumSeries(docker-metrics.containers.*.network.rx_bytes)` -- Container health: `aliasByNode(docker-metrics.containers.*.health, 2)` +### Container Performance + +- **Top 10 CPU consumers**: `aliasByNode(highestMax(docker-metrics.containers.*.cpu_percent, 10), 2)` +- **Top 10 memory users**: `aliasByNode(highestMax(docker-metrics.containers.*.memory_bytes, 10), 2)` +- **Average CPU across all containers**: `averageSeries(docker-metrics.containers.*.cpu_percent)` +- **Total memory used by all containers**: `sumSeries(docker-metrics.containers.*.memory_bytes)` +- **Container health status**: `aliasByNode(docker-metrics.containers.*.health, 2)` (`2` = healthy, `1` = starting, `0` = unhealthy, `-1` = not available) + +### Network Monitoring + +- **Total network traffic (RX + TX)**: `sumSeries(docker-metrics.containers.*.network.{rx,tx}_bytes)` +- **Top 5 network receivers**: `aliasByNode(highestMax(docker-metrics.containers.*.network.rx_bytes, 5), 2)` +- **Top 5 network transmitters**: `aliasByNode(highestMax(docker-metrics.containers.*.network.tx_bytes, 5), 2)` +- **Network packets per second**: `derivative(sumSeries(docker-metrics.containers.*.network.{rx,tx}_packets))` + +### Storage & Disk I/O + +- **Total Docker storage usage**: `sumSeries(docker-metrics.system.{images,containers,volumes}.total_size_bytes)` +- **Storage by category**: `aliasByNode(docker-metrics.system.*.total_size_bytes, 2)` +- **Top 10 containers by disk usage**: `aliasByNode(highestMax(docker-metrics.containers.*.disk_usage_bytes, 10), 2)` +- **Total disk usage across all containers**: `sumSeries(docker-metrics.containers.*.disk_usage_bytes)` +- **Container disk usage over time**: `aliasByNode(docker-metrics.containers.*.disk_usage_bytes, 2)` +- **Top 5 disk readers**: `aliasByNode(highestMax(docker-metrics.containers.*.blkio.read_bytes, 5), 2)` +- **Top 5 disk writers**: `aliasByNode(highestMax(docker-metrics.containers.*.blkio.write_bytes, 5), 2)` +- **Total I/O operations rate**: `derivative(sumSeries(docker-metrics.containers.*.blkio.{read,write}_bytes))` + +### System Overview + +- **Container utilization %**: `docker-metrics.aggregated.system.container_utilization_percent` +- **Running vs total containers**: `aliasByNode(docker-metrics.system.containers.{running,total}, 3)` +- **Container states breakdown**: `aliasByNode(docker-metrics.system.containers.*, 3)` +- **Unused volumes**: `docker-metrics.aggregated.volumes.unused_count` +- **Volume usage ratio**: `divideSeries(docker-metrics.aggregated.volumes.in_use_count, docker-metrics.aggregated.volumes.total_count)` + +### Container Lifecycle + +- **Containers by state**: `aliasByNode(docker-metrics.containers.*.state, 2)` (`2` = running, `1` = paused, `0` = stopped) +- **Restart count trends**: `aliasByNode(docker-metrics.containers.*.restart_count, 2)` +- **Containers restarted recently**: `aliasByNode(highestCurrent(docker-metrics.containers.*.restart_count, 5), 2)` + +### Advanced Queries + +- **Memory usage % across containers**: `aliasByNode(docker-metrics.containers.*.memory_percent, 2)` +- **Containers using most network bandwidth**: `aliasByNode(highestMax(sumSeriesWithWildcards(docker-metrics.containers.*.network.{rx,tx}_bytes, 3), 10), 2)` +- **I/O per container (read + write)**: `aliasByNode(sumSeriesWithWildcards(docker-metrics.containers.*.blkio.{read,write}_bytes, 3), 2)` +- **Active images ratio**: `divideSeries(docker-metrics.system.images.active_count, docker-metrics.system.images.total)` ## 🛠️ Development diff --git a/src/collectors/container_collector.py b/src/collectors/container_collector.py index 4ffc2e4..70ed1c8 100644 --- a/src/collectors/container_collector.py +++ b/src/collectors/container_collector.py @@ -59,6 +59,18 @@ class ContainerCollector(BaseCollector): 'timestamp': timestamp }) + # Disk usage metrics (available for all containers) + try: + disk_usage = self._get_container_disk_usage(container) + if disk_usage is not None: + metrics.append({ + 'name': f'containers.{container_name}.disk_usage_bytes', + 'value': disk_usage, + 'timestamp': timestamp + }) + except Exception as e: + print(f"Warning: Could not collect disk usage for {container_name}: {e}") + # Only collect resource metrics for running containers if state == 'running': try: @@ -226,3 +238,27 @@ class ContainerCollector(BaseCollector): print(f"Warning: Error getting I/O metrics: {e}") return metrics + + def _get_container_disk_usage(self, container) -> int: + """ + Get the disk usage for a container in bytes. + This includes the writable layer size (SizeRw) and root filesystem size (SizeRootFs). + """ + try: + # Reload container to get latest attributes with size information + container.reload() + attrs = container.attrs + + # SizeRw: Size of files that have been created or changed + size_rw = attrs.get('SizeRw', 0) + + # SizeRootFs: Total size of container filesystem (read-only + writable layers) + size_rootfs = attrs.get('SizeRootFs', 0) + + # Return the total size (SizeRootFs includes SizeRw) + # If SizeRootFs is not available, fall back to SizeRw + return size_rootfs if size_rootfs > 0 else size_rw + + except Exception as e: + print(f"Warning: Error getting disk usage: {e}") + return None