root / plugins / docker / docker_ @ b1e3e601
Historique | Voir | Annoter | Télécharger (17,3 ko)
| 1 |
#!/usr/bin/env python3 |
|---|---|
| 2 |
""" |
| 3 |
=head1 NAME |
| 4 |
|
| 5 |
docker_ - Docker wildcard-plugin to monitor a L<Docker|https://www.docker.com> host. |
| 6 |
|
| 7 |
This wildcard plugin provides at the moment only the suffixes C<containers>, C<images>, C<status>, |
| 8 |
C<volumes>, C<cpu>, C<memory> and C<network>. |
| 9 |
|
| 10 |
=head1 INSTALLATION |
| 11 |
|
| 12 |
- Copy this plugin in your munin plugins directory |
| 13 |
- Install Python3 "docker" package |
| 14 |
|
| 15 |
=over 2 |
| 16 |
|
| 17 |
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_containers |
| 18 |
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_cpu |
| 19 |
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_images |
| 20 |
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_memory |
| 21 |
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_network |
| 22 |
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_status |
| 23 |
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_volumes |
| 24 |
|
| 25 |
=back |
| 26 |
|
| 27 |
After the installation you need to restart your munin-node: |
| 28 |
|
| 29 |
=over 2 |
| 30 |
|
| 31 |
systemctl restart munin-node |
| 32 |
|
| 33 |
=back |
| 34 |
|
| 35 |
=head1 CONFIGURATION |
| 36 |
|
| 37 |
This plugin need to run as root, you need to create a file named docker placed in the |
| 38 |
directory /etc/munin/plugin-conf.d/ with the following config (you can also use |
| 39 |
Docker environment variables here as described in |
| 40 |
https://docs.docker.com/compose/reference/envvars/): |
| 41 |
|
| 42 |
You can use the EXCLUDE_CONTAINER_NAME environment variable to specify a regular expression |
| 43 |
which if matched will exclude the matching containers from the memory and cpu graphs. |
| 44 |
|
| 45 |
For example |
| 46 |
|
| 47 |
env.EXCLUDE_CONTAINER_NAME runner |
| 48 |
|
| 49 |
Would exclude all containers with the word "runner" in the name. |
| 50 |
|
| 51 |
|
| 52 |
=over 2 |
| 53 |
|
| 54 |
[docker_*] |
| 55 |
user root |
| 56 |
env.DOCKER_HOST unix://var/run/docker.sock |
| 57 |
env.EXCLUDE_CONTAINER_NAME regexp |
| 58 |
|
| 59 |
=back |
| 60 |
|
| 61 |
=head1 AUTHORS |
| 62 |
|
| 63 |
This section has been reverse-engineered from git logs |
| 64 |
|
| 65 |
* Codimp <contact@lithio.fr>: original rewrite |
| 66 |
* Rowan Wookey <admin@rwky.net>: performance improvement |
| 67 |
* Olivier Mehani <shtrom@ssji.net>: Network support, ClientWrapper, gerenal |
| 68 |
cleanup |
| 69 |
|
| 70 |
=head1 MAGIC MARKERS |
| 71 |
|
| 72 |
#%# family=auto |
| 73 |
#%# capabilities=autoconf suggest |
| 74 |
|
| 75 |
""" |
| 76 |
|
| 77 |
import os |
| 78 |
import sys |
| 79 |
import re |
| 80 |
try: |
| 81 |
from functools import cached_property |
| 82 |
except ImportError: |
| 83 |
# If cached_property is not available, |
| 84 |
# just use the property decorator, without caching |
| 85 |
# This is for backward compatibility with Python<3.8 |
| 86 |
cached_property = property |
| 87 |
from multiprocessing import Process, Queue |
| 88 |
|
| 89 |
|
| 90 |
def sorted_by_creation_date(func): |
| 91 |
def sorted_func(*args, **kwargs): |
| 92 |
return sorted( |
| 93 |
func(*args, **kwargs), |
| 94 |
key=( |
| 95 |
lambda x: x.attrs['CreatedAt'] |
| 96 |
if 'CreatedAt' in x.attrs |
| 97 |
else x.attrs['Created'] |
| 98 |
) |
| 99 |
) |
| 100 |
return sorted_func |
| 101 |
|
| 102 |
|
| 103 |
class ClientWrapper: |
| 104 |
""" |
| 105 |
A small wrapper for the docker client, to centralise some parsing logic, |
| 106 |
and support caching. |
| 107 |
|
| 108 |
In addition, when the exclude_re parameter is not None, |
| 109 |
any container which name is matched by the RE will not be excluded from reports. |
| 110 |
""" |
| 111 |
client = None |
| 112 |
exclude = None |
| 113 |
|
| 114 |
def __init__(self, client, exclude_re=None): |
| 115 |
self.client = client |
| 116 |
if exclude_re: |
| 117 |
self.exclude = re.compile(exclude_re) |
| 118 |
|
| 119 |
@cached_property |
| 120 |
@sorted_by_creation_date |
| 121 |
def containers(self): |
| 122 |
return self.client.containers.list() |
| 123 |
|
| 124 |
@cached_property |
| 125 |
@sorted_by_creation_date |
| 126 |
def all_containers(self): |
| 127 |
return [c for c in self.client.containers.list(all=True) |
| 128 |
if not self.exclude |
| 129 |
or not self.exclude.search(c.name)] |
| 130 |
|
| 131 |
@cached_property |
| 132 |
@sorted_by_creation_date |
| 133 |
def intermediate_images(self): |
| 134 |
return list( |
| 135 |
set(self.all_images) |
| 136 |
.difference( |
| 137 |
set(self.images) |
| 138 |
.difference( |
| 139 |
set(self.dangling_images) |
| 140 |
) |
| 141 |
) |
| 142 |
) |
| 143 |
|
| 144 |
@cached_property |
| 145 |
@sorted_by_creation_date |
| 146 |
def all_images(self): |
| 147 |
return self.client.images.list(all=True) |
| 148 |
|
| 149 |
@cached_property |
| 150 |
@sorted_by_creation_date |
| 151 |
def images(self): |
| 152 |
images = self.client.images.list() |
| 153 |
return list( |
| 154 |
set(images) |
| 155 |
.difference( |
| 156 |
set(self.dangling_images)) |
| 157 |
) |
| 158 |
|
| 159 |
@cached_property |
| 160 |
@sorted_by_creation_date |
| 161 |
def dangling_images(self): |
| 162 |
return self.client.images.list(filters={'dangling': True})
|
| 163 |
|
| 164 |
@cached_property |
| 165 |
@sorted_by_creation_date |
| 166 |
def volumes(self): |
| 167 |
return self.client.volumes.list() |
| 168 |
|
| 169 |
|
| 170 |
def container_summary(container): |
| 171 |
summary = container.name |
| 172 |
attributes = container_attributes(container) |
| 173 |
if attributes: |
| 174 |
summary += f' ({attributes})'
|
| 175 |
return summary |
| 176 |
|
| 177 |
|
| 178 |
def container_attributes(container): |
| 179 |
attributes = container.image.tags |
| 180 |
attributes.append(container.attrs['Created']) |
| 181 |
return ', '.join(attributes) |
| 182 |
|
| 183 |
|
| 184 |
def print_containers_status(client): |
| 185 |
running = [] |
| 186 |
paused = [] |
| 187 |
created = [] |
| 188 |
restarting = [] |
| 189 |
removing = [] |
| 190 |
exited = [] |
| 191 |
dead = [] |
| 192 |
for container in client.all_containers: |
| 193 |
if container.status == 'running': |
| 194 |
running.append(container) |
| 195 |
elif container.status == 'paused': |
| 196 |
paused.append(container) |
| 197 |
elif container.status == 'created': |
| 198 |
created.append(container) |
| 199 |
elif container.status == 'restarting': |
| 200 |
restarting.append(container) |
| 201 |
elif container.status == 'removing': |
| 202 |
removing.append(container) |
| 203 |
elif container.status == 'exited': |
| 204 |
exited.append(container) |
| 205 |
elif container.status == 'dead': |
| 206 |
dead.append(container) |
| 207 |
print('running.value', len(running))
|
| 208 |
print('running.extinfo', ', '.join(container_summary(c) for c in running))
|
| 209 |
print('paused.value', len(paused))
|
| 210 |
print('paused.extinfo', ', '.join(container_summary(c) for c in paused))
|
| 211 |
print('created.value', len(created))
|
| 212 |
print('created.extinfo', ', '.join(container_summary(c) for c in created))
|
| 213 |
print('restarting.value', len(restarting))
|
| 214 |
print('restarting.extinfo', ', '.join(container_summary(c) for c in restarting))
|
| 215 |
print('removing.value', len(removing))
|
| 216 |
print('removing.extinfo', ', '.join(container_summary(c) for c in removing))
|
| 217 |
print('exited.value', len(exited))
|
| 218 |
print('exited.extinfo', ', '.join(container_summary(c) for c in exited))
|
| 219 |
print('dead.value', len(dead))
|
| 220 |
print('dead.extinfo', ', '.join(container_summary(c) for c in dead))
|
| 221 |
|
| 222 |
|
| 223 |
def image_summary(image): |
| 224 |
attributes = image.tags |
| 225 |
attributes.append(image.attrs['Created']) |
| 226 |
attributes.append(f"{round(image.attrs['Size']/1024**2, 2)} MiB")
|
| 227 |
return f"{image.short_id} ({', '.join(attributes)})"
|
| 228 |
|
| 229 |
|
| 230 |
def print_images_count(client): |
| 231 |
images = client.images |
| 232 |
intermediate = client.intermediate_images |
| 233 |
dangling = client.dangling_images |
| 234 |
|
| 235 |
print('intermediate_quantity.value', len(intermediate))
|
| 236 |
print('intermediate_quantity.extinfo', ', '.join(image_summary(i) for i in intermediate))
|
| 237 |
print('images_quantity.value', len(images))
|
| 238 |
print('images_quantity.extinfo', ', '.join(image_summary(i) for i in images))
|
| 239 |
print('dangling_quantity.value', len(dangling))
|
| 240 |
print('dangling_quantity.extinfo', ', '.join(image_summary(i) for i in dangling))
|
| 241 |
|
| 242 |
|
| 243 |
def get_container_stats(container, q): |
| 244 |
q.put(container.stats(stream=False)) |
| 245 |
|
| 246 |
|
| 247 |
def parallel_container_stats(client): |
| 248 |
proc_list = [] |
| 249 |
stats = {}
|
| 250 |
for container in client.containers: |
| 251 |
q = Queue() |
| 252 |
p = Process(target=get_container_stats, args=(container, q)) |
| 253 |
proc_list.append({'proc': p, 'queue': q, 'container': container})
|
| 254 |
p.start() |
| 255 |
for proc in proc_list: |
| 256 |
proc['proc'].join() |
| 257 |
stats[proc['container']] = proc['queue'].get() |
| 258 |
return stats.items() |
| 259 |
|
| 260 |
|
| 261 |
def print_containers_cpu(client): |
| 262 |
for container, stats in parallel_container_stats(client): |
| 263 |
cpu_count = len(stats["cpu_stats"]["cpu_usage"]["percpu_usage"]) |
| 264 |
cpu_percent = 0.0 |
| 265 |
cpu_delta = (float(stats["cpu_stats"]["cpu_usage"]["total_usage"]) |
| 266 |
- float(stats["precpu_stats"]["cpu_usage"]["total_usage"])) |
| 267 |
system_delta = (float(stats["cpu_stats"]["system_cpu_usage"]) |
| 268 |
- float(stats["precpu_stats"]["system_cpu_usage"])) |
| 269 |
if system_delta > 0.0: |
| 270 |
cpu_percent = cpu_delta / system_delta * 100.0 * cpu_count |
| 271 |
print(container.name + '.value', cpu_percent) |
| 272 |
print(container.name + '.extinfo', container_attributes(container)) |
| 273 |
|
| 274 |
|
| 275 |
def print_containers_memory(client): |
| 276 |
for container, stats in parallel_container_stats(client): |
| 277 |
print(container.name + '.value', stats['memory_stats']['stats']['total_rss']) |
| 278 |
print(container.name + '.extinfo', container_attributes(container)) |
| 279 |
|
| 280 |
|
| 281 |
def print_containers_network(client): |
| 282 |
for container, stats in parallel_container_stats(client): |
| 283 |
tx_bytes = 0 |
| 284 |
rx_bytes = 0 |
| 285 |
for data in stats['networks'].values(): |
| 286 |
tx_bytes += data['tx_bytes'] |
| 287 |
rx_bytes += data['rx_bytes'] |
| 288 |
print(container.name + '_up.value', tx_bytes) |
| 289 |
print(container.name + '_down.value', rx_bytes) |
| 290 |
print(container.name + '.extinfo', container_attributes(container)) |
| 291 |
|
| 292 |
|
| 293 |
def volume_summary(volume): |
| 294 |
summary = f"{volume.short_id}"
|
| 295 |
if volume.attrs['Labels']: |
| 296 |
summary += f" ({', '.join(volume.attrs['Labels'])})"
|
| 297 |
return summary |
| 298 |
|
| 299 |
|
| 300 |
def main(): |
| 301 |
try: |
| 302 |
mode = sys.argv[1] |
| 303 |
except IndexError: |
| 304 |
mode = "" |
| 305 |
wildcard = sys.argv[0].split("docker_")[1].split("_")[0]
|
| 306 |
|
| 307 |
try: |
| 308 |
import docker |
| 309 |
client = docker.from_env() |
| 310 |
if mode == "autoconf": |
| 311 |
client.ping() |
| 312 |
print('yes')
|
| 313 |
sys.exit(0) |
| 314 |
except Exception as e: |
| 315 |
print(f'no ({e})')
|
| 316 |
if mode == "autoconf": |
| 317 |
sys.exit(0) |
| 318 |
sys.exit(1) |
| 319 |
|
| 320 |
if mode == "suggest": |
| 321 |
print("cpu")
|
| 322 |
print("images")
|
| 323 |
print("memory")
|
| 324 |
print("network")
|
| 325 |
print("status")
|
| 326 |
print("volumes")
|
| 327 |
sys.exit(0) |
| 328 |
|
| 329 |
client = ClientWrapper(client, |
| 330 |
exclude_re=os.getenv('EXCLUDE_CONTAINER_NAME'))
|
| 331 |
|
| 332 |
if wildcard == "status": |
| 333 |
if mode == "config": |
| 334 |
print("graph_title Docker status")
|
| 335 |
print("graph_vlabel containers")
|
| 336 |
print("graph_category virtualization")
|
| 337 |
print("graph_total All containers")
|
| 338 |
print("running.label RUNNING")
|
| 339 |
print("running.draw AREASTACK")
|
| 340 |
print("running.info Running containers can be manipulated with "
|
| 341 |
"`docker container [attach|kill|logs|pause|restart|stop] <NAME>` or " |
| 342 |
"commands run in them with `docker container exec " |
| 343 |
"[--detach|--interactive,--privileged,--tty] <NAME> <COMMAND>`" |
| 344 |
) |
| 345 |
print("paused.label PAUSED")
|
| 346 |
print("paused.draw AREASTACK")
|
| 347 |
print("paused.info Paused containers can be resumed with "
|
| 348 |
"`docker container unpause <NAME>`") |
| 349 |
print("created.label CREATED")
|
| 350 |
print("created.draw AREASTACK")
|
| 351 |
print("created.info New containers can be created with "
|
| 352 |
"`docker container create --name <NAME> <IMAGE_ID >` or " |
| 353 |
"`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`") |
| 354 |
print("restarting.label RESTARTING")
|
| 355 |
print("restarting.draw AREASTACK")
|
| 356 |
print("restarting.info Containers can be restarted with "
|
| 357 |
"`docker container restart <NAME>`") |
| 358 |
print("removing.label REMOVING")
|
| 359 |
print("removing.draw AREASTACK")
|
| 360 |
print("removing.info Containers can be removed with "
|
| 361 |
"`docker container rm <NAME>`") |
| 362 |
print("exited.label EXITED")
|
| 363 |
print("exited.draw AREASTACK")
|
| 364 |
print("exited.info Exited containers can be started with "
|
| 365 |
"`docker container start [--attach] <NAME>`") |
| 366 |
print("dead.label DEAD")
|
| 367 |
print("dead.draw AREASTACK")
|
| 368 |
print("dead.warning 1")
|
| 369 |
print("dead.info Dead containers can be started with "
|
| 370 |
"`docker container start <NAME>`") |
| 371 |
else: |
| 372 |
print_containers_status(client) |
| 373 |
elif wildcard == "containers": |
| 374 |
if mode == "config": |
| 375 |
print("graph_title Docker containers")
|
| 376 |
print("graph_vlabel containers")
|
| 377 |
print("graph_category virtualization")
|
| 378 |
print("containers_quantity.label Containers")
|
| 379 |
else: |
| 380 |
print('containers_quantity.value', len(client.containers))
|
| 381 |
elif wildcard == "images": |
| 382 |
if mode == "config": |
| 383 |
print("graph_title Docker images")
|
| 384 |
print("graph_vlabel images")
|
| 385 |
print("graph_category virtualization")
|
| 386 |
print("graph_total All images")
|
| 387 |
print("intermediate_quantity.label Intermediate images")
|
| 388 |
print("intermediate_quantity.draw AREASTACK")
|
| 389 |
print("intermediate_quantity.info All unused images can be deleted with "
|
| 390 |
"`docker image prune --all`") |
| 391 |
print("images_quantity.label Images")
|
| 392 |
print("images_quantity.draw AREASTACK")
|
| 393 |
print("images_quantity.info Images can be used in containers with "
|
| 394 |
"`docker container create --name <NAME> <IMAGE_ID >` or " |
| 395 |
"`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`") |
| 396 |
print("dangling_quantity.label Dangling images")
|
| 397 |
print("dangling_quantity.draw AREASTACK")
|
| 398 |
print("dangling_quantity.info Dangling images can be deleted with "
|
| 399 |
"`docker image prune`" |
| 400 |
"or tagged with `docker image tag <IMAGE_ID> <NAME>`") |
| 401 |
print("dangling_quantity.warning 10")
|
| 402 |
else: |
| 403 |
print_images_count(client) |
| 404 |
elif wildcard == "volumes": |
| 405 |
if mode == "config": |
| 406 |
print("graph_title Docker volumes")
|
| 407 |
print("graph_vlabel volumes")
|
| 408 |
print("graph_category virtualization")
|
| 409 |
print("volumes_quantity.label Volumes")
|
| 410 |
print("volumes_quantity.draw AREASTACK")
|
| 411 |
print("volumes_quantity.info Unused volumes can be deleted with "
|
| 412 |
"`docker volume prune`") |
| 413 |
else: |
| 414 |
print('volumes_quantity.value', len(client.volumes))
|
| 415 |
print('volumes_quantity.extinfo', ', '.join(volume_summary(v) for v in client.volumes))
|
| 416 |
elif wildcard == "cpu": |
| 417 |
if mode == "config": |
| 418 |
graphlimit = str(os.cpu_count() * 100) |
| 419 |
print("graph_title Docker containers CPU usage")
|
| 420 |
print("graph_args --base 1000 -r --lower-limit 0 --upper-limit " + graphlimit)
|
| 421 |
print("graph_scale no")
|
| 422 |
print("graph_period second")
|
| 423 |
print("graph_vlabel CPU usage (%)")
|
| 424 |
print("graph_category virtualization")
|
| 425 |
print("graph_info This graph shows docker container CPU usage.")
|
| 426 |
print("graph_total Total CPU usage")
|
| 427 |
for container in client.all_containers: |
| 428 |
print("{}.label {}".format(container.name, container.name))
|
| 429 |
print("{}.draw AREASTACK".format(container.name))
|
| 430 |
print("{}.info {}".format(container.name, container_attributes(container)))
|
| 431 |
else: |
| 432 |
print_containers_cpu(client) |
| 433 |
elif wildcard == "memory": |
| 434 |
if mode == "config": |
| 435 |
print("graph_title Docker containers memory usage")
|
| 436 |
print("graph_args --base 1024 -l 0")
|
| 437 |
print("graph_vlabel Bytes")
|
| 438 |
print("graph_category virtualization")
|
| 439 |
print("graph_info This graph shows docker container memory usage.")
|
| 440 |
print("graph_total Total memory usage")
|
| 441 |
for container in client.all_containers: |
| 442 |
print("{}.label {}".format(container.name, container.name))
|
| 443 |
print("{}.draw AREASTACK".format(container.name))
|
| 444 |
print("{}.info {}".format(container.name, container_attributes(container)))
|
| 445 |
else: |
| 446 |
print_containers_memory(client) |
| 447 |
elif wildcard == "network": |
| 448 |
if mode == "config": |
| 449 |
print("graph_title Docker containers network usage")
|
| 450 |
print("graph_args --base 1024 -l 0")
|
| 451 |
print("graph_vlabel bits in (-) / out (+) per ${graph_period}")
|
| 452 |
print("graph_category virtualization")
|
| 453 |
print("graph_info This graph shows docker container network usage.")
|
| 454 |
print("graph_total Total network usage")
|
| 455 |
for container in client.all_containers: |
| 456 |
print("{}_down.label {}_received".format(container.name, container.name))
|
| 457 |
print("{}_down.type DERIVE".format(container.name))
|
| 458 |
print("{}_down.min 0".format(container.name))
|
| 459 |
print("{}_down.graph no".format(container.name))
|
| 460 |
print("{}_down.cdef {}_down,8,*".format(container.name, container.name))
|
| 461 |
print("{}_up.label {}".format(container.name, container.name))
|
| 462 |
print("{}_up.draw LINESTACK1".format(container.name))
|
| 463 |
print("{}_up.type DERIVE".format(container.name))
|
| 464 |
print("{}_up.min 0".format(container.name))
|
| 465 |
print("{}_up.negative {}_down".format(container.name, container.name))
|
| 466 |
print("{}_up.cdef {}_up,8,*".format(container.name, container.name))
|
| 467 |
print("{}_up.info {}".format(container.name, container_attributes(container)))
|
| 468 |
else: |
| 469 |
print_containers_network(client) |
| 470 |
|
| 471 |
|
| 472 |
if __name__ == '__main__': |
| 473 |
main() |
