Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / docker / docker_ @ 7ac85085

Historique | Voir | Annoter | Télécharger (18,9 ko)

1
#!/usr/bin/env python3
2
"""
3
=head1 NAME
4

    
5
docker_ - Docker wildcard-plugin to monitor a L<Docker|https://www.docker.com> host.
6

    
7
This wildcard plugin provides series C<containers>, C<images>, C<status>,
8
C<volumes>, C<cpu>, C<memory> and C<network> as separate graphs. It also
9
supports a C<multi> suffix that provides all of those as a multigraph.
10

    
11
=head1 INSTALLATION
12

    
13
- Copy this plugin in your munin plugins directory
14
- Install Python3 "docker" package
15

    
16
=over 2
17

    
18
If you want all the graphs as a multigraph, create a single multi symlink.
19

    
20
    ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_multi
21

    
22
Or choose a subset of those you want.
23

    
24
    ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_containers
25
    ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_cpu
26
    ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_images
27
    ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_memory
28
    ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_network
29
    ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_status
30
    ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_volumes
31

    
32
=back
33

    
34
After the installation you need to restart your munin-node:
35

    
36
=over 2
37

    
38
    systemctl restart munin-node
39

    
40
=back
41

    
42
=head1 CONFIGURATION
43

    
44
This plugin need to run as root, you need to create a file named docker placed in the
45
directory /etc/munin/plugin-conf.d/ with the following config (you can also use
46
Docker environment variables here as described in
47
https://docs.docker.com/compose/reference/envvars/):
48

    
49
You can use the EXCLUDE_CONTAINER_NAME environment variable to specify a regular expression
50
which if matched will exclude the matching containers from the memory and cpu graphs.
51

    
52
For example
53

    
54
 env.EXCLUDE_CONTAINER_NAME runner
55

    
56
Would exclude all containers with the word "runner" in the name.
57

    
58

    
59
=over 2
60

    
61
    [docker_*]
62
    group docker
63
    env.DOCKER_HOST unix://run/docker.sock
64
    env.EXCLUDE_CONTAINER_NAME regexp
65

    
66
=back
67

    
68
You may need to pick a different group depending on the name schema of your
69
distribution.  Or maybe use "user root", if nothing else works.
70

    
71
=head1 AUTHORS
72

    
73
This section has been reverse-engineered from git logs
74

    
75
* Codimp <contact@lithio.fr>: original rewrite
76

    
77
* Rowan Wookey <admin@rwky.net>: performance improvement
78

    
79
* Olivier Mehani <shtrom@ssji.net>: Network support, ClientWrapper, general cleanup, multigraph
80

    
81
=head1 MAGIC MARKERS
82

    
83
 #%# family=auto
84
 #%# capabilities=autoconf suggest multigraph
85

    
86
=cut
87
"""
88

    
89
import os
90
import sys
91
import re
92
try:
93
    from functools import cached_property
94
except ImportError:
95
    # If cached_property is not available,
96
    # just use the property decorator, without caching
97
    # This is for backward compatibility with Python<3.8
98
    cached_property = property
99
from multiprocessing import Process, Queue
100

    
101

    
102
def sorted_by_creation_date(func):
103
    def sorted_func(*args, **kwargs):
104
        return sorted(
105
            func(*args, **kwargs),
106
            key=(
107
                lambda x: x.attrs['CreatedAt']
108
                if 'CreatedAt' in x.attrs
109
                else x.attrs['Created']
110
            )
111
        )
112
    return sorted_func
113

    
114

    
115
class ClientWrapper:
116
    """
117
    A small wrapper for the docker client, to centralise some parsing logic,
118
    and support caching.
119

    
120
    In addition, when the exclude_re parameter is not None,
121
    any container which name is matched by the RE will not be excluded from reports.
122
    """
123
    client = None
124
    exclude = None
125

    
126
    def __init__(self, client, exclude_re=None):
127
        self.client = client
128
        if exclude_re:
129
            self.exclude = re.compile(exclude_re)
130

    
131
    @property
132
    def api(self):
133
        return self.client.api
134

    
135
    @cached_property
136
    @sorted_by_creation_date
137
    def containers(self):
138
        return self.client.containers.list()
139

    
140
    @cached_property
141
    @sorted_by_creation_date
142
    def all_containers(self):
143
        return [c for c in self.client.containers.list(all=True)
144
                if not self.exclude
145
                or not self.exclude.search(c.name)]
146

    
147
    @cached_property
148
    @sorted_by_creation_date
149
    def intermediate_images(self):
150
        return list(
151
            set(self.all_images)
152
            .difference(
153
                set(self.images)
154
                .difference(
155
                    set(self.dangling_images)
156
                )
157
            )
158
        )
159

    
160
    @cached_property
161
    @sorted_by_creation_date
162
    def all_images(self):
163
        return self.client.images.list(all=True)
164

    
165
    @cached_property
166
    @sorted_by_creation_date
167
    def images(self):
168
        images = self.client.images.list()
169
        return list(
170
            set(images)
171
            .difference(
172
                set(self.dangling_images))
173
        )
174

    
175
    @cached_property
176
    @sorted_by_creation_date
177
    def dangling_images(self):
178
        return self.client.images.list(filters={'dangling': True})
179

    
180
    @cached_property
181
    @sorted_by_creation_date
182
    def volumes(self):
183
        return self.client.volumes.list()
184

    
185

    
186
def container_summary(container, *args):
187
    summary = container.name
188
    attributes = container_attributes(container, *args)
189
    if attributes:
190
        summary += f' ({attributes})'
191
    return summary
192

    
193

    
194
def container_attributes(container, *args):
195
    attributes = container.image.tags
196
    attributes.append(container.attrs['Created'])
197
    return ', '.join(attributes + list(args))
198

    
199

    
200
def print_containers_status(client):
201
    running = []
202
    unhealthy = []
203
    paused = []
204
    created = []
205
    restarting = []
206
    removing = []
207
    exited = []
208
    dead = []
209
    for container in client.all_containers:
210
        if container.status == 'running':
211
            state = client.api.inspect_container(container.name)['State']
212
            if state.get('Health', {}).get('Status') == 'unhealthy':
213
                unhealthy.append(container)
214
            else:
215
                running.append(container)
216
        elif container.status == 'paused':
217
            paused.append(container)
218
        elif container.status == 'created':
219
            created.append(container)
220
        elif container.status == 'restarting':
221
            restarting.append(container)
222
        elif container.status == 'removing':
223
            removing.append(container)
224
        elif container.status == 'exited':
225
            exited.append(container)
226
        elif container.status == 'dead':
227
            dead.append(container)
228
    print('running.value', len(running))
229
    print('running.extinfo', ', '.join(container_summary(c) for c in running))
230
    print('unhealthy.value', len(unhealthy))
231
    print('unhealthy.extinfo', ', '.join(container_summary(c) for c in unhealthy))
232
    print('paused.value', len(paused))
233
    print('paused.extinfo', ', '.join(container_summary(c) for c in paused))
234
    print('created.value', len(created))
235
    print('created.extinfo', ', '.join(container_summary(c) for c in created))
236
    print('restarting.value', len(restarting))
237
    print('restarting.extinfo', ', '.join(container_summary(c) for c in restarting))
238
    print('removing.value', len(removing))
239
    print('removing.extinfo', ', '.join(container_summary(c) for c in removing))
240
    print('exited.value', len(exited))
241
    print('exited.extinfo', ', '.join(container_summary(c) for c in exited))
242
    print('dead.value', len(dead))
243
    print('dead.extinfo', ', '.join(container_summary(c) for c in dead))
244

    
245

    
246
def image_summary(image):
247
    attributes = image.tags
248
    attributes.append(image.attrs['Created'])
249
    attributes.append(f"{round(image.attrs['Size']/1024**2, 2)} MiB")
250
    return f"{image.short_id} ({', '.join(attributes)})"
251

    
252

    
253
def print_images_count(client):
254
    images = client.images
255
    intermediate = client.intermediate_images
256
    dangling = client.dangling_images
257

    
258
    print('intermediate_quantity.value', len(intermediate))
259
    print('intermediate_quantity.extinfo', ', '.join(image_summary(i) for i in intermediate))
260
    print('images_quantity.value', len(images))
261
    print('images_quantity.extinfo', ', '.join(image_summary(i) for i in images))
262
    print('dangling_quantity.value', len(dangling))
263
    print('dangling_quantity.extinfo', ', '.join(image_summary(i) for i in dangling))
264

    
265

    
266
def get_container_stats(container, q):
267
    q.put(container.stats(stream=False))
268

    
269

    
270
def parallel_container_stats(client):
271
    proc_list = []
272
    stats = {}
273
    for container in client.containers:
274
        q = Queue()
275
        p = Process(target=get_container_stats, args=(container, q))
276
        proc_list.append({'proc': p, 'queue': q, 'container': container})
277
        p.start()
278
    for proc in proc_list:
279
        proc['proc'].join()
280
        stats[proc['container']] = proc['queue'].get()
281
    return stats.items()
282

    
283

    
284
def print_containers_cpu(client):
285
    for container, stats in parallel_container_stats(client):
286
        cpu_percent = 0.0
287
        cpu_delta = (float(stats["cpu_stats"]["cpu_usage"]["total_usage"])
288
                     - float(stats["precpu_stats"]["cpu_usage"]["total_usage"]))
289
        system_delta = (float(stats["cpu_stats"]["system_cpu_usage"])
290
                        - float(stats["precpu_stats"]["system_cpu_usage"]))
291
        if system_delta > 0.0:
292
            cpu_percent = cpu_delta / system_delta * 100.0 * os.cpu_count()
293
        print(container.name + '.value', cpu_percent)
294
        print(container.name + '.extinfo', container_attributes(container))
295

    
296

    
297
def print_containers_memory(client):
298
    for container, stats in parallel_container_stats(client):
299
        if 'total_rss' in stats['memory_stats']['stats']:  # cgroupv1 only?
300
            memory_usage = stats['memory_stats']['stats']['total_rss']
301
            extinfo = 'Resident Set Size'
302
        else:
303
            memory_usage = stats['memory_stats']['usage']
304
            extinfo = 'Total memory usage'
305
        print(container.name + '.value', memory_usage)
306
        print(container.name + '.extinfo', container_attributes(container, extinfo))
307

    
308

    
309
def print_containers_network(client):
310
    for container, stats in parallel_container_stats(client):
311
        tx_bytes = 0
312
        rx_bytes = 0
313
        for data in stats['networks'].values():
314
            tx_bytes += data['tx_bytes']
315
            rx_bytes += data['rx_bytes']
316
        print(container.name + '_up.value', tx_bytes)
317
        print(container.name + '_down.value', rx_bytes)
318
        print(container.name + '_up.extinfo', container_attributes(container))
319

    
320

    
321
def volume_summary(volume):
322
    summary = f"{volume.short_id}"
323
    if volume.attrs['Labels']:
324
        summary += f" ({', '.join(volume.attrs['Labels'])})"
325
    return summary
326

    
327

    
328
def status(client, mode):
329
    if mode == "config":
330
        print("graph_title Docker status")
331
        print("graph_vlabel containers")
332
        print("graph_category virtualization")
333
        print("graph_total All containers")
334
        print("running.label RUNNING")
335
        print("running.draw AREASTACK")
336
        print("running.info Running containers can be manipulated with "
337
              "`docker container [attach|kill|logs|pause|restart|stop] <NAME>` or "
338
              "commands run in them with `docker container exec "
339
              "[--detach|--interactive,--privileged,--tty] <NAME> <COMMAND>`"
340
              )
341
        print("unhealthy.label UNHEALTHY")
342
        print("unhealthy.draw AREASTACK")
343
        print("unhealthy.warning 1")
344
        print("unhealthy.info Unhealthy containers can be restarted with "
345
              "`docker container restart <NAME>`")
346
        print("paused.label PAUSED")
347
        print("paused.draw AREASTACK")
348
        print("paused.info Paused containers can be resumed with "
349
              "`docker container unpause <NAME>`")
350
        print("created.label CREATED")
351
        print("created.draw AREASTACK")
352
        print("created.info New containers can be created with "
353
              "`docker container create --name <NAME> <IMAGE_ID >` or "
354
              "`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`")
355
        print("restarting.label RESTARTING")
356
        print("restarting.draw AREASTACK")
357
        print("restarting.info Containers can be restarted with "
358
              "`docker container restart <NAME>`")
359
        print("removing.label REMOVING")
360
        print("removing.draw AREASTACK")
361
        print("removing.info Containers can be removed with "
362
              "`docker container rm <NAME>`")
363
        print("exited.label EXITED")
364
        print("exited.draw AREASTACK")
365
        print("exited.info Exited containers can be started with "
366
              "`docker container start [--attach] <NAME>`")
367
        print("dead.label DEAD")
368
        print("dead.draw AREASTACK")
369
        print("dead.warning 1")
370
        print("dead.info Dead containers can be started with "
371
              "`docker container start <NAME>`")
372
    else:
373
        print_containers_status(client)
374

    
375

    
376
def containers(client, mode):
377
    if mode == "config":
378
        print("graph_title Docker containers")
379
        print("graph_vlabel containers")
380
        print("graph_category virtualization")
381
        print("containers_quantity.label Containers")
382
    else:
383
        print('containers_quantity.value', len(client.containers))
384

    
385

    
386
def images(client, mode):
387
    if mode == "config":
388
        print("graph_title Docker images")
389
        print("graph_vlabel images")
390
        print("graph_category virtualization")
391
        print("graph_total All images")
392
        print("intermediate_quantity.label Intermediate images")
393
        print("intermediate_quantity.draw AREASTACK")
394
        print("intermediate_quantity.info All unused images can be deleted with "
395
              "`docker image prune --all`")
396
        print("images_quantity.label Images")
397
        print("images_quantity.draw AREASTACK")
398
        print("images_quantity.info Images can be used in containers with "
399
              "`docker container create --name <NAME> <IMAGE_ID >` or "
400
              "`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`")
401
        print("dangling_quantity.label Dangling images")
402
        print("dangling_quantity.draw AREASTACK")
403
        print("dangling_quantity.info Dangling images can be deleted with "
404
              "`docker image prune`"
405
              "or tagged with `docker image tag <IMAGE_ID> <NAME>`")
406
        print("dangling_quantity.warning 10")
407
    else:
408
        print_images_count(client)
409

    
410

    
411
def volumes(client, mode):
412
    if mode == "config":
413
        print("graph_title Docker volumes")
414
        print("graph_vlabel volumes")
415
        print("graph_category virtualization")
416
        print("volumes_quantity.label Volumes")
417
        print("volumes_quantity.draw AREASTACK")
418
        print("volumes_quantity.info Unused volumes can be deleted with "
419
              "`docker volume prune`")
420
    else:
421
        print('volumes_quantity.value', len(client.volumes))
422
        print('volumes_quantity.extinfo', ', '.join(volume_summary(v) for v in client.volumes))
423

    
424

    
425
def cpu(client, mode):
426
    if mode == "config":
427
        graphlimit = str(os.cpu_count() * 100)
428
        print("graph_title Docker containers CPU usage")
429
        print("graph_args --base 1000 -r --lower-limit 0 --upper-limit " + graphlimit)
430
        print("graph_scale no")
431
        print("graph_period second")
432
        print("graph_vlabel CPU usage (%)")
433
        print("graph_category virtualization")
434
        print("graph_info This graph shows docker container CPU usage.")
435
        print("graph_total Total CPU usage")
436
        for container in client.all_containers:
437
            print("{}.label {}".format(container.name, container.name))
438
            print("{}.draw AREASTACK".format(container.name))
439
            print("{}.info {}".format(container.name, container_attributes(container)))
440
    else:
441
        print_containers_cpu(client)
442

    
443

    
444
def network(client, mode):
445
        if mode == "config":
446
            print("graph_title Docker containers network usage")
447
            print("graph_args --base 1024 -l 0")
448
            print("graph_vlabel bits in (-) / out (+) per ${graph_period}")
449
            print("graph_category virtualization")
450
            print("graph_info This graph shows docker container network usage.")
451
            print("graph_total Total network usage")
452
            for container in client.all_containers:
453
                print("{}_down.label {}_received".format(container.name, container.name))
454
                print("{}_down.type DERIVE".format(container.name))
455
                print("{}_down.min 0".format(container.name))
456
                print("{}_down.graph no".format(container.name))
457
                print("{}_down.cdef {}_down,8,*".format(container.name, container.name))
458
                print("{}_up.label {}".format(container.name, container.name))
459
                print("{}_up.draw LINESTACK1".format(container.name))
460
                print("{}_up.type DERIVE".format(container.name))
461
                print("{}_up.min 0".format(container.name))
462
                print("{}_up.negative {}_down".format(container.name, container.name))
463
                print("{}_up.cdef {}_up,8,*".format(container.name, container.name))
464
                print("{}_up.info {}".format(container.name, container_attributes(container)))
465
        else:
466
            print_containers_network(client)
467

    
468

    
469
def memory(client, mode):
470
    if mode == "config":
471
        print("graph_title Docker containers memory usage")
472
        print("graph_args --base 1024 -l 0")
473
        print("graph_vlabel Bytes")
474
        print("graph_category virtualization")
475
        print("graph_info This graph shows docker container memory usage.")
476
        print("graph_total Total memory usage")
477
        for container in client.all_containers:
478
            print("{}.label {}".format(container.name, container.name))
479
            print("{}.draw AREASTACK".format(container.name))
480
            print("{}.info {}".format(container.name, container_attributes(container)))
481
    else:
482
        print_containers_memory(client)
483

    
484

    
485
def main():
486
    series = [
487
        'containers',
488
        'cpu',
489
        'images',
490
        'memory',
491
        'network',
492
        'status',
493
        'volumes',
494
    ]
495

    
496
    try:
497
        mode = sys.argv[1]
498
    except IndexError:
499
        mode = ""
500
    wildcard = sys.argv[0].split("docker_")[1].split("_")[0]
501

    
502
    try:
503
        import docker
504
        client = docker.from_env()
505
        if mode == "autoconf":
506
            client.ping()
507
            print('yes')
508
            sys.exit(0)
509
    except Exception as e:
510
        print(f'no ({e})')
511
        if mode == "autoconf":
512
            sys.exit(0)
513
        sys.exit(1)
514

    
515
    if mode == "suggest":
516
        # The multigraph covers all other graphs,
517
        # so we only need to suggest one
518
        print("multi")
519
        sys.exit(0)
520

    
521
    client = ClientWrapper(client,
522
                           exclude_re=os.getenv('EXCLUDE_CONTAINER_NAME'))
523

    
524
    if wildcard in series:
525
        # dereference the function name by looking in the globals()
526
        # this assumes that the function name matches the series name exactly
527
        # if this were to change, a different approach would be needed,
528
        # most likely using a Dict of series name string to callable
529
        globals()[wildcard](client, mode)
530
    elif wildcard == 'multi':
531
        for s in series:
532
            print(f'multigraph docker_{s}')
533
            # ditto
534
            globals()[s](client, mode)
535
    else:
536
        print(f'unknown series ({wildcard})', file=sys.stderr)
537
        sys.exit(1)
538

    
539

    
540
if __name__ == '__main__':
541
    main()