Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / vmware / esx_ @ 72e4561a

Historique | Voir | Annoter | Télécharger (22,2 ko)

1 8ee4e27c Stefan Seidel
#!/usr/bin/perl -w
2
#
3
# -== Munin plugin for VMware ESXi/vSphere monitoring ==-
4
#
5
# Copyright (c) 2012 - Stefan Seidel <munin@stefanseidel.info>
6
#
7
#    This program is free software: you can redistribute it and/or modify
8
#    it under the terms of the GNU General Public License as published by
9
#    the Free Software Foundation, either version 3 of the License, or
10
#    (at your option) any later version.
11
#
12
#    This program is distributed in the hope that it will be useful,
13
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
14
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
#    GNU General Public License for more details.
16
#
17
#    You should have received a copy of the GNU General Public License
18
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
#
20
#
21
# This plugin uses the vSphere SDK for Perl available at
22
# http://www.vmware.com/support/developer/viperltoolkit/
23
# or included in the vSphere CLI available at
24
# http://www.vmware.com/support/developer/vcli/
25
# The use of the SDK is subject to the terms and condition
26
# of VMware, Inc. to which you must agree upon installation.
27
#
28
29
30
#
31
# -== Usage ==-
32
# Put this file in /usr/share/munin/plugins, `chmod +x` it and
33
# `ln -s` it to /etc/munin/plugins/esx_<hostname of server to monitor>
34
#
35
36
use strict;
37
use sort 'stable'; # guarantee stability
38
no warnings; # don't want warnings in output
39
40
use VMware::VIRuntime; # need to install VIM SDK (vSphere CLI/SDK 4.1 or newer)
41
use VMware::VILib;
42
use VMware::VIExt;
43
use Data::Dumper;
44
use DateTime::Format::ISO8601; # may need to install "libdatetime-format-iso8601-perl" on Debian-based systems
45
use List::Util qw(sum max);
46
use List::MoreUtils qw(all);
47
use Munin::Plugin;
48
49
# get hostname from filename and blurt it out immediately
50
# so that when something goes wrong, at least the plugin
51
# output is linked with the right host
52
$0 =~ /esx_(.+)$/;
53
my $host_name = $1;
54
print "host_name $host_name\n";
55
56
# env.user and env.password need to be set in plugin-conf/munin-node
57
Opts::set_option ('username', $ENV{user} || 'root');
58
Opts::set_option ('password', $ENV{password} || '');
59
Opts::set_option ('url',"https://$host_name/sdk/webService");
60
61
# plugin needs Munin 1.4 or later
62
need_multigraph();
63
64
# for datetime parsing later on
65
my $iso8601 = DateTime::Format::ISO8601->new;
66
67
# connect to vSphere host
68
Util::connect();
69
70
# central object host_view holds all relevant items (VMs, network, etc.)
71
my $host_view = VIExt::get_host_view(1, ['summary', 'network', 'datastore', 'vm', 'runtime', 'configManager.networkSystem']);
72
Opts::assert_usage(defined($host_view), "Invalid host.");
73
74
# Performance Manager for getting the actual values
75
my $perfMan = Vim::get_view (mo_ref => ManagedObjectReference->new(type => 'PerformanceManager', value => 'ha-perfmgr'));
76
Opts::assert_usage(defined($perfMan), "No PerformanceManager.");
77
78
# may be needed later
79
#my $netsys = Vim::get_view(mo_ref => ManagedObjectReference->new(type => 'HostNetworkSystem', value => 'networkSystem'));
80
#Opts::assert_usage(defined($netsys), "No NetworkSystem.");
81
82
# used for getting the current vSphere server time and then
83
# defining the (now - 5minutes) interval
84
my $dtsys = Vim::get_view(mo_ref => ManagedObjectReference->new(type => 'HostDateTimeSystem', value => 'dateTimeSystem'));
85
Opts::assert_usage(defined($dtsys), "No DateTimeSystem.");
86
87
# enumerate all performance counters by their IDs
88
my %perfCounter = map { $_->key => $_ } @{$perfMan->perfCounter};
89
# holds all performance data
90
my @all_perf_data = ();
91
# store VM ids for iteration later on
92
my @all_vms = ();
93
# IDs/UUIDs to human readable names
94
my $resolveNames;
95
96
# retrieve performance counters for host
97
push @all_perf_data, get_perf_data($host_view);
98
# manually set UF name for host system
99
$resolveNames->{vm}->{""} = "Host System";
100
101
# only purpose of this loop is getting the UF network names
102
# network ManagedObjects do not have performance counters
103
for ($host_view->network) {
104
    for (@$_) {
105
        my $network = Vim::get_view (mo_ref => $_);
106
        $resolveNames->{net}->{$_->{value}} = $_->{value}." (".$network->summary->name.")";
107
    }
108
}
109
110
# purpose of this loop is getting the UF datastore names 
111
# and retrieving capacity and free/uncommitted space
112
# datastore ManagedObjects do not have performance counters
113
for ($host_view->datastore) {
114
    for (@$_) {
115
        my $datastore = Vim::get_view (mo_ref => $_);
116
        # update freeSpace values (doesn't work on free ESXi)
117
        eval { $datastore->RefreshDatastore(); };
118
        my $uuid =$datastore->summary->url;
119
        $uuid =~ s!.+/!!;
120
        $resolveNames->{datastore}->{$uuid} = $datastore->name;
121
        push (@all_perf_data,
122
                    { rollup => "latest", 
123
                       group => "datastore", 
124
                        name => "capacity", 
125
                       value => $datastore->summary->capacity, 
126
                     counter => PerfCounterInfo->new(nameInfo => ElementDescription->new(label => "Capacity", summary => "Maximum amount of storage space on this datastore")),
127
                          vm => "", 
128
                    instance => $uuid,
129
                        unit => "Bytes" });
130
        push (@all_perf_data,
131
                    { rollup => "latest", 
132
                       group => "datastore", 
133
                        name => "freeSpace", 
134
                       value => $datastore->summary->freeSpace, 
135
                     counter => PerfCounterInfo->new(nameInfo => ElementDescription->new(label => "Free", summary => "Total amount of unused, available storage space on this datastore")),
136
                          vm => "", 
137
                    instance => $uuid,
138
                        unit => "Bytes" });
139
        push (@all_perf_data,
140
                    { rollup => "latest", 
141
                       group => "datastore", 
142
                        name => "uncommitted", 
143
                       value => $datastore->summary->uncommitted, 
144
                     counter => PerfCounterInfo->new(nameInfo => ElementDescription->new(label => "Uncommitted", summary => "Total additional storage space, potentially used by all virtual machines on this datastore")),
145
                          vm => "", 
146
                    instance => $uuid,
147
                        unit => "Bytes" });
148
    }
149
}
150
151
# iterate over all vms
152
for ($host_view->vm) {
153
    for (@$_) {
154
        my $vm = Vim::get_view (mo_ref => $_);
155
        # store VM id for later iteration
156
        my $vmId = $_->{value};
157
        push @all_vms, $vmId;
158
        # ID to VM name
159
        $resolveNames->{vm}->{$vmId} = "VM ".$vm->summary->config->name;
160
        # fetch disk space usage per datastore
161
        for (@{$vm->storage->perDatastoreUsage}) {
162
            my $uuid = Vim::get_view(mo_ref => $_->datastore)->summary->url;
163
            $uuid =~ s!.+/!!;
164
            push (@all_perf_data,
165
                        { rollup => "latest", 
166
                           group => "datastore", 
167
                            name => "committed", 
168
                           value => $_->committed, 
169
                         counter => PerfCounterInfo->new(nameInfo => ElementDescription->new(label => "Comitted", summary => "Storage space, in bytes, on this datastore that is actually being used by the virtual machine.\n\nIt includes space actually occupied by disks, logs, snapshots, configuration files etc. Files of the virtual machine which are present on a different datastore (e.g. a virtual disk on another datastore) are not included here.\n\n")),
170
                              vm => $vmId, 
171
                        instance => $uuid,
172
                            unit => "Bytes" });
173
            push (@all_perf_data,
174
                        { rollup => "latest", 
175
                           group => "datastore", 
176
                            name => "uncommitted", 
177
                           value => $_->uncommitted, 
178
                         counter => PerfCounterInfo->new(nameInfo => ElementDescription->new(label => "Uncomitted", summary => "Additional storage space, in bytes, potentially used by the virtual machine on this datastore.\n\nAdditional space may be needed for example when lazily allocated disks grow, or storage for swap is allocated when powering on the virtual machine.\n\nIf the virtual machine is running off delta disks (for example because a snapshot was taken), then only the potential growth of the currently used delta-disks is considered.\n\n")),
179
                              vm => $vmId, 
180
                        instance => $uuid,
181
                            unit => "Bytes" });
182
            push (@all_perf_data,
183
                        { rollup => "latest", 
184
                           group => "datastore", 
185
                            name => "unshared", 
186
                           value => $_->unshared, 
187
                         counter => PerfCounterInfo->new(nameInfo => ElementDescription->new(label => "Unshared", summary => "Storage space, in bytes, occupied by the virtual machine on this datastore that is not shared with any other virtual machine.\n\n")),
188
                              vm => $vmId, 
189
                        instance => $uuid,
190
                            unit => "Bytes" });
191
        }
192
        # retrieve performance counters for this VM
193
        push @all_perf_data, get_perf_data ($_);
194
    }
195
}
196
197
# keep track of how many sensors are in which state
198
my %sensorCount = ( green => 0, red => 0, unknown => 0, yellow => 0 );
199
200
# iterate over all sensor data
201
my $index = 0;
202
for (@{$host_view->runtime->healthSystemRuntime->systemHealthInfo->numericSensorInfo}) {
203
    # update counters
204
    $sensorCount{$_->healthState->key}++;
205
    # do not create entries for unmonitorable things like software components
206
    next unless ($_->baseUnits =~ /.+/);
207
    # create entry with sensor data
208
    push (@all_perf_data,
209
                { rollup => "latest",
210
                   group => "sensors",
211
                    name => "sensor_".($index++),
212
                   value => $_->currentReading,
213
                 counter => PerfCounterInfo->new(nameInfo => ElementDescription->new(label => $_->name, summary => "Sensor data for the ".$_->sensorType." sensor ".$_->name.". ".$_->healthState->summary." (".$_->healthState->label.")")),
214
                      vm => "",
215
                instance => "",
216
            unitModifier => $_->unitModifier,
217
                    unit => $_->baseUnits });
218
}
219
220
# we're finished querying the server, so we can disconnect now
221
Util::disconnect();
222
223
# create entries for the green/red/yellow/unknown counters
224
for (keys %sensorCount) {
225
    push (@all_perf_data,
226
                { rollup => "latest",
227
                   group => "sensors",
228
                    name => $_."_sensors",
229
                   value => $sensorCount{$_},
230
                 counter => PerfCounterInfo->new(nameInfo => ElementDescription->new(label => ucfirst($_), summary => "Count of sensors in the $_ state")),
231
                      vm => "",
232
                instance => "",
233
                    unit => "Numbers" });
234
}
235
236
# -> DEBUG
237
foreach (sort { $a->{group} cmp $b->{group} || $a->{instance} cmp $b->{instance} || $a->{name} cmp $b->{name} || $a->{rollup} cmp $b->{rollup} || $a->{vm} cmp $b->{vm} } @all_perf_data) {
238
   print "# $_->{vm}\t$_->{rollup}\t$_->{group}\t$_->{instance}\t$_->{name}\t$_->{value}\t$_->{unit}\n";
239
}
240
# <- DEBUG
241
242
# which graphs to draw
243
my @all_graphs = ();
244
245
# host system
246
push @all_graphs, (
247
    {   selector => { group => qr/^cpu$/i, name => qr/^usagemhz$/i, instance => qr/^$/ },
248
          config => { groupBy => "group", graphName => "usage_", graphTitle => "CPU usage per " }
249
    },
250
    {   selector => { group => qr/^disk$/i, name => qr/^(read|usage|write)$/i, instance => qr/.+/ },
251
          config => { groupBy => "group", graphName => "transfer_", graphTitle => "Disk Transfer Rates per " }
252
    },
253
    {   selector => { group => qr/^disk$/i, name => qr/^.+Averaged$/i, instance => qr/.+/ },
254
          config => { groupBy => "group", graphName => "iops_", graphTitle => "Disk I/O operations per " }
255
    },
256
    {   selector => { group => qr/^disk$/i, name => qr/^.+Latency$/i, instance => qr/.+/, vm => qr/^$/ },
257
          config => { groupBy => "vm", graphName => "latency_disk", graphTitle => "Disk latency for " }
258
    },
259
    {   selector => { group => qr/^mem$/i, unit => qr/^KB$/i, rollup => qr/^none$/, vm => qr/^$/ },
260
          config => { groupBy => "vm", graphName => "mem_host", graphTitle => "Memory usage for " }
261
    },
262
    {   selector => { group => qr/^datastore$/i, unit => qr/^Bytes$/i, vm => qr/^$/ },
263
          config => { groupBy => "vm", graphName => "usage_datastore", graphTitle => "Disk space usage for ", graphArgs => "--lower-limit 10737418240 --logarithmic --alt-autoscale-min --units=si" }
264
    },
265
    {   selector => { group => qr/^net$/i, unit => qr/^KBps$/i, vm => qr/^$/ },
266
          config => { groupBy => "vm", graphName => "traffic_net", graphTitle => "Network traffic for " }
267
    },
268
    {   selector => { group => qr/^net$/i, unit => qr/^Number$/i, vm => qr/^$/ },
269
          config => { groupBy => "vm", graphName => "packets_net", graphTitle => "Network packets for " }
270
    },
271
    {   selector => { group => qr/^sys$/i, name => qr/^diskUsage$/i },
272
          config => { groupBy => "name", graphName => "host_", graphTitle => "Host System " }
273
    },
274
    {   selector => { group => qr/^sys$/i, name => qr/^uptime$/i },
275
          config => { groupBy => "name", graphName => "host_", graphTitle => "Host System and VM ", graphArgs => "--lower-limit 1000 --logarithmic --alt-autoscale-min" }
276
    }
277
);
278
279
# graphs per VM
280
foreach (@all_vms) {
281
    my $vmName = clean_fieldname($resolveNames->{vm}->{$_});
282
    push @all_graphs, (
283
        {   selector => { group => qr/^cpu$/i, name => qr/^usagemhz$/i, vm => qr/^$_$/ },
284
              config => { groupBy => "vm", graphName => "$vmName.cpu_", graphTitle => "CPU usage for " }
285
        },
286
        {   selector => { group => qr/^mem$/i, unit => qr/^KB$/i, rollup => qr/^none$/, vm => qr/^$_$/ },
287
              config => { groupBy => "vm", graphName => "$vmName.memory_", graphTitle => "Memory usage for " }
288
        },
289
        {   selector => { group => qr/^datastore$/i, unit => qr/^Bytes$/i, vm => qr/^$_$/ },
290
              config => { groupBy => "vm", graphName => "$vmName.datastore_", graphTitle => "Disk space usage for ", graphArgs => "--lower-limit 10485760 --logarithmic --alt-autoscale-min --units=si" }
291
        },
292
        {   selector => { group => qr/^virtualDisk$/i, unit => qr/^Millisecond$/i, vm => qr/^$_$/ },
293
              config => { groupBy => "vm", graphName => "$vmName.disklat_", graphTitle => "Disk latency for " }
294
        },
295
        {   selector => { group => qr/^virtualDisk$/i, unit => qr/^Number$/i, vm => qr/^$_$/ },
296
              config => { groupBy => "vm", graphName => "$vmName.diskiops_", graphTitle => "Disk I/O operations for " }
297
        },
298
        {   selector => { group => qr/^virtualDisk$/i, unit => qr/^KBps$/i, vm => qr/^$_$/ },
299
              config => { groupBy => "vm", graphName => "$vmName.disktrans_", graphTitle => "Disk transfer rates for " }
300
        },
301
        {   selector => { group => qr/^net$/i, unit => qr/^KBps$/i, vm => qr/^$_$/ },
302
              config => { groupBy => "vm", graphName => "$vmName.traffic_net_", graphTitle => "Network traffic for " }
303
        },
304
        {   selector => { group => qr/^net$/i, unit => qr/^Number$/i, vm => qr/^$_$/ },
305
              config => { groupBy => "vm", graphName => "$vmName.packets_net_", graphTitle => "Network packets for " }
306
        },
307
        {   selector => { group => qr/^sys$/i, name => qr/^uptime$/i, vm => qr/^$_$/ },
308
              config => { groupBy => "vm", graphName => "$vmName.uptime_", graphTitle => "VM uptime " }
309
        }
310
    );
311
}
312
313
# sensor graphs
314
push @all_graphs, (
315
    {   selector => { group => qr/^sensors$/i },
316
          config => { groupBy => "unit", graphName => "sensor_", graphTitle => "Sensors " }
317
    });
318
319
320
# actual processing
321
foreach (@all_graphs) {
322
    if ((defined $ARGV[0]) and ($ARGV[0] eq "config")) {
323
        munin_print("config", \@all_perf_data, $_);
324
        munin_print("values", \@all_perf_data, $_) if $ENV{MUNIN_CAP_DIRTYCONFIG}; # this doesn't seem to work even on Munin 1.4.6
325
    } else {
326
        munin_print("values", \@all_perf_data, $_);
327
    }
328
}
329
330
0;
331
332
####################################################################
333
334
# calculate sum, max or avg from performance data values
335
sub process_value_array {
336
    my $arr = shift;
337
    my $pd = shift;
338
    my @vs = ();
339
    if ($pd->unitInfo->key eq "percent") {
340
        @vs = map { $_ / 100 } @$arr ;
341
    } else {
342
        @vs = @$arr;
343
    }
344
    return sum(@vs) if $pd->rollupType->val eq "summation";
345
    return max(@vs) if $pd->nameInfo->key =~ /max/i;
346
    return sum(@vs)/@$arr;
347
}
348
349
# query performance data for object
350
sub get_perf_data {
351
    my $entity = shift;
352
    my @ret = ();
353
    # get the current server time
354
    my $curtime = $iso8601->parse_datetime($dtsys->QueryDateTime());
355
    # and subtract 5 minutes to get all values for the last period
356
    my $oldtime = $curtime->clone->add(minutes => -5);
357
358
    # actual query, intervalId is 20 because that's the default
359
    my $perfQ = $perfMan->QueryPerf(querySpec => PerfQuerySpec->new(entity => $entity, intervalId => 20, startTime => $oldtime));
360
361
    # loop over PerfEntityMetric
362
    foreach (defined $perfQ ? @$perfQ : ()) {
363
        my $vm = ($_->entity->type eq 'VirtualMachine')?$_->entity->value:"";
364
        # loop over PerfMetricIntSeries
365
        foreach (@{$_->{value}}) {
366
            my $perfDesc = $perfCounter{$_->id->counterId};
367
            next unless defined $perfDesc;
368
            push @ret, { rollup => $perfDesc->rollupType->val, 
369
                          group => $perfDesc->groupInfo->key, 
370
                           name => $perfDesc->nameInfo->key, 
371
                          value => process_value_array(\@{$_->{value}}, $perfDesc), 
372
                        counter => $perfDesc,
373
                             vm => $vm, 
374
                       instance => $_->id->instance,
375
                           unit => $perfDesc->unitInfo->label };
376
        }
377
    }
378
    return @ret;
379
}
380
381
# generate a munin-friendly and unique field name
382
sub gen_dp_name {
383
    return clean_fieldname("$_[0]->{name}v$_[0]->{vm}i$_[0]->{instance}");
384
}
385
386
# trim white spaces
387
sub trim {
388
    my $string = shift;
389
    $string =~ s/^\s+//;
390
    $string =~ s/\s+$//;
391
    return $string;
392
}
393
394
# print values and configs for graphs
395
sub munin_print {
396
    # action
397
    my $act = shift || "";
398
    # values
399
    my $arr = shift || ();
400
    # parameters
401
    my $par = shift || {};
402
    my $cfg = $par->{config};
403
    $par = $par->{selector};
404
    my $oldGroup = "_-_";
405 200512d8 Stefan Seidel
    my $factor;
406 8ee4e27c Stefan Seidel
407
    # find values according to criteria in $par and sort by grouping parameter
408
    foreach (sort { $a->{$cfg->{groupBy}} cmp $b->{$cfg->{groupBy}} } grep { my $d = $_; all { (not exists $d->{$_}) || $d->{$_} =~ /$par->{$_}/ } keys %$par; } @$arr) {
409
        my $groupCrit = $cfg->{groupBy};
410
        my $curGroup = $_->{$groupCrit};
411
412
        if (!($curGroup eq $oldGroup)) {
413
            # we're in a new group, meaning a new graph starts
414
            $factor = 0;
415
            # clean up group name for multigraph name
416
            my $ccurGroup = $curGroup;
417
    	    $ccurGroup =~ s/ |\./_/g;
418
            print "multigraph ",$cfg->{graphName},$ccurGroup,"\n";
419
420
            if ("config" eq $act) {
421
                # want configuration
422
                print "graph_title ",$cfg->{graphTitle},$resolveNames->{$groupCrit}->{$curGroup} || $curGroup,"\n";
423
                #print "graph_order xxx yyy\n";
424
425
                my $unit = $_->{unit};
426
                my $base = 1000;
427
428
                # since the y-axis markers are going to be wrong with source units like
429
                # KB, MB, MHz etc., we define a correction factor via cdef later
430
                # this way, if 1024 MB is reported, the graph shows 1G and not 1k
431
                # (although 1k MB is technically also correct, but confusing)
432
                if ($unit =~ /^Bytes$/i) {
433
                    $base = 1024;
434
                } elsif ($unit =~ /^KBps$/i) {
435
                    $unit = "Bytes/s";
436
                    $factor = 1024;
437
                    $base = 1024;
438
                } elsif ($unit =~ /^KB$/i) {
439
                    $unit = "Bytes";
440
                    $factor = 1024;
441
                    $base = 1024;
442
                } elsif ($unit =~ /^MB$/i) {
443
                    $unit = "Bytes";
444
                    $factor = 1024*1024;
445
                    $base = 1024;
446
                } elsif ($unit =~ /^MHz$/i) {
447
                    $unit = "Hz";
448
                    $factor = 1000000;
449
                } elsif ($unit =~ /^Millisecond$/i) {
450
                    $unit = "Second";
451
                    $factor = 1/1000;
452
                }
453
                print "graph_vlabel $unit\n";
454
                print "graph_category $_->{group}\n";
455
                print "graph_args --base=$base --alt-autoscale-max ",(defined $cfg->{graphArgs})?$cfg->{graphArgs}:"","\n";
456
            }
457
458
        }
459
        $oldGroup = $curGroup;
460
        my $dpName = gen_dp_name($_);
461
        if ("config" eq $act) {
462
            # want configuration
463
            # get instance and VM names and UF names, if applicable
464
            my $iName = $resolveNames->{$_->{group}}->{$_->{instance}} || (("" eq $_->{instance})?"":$_->{group}." ".$_->{instance});
465
            $iName = " $iName" if $iName;
466
            my $vmName = $resolveNames->{vm}->{$_->{vm}};
467
            $vmName = " $vmName" if $vmName;
468
            # all values are drawn as lines for now
469
            print "$dpName.draw LINE2\n";
470
            print "$dpName.label ",$_->{counter}->nameInfo->label,$iName,("vm" eq $groupCrit)?"":$vmName || "","\n";
471
            my $summary = $_->{counter}->nameInfo->summary;
472
            $summary =~ s!\n!\\n!g;
473
            print "$dpName.info ",$summary,$iName?", instance$iName ($_->{instance})":"",$vmName?",$vmName":"","\n";
474
            # declare CDEF if we want to apply a factor
475
            if ($factor > 1) {
476
                print "$dpName.cdef $dpName,$factor,*\n";
477
            } elsif ($factor <= 0) {
478
                if (defined $_->{unitModifier}) {
479
                    # sensor values have a unit modifier M attached to them so that REALVAL=VAL*10^M
480
                    # y,x,LOG,*,EXP is x^y, just in case this is not obvious to the reader
481
                    print "$dpName.cdef $dpName,",$_->{unitModifier},",10,LOG,*,EXP,*\n";
482
                }
483
            } elsif ($factor < 1) {
484
                print "$dpName.cdef $dpName,",1/$factor,",/\n";
485
            }
486
        } else {
487
            # just print value
488
            print gen_dp_name ($_), ".value $_->{value}\n";
489
        }
490
    }
491
}