Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / gpu / nvidia_smi_ @ e5ce7492

Historique | Voir | Annoter | Télécharger (3,69 ko)

1 06dc4744 NVIDIA Corporation
#!/usr/bin/perl
2 36408755 Matthew Ritchie
3 06dc4744 NVIDIA Corporation
#
4
# Copyright and BSD license
5
#
6
# Copyright (c) 2011 NVIDIA Corporation
7
# All rights reserved.
8
#
9
# Redistribution and use in source and binary forms are permitted
10
# provided that the above copyright notice and this paragraph are
11
# duplicated in all such forms and that any documentation,
12
# advertising materials, and other materials related to such
13
# distribution and use acknowledge that the software was developed
14
# by NVIDIA Corporation.  The name of the NVIDIA Corporation may not be
15
# used to endorse or promote products derived from this software
16
# without specific prior written permission.
17
#
18
# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
19
# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
20
# WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21
#
22 36408755 Matthew Ritchie
23 06dc4744 NVIDIA Corporation
#
24
# This script collects GPU information for use as a munin plugin
25
# Inspired by Matthew Ritchie and Vadim Bulakh's nvidia_smi_ plugin
26
#
27 36408755 Matthew Ritchie
28 06dc4744 NVIDIA Corporation
#
29
# This requires the NVML bindings and NVIDIA driver >= R270
30
# $ sudo cpan install nvidia::ml
31
# http://search.cpan.org/~nvbinding/nvidia-ml-pl/lib/nvidia/ml.pm
32
#
33 938d4143 Vadim Bulakh
34 06dc4744 NVIDIA Corporation
use strict;
35
use nvidia::ml qw(:all);
36 938d4143 Vadim Bulakh
37 06dc4744 NVIDIA Corporation
my $runType = "normal";
38
my @runTypes = qw( normal config autoconf );
39
if ($#ARGV + 1 == 1)
40
{
41
    if (grep $_ eq $ARGV[0], @runTypes) 
42
    {
43
        $runType = $ARGV[0];
44
    }
45
    else
46
    {
47
        print "Invalid arguement: $ARGV[0].\n";
48
        print "Valid arguements: @runTypes.\n";
49
        exit(1);
50
    }
51 938d4143 Vadim Bulakh
}
52
53 06dc4744 NVIDIA Corporation
my $ret = nvmlInit();
54
exit(1) unless $ret == $NVML_SUCCESS;
55
56
($ret, my $gpuCount) = nvmlDeviceGetCount();
57
exit(1) unless $ret == $NVML_SUCCESS;
58
59
($ret, my $driverVersion) = nvmlSystemGetDriverVersion();
60
$driverVersion = "Unknown" if $ret != $NVML_SUCCESS;
61
62
for (my $i = 0; $i < $gpuCount; $i++)
63
{
64
    ($ret, my $handle) = nvmlDeviceGetHandleByIndex($i);
65
    next if $ret != $NVML_SUCCESS;
66
    
67
    ($ret, my $pciInfo) = nvmlDeviceGetPciInfo($handle);
68
    my $gpuName = $pciInfo->{'busId'} if $ret == $NVML_SUCCESS;
69
    
70
    if ($runType eq "config")
71
    {
72
        # only print the graph information once
73
        if ($i == 0)
74
        {
75
            print "graph_title GPU\n";
76
            print "graph_args --upper-limit 120 -l 0\n";
77
            print "graph_vlabel Percent or Degrees C\n";
78
            print "graph_category GPU Metrics\n";
79
            print "graph_info Information for NVIDIA GPUs using driver version $driverVersion\n";
80
        }
81
        
82
        # metrics are collected for all the GPUs to a single graph
83
        print "GPU_UTIL_$i.label GPU$i - $gpuName : GPU utilization\n";
84
        print "GPU_FANSPEED_$i.label GPU$i - $gpuName : fan speed\n";
85
        print "GPU_MEM_UTIL_$i.label GPU$i - $gpuName : GPU memory utilization\n";
86
        print "GPU_TEMP_$i.label GPU$i - $gpuName : GPU temperature\n";
87
    }
88
    elsif ($runType eq "autoconf")
89
    {
90
        print "yes\n";
91
        exit(0);
92
    }
93
    else
94
    {
95
        ($ret, my $gpuTemp) = nvmlDeviceGetTemperature($handle,
96
                                                       $NVML_TEMPERATURE_GPU);
97
        $gpuTemp = "N/A" if $ret != $NVML_SUCCESS;
98
99
        ($ret, my $gpuFanSpeed) = nvmlDeviceGetFanSpeed($handle);
100
        $gpuFanSpeed = "N/A" if $ret != $NVML_SUCCESS;
101
102
        ($ret, my $utilRates) = nvmlDeviceGetUtilizationRates($handle);
103
        my $gpuUtil;
104
        my $memUtil;
105
        if ($ret == $NVML_SUCCESS)
106
        {
107
            $gpuUtil = $utilRates->{'gpu'};
108
            $memUtil = $utilRates->{'memory'};
109
        }
110
        else
111
        {
112
            $gpuUtil = "N/A";
113
            $memUtil = "N/A";
114
        }
115
116
        print "GPU_TEMP_$i.value $gpuTemp\n";
117
        print "GPU_FANSPEED_$i.value $gpuFanSpeed\n";
118
        print "GPU_UTIL_$i.value $gpuUtil\n";
119
        print "GPU_MEM_UTIL_$i.value $memUtil\n";
120
    }
121
}