root / plugins / gpu / nvidia_smi_ @ e5ce7492
Historique | Voir | Annoter | Télécharger (3,69 ko)
| 1 | 06dc4744 | NVIDIA Corporation | #!/usr/bin/perl |
|---|---|---|---|
| 2 | 36408755 | Matthew Ritchie | |
| 3 | 06dc4744 | NVIDIA Corporation | # |
| 4 | # Copyright and BSD license |
||
| 5 | # |
||
| 6 | # Copyright (c) 2011 NVIDIA Corporation |
||
| 7 | # All rights reserved. |
||
| 8 | # |
||
| 9 | # Redistribution and use in source and binary forms are permitted |
||
| 10 | # provided that the above copyright notice and this paragraph are |
||
| 11 | # duplicated in all such forms and that any documentation, |
||
| 12 | # advertising materials, and other materials related to such |
||
| 13 | # distribution and use acknowledge that the software was developed |
||
| 14 | # by NVIDIA Corporation. The name of the NVIDIA Corporation may not be |
||
| 15 | # used to endorse or promote products derived from this software |
||
| 16 | # without specific prior written permission. |
||
| 17 | # |
||
| 18 | # THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR |
||
| 19 | # IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED |
||
| 20 | # WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
||
| 21 | # |
||
| 22 | 36408755 | Matthew Ritchie | |
| 23 | 06dc4744 | NVIDIA Corporation | # |
| 24 | # This script collects GPU information for use as a munin plugin |
||
| 25 | # Inspired by Matthew Ritchie and Vadim Bulakh's nvidia_smi_ plugin |
||
| 26 | # |
||
| 27 | 36408755 | Matthew Ritchie | |
| 28 | 06dc4744 | NVIDIA Corporation | # |
| 29 | # This requires the NVML bindings and NVIDIA driver >= R270 |
||
| 30 | # $ sudo cpan install nvidia::ml |
||
| 31 | # http://search.cpan.org/~nvbinding/nvidia-ml-pl/lib/nvidia/ml.pm |
||
| 32 | # |
||
| 33 | 938d4143 | Vadim Bulakh | |
| 34 | 06dc4744 | NVIDIA Corporation | use strict; |
| 35 | use nvidia::ml qw(:all); |
||
| 36 | 938d4143 | Vadim Bulakh | |
| 37 | 06dc4744 | NVIDIA Corporation | my $runType = "normal"; |
| 38 | my @runTypes = qw( normal config autoconf ); |
||
| 39 | if ($#ARGV + 1 == 1) |
||
| 40 | {
|
||
| 41 | if (grep $_ eq $ARGV[0], @runTypes) |
||
| 42 | {
|
||
| 43 | $runType = $ARGV[0]; |
||
| 44 | } |
||
| 45 | else |
||
| 46 | {
|
||
| 47 | print "Invalid arguement: $ARGV[0].\n"; |
||
| 48 | print "Valid arguements: @runTypes.\n"; |
||
| 49 | exit(1); |
||
| 50 | } |
||
| 51 | 938d4143 | Vadim Bulakh | } |
| 52 | |||
| 53 | 06dc4744 | NVIDIA Corporation | my $ret = nvmlInit(); |
| 54 | exit(1) unless $ret == $NVML_SUCCESS; |
||
| 55 | |||
| 56 | ($ret, my $gpuCount) = nvmlDeviceGetCount(); |
||
| 57 | exit(1) unless $ret == $NVML_SUCCESS; |
||
| 58 | |||
| 59 | ($ret, my $driverVersion) = nvmlSystemGetDriverVersion(); |
||
| 60 | $driverVersion = "Unknown" if $ret != $NVML_SUCCESS; |
||
| 61 | |||
| 62 | for (my $i = 0; $i < $gpuCount; $i++) |
||
| 63 | {
|
||
| 64 | ($ret, my $handle) = nvmlDeviceGetHandleByIndex($i); |
||
| 65 | next if $ret != $NVML_SUCCESS; |
||
| 66 | |||
| 67 | ($ret, my $pciInfo) = nvmlDeviceGetPciInfo($handle); |
||
| 68 | my $gpuName = $pciInfo->{'busId'} if $ret == $NVML_SUCCESS;
|
||
| 69 | |||
| 70 | if ($runType eq "config") |
||
| 71 | {
|
||
| 72 | # only print the graph information once |
||
| 73 | if ($i == 0) |
||
| 74 | {
|
||
| 75 | print "graph_title GPU\n"; |
||
| 76 | print "graph_args --upper-limit 120 -l 0\n"; |
||
| 77 | print "graph_vlabel Percent or Degrees C\n"; |
||
| 78 | print "graph_category GPU Metrics\n"; |
||
| 79 | print "graph_info Information for NVIDIA GPUs using driver version $driverVersion\n"; |
||
| 80 | } |
||
| 81 | |||
| 82 | # metrics are collected for all the GPUs to a single graph |
||
| 83 | print "GPU_UTIL_$i.label GPU$i - $gpuName : GPU utilization\n"; |
||
| 84 | print "GPU_FANSPEED_$i.label GPU$i - $gpuName : fan speed\n"; |
||
| 85 | print "GPU_MEM_UTIL_$i.label GPU$i - $gpuName : GPU memory utilization\n"; |
||
| 86 | print "GPU_TEMP_$i.label GPU$i - $gpuName : GPU temperature\n"; |
||
| 87 | } |
||
| 88 | elsif ($runType eq "autoconf") |
||
| 89 | {
|
||
| 90 | print "yes\n"; |
||
| 91 | exit(0); |
||
| 92 | } |
||
| 93 | else |
||
| 94 | {
|
||
| 95 | ($ret, my $gpuTemp) = nvmlDeviceGetTemperature($handle, |
||
| 96 | $NVML_TEMPERATURE_GPU); |
||
| 97 | $gpuTemp = "N/A" if $ret != $NVML_SUCCESS; |
||
| 98 | |||
| 99 | ($ret, my $gpuFanSpeed) = nvmlDeviceGetFanSpeed($handle); |
||
| 100 | $gpuFanSpeed = "N/A" if $ret != $NVML_SUCCESS; |
||
| 101 | |||
| 102 | ($ret, my $utilRates) = nvmlDeviceGetUtilizationRates($handle); |
||
| 103 | my $gpuUtil; |
||
| 104 | my $memUtil; |
||
| 105 | if ($ret == $NVML_SUCCESS) |
||
| 106 | {
|
||
| 107 | $gpuUtil = $utilRates->{'gpu'};
|
||
| 108 | $memUtil = $utilRates->{'memory'};
|
||
| 109 | } |
||
| 110 | else |
||
| 111 | {
|
||
| 112 | $gpuUtil = "N/A"; |
||
| 113 | $memUtil = "N/A"; |
||
| 114 | } |
||
| 115 | |||
| 116 | print "GPU_TEMP_$i.value $gpuTemp\n"; |
||
| 117 | print "GPU_FANSPEED_$i.value $gpuFanSpeed\n"; |
||
| 118 | print "GPU_UTIL_$i.value $gpuUtil\n"; |
||
| 119 | print "GPU_MEM_UTIL_$i.value $memUtil\n"; |
||
| 120 | } |
||
| 121 | } |
