root / plugins / gpu / amd_gpu_ @ 10b1de81
Historique | Voir | Annoter | Télécharger (6,93 ko)
| 1 |
#!/bin/bash |
|---|---|
| 2 |
# -*- bash -*- |
| 3 |
|
| 4 |
: << =cut |
| 5 |
|
| 6 |
=head1 NAME |
| 7 |
|
| 8 |
amd_gpu_ - Wildcard plugin to monitor AMD GPUs. Uses aticonfig utility, |
| 9 |
usually bundled with AMD GPU driver, to obtain information. To use this |
| 10 |
plugin you have to make sure aticonfig will run without an active X |
| 11 |
server (i.e. without anyone being logged in via the GUI). For more |
| 12 |
information about this issue visit the link below: |
| 13 |
http://www.mayankdaga.com/running-opencl-applications-remotely-on-amd-gpus/ |
| 14 |
|
| 15 |
=head1 CONFIGURATION |
| 16 |
|
| 17 |
This is a wildcard plugin. The wildcard prefix link name should be the |
| 18 |
value to monitor. |
| 19 |
|
| 20 |
This plugin uses the following configuration variables: |
| 21 |
|
| 22 |
[amd_gpu_*] |
| 23 |
user root |
| 24 |
env.aticonfexec - Location of aticonfig executable. |
| 25 |
env.warning - Warning temperature |
| 26 |
env.critical - Critical temperature |
| 27 |
|
| 28 |
=head2 DEFAULT CONFIGURATION |
| 29 |
|
| 30 |
The default configuration is to set "env.aticonfexec" to /usr/bin/aticonfig. |
| 31 |
|
| 32 |
=head2 EXAMPLE WILDCARD USAGE |
| 33 |
|
| 34 |
C<ln -s /usr/share/munin/plugins/amd_gpu_ /etc/munin/plugins/amd_gpu_temp> |
| 35 |
|
| 36 |
...will monitor the temperature of available AMD GPUs. |
| 37 |
|
| 38 |
=head1 AUTHOR |
| 39 |
|
| 40 |
Nuno Fachada |
| 41 |
faken@fakenmc.com |
| 42 |
|
| 43 |
=head1 LICENSE |
| 44 |
|
| 45 |
GNU General Public License, version 2 |
| 46 |
http://www.gnu.org/licenses/gpl-2.0.html |
| 47 |
|
| 48 |
=head1 MAGIC MARKERS |
| 49 |
|
| 50 |
#%# family=auto |
| 51 |
#%# capabilities=autoconf suggest |
| 52 |
|
| 53 |
=cut |
| 54 |
|
| 55 |
# Determine name of parameter to monitor |
| 56 |
name=`basename $0 | sed 's/^amd_gpu_//g'` |
| 57 |
|
| 58 |
# Get location of aticonfig executable or use default |
| 59 |
atiConfigExec=${aticonfexec:-'/usr/bin/aticonfig'}
|
| 60 |
|
| 61 |
# Check if autoconf was requested |
| 62 |
if [ "$1" = "autoconf" ]; then |
| 63 |
# Autoconf only returns yes if aticonfig exists and is executable |
| 64 |
if [ -x $atiConfigExec ]; then |
| 65 |
echo yes |
| 66 |
exit 0 |
| 67 |
else |
| 68 |
echo "no (aticonfig executable not found)" |
| 69 |
exit 0 |
| 70 |
fi |
| 71 |
fi |
| 72 |
|
| 73 |
# Check if suggest was requested |
| 74 |
if [ "$1" = "suggest" ]; then |
| 75 |
echo "temp" |
| 76 |
echo "clocks" |
| 77 |
echo "fan" |
| 78 |
echo "load" |
| 79 |
echo "vcore" |
| 80 |
exit 0 |
| 81 |
fi |
| 82 |
|
| 83 |
# Get number of GPUs |
| 84 |
nGpusOutput=`$atiConfigExec --list-adapters` |
| 85 |
|
| 86 |
nGpus=`echo "$nGpusOutput" | wc -l` |
| 87 |
nGpus=$((nGpus - 2)) # Last two lines don't matter |
| 88 |
# FIXME Possible bug in code bellow: maybe should be <= 0 instead of == 0? |
| 89 |
if [ $nGpus -eq 0 ]; then |
| 90 |
# Exit if no GPUs found |
| 91 |
echo "No AMD GPUs detected. Exiting." |
| 92 |
exit 1 |
| 93 |
fi |
| 94 |
|
| 95 |
# Check if config was requested |
| 96 |
if [ "$1" = "config" ]; then |
| 97 |
|
| 98 |
# Configure graph depending on what which quantity will be plotted |
| 99 |
case $name in |
| 100 |
temp) |
| 101 |
echo 'graph_title GPU temperature' |
| 102 |
echo 'graph_args -l 0 -u 120' |
| 103 |
echo 'graph_vlabel Degrees (C)' |
| 104 |
echo 'graph_category gpu' |
| 105 |
echo "graph_info Temperature information for AMD GPUs" |
| 106 |
nGpusCounter=0 |
| 107 |
while [ $nGpusCounter -lt $nGpus ] |
| 108 |
do |
| 109 |
gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "` |
| 110 |
echo "temp${nGpusCounter}.warning ${warning:-75}"
|
| 111 |
echo "temp${nGpusCounter}.critical ${critical:-95}"
|
| 112 |
echo "temp${nGpusCounter}.info Temperature information for $gpuName"
|
| 113 |
echo "temp${nGpusCounter}.label Temperature ($gpuName)"
|
| 114 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 115 |
done |
| 116 |
;; |
| 117 |
clocks) |
| 118 |
# First determine max clock for each GPU... |
| 119 |
read -a array <<< `$atiConfigExec --odgc | grep "Peak Range" | grep -o "[0-9]*"` |
| 120 |
maxclock=0 |
| 121 |
for element in "${array[@]}"
|
| 122 |
do |
| 123 |
if [ "$element" -gt "$maxclock" ]; then |
| 124 |
maxclock=$element |
| 125 |
fi |
| 126 |
done |
| 127 |
# ...then output config data. |
| 128 |
echo 'graph_title GPU clock' |
| 129 |
echo "graph_args -l 0 -u $maxclock" |
| 130 |
echo 'graph_vlabel MHz' |
| 131 |
echo 'graph_category gpu' |
| 132 |
echo "graph_info Core and memory clock info for AMD GPUs" |
| 133 |
nGpusCounter=0 |
| 134 |
while [ $nGpusCounter -lt $nGpus ] |
| 135 |
do |
| 136 |
gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "` |
| 137 |
echo "memclock${nGpusCounter}.info Memory clock information for $gpuName"
|
| 138 |
echo "memclock${nGpusCounter}.label Memory clock ($gpuName)"
|
| 139 |
echo "coreclock${nGpusCounter}.info Core clock information for $gpuName"
|
| 140 |
echo "coreclock${nGpusCounter}.label Core clock ($gpuName)"
|
| 141 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 142 |
done |
| 143 |
;; |
| 144 |
fan) |
| 145 |
echo 'graph_title GPU fan speed' |
| 146 |
echo 'graph_args -l 0 -u 100' |
| 147 |
echo 'graph_vlabel Percentage' |
| 148 |
echo 'graph_category gpu' |
| 149 |
echo "graph_info Fan speed of AMD GPUs" |
| 150 |
nGpusCounter=0 |
| 151 |
while [ $nGpusCounter -lt $nGpus ] |
| 152 |
do |
| 153 |
gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "` |
| 154 |
echo "fan${nGpusCounter}.info Fan speed information for $gpuName"
|
| 155 |
echo "fan${nGpusCounter}.label Fan speed ($gpuName)"
|
| 156 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 157 |
done |
| 158 |
;; |
| 159 |
load) |
| 160 |
echo 'graph_title GPU load' |
| 161 |
echo 'graph_args -l 0 -u 100' |
| 162 |
echo 'graph_vlabel Percentage' |
| 163 |
echo 'graph_category gpu' |
| 164 |
echo "graph_info GPU load" |
| 165 |
nGpusCounter=0 |
| 166 |
while [ $nGpusCounter -lt $nGpus ] |
| 167 |
do |
| 168 |
gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "` |
| 169 |
echo "load${nGpusCounter}.info Load information for $gpuName"
|
| 170 |
echo "load${nGpusCounter}.label Load ($gpuName)"
|
| 171 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 172 |
done |
| 173 |
;; |
| 174 |
vcore) |
| 175 |
echo 'graph_title GPU core voltage' |
| 176 |
echo 'graph_vlabel mV' |
| 177 |
echo 'graph_category gpu' |
| 178 |
echo "graph_info GPU core voltage" |
| 179 |
nGpusCounter=0 |
| 180 |
while [ $nGpusCounter -lt $nGpus ] |
| 181 |
do |
| 182 |
gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "` |
| 183 |
echo "vcore${nGpusCounter}.info Vcore information for $gpuName"
|
| 184 |
echo "vcore${nGpusCounter}.label Core voltage ($gpuName)"
|
| 185 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 186 |
done |
| 187 |
;; |
| 188 |
*) |
| 189 |
echo "Can't run without a proper symlink. Exiting." |
| 190 |
echo "Try running munin-node-configure --suggest." |
| 191 |
exit 1 |
| 192 |
;; |
| 193 |
esac |
| 194 |
|
| 195 |
exit 0 |
| 196 |
fi |
| 197 |
|
| 198 |
# Get and print requested value for all available GPUs |
| 199 |
export DISPLAY=:0 |
| 200 |
nGpusCounter=0 |
| 201 |
while [ $nGpusCounter -lt $nGpus ] |
| 202 |
do |
| 203 |
case $name in |
| 204 |
temp) |
| 205 |
value=`$atiConfigExec --adapter=$nGpusCounter --odgt | grep "Sensor 0: Temperature" | grep -o "[0-9]*\.[0-9]*"` |
| 206 |
echo "temp${nGpusCounter}.value $value"
|
| 207 |
;; |
| 208 |
clocks) |
| 209 |
value=`$atiConfigExec --adapter=$nGpusCounter --odgc | grep "Current Clocks" | grep -o "[0-9]*"` |
| 210 |
coreClock=`echo "$value" | sed -n 1p` |
| 211 |
echo "coreclock${nGpusCounter}.value $coreClock"
|
| 212 |
memClock=`echo "$value" | sed -n 2p` |
| 213 |
echo "memclock${nGpusCounter}.value $memClock"
|
| 214 |
;; |
| 215 |
fan) |
| 216 |
value=`$atiConfigExec --adapter=$nGpusCounter --pplib-cmd "get fanspeed 0" | grep "Fan Speed" | grep -o "[0-9]*"` |
| 217 |
echo "fan${nGpusCounter}.value $value"
|
| 218 |
;; |
| 219 |
load) |
| 220 |
value=`$atiConfigExec --adapter=$nGpusCounter --odgc | grep "GPU load" | grep -o "[0-9]*"` |
| 221 |
echo "load${nGpusCounter}.value $value"
|
| 222 |
;; |
| 223 |
vcore) |
| 224 |
value=`$atiConfigExec --adapter=$nGpusCounter --pplib-cmd "get activity" | grep "VDDC" | grep -o "[0-9]*"` |
| 225 |
echo "vcore${nGpusCounter}.value $value"
|
| 226 |
;; |
| 227 |
*) |
| 228 |
echo "Can't run without a proper symlink. Exiting." |
| 229 |
echo "Try running munin-node-configure --suggest." |
| 230 |
exit 1 |
| 231 |
;; |
| 232 |
esac |
| 233 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 234 |
done |
| 235 |
|
| 236 |
# TODO Follow multigraph suggestion from Flameeyes to look into multigraph plugins http://munin-monitoring.org/wiki/MultigraphSampleOutput, in order to reduce the amount of round trips to get the data. |
| 237 |
|
| 238 |
|
| 239 |
|
| 240 |
|
