root / plugins / gpu / amd_gpu_ @ 17f78427
Historique | Voir | Annoter | Télécharger (7 ko)
| 1 |
#!/bin/bash |
|---|---|
| 2 |
# -*- bash -*- |
| 3 |
|
| 4 |
: << =cut |
| 5 |
|
| 6 |
=head1 NAME |
| 7 |
|
| 8 |
amd_gpu_ - Wildcard plugin to monitor AMD GPUs. Uses aticonfig utility, |
| 9 |
usually bundled with AMD GPU driver, to obtain information. To use this |
| 10 |
plugin you have to make sure aticonfig will run without an active X |
| 11 |
server (i.e. without anyone being logged in via the GUI). For more |
| 12 |
information about this issue visit the link below: |
| 13 |
http://www.mayankdaga.com/running-opencl-applications-remotely-on-amd-gpus/ |
| 14 |
|
| 15 |
=head1 CONFIGURATION |
| 16 |
|
| 17 |
This is a wildcard plugin. The wildcard prefix link name should be the |
| 18 |
value to monitor. |
| 19 |
|
| 20 |
This plugin uses the following configuration variables: |
| 21 |
|
| 22 |
[amd_gpu_*] |
| 23 |
user root |
| 24 |
env.aticonfexec - Location of aticonfig executable. |
| 25 |
env.warning - Warning temperature |
| 26 |
env.critical - Critical temperature |
| 27 |
|
| 28 |
=head2 DEFAULT CONFIGURATION |
| 29 |
|
| 30 |
The default configuration is to set "env.aticonfexec" to /usr/bin/aticonfig and |
| 31 |
assume warning and critical temperatures of 75 and 95 degrees celsius, respectively. |
| 32 |
|
| 33 |
=head2 EXAMPLE WILDCARD USAGE |
| 34 |
|
| 35 |
C<ln -s /usr/share/munin/plugins/amd_gpu_ /etc/munin/plugins/amd_gpu_temp> |
| 36 |
|
| 37 |
...will monitor the temperature of available AMD GPUs. |
| 38 |
|
| 39 |
=head1 TODO |
| 40 |
|
| 41 |
=over 4 |
| 42 |
|
| 43 |
=item * |
| 44 |
|
| 45 |
Use multigraphs for multiple GPUs (http://munin-monitoring.org/wiki/MultigraphSampleOutput). |
| 46 |
|
| 47 |
=back |
| 48 |
|
| 49 |
=head1 AUTHOR |
| 50 |
|
| 51 |
Nuno Fachada |
| 52 |
faken@fakenmc.com |
| 53 |
|
| 54 |
=head1 LICENSE |
| 55 |
|
| 56 |
GNU General Public License, version 2 |
| 57 |
http://www.gnu.org/licenses/gpl-2.0.html |
| 58 |
|
| 59 |
=head1 MAGIC MARKERS |
| 60 |
|
| 61 |
#%# family=auto |
| 62 |
#%# capabilities=autoconf suggest |
| 63 |
|
| 64 |
=cut |
| 65 |
|
| 66 |
# Determine name of parameter to monitor |
| 67 |
name=`basename $0 | sed 's/^amd_gpu_//g'` |
| 68 |
|
| 69 |
# Get location of aticonfig executable or use default |
| 70 |
atiConfigExec=${aticonfexec:-'/usr/bin/aticonfig'}
|
| 71 |
|
| 72 |
# Check if autoconf was requested |
| 73 |
if [ "$1" = "autoconf" ]; then |
| 74 |
# Autoconf only returns yes if aticonfig exists and is executable |
| 75 |
if [ -x $atiConfigExec ]; then |
| 76 |
echo yes |
| 77 |
exit 0 |
| 78 |
else |
| 79 |
echo "no (aticonfig executable not found)" |
| 80 |
exit 0 |
| 81 |
fi |
| 82 |
fi |
| 83 |
|
| 84 |
# Check if suggest was requested |
| 85 |
if [ "$1" = "suggest" ]; then |
| 86 |
echo "temp" |
| 87 |
echo "clocks" |
| 88 |
echo "fan" |
| 89 |
echo "load" |
| 90 |
echo "vcore" |
| 91 |
exit 0 |
| 92 |
fi |
| 93 |
|
| 94 |
# Get number of GPUs |
| 95 |
nGpusOutput=`$atiConfigExec --list-adapters` |
| 96 |
|
| 97 |
nGpus=`echo "$nGpusOutput" | wc -l` |
| 98 |
nGpus=$((nGpus - 2)) # Last two lines don't matter |
| 99 |
if [ $nGpus -eq 0 ]; then |
| 100 |
# Exit if no GPUs found |
| 101 |
echo "No AMD GPUs detected. Exiting." |
| 102 |
exit 1 |
| 103 |
fi |
| 104 |
|
| 105 |
# Check if config was requested |
| 106 |
if [ "$1" = "config" ]; then |
| 107 |
|
| 108 |
# Configure graph depending on what which quantity will be plotted |
| 109 |
case $name in |
| 110 |
temp) |
| 111 |
echo 'graph_title GPU temperature' |
| 112 |
echo 'graph_args -l 20 -u 120' |
| 113 |
echo 'graph_vlabel Degrees (C)' |
| 114 |
echo 'graph_category sensors' |
| 115 |
echo "graph_info Temperature information for AMD GPUs" |
| 116 |
nGpusCounter=0 |
| 117 |
while [ $nGpusCounter -lt $nGpus ] |
| 118 |
do |
| 119 |
gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'` |
| 120 |
echo "temp${nGpusCounter}.warning ${warning:-75}"
|
| 121 |
echo "temp${nGpusCounter}.critical ${critical:-95}"
|
| 122 |
echo "temp${nGpusCounter}.info Temperature information for $gpuName"
|
| 123 |
echo "temp${nGpusCounter}.label Temperature ($gpuName)"
|
| 124 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 125 |
done |
| 126 |
;; |
| 127 |
clocks) |
| 128 |
# First determine max clock for each GPU... |
| 129 |
read -a array <<< `$atiConfigExec --odgc | grep "Peak Range" | grep -o "[0-9]*"` |
| 130 |
maxclock=0 |
| 131 |
for element in "${array[@]}"
|
| 132 |
do |
| 133 |
if [ "$element" -gt "$maxclock" ]; then |
| 134 |
maxclock=$element |
| 135 |
fi |
| 136 |
done |
| 137 |
# ...then output config data. |
| 138 |
echo 'graph_title GPU clock' |
| 139 |
echo "graph_args -l 0 -u $maxclock" |
| 140 |
echo 'graph_vlabel MHz' |
| 141 |
echo 'graph_category htc' |
| 142 |
echo "graph_info Core and memory clock info for AMD GPUs" |
| 143 |
nGpusCounter=0 |
| 144 |
while [ $nGpusCounter -lt $nGpus ] |
| 145 |
do |
| 146 |
gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'` |
| 147 |
echo "memclock${nGpusCounter}.info Memory clock information for $gpuName"
|
| 148 |
echo "memclock${nGpusCounter}.label Memory clock ($gpuName)"
|
| 149 |
echo "coreclock${nGpusCounter}.info Core clock information for $gpuName"
|
| 150 |
echo "coreclock${nGpusCounter}.label Core clock ($gpuName)"
|
| 151 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 152 |
done |
| 153 |
;; |
| 154 |
fan) |
| 155 |
echo 'graph_title GPU fan speed' |
| 156 |
echo 'graph_args -l 0 -u 100' |
| 157 |
echo 'graph_vlabel Percentage' |
| 158 |
echo 'graph_category sensors' |
| 159 |
echo "graph_info Fan speed of AMD GPUs" |
| 160 |
nGpusCounter=0 |
| 161 |
while [ $nGpusCounter -lt $nGpus ] |
| 162 |
do |
| 163 |
gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'` |
| 164 |
echo "fan${nGpusCounter}.info Fan speed information for $gpuName"
|
| 165 |
echo "fan${nGpusCounter}.label Fan speed ($gpuName)"
|
| 166 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 167 |
done |
| 168 |
;; |
| 169 |
load) |
| 170 |
echo 'graph_title GPU load' |
| 171 |
echo 'graph_args -l 0 -u 100' |
| 172 |
echo 'graph_vlabel Percentage' |
| 173 |
echo 'graph_category htc' |
| 174 |
echo "graph_info GPU load" |
| 175 |
nGpusCounter=0 |
| 176 |
while [ $nGpusCounter -lt $nGpus ] |
| 177 |
do |
| 178 |
gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'` |
| 179 |
echo "load${nGpusCounter}.info Load information for $gpuName"
|
| 180 |
echo "load${nGpusCounter}.label Load ($gpuName)"
|
| 181 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 182 |
done |
| 183 |
;; |
| 184 |
vcore) |
| 185 |
echo 'graph_title GPU core voltage' |
| 186 |
echo 'graph_vlabel mV' |
| 187 |
echo 'graph_category sensors' |
| 188 |
echo "graph_info GPU core voltage" |
| 189 |
nGpusCounter=0 |
| 190 |
while [ $nGpusCounter -lt $nGpus ] |
| 191 |
do |
| 192 |
gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'` |
| 193 |
echo "vcore${nGpusCounter}.info Vcore information for $gpuName"
|
| 194 |
echo "vcore${nGpusCounter}.label Core voltage ($gpuName)"
|
| 195 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 196 |
done |
| 197 |
;; |
| 198 |
*) |
| 199 |
echo "Can't run without a proper symlink. Exiting." |
| 200 |
echo "Try running munin-node-configure --suggest." |
| 201 |
exit 1 |
| 202 |
;; |
| 203 |
esac |
| 204 |
|
| 205 |
exit 0 |
| 206 |
fi |
| 207 |
|
| 208 |
# Get and print requested value for all available GPUs |
| 209 |
export DISPLAY=:0 |
| 210 |
nGpusCounter=0 |
| 211 |
while [ $nGpusCounter -lt $nGpus ] |
| 212 |
do |
| 213 |
case $name in |
| 214 |
temp) |
| 215 |
value=`$atiConfigExec --adapter=$nGpusCounter --odgt | grep "Sensor 0: Temperature" | grep -o "[0-9]*\.[0-9]*"` |
| 216 |
echo "temp${nGpusCounter}.value $value"
|
| 217 |
;; |
| 218 |
clocks) |
| 219 |
value=`$atiConfigExec --adapter=$nGpusCounter --odgc | grep "Current Clocks" | grep -o "[0-9]*"` |
| 220 |
coreClock=`echo "$value" | sed -n 1p` |
| 221 |
echo "coreclock${nGpusCounter}.value $coreClock"
|
| 222 |
memClock=`echo "$value" | sed -n 2p` |
| 223 |
echo "memclock${nGpusCounter}.value $memClock"
|
| 224 |
;; |
| 225 |
fan) |
| 226 |
value=`$atiConfigExec --adapter=$nGpusCounter --pplib-cmd "get fanspeed 0" | grep "Fan Speed" | grep -o "[0-9]*"` |
| 227 |
echo "fan${nGpusCounter}.value $value"
|
| 228 |
;; |
| 229 |
load) |
| 230 |
value=`$atiConfigExec --adapter=$nGpusCounter --odgc | grep "GPU load" | grep -o "[0-9]*"` |
| 231 |
echo "load${nGpusCounter}.value $value"
|
| 232 |
;; |
| 233 |
vcore) |
| 234 |
value=`$atiConfigExec --adapter=$nGpusCounter --pplib-cmd "get activity" | grep "VDDC" | grep -o "[0-9]*"` |
| 235 |
echo "vcore${nGpusCounter}.value $value"
|
| 236 |
;; |
| 237 |
*) |
| 238 |
echo "Can't run without a proper symlink. Exiting." |
| 239 |
echo "Try running munin-node-configure --suggest." |
| 240 |
exit 1 |
| 241 |
;; |
| 242 |
esac |
| 243 |
: $(( nGpusCounter = $nGpusCounter + 1 )) |
| 244 |
done |
| 245 |
|
| 246 |
|
