Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / gpu / amd_gpu_ @ 10b1de81

Historique | Voir | Annoter | Télécharger (6,93 ko)

1 426bba44 Nuno Fachada
#!/bin/bash
2
# -*- bash -*-
3
4
: << =cut
5
6
=head1 NAME
7
8
amd_gpu_ - Wildcard plugin to monitor AMD GPUs. Uses aticonfig utility,
9
usually bundled with AMD GPU driver, to obtain information. To use this
10
plugin you have to make sure aticonfig will run without an active X
11
server (i.e. without anyone being logged in via the GUI). For more 
12 10b1de81 Nuno Fachada
information about this issue visit the link below: 
13 426bba44 Nuno Fachada
http://www.mayankdaga.com/running-opencl-applications-remotely-on-amd-gpus/
14
15
=head1 CONFIGURATION
16
17
This is a wildcard plugin. The wildcard prefix link name should be the 
18
value to monitor.
19
20
This plugin uses the following configuration variables:
21
22
 [amd_gpu_*]
23
  user root
24 10b1de81 Nuno Fachada
  env.aticonfexec - Location of aticonfig executable.
25
  env.warning - Warning temperature
26
  env.critical - Critical temperature
27 426bba44 Nuno Fachada
28
=head2 DEFAULT CONFIGURATION
29
30
The default configuration is to set "env.aticonfexec" to /usr/bin/aticonfig.
31
32
=head2 EXAMPLE WILDCARD USAGE
33
34
C<ln -s /usr/share/munin/plugins/amd_gpu_ /etc/munin/plugins/amd_gpu_temp>
35
36
...will monitor the temperature of available AMD GPUs.
37
38
=head1 AUTHOR
39
40
Nuno Fachada
41
faken@fakenmc.com
42
43
=head1 LICENSE
44
45
 GNU General Public License, version 2
46
 http://www.gnu.org/licenses/gpl-2.0.html 
47
48
=head1 MAGIC MARKERS
49
50
 #%# family=auto
51
 #%# capabilities=autoconf suggest
52
53
=cut
54
55
# Determine name of parameter to monitor
56
name=`basename $0 | sed 's/^amd_gpu_//g'`
57
58
# Get location of aticonfig executable or use default
59
atiConfigExec=${aticonfexec:-'/usr/bin/aticonfig'}
60
61
# Check if autoconf was requested
62
if [ "$1" = "autoconf" ]; then
63
	# Autoconf only returns yes if aticonfig exists and is executable 
64
	if [ -x $atiConfigExec ]; then
65
		echo yes
66
		exit 0
67
	else
68
		echo "no (aticonfig executable not found)"
69
		exit 0
70
	fi
71
fi
72
73
# Check if suggest was requested
74
if [ "$1" = "suggest" ]; then
75
	echo "temp"
76
	echo "clocks"
77
	echo "fan"
78
	echo "load"
79
	echo "vcore"
80
	exit 0
81
fi
82
83
# Get number of GPUs
84
nGpusOutput=`$atiConfigExec --list-adapters`
85
86
nGpus=`echo "$nGpusOutput" | wc -l`
87
nGpus=$((nGpus - 2)) # Last two lines don't matter
88 758ca724 Nuno Fachada
# FIXME Possible bug in code bellow: maybe should be <= 0 instead of == 0?
89 426bba44 Nuno Fachada
if [ $nGpus -eq 0 ]; then
90
	# Exit if no GPUs found
91
	echo "No AMD GPUs detected. Exiting."
92
	exit 1
93
fi
94
95
# Check if config was requested
96
if [ "$1" = "config" ]; then
97
98
	# Configure graph depending on what which quantity will be plotted
99
	case $name in
100
		temp)
101
			echo 'graph_title GPU temperature'
102
			echo 'graph_args -l 0 -u 120'
103
			echo 'graph_vlabel Degrees (C)'
104
			echo 'graph_category gpu'
105
			echo "graph_info Temperature information for AMD GPUs"
106
			nGpusCounter=0
107
			while [ $nGpusCounter -lt $nGpus ]
108
			do
109
				gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "`
110 10b1de81 Nuno Fachada
				echo "temp${nGpusCounter}.warning ${warning:-75}"
111
				echo "temp${nGpusCounter}.critical ${critical:-95}"
112 426bba44 Nuno Fachada
				echo "temp${nGpusCounter}.info Temperature information for $gpuName"
113
				echo "temp${nGpusCounter}.label Temperature ($gpuName)"
114
				: $(( nGpusCounter = $nGpusCounter + 1 ))
115
			done 
116
			;;
117
		clocks)
118
			# First determine max clock for each GPU...
119
			read -a array <<< `$atiConfigExec --odgc | grep "Peak Range" |  grep -o "[0-9]*"`
120
			maxclock=0
121
			for element in "${array[@]}"
122
			do
123
				if [ "$element" -gt "$maxclock" ]; then
124
					maxclock=$element
125
				fi
126
			done
127
			# ...then output config data.
128
			echo 'graph_title GPU clock'
129
			echo "graph_args -l 0 -u $maxclock"
130
			echo 'graph_vlabel MHz'
131
			echo 'graph_category gpu'
132
			echo "graph_info Core and memory clock info for AMD GPUs"
133
			nGpusCounter=0
134
			while [ $nGpusCounter -lt $nGpus ]
135
			do
136
				gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "`
137
				echo "memclock${nGpusCounter}.info Memory clock information for $gpuName"
138
				echo "memclock${nGpusCounter}.label Memory clock ($gpuName)"
139
				echo "coreclock${nGpusCounter}.info Core clock information for $gpuName"
140
				echo "coreclock${nGpusCounter}.label Core clock ($gpuName)"
141
				: $(( nGpusCounter = $nGpusCounter + 1 ))
142
			done 
143
			;;
144
		fan)
145
			echo 'graph_title GPU fan speed'
146
			echo 'graph_args -l 0 -u 100'
147
			echo 'graph_vlabel Percentage'
148
			echo 'graph_category gpu'
149
			echo "graph_info Fan speed of AMD GPUs"
150
			nGpusCounter=0
151
			while [ $nGpusCounter -lt $nGpus ]
152
			do
153
				gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "`
154
				echo "fan${nGpusCounter}.info Fan speed information for $gpuName"
155
				echo "fan${nGpusCounter}.label Fan speed ($gpuName)"
156
				: $(( nGpusCounter = $nGpusCounter + 1 ))
157
			done 
158
			;;
159
		load)
160
			echo 'graph_title GPU load'
161
			echo 'graph_args -l 0 -u 100'
162
			echo 'graph_vlabel Percentage'
163
			echo 'graph_category gpu'
164
			echo "graph_info GPU load"			
165
			nGpusCounter=0
166
			while [ $nGpusCounter -lt $nGpus ]
167
			do
168
				gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "`
169
				echo "load${nGpusCounter}.info Load information for $gpuName"
170
				echo "load${nGpusCounter}.label Load ($gpuName)"
171
				: $(( nGpusCounter = $nGpusCounter + 1 ))
172
			done 
173
			;;
174
		vcore)
175
			echo 'graph_title GPU core voltage'
176
			echo 'graph_vlabel mV'
177
			echo 'graph_category gpu'
178
			echo "graph_info GPU core voltage"
179
			nGpusCounter=0
180
			while [ $nGpusCounter -lt $nGpus ]
181
			do
182
				gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "`
183
				echo "vcore${nGpusCounter}.info Vcore information for $gpuName"
184
				echo "vcore${nGpusCounter}.label Core voltage ($gpuName)"
185
				: $(( nGpusCounter = $nGpusCounter + 1 ))
186
			done 
187
			;;
188
		*)
189
			echo "Can't run without a proper symlink. Exiting."
190
			echo "Try running munin-node-configure --suggest."
191
			exit 1
192
			;;
193
	esac
194
195
	exit 0
196
fi
197
198
# Get and print requested value for all available GPUs
199
export DISPLAY=:0
200
nGpusCounter=0
201
while [ $nGpusCounter -lt $nGpus ]
202
do
203
	case $name in
204
		temp)
205
			value=`$atiConfigExec --adapter=$nGpusCounter --odgt | grep "Sensor 0: Temperature" | grep -o "[0-9]*\.[0-9]*"`
206
			echo "temp${nGpusCounter}.value $value"
207
			;;
208
		clocks)
209
			value=`$atiConfigExec --adapter=$nGpusCounter --odgc | grep "Current Clocks" | grep -o "[0-9]*"`
210
			coreClock=`echo "$value" | sed -n 1p`
211
			echo "coreclock${nGpusCounter}.value $coreClock"
212
			memClock=`echo "$value" | sed -n 2p`
213
			echo "memclock${nGpusCounter}.value $memClock"
214
			;;
215
		fan)
216
			value=`$atiConfigExec --adapter=$nGpusCounter --pplib-cmd "get fanspeed 0" | grep "Fan Speed" | grep -o "[0-9]*"`
217
			echo "fan${nGpusCounter}.value $value"
218
			;;
219
		load)
220
			value=`$atiConfigExec --adapter=$nGpusCounter --odgc | grep "GPU load" | grep -o "[0-9]*"`
221
			echo "load${nGpusCounter}.value $value"
222
			;;
223
		vcore)
224
			value=`$atiConfigExec --adapter=$nGpusCounter --pplib-cmd "get activity" | grep "VDDC" | grep -o "[0-9]*"`
225
			echo "vcore${nGpusCounter}.value $value"
226
			;;
227
		*)
228
			echo "Can't run without a proper symlink. Exiting."
229
			echo "Try running munin-node-configure --suggest."
230
			exit 1
231
			;;
232
	esac
233
	: $(( nGpusCounter = $nGpusCounter + 1 ))
234
done
235
236 758ca724 Nuno Fachada
# TODO Follow multigraph suggestion from Flameeyes to look into multigraph plugins http://munin-monitoring.org/wiki/MultigraphSampleOutput, in order to reduce the amount of round trips to get the data.
237
238 426bba44 Nuno Fachada
239