Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / gpu / amd_gpu_ @ 17f78427

Historique | Voir | Annoter | Télécharger (7 ko)

1
#!/bin/bash
2
# -*- bash -*-
3

    
4
: << =cut
5

    
6
=head1 NAME
7

    
8
amd_gpu_ - Wildcard plugin to monitor AMD GPUs. Uses aticonfig utility,
9
usually bundled with AMD GPU driver, to obtain information. To use this
10
plugin you have to make sure aticonfig will run without an active X
11
server (i.e. without anyone being logged in via the GUI). For more
12
information about this issue visit the link below:
13
http://www.mayankdaga.com/running-opencl-applications-remotely-on-amd-gpus/
14

    
15
=head1 CONFIGURATION
16

    
17
This is a wildcard plugin. The wildcard prefix link name should be the
18
value to monitor.
19

    
20
This plugin uses the following configuration variables:
21

    
22
 [amd_gpu_*]
23
  user root
24
  env.aticonfexec - Location of aticonfig executable.
25
  env.warning - Warning temperature
26
  env.critical - Critical temperature
27

    
28
=head2 DEFAULT CONFIGURATION
29

    
30
The default configuration is to set "env.aticonfexec" to /usr/bin/aticonfig and
31
assume warning and critical temperatures of 75 and 95 degrees celsius, respectively.
32

    
33
=head2 EXAMPLE WILDCARD USAGE
34

    
35
C<ln -s /usr/share/munin/plugins/amd_gpu_ /etc/munin/plugins/amd_gpu_temp>
36

    
37
...will monitor the temperature of available AMD GPUs.
38

    
39
=head1 TODO
40

    
41
=over 4
42

    
43
=item *
44

    
45
Use multigraphs for multiple GPUs (http://munin-monitoring.org/wiki/MultigraphSampleOutput).
46

    
47
=back
48

    
49
=head1 AUTHOR
50

    
51
Nuno Fachada
52
faken@fakenmc.com
53

    
54
=head1 LICENSE
55

    
56
 GNU General Public License, version 2
57
 http://www.gnu.org/licenses/gpl-2.0.html
58

    
59
=head1 MAGIC MARKERS
60

    
61
 #%# family=auto
62
 #%# capabilities=autoconf suggest
63

    
64
=cut
65

    
66
# Determine name of parameter to monitor
67
name=`basename $0 | sed 's/^amd_gpu_//g'`
68

    
69
# Get location of aticonfig executable or use default
70
atiConfigExec=${aticonfexec:-'/usr/bin/aticonfig'}
71

    
72
# Check if autoconf was requested
73
if [ "$1" = "autoconf" ]; then
74
	# Autoconf only returns yes if aticonfig exists and is executable
75
	if [ -x $atiConfigExec ]; then
76
		echo yes
77
		exit 0
78
	else
79
		echo "no (aticonfig executable not found)"
80
		exit 0
81
	fi
82
fi
83

    
84
# Check if suggest was requested
85
if [ "$1" = "suggest" ]; then
86
	echo "temp"
87
	echo "clocks"
88
	echo "fan"
89
	echo "load"
90
	echo "vcore"
91
	exit 0
92
fi
93

    
94
# Get number of GPUs
95
nGpusOutput=`$atiConfigExec --list-adapters`
96

    
97
nGpus=`echo "$nGpusOutput" | wc -l`
98
nGpus=$((nGpus - 2)) # Last two lines don't matter
99
if [ $nGpus -eq 0 ]; then
100
	# Exit if no GPUs found
101
	echo "No AMD GPUs detected. Exiting."
102
	exit 1
103
fi
104

    
105
# Check if config was requested
106
if [ "$1" = "config" ]; then
107

    
108
	# Configure graph depending on what which quantity will be plotted
109
	case $name in
110
		temp)
111
			echo 'graph_title GPU temperature'
112
			echo 'graph_args -l 20 -u 120'
113
			echo 'graph_vlabel Degrees (C)'
114
			echo 'graph_category sensors'
115
			echo "graph_info Temperature information for AMD GPUs"
116
			nGpusCounter=0
117
			while [ $nGpusCounter -lt $nGpus ]
118
			do
119
				gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'`
120
				echo "temp${nGpusCounter}.warning ${warning:-75}"
121
				echo "temp${nGpusCounter}.critical ${critical:-95}"
122
				echo "temp${nGpusCounter}.info Temperature information for $gpuName"
123
				echo "temp${nGpusCounter}.label Temperature ($gpuName)"
124
				: $(( nGpusCounter = $nGpusCounter + 1 ))
125
			done
126
			;;
127
		clocks)
128
			# First determine max clock for each GPU...
129
			read -a array <<< `$atiConfigExec --odgc | grep "Peak Range" |  grep -o "[0-9]*"`
130
			maxclock=0
131
			for element in "${array[@]}"
132
			do
133
				if [ "$element" -gt "$maxclock" ]; then
134
					maxclock=$element
135
				fi
136
			done
137
			# ...then output config data.
138
			echo 'graph_title GPU clock'
139
			echo "graph_args -l 0 -u $maxclock"
140
			echo 'graph_vlabel MHz'
141
			echo 'graph_category htc'
142
			echo "graph_info Core and memory clock info for AMD GPUs"
143
			nGpusCounter=0
144
			while [ $nGpusCounter -lt $nGpus ]
145
			do
146
				gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'`
147
				echo "memclock${nGpusCounter}.info Memory clock information for $gpuName"
148
				echo "memclock${nGpusCounter}.label Memory clock ($gpuName)"
149
				echo "coreclock${nGpusCounter}.info Core clock information for $gpuName"
150
				echo "coreclock${nGpusCounter}.label Core clock ($gpuName)"
151
				: $(( nGpusCounter = $nGpusCounter + 1 ))
152
			done
153
			;;
154
		fan)
155
			echo 'graph_title GPU fan speed'
156
			echo 'graph_args -l 0 -u 100'
157
			echo 'graph_vlabel Percentage'
158
			echo 'graph_category sensors'
159
			echo "graph_info Fan speed of AMD GPUs"
160
			nGpusCounter=0
161
			while [ $nGpusCounter -lt $nGpus ]
162
			do
163
				gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'`
164
				echo "fan${nGpusCounter}.info Fan speed information for $gpuName"
165
				echo "fan${nGpusCounter}.label Fan speed ($gpuName)"
166
				: $(( nGpusCounter = $nGpusCounter + 1 ))
167
			done
168
			;;
169
		load)
170
			echo 'graph_title GPU load'
171
			echo 'graph_args -l 0 -u 100'
172
			echo 'graph_vlabel Percentage'
173
			echo 'graph_category htc'
174
			echo "graph_info GPU load"
175
			nGpusCounter=0
176
			while [ $nGpusCounter -lt $nGpus ]
177
			do
178
				gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'`
179
				echo "load${nGpusCounter}.info Load information for $gpuName"
180
				echo "load${nGpusCounter}.label Load ($gpuName)"
181
				: $(( nGpusCounter = $nGpusCounter + 1 ))
182
			done
183
			;;
184
		vcore)
185
			echo 'graph_title GPU core voltage'
186
			echo 'graph_vlabel mV'
187
			echo 'graph_category sensors'
188
			echo "graph_info GPU core voltage"
189
			nGpusCounter=0
190
			while [ $nGpusCounter -lt $nGpus ]
191
			do
192
				gpuName=`echo "$nGpusOutput" | grep "\ $nGpusCounter\.\ " | cut -f 3 -d "." | sed -r 's/^[0-9]+\ //'`
193
				echo "vcore${nGpusCounter}.info Vcore information for $gpuName"
194
				echo "vcore${nGpusCounter}.label Core voltage ($gpuName)"
195
				: $(( nGpusCounter = $nGpusCounter + 1 ))
196
			done
197
			;;
198
		*)
199
			echo "Can't run without a proper symlink. Exiting."
200
			echo "Try running munin-node-configure --suggest."
201
			exit 1
202
			;;
203
	esac
204

    
205
	exit 0
206
fi
207

    
208
# Get and print requested value for all available GPUs
209
export DISPLAY=:0
210
nGpusCounter=0
211
while [ $nGpusCounter -lt $nGpus ]
212
do
213
	case $name in
214
		temp)
215
			value=`$atiConfigExec --adapter=$nGpusCounter --odgt | grep "Sensor 0: Temperature" | grep -o "[0-9]*\.[0-9]*"`
216
			echo "temp${nGpusCounter}.value $value"
217
			;;
218
		clocks)
219
			value=`$atiConfigExec --adapter=$nGpusCounter --odgc | grep "Current Clocks" | grep -o "[0-9]*"`
220
			coreClock=`echo "$value" | sed -n 1p`
221
			echo "coreclock${nGpusCounter}.value $coreClock"
222
			memClock=`echo "$value" | sed -n 2p`
223
			echo "memclock${nGpusCounter}.value $memClock"
224
			;;
225
		fan)
226
			value=`$atiConfigExec --adapter=$nGpusCounter --pplib-cmd "get fanspeed 0" | grep "Fan Speed" | grep -o "[0-9]*"`
227
			echo "fan${nGpusCounter}.value $value"
228
			;;
229
		load)
230
			value=`$atiConfigExec --adapter=$nGpusCounter --odgc | grep "GPU load" | grep -o "[0-9]*"`
231
			echo "load${nGpusCounter}.value $value"
232
			;;
233
		vcore)
234
			value=`$atiConfigExec --adapter=$nGpusCounter --pplib-cmd "get activity" | grep "VDDC" | grep -o "[0-9]*"`
235
			echo "vcore${nGpusCounter}.value $value"
236
			;;
237
		*)
238
			echo "Can't run without a proper symlink. Exiting."
239
			echo "Try running munin-node-configure --suggest."
240
			exit 1
241
			;;
242
	esac
243
	: $(( nGpusCounter = $nGpusCounter + 1 ))
244
done
245

    
246