-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollectPerformanceData.sh
410 lines (345 loc) · 14.8 KB
/
collectPerformanceData.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
#!/bin/bash
# shellcheck shell=bash
## The following is automatically generated code, do not manually modify.
## Template is available in # scripts/commons-templates.sh
## START AUTOGENERATED CODE
# shellcheck disable=SC2034
SCRIPT_VERSION=1662995874
# Useful variables
HERE="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Logging functions
LOGGING_LEVEL=${CBSUPPORT_LOGGING_LEVEL:-'DEBUG'}
function log_info() {
local -r txt="${1}"
__log 'INFO' "${txt}"
}
function log_debug() {
local -r txt="${1}"
__log 'DEBUG' "${txt}"
}
function log_error() {
local -r txt="${1}"
__log 'ERROR' "${txt}"
}
function log_header() {
echo '##################'
echo "# ${1}"
echo '##################'
}
function log_separator() {
echo '---------------------'
}
# A simpler solution would be to use associative arrays, but we cannot assume bash 4...
function __level_to_int() {
local -r level="${1}"
if [ "${level}" == 'ERROR' ]; then
echo 2
elif [ "${level}" == 'INFO' ]; then
echo 1
else
# default
echo 0
fi
}
function __log() {
local -r level="${1}"
local -r txt="${2}"
(("$(__level_to_int "$level")" < "$(__level_to_int "$LOGGING_LEVEL")")) && return
echo "[${level}] ${txt}"
}
function check_tool() {
local -r cmd="${1}"
local -r verbose="${2:-true}"
is_tool_installed "${cmd}" || ("${verbose}" && log_debug "${cmd} is recommended but it's not installed.")
}
function is_tool_installed() {
local -r cmd="${1}"
command -v "${cmd}" >/dev/null 2>&1
}
function script_name() {
basename "$0" .sh
}
function canWrite() {
local -r directory="${1}"
local -r name="${2}"
log_debug "Moving to ${directory}"
pushd "${directory}" >/dev/null || exit
#check if the directory can be written to by the user that is running the script, i.e. user
touch testFile.txt 2>/dev/null
if [ -e testFile.txt ]; then
log_debug 'This directory can be written to by the script'
else
log_error "This directory cannot be written to by the script. Please either run this script from a directory that can be written to or use the optional environment variable: ${name} ."
exit 1
fi
rm -rf testFile.txt
log_debug "Moving back to current dir ${HERE}"
popd >/dev/null || exit
}
## END AUTOGENERATED CODE
####################################################################################
# This script is used to collect data for
# 'RequiredData: Performance, Hang or High CPU Issues for a Java process running on Linux'
#
#####################################################################################
function print_help() {
cat <<EOM
Unable to find required PID argument. Please rerun the script as follows:
$(basename "$0") PID [duration] [frequency]
PID: Java process (Jenkins, CI, CD) PID
duration: Tests duration time in seconds (default 60 seconds)
frequency: Number of seconds that will wait until next data require (default 5 seconds)
Optional environment vars
JAVA_HOME used to locate JDK
JATTACH_HOME path to directory containing jattach (optional: is used only if no JDK is found and jattach is not in the path)
BUSYBOX_HOME path to directory containing busybox (optional)
JAVA_USERID Java userid if this script is run as root instead of the userid running the Java process
PERFORMANCE_DATA_OUTPUT_DIR output dir
In case no JDK is found, the script will try to use jattach: https://github.com/apangin/jattach
Run $(basename "$0") --help to print help.
EOM
}
function busybox_wrapper() {
local command=("$@")
if [ -n "${BUSYBOX_HOME}" ]; then
"${BUSYBOX_HOME}/busybox" "${command[@]}"
else
"${command[@]}"
fi
}
function busybox_or_tool_installed() {
# shellcheck disable=SC2178
# false positive
local command="${1}"
# shellcheck disable=SC2128
# false positive
[ -n "${BUSYBOX_HOME}" ] || is_tool_installed "${command}"
}
function script_validation() {
log_debug "Script Validation Results"
canWrite "${PERFORMANCE_DATA_OUTPUT_DIR}" 'PERFORMANCE_DATA_OUTPUT_DIR'
log_debug "Moving to ${PERFORMANCE_DATA_OUTPUT_DIR}"
if [ -z "${BUSYBOX_HOME}" ]; then
check_tool 'top'
check_tool 'vmstat'
check_tool 'netstat'
check_tool 'iostat'
check_tool 'nfsiostat'
check_tool 'nfsstat'
check_tool 'sar'
fi
}
function collect_sar() {
log_info "Taking sar data collection"
SA_DIR=$1
TEMP_DIR=$2
if [ ! -f "${SA_DIR}/sa$(date +%d)" ]; then
log_info "sysstat package installed but no log files exist; Aborting sar collection"
return
fi
for SA_FILE in "${SA_DIR}"/sa[0-9]*; do
SA_FN=$(echo "${SA_FILE}" | awk -F'/' '{print $5}')
mkdir "${TEMP_DIR}/sysstat/${SA_FN}"
sar -f "${SA_FILE}" >"${TEMP_DIR}/sysstat/${SA_FN}/01.sar.$(date +%Y%m%d%H%M%S).out"
sar -AC --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/02.sar-A.$(date +%Y%m%d%H%M%S).out"
sar -BC --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/03.sar-B.$(date +%Y%m%d%H%M%S).out"
sar -bC --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/04.sar-b.$(date +%Y%m%d%H%M%S).out"
sar -dC --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/05.sar-d.$(date +%Y%m%d%H%M%S).out"
sar -HC --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/06.sar-H.$(date +%Y%m%d%H%M%S).out"
sar -C -n ALL --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/07.sar-n.$(date +%Y%m%d%H%M%S).out"
sar -C -P ALL --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/08.sar-P.$(date +%Y%m%d%H%M%S).out"
sar -qC --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/09.sar-q.$(date +%Y%m%d%H%M%S).out"
sar -Cr ALL --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/10.sar-r.$(date +%Y%m%d%H%M%S).out"
sar -SC --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/11.sar-S.$(date +%Y%m%d%H%M%S).out"
sar -C -u ALL --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/12.sar-U.$(date +%Y%m%d%H%M%S).out"
sar -vC --human -f "${SA_FILE}" >"$TEMP_DIR/sysstat/${SA_FN}/13.sar-v.$(date +%Y%m%d%H%M%S).out"
done
}
duration=60
frequency=5
if [ $# -eq 1 ]; then
if [ "$1" = "--help" ]; then
print_help
exit 0
fi
pid="${1}"
elif [ $# -eq 2 ]; then
pid="${1}"
duration="${2}"
elif [ $# -eq 3 ]; then
pid="${1}"
duration="${2}"
frequency="${3}"
else
print_help "${0}"
exit 1
fi
if [ -z "$PERFORMANCE_DATA_OUTPUT_DIR" ]; then
PERFORMANCE_DATA_OUTPUT_DIR="$(pwd)"
log_debug "Output dir ${PERFORMANCE_DATA_OUTPUT_DIR}"
fi
# it would be simpler to symlink the busybox executable to all the commands but this requires
# root :(
if [ -n "${BUSYBOX_HOME}" ]; then
log_debug "BUSYBOX_HOME is set. Looking for the binary in ${BUSYBOX_HOME}"
busybox_bin="${BUSYBOX_HOME}/busybox"
if ! is_tool_installed "${busybox_bin}"; then
log_error 'It appears busybox is not installed despite specifying a BUSYBOX_HOME'
exit 1
fi
fi
script_validation "${0}"
declare jcmd_bin="jcmd"
declare jstack_bin="jstack"
declare jattach_bin="jattach"
if [ -n "${JAVA_HOME}" ]; then
# shellcheck disable=SC2016
log_debug 'JAVA_HOME is set. Looking for JDK tools in ${JAVA_HOME}/bin.'
jcmd_bin="${JAVA_HOME}/bin/jcmd"
jstack_bin="${JAVA_HOME}/bin/jstack"
else
log_debug 'JAVA_HOME is NOT set. Looking for a JDK on the PATH.'
fi
if ! is_tool_installed "${jcmd_bin}" && ! is_tool_installed "${jstack_bin}"; then
log_debug 'jcmd or jstack not found. Looking for jattach'
if [ -n "${JATTACH_HOME}" ]; then
log_debug "JATTACH_HOME is set. Looking for the binary in ${JATTACH_HOME}"
jattach_bin="${JATTACH_HOME}/jattach"
else
log_debug 'JATTACH_HOME is NOT set. Looking for jattach on the PATH.'
fi
if ! is_tool_installed "${jattach_bin}"; then
log_error 'Could not find a JDK nor jattach. Either the full Java JDK and jattach are not installed or they are not the path of the user that is running the Java process.'
exit 1
fi
fi
declare cmd_prefix=""
if [ -n "${JAVA_USERID}" ]; then
cmd_prefix="sudo -u ${JAVA_USERID}"
log_debug "user ${JAVA_USERID}"
fi
function write_threads() {
local pid="$1"
local threadFileName="$2"
if is_tool_installed "${jcmd_bin}"; then
${cmd_prefix} "${jcmd_bin}" "${pid}" Thread.print -l >"${threadFileName}"
elif is_tool_installed "${jstack_bin}"; then
${cmd_prefix} "${jstack_bin}" -l "${pid}" >"${threadFileName}"
elif is_tool_installed "${jattach_bin}"; then
${cmd_prefix} "${jattach_bin}" "${pid}" threaddump >"${threadFileName}"
fi
}
# Create temporary directories
TEMP_DIR="$PERFORMANCE_DATA_OUTPUT_DIR/tmp.$pid.$(date +%Y%m%d%H%M%S)"
log_debug "Temporary dir ${TEMP_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir "${TEMP_DIR}"/iostat "${TEMP_DIR}"/threads "${TEMP_DIR}"/netstat "${TEMP_DIR}"/topdashHOutput "${TEMP_DIR}"/topOutput "${TEMP_DIR}"/vmstat "${TEMP_DIR}"/nfsiostat "${TEMP_DIR}"/nfsstat "${TEMP_DIR}"/sysstat
# Begin script and notify the end user
log_info "The collectPerformanceData.sh script $SCRIPT_VERSION is starting in custom mode." | tee "$TEMP_DIR"/mode.txt
# shellcheck disable=SC2129
log_info "The pid is $pid" >>"$TEMP_DIR"/mode.txt
log_info "The custom duration is $duration" >>"$TEMP_DIR"/mode.txt
log_info "The custom thread dump generation frequency is $frequency" >>"$TEMP_DIR"/mode.txt
# Output the Default Settings to the end user
log_debug "The custom mode should only be used if requested && if data should be collected for longer than 1 minute"
log_info "The collectPerformanceData.sh script will run for $duration seconds."
log_info "It will generate a full data generation (threadDump, iostat, vmstat, netstat, top) every $frequency seconds."
log_debug ">>>>>>>>>>>>>>>The frequency Has To Divide into the duration by a whole integer.<<<<<<<<<<<<<<<"
log_debug ">>>>>>>>>>>>>>>The duration Divided by 60 should also be a whole integer.<<<<<<<<<<<<<<<"
log_debug ">>>>>>>>>>>>>>>The duration Divided by 5 should also be a whole integer.<<<<<<<<<<<<<<<"
log_debug ">>>>>>>>>>>>>>>Setting the frequency to low, i.e. 1 second, may cause the data to be inconclusive.<<<<<<<<<<<<<<<"
# Begin data generation once every $frequency seconds.
while [ "${duration}" -gt 0 ]; do
if busybox_or_tool_installed 'top'; then
# Taking top data collection
log_info "Taking top data collection."
COLUMNS=300 busybox_wrapper top -b -n 1 >"$TEMP_DIR"/topOutput/topOutput."$(date +%Y%m%d%H%M%S)".txt &
# Taking topdashH data collection
log_info "Taking TopdashH data collection."
busybox_wrapper top -bH -n 1 >"$TEMP_DIR"/topdashHOutput/topdashHOutput."$pid"."$(date +%Y%m%d%H%M%S)".txt &
fi
if is_tool_installed 'vmstat'; then
# Taking vmstat data collection in the background
log_info "Taking vmstat data collection."
vmstat >"$TEMP_DIR"/vmstat/vmstat."$(date +%Y%m%d%H%M%S)".out &
fi
if busybox_or_tool_installed 'netstat'; then
# Taking netstat data
log_info "Taking netstat collection."
# redirecring to /dev/null to get rid of the annoying message for non root users
busybox_wrapper netstat -pan 2>/dev/null >"$TEMP_DIR"/netstat/netstat."$(date +%Y%m%d%H%M%S)".out &
fi
if busybox_or_tool_installed 'iostat'; then
# Taking iostat data collection
log_info "Taking iostat data collection."
busybox_wrapper iostat -t 2>/dev/null >"$TEMP_DIR"/iostat/iostat."$(date +%Y%m%d%H%M%S)".out &
fi
if is_tool_installed 'nfsiostat'; then
# Taking nfsiostat data collection
log_info 'Taking nfsiostat data collection.'
nfsiostat >"$TEMP_DIR"/nfsiostat/nfsiostat."$(date +%Y%m%d%H%M%S)".out &
fi
if is_tool_installed 'nfsstat'; then
# Taking nfsstat data collection
log_info 'Taking nfsstat data collection.'
nfsstat -c >"$TEMP_DIR"/nfsstat/nfsstat."$(date +%Y%m%d%H%M%S)".out &
fi
# Taking a threadDump
THREADS_FILENAME="$TEMP_DIR"/threads/threads.$pid."$(date +%Y%m%d%H%M%S)".txt
write_threads "${pid}" "$THREADS_FILENAME" &
# Record the process PID
THREAD_DUMP_PID=$!
log_info "Collected a threadDump for PID $pid."
# Wait for the thread dump background process
wait $THREAD_DUMP_PID
# Get the exit code of the $THREAD_DUMP_PID
THREAD_DUMP_PID_STATUS=$?
# Wait for all background process
wait
if [ $THREAD_DUMP_PID_STATUS -ne 0 ]; then
rm -r "$TEMP_DIR"
log_error 'The script failed to collect a thread dump. Maybe it is not launched with the same user that the Java process is running as. Try with sudo -u <JAVA_USERID> >>>>>>>>>>>>>>>'
exit 1
fi
# Pause for THREADDUMP_FREQUENCY seconds.
log_info "A new collection will start in ${frequency} seconds."
sleep "${frequency}"
# Update duration
duration=$((duration - frequency))
done
# Taking sar (sysstat) data collection
# Taken outside of main collection loop since this is static archived data
if is_tool_installed 'sar'; then
if [ -d /var/log/sysstat ]; then
collect_sar "/var/log/sysstat" "${TEMP_DIR}"
elif [ -d /var/log/sa/ ]; then
collect_sar "/var/log/sa" "${TEMP_DIR}"
else
log_info "Neither /var/log/sysstat/ or /var/log/sys/ exist, skipping sar reports"
echo "$(date) Skipping sysstat reports: Logs do not exist" >"${TEMP_DIR}/systat_skipped.$(date +%Y%m%d%H%M%S).log"
fi
else
log_info "sysstat package not installed skipping"
echo "$(date) Skipping sysstat reports: Not Installed" >"${TEMP_DIR}/systat_skipped.$(date +%Y%m%d%H%M%S).log"
fi
log_info "Packaging data and preparing for cleanup."
log_debug "Moving to $PERFORMANCE_DATA_OUTPUT_DIR"
pushd "${TEMP_DIR}" >/dev/null || exit
PERFORMANCE_DATA_ARCHIVE_NAME="${CBSUPPORT_OUTPUT:-performanceData.$pid.output.tar.gz}"
tar -czf "${PERFORMANCE_DATA_ARCHIVE_NAME}" topOutput topdashHOutput mode.txt threads vmstat netstat iostat nfsiostat nfsstat sysstat
# not needed by cbsupport since we archive in place
if [ -z "$CBSUPPORT_OUTPUT" ]; then
cp "${PERFORMANCE_DATA_ARCHIVE_NAME}" ..
fi
log_info "Cleanup files"
# Clean up the topOutput.txt and topdashHOutput.$pid.txt files
rm -r "$TEMP_DIR"
log_debug "Moving back to current dir ${HERE}"
popd >/dev/null || exit
# Notify end user. Do not do it when running in the context of cbsupport as the message is misleading for the end user.
if [ -z "$CBSUPPORT_OUTPUT" ]; then
log_info "The temporary dir \"${TEMP_DIR}\" has been deleted"
log_info "The collectPerformanceData.sh script in CUSTOM MODE is complete."
log_info "The Output files are contained within !>>>! ${PERFORMANCE_DATA_ARCHIVE_NAME} !<<<!"
log_info "Please upload the ${PERFORMANCE_DATA_ARCHIVE_NAME} archive to your ticket for review."
fi