|
|
@ -1,49 +1,109 @@ |
|
|
|
#!/bin/bash |
|
|
|
#!/bin/bash |
|
|
|
# 20151028 |
|
|
|
# 20151028 |
|
|
|
|
|
|
|
# |
|
|
|
|
|
|
|
# Note: original extraction command on turing: |
|
|
|
|
|
|
|
# |
|
|
|
|
|
|
|
# qstat -f | grep -ve '^[-# ]' -e '^queuename' | less |
|
|
|
|
|
|
|
# |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
: ${optShowDisabledNodes=0} |
|
|
|
|
|
|
|
: ${optPrintRaw=0} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function node_slot_stats_raw() |
|
|
|
|
|
|
|
# Prints the node stats from `qstat -f' in raw format: |
|
|
|
|
|
|
|
# - not printing disabled nodes |
|
|
|
|
|
|
|
# - not showing the computational jobs that are running on these nodes |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
qstat -f \ |
|
|
|
|
|
|
|
| gawk -v optShowDisabledNodes="$optShowDisabledNodes" \ |
|
|
|
|
|
|
|
' |
|
|
|
|
|
|
|
BEGIN { |
|
|
|
|
|
|
|
STDERR = "/dev/stderr" |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
FNR == 1 && $1 == "queuename" { print; next; } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Valid host status field |
|
|
|
|
|
|
|
($0 ~ /^[A-Za-z]/) && (NF == 5 || NF == 6) && (optShowDisabledNodes!=0 || ($6 !~ /d/)) { |
|
|
|
|
|
|
|
print |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
' |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function node_slot_stats_per_machine_type() |
|
|
|
function node_slot_stats_per_machine_type() |
|
|
|
# Original extraction command on turing: |
|
|
|
# Prints status of slot availability per machine type (defined as |
|
|
|
|
|
|
|
# host with the same base hostname (e.g. "c6-", or "c8-"). |
|
|
|
|
|
|
|
# Originally implemented based on the naming of hosts on Turing cluster. |
|
|
|
# |
|
|
|
# |
|
|
|
# qstat -f | grep -ve '^[-# ]' -e '^queuename' | less |
|
|
|
# Example output: (changes depending on what's disabled and the load of the cluster) |
|
|
|
|
|
|
|
# |
|
|
|
|
|
|
|
# MACHTYPE NODE CORES used free resv |
|
|
|
|
|
|
|
# c6 15 240 77 163 0 |
|
|
|
|
|
|
|
# c8 40 768 569 199 0 |
|
|
|
|
|
|
|
# cr 74 1480 988 492 0 |
|
|
|
|
|
|
|
# crhimem 3 96 0 96 0 |
|
|
|
|
|
|
|
# crphi 10 200 48 152 0 |
|
|
|
|
|
|
|
# d430 49 1568 1292 276 0 |
|
|
|
|
|
|
|
# d730 10 280 10 270 0 |
|
|
|
# |
|
|
|
# |
|
|
|
# FIXME: If a machine is covered by more than one queue, this will cause the counts |
|
|
|
# FIXME: If a machine is covered by more than one queue, this will cause the counts |
|
|
|
# to be overestimated. |
|
|
|
# to be overestimated. Must register if a machine has been encountered and not |
|
|
|
|
|
|
|
# re-account that machine. |
|
|
|
{ |
|
|
|
{ |
|
|
|
qstat -f \ |
|
|
|
qstat -f | _Process_node_slot_stats_per_machine_type |
|
|
|
| gawk ' |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function _Process_node_slot_stats_per_machine_type() |
|
|
|
|
|
|
|
# Processing part of the routine above. |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
gawk \ |
|
|
|
|
|
|
|
-v optShowDisabledNodes="$optShowDisabledNodes" \ |
|
|
|
|
|
|
|
-v optPrintRaw="$optPrintRaw" \ |
|
|
|
|
|
|
|
'#### |
|
|
|
BEGIN { |
|
|
|
BEGIN { |
|
|
|
STDERR = "/dev/stderr" |
|
|
|
STDERR = "/dev/stderr" |
|
|
|
|
|
|
|
hostnames_seen[-1234] = 0 |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
FNR == 1 && $1 == "queuename" { next; } |
|
|
|
FNR == 1 && $1 == "queuename" { next; } |
|
|
|
|
|
|
|
|
|
|
|
# Valid host status field |
|
|
|
# Valid host status field |
|
|
|
($0 ~ /^[A-Za-z]/) && (NF == 5 || NF == 6) { |
|
|
|
($0 ~ /^[A-Za-z]/) && (NF == 5 || NF == 6) { |
|
|
|
#print($0) |
|
|
|
|
|
|
|
queue_node = $1 |
|
|
|
queue_node = $1 |
|
|
|
core_usage_combo = $3 |
|
|
|
core_usage_combo = $3 |
|
|
|
states = $6 # if any |
|
|
|
states = $6 # if any |
|
|
|
|
|
|
|
|
|
|
|
# skip disabled hosts |
|
|
|
# skip disabled hosts |
|
|
|
if (states ~ /d/) next; |
|
|
|
if (states ~ /d/ && (optShowDisabledNodes==0)) next; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (optPrintRaw != 0) print($0) |
|
|
|
|
|
|
|
|
|
|
|
# gawk extension of match: |
|
|
|
# gawk extension of match: |
|
|
|
if (! match(queue_node, /^([^@]+)@([^-]+)-(.*)$/, Strs)) |
|
|
|
if (match(queue_node, /^([^@]+)@([^-]+)-(.*)$/, Strs)) |
|
|
|
{ |
|
|
|
|
|
|
|
print("Invalid queue/host combo: " queue_node) > STDERR |
|
|
|
|
|
|
|
next |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
else |
|
|
|
|
|
|
|
{ |
|
|
|
{ |
|
|
|
queue = Strs[1] |
|
|
|
queue = Strs[1] |
|
|
|
hostkind = Strs[2] |
|
|
|
hostkind = Strs[2] |
|
|
|
hostnum = Strs[3] |
|
|
|
hostnum = Strs[3] |
|
|
|
|
|
|
|
hostname = hostkind "-" hostnum |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
else |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
print("Invalid queue/host combo: " queue_node) > STDERR |
|
|
|
|
|
|
|
next |
|
|
|
} |
|
|
|
} |
|
|
|
split(core_usage_combo, Strs, "/") |
|
|
|
split(core_usage_combo, Strs, "/") |
|
|
|
slots_resv = Strs[1] |
|
|
|
slots_resv = Strs[1] |
|
|
|
slots_used = Strs[2] |
|
|
|
slots_used = Strs[2] |
|
|
|
slots_tot = Strs[3] |
|
|
|
slots_tot = Strs[3] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Avoiding double counting: |
|
|
|
|
|
|
|
if (hostname in hostname_seen) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
print("Host already seen: " hostname) > STDERR |
|
|
|
|
|
|
|
next |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
mach_node_count[hostkind] = mach_node_count[hostkind] + 1 |
|
|
|
mach_node_count[hostkind] = mach_node_count[hostkind] + 1 |
|
|
|
mach_node_slot_count[hostkind] = slots_tot # assume homogenous! This DOES NOT work with c8-type nodes! |
|
|
|
mach_node_slot_count[hostkind] = slots_tot # assume homogenous! This DOES NOT work with c8-type nodes! |
|
|
|
mach_slots_tot[hostkind] = mach_slots_tot[hostkind] + slots_tot |
|
|
|
mach_slots_tot[hostkind] = mach_slots_tot[hostkind] + slots_tot |
|
|
@ -76,7 +136,36 @@ function report_node_stats() |
|
|
|
END { |
|
|
|
END { |
|
|
|
report_node_stats() |
|
|
|
report_node_stats() |
|
|
|
} |
|
|
|
} |
|
|
|
' |
|
|
|
' \ |
|
|
|
|
|
|
|
"$@" |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function node_slot_stats_per_machine_type_f() |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
_Process_node_slot_stats_per_machine_type "$1" |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case "$1" in |
|
|
|
|
|
|
|
(--raw|raw) |
|
|
|
|
|
|
|
node_slot_stats_raw |
|
|
|
|
|
|
|
;; |
|
|
|
|
|
|
|
(--stats|stats|"") |
|
|
|
|
|
|
|
if [ "$2" ]; then |
|
|
|
|
|
|
|
node_slot_stats_per_machine_type_f "$2" |
|
|
|
|
|
|
|
else |
|
|
|
node_slot_stats_per_machine_type |
|
|
|
node_slot_stats_per_machine_type |
|
|
|
|
|
|
|
fi |
|
|
|
|
|
|
|
;; |
|
|
|
|
|
|
|
(--stats-with-disabled|stats-with-disabled) |
|
|
|
|
|
|
|
if [ "$2" ]; then |
|
|
|
|
|
|
|
optShowDisabledNodes=1 node_slot_stats_per_machine_type_f "$2" |
|
|
|
|
|
|
|
else |
|
|
|
|
|
|
|
optShowDisabledNodes=1 node_slot_stats_per_machine_type |
|
|
|
|
|
|
|
fi |
|
|
|
|
|
|
|
;; |
|
|
|
|
|
|
|
(*) |
|
|
|
|
|
|
|
echo "Unknown action: $1" >&2 |
|
|
|
|
|
|
|
exit 2 |
|
|
|
|
|
|
|
;; |
|
|
|
|
|
|
|
esac |
|
|
|