#!/bin/bash # 20151028 # # Note: original extraction command on turing: # # qstat -f | grep -ve '^[-# ]' -e '^queuename' | less # : ${optShowDisabledNodes=0} : ${optPrintRaw=0} function node_slot_stats_raw() # Prints the node stats from `qstat -f' in raw format: # - not printing disabled nodes # - not showing the computational jobs that are running on these nodes { qstat -f \ | gawk -v optShowDisabledNodes="$optShowDisabledNodes" \ ' BEGIN { STDERR = "/dev/stderr" } FNR == 1 && $1 == "queuename" { print; next; } # Valid host status field ($0 ~ /^[A-Za-z]/) && (NF == 5 || NF == 6) && (optShowDisabledNodes!=0 || ($6 !~ /d/)) { print } ' } function node_slot_stats_per_machine_type() # Prints status of slot availability per machine type (defined as # host with the same base hostname (e.g. "c6-", or "c8-"). # Originally implemented based on the naming of hosts on Turing cluster. # # Example output: (changes depending on what's disabled and the load of the cluster) # # MACHTYPE NODE CORES used free resv # c6 15 240 77 163 0 # c8 40 768 569 199 0 # cr 74 1480 988 492 0 # crhimem 3 96 0 96 0 # crphi 10 200 48 152 0 # d430 49 1568 1292 276 0 # d730 10 280 10 270 0 # # FIXME: If a machine is covered by more than one queue, this will cause the counts # to be overestimated. Must register if a machine has been encountered and not # re-account that machine. { qstat -f | _Process_node_slot_stats_per_machine_type } function _Process_node_slot_stats_per_machine_type() # Processing part of the routine above. { gawk \ -v optShowDisabledNodes="$optShowDisabledNodes" \ -v optPrintRaw="$optPrintRaw" \ '#### BEGIN { STDERR = "/dev/stderr" hostnames_seen[-1234] = 0 } FNR == 1 && $1 == "queuename" { next; } # Valid host status field ($0 ~ /^[A-Za-z]/) && (NF == 5 || NF == 6) { queue_node = $1 core_usage_combo = $3 states = $6 # if any # skip disabled hosts if (states ~ /d/ && (optShowDisabledNodes==0)) next; if (optPrintRaw != 0) print($0) # gawk extension of match: if (match(queue_node, /^([^@]+)@([^-]+)-(.*)$/, Strs)) { queue = Strs[1] hostkind = Strs[2] hostnum = Strs[3] hostname = hostkind "-" hostnum } else { print("Invalid queue/host combo: " queue_node) > STDERR next } split(core_usage_combo, Strs, "/") slots_resv = Strs[1] slots_used = Strs[2] slots_tot = Strs[3] # Avoiding double counting: if (hostname in hostname_seen) { print("Host already seen: " hostname) > STDERR next } mach_node_count[hostkind] = mach_node_count[hostkind] + 1 mach_node_slot_count[hostkind] = slots_tot # assume homogenous! This DOES NOT work with c8-type nodes! mach_slots_tot[hostkind] = mach_slots_tot[hostkind] + slots_tot mach_slots_used[hostkind] = mach_slots_used[hostkind] + slots_used mach_slots_resv[hostkind] = mach_slots_resv[hostkind] + slots_resv } function report_node_stats() { j = 0 for (i in mach_node_count) { j += 1 machs[j] = i } machs_count = asort(machs) printf("%-16s %4s %5s %5s %5s %5s\n", "MACHTYPE", "NODE", "CORES", "used", "free", "resv") for (i = 1; i <= machs_count; ++i) { mach = machs[i] printf("%-16s %4d %5d %5d %5d %5d\n", mach, mach_node_count[mach], mach_slots_tot[mach], mach_slots_used[mach], mach_slots_tot[mach] - mach_slots_used[mach] - mach_slots_resv[mach], mach_slots_resv[mach]) } } END { report_node_stats() } ' \ "$@" } function node_slot_stats_per_machine_type_f() { _Process_node_slot_stats_per_machine_type "$1" } case "$1" in (--raw|raw) node_slot_stats_raw ;; (--stats|stats|"") if [ "$2" ]; then node_slot_stats_per_machine_type_f "$2" else node_slot_stats_per_machine_type fi ;; (--stats-with-disabled|stats-with-disabled) if [ "$2" ]; then optShowDisabledNodes=1 node_slot_stats_per_machine_type_f "$2" else optShowDisabledNodes=1 node_slot_stats_per_machine_type fi ;; (*) echo "Unknown action: $1" >&2 exit 2 ;; esac