nodes based on SLURM's "sinfo" output.master
parent
18d79dd34b
commit
19c833c3ff
1 changed files with 68 additions and 0 deletions
@ -0,0 +1,68 @@ |
|||||||
|
#!/bin/bash |
||||||
|
# |
||||||
|
# Query the status of active nodes from sinfo |
||||||
|
|
||||||
|
DOC="Get information about active nodes from SLURM perspective. |
||||||
|
|
||||||
|
Environment variables that affect this script: |
||||||
|
|
||||||
|
* OUTDIR |
||||||
|
* TIMESTAMP |
||||||
|
* SINFO_NODES |
||||||
|
|
||||||
|
" |
||||||
|
|
||||||
|
: ${OUTDIR:=.} |
||||||
|
|
||||||
|
# If the output of "sinfo -N" is not specified, we will fetch |
||||||
|
# the output from SLURM and include additional information. |
||||||
|
if [ -z "${SINFO_NODES}" ]; then |
||||||
|
if [ -n "$TIMESTAMP" ]; then |
||||||
|
case "$TIMESTAMP" in |
||||||
|
(-|"(none)"|none) |
||||||
|
TIMESTAMP= # BLANK |
||||||
|
;; |
||||||
|
esac |
||||||
|
else |
||||||
|
TIMESTAMP=$(date +"_%Y-%m-%dT%H.%M.%S") |
||||||
|
fi |
||||||
|
|
||||||
|
# FIXME: Yeah I know this can run into race condition, oh well. |
||||||
|
# sinfo -N should be considered the authoritative output. |
||||||
|
|
||||||
|
sinfo > "${OUTDIR}/sinfo${TIMESTAMP}.txt" |
||||||
|
sinfo -s > "${OUTDIR}/sinfo-s${TIMESTAMP}.txt" |
||||||
|
sinfo -N > "${OUTDIR}/sinfo-N${TIMESTAMP}.txt" |
||||||
|
SINFO_NODES="${OUTDIR}/sinfo-N${TIMESTAMP}.txt" |
||||||
|
else |
||||||
|
echo "Reusing node info from ${SINFO_NODES}" |
||||||
|
echo "Assigned TIMESTAMP=${TIMESTAMP:-(none)}" |
||||||
|
fi |
||||||
|
|
||||||
|
# `sinfo -N` will give list of nodes and partition it belongs |
||||||
|
# (one line per node:partition combination) |
||||||
|
|
||||||
|
# Get the list of node names, exclude fail and down state, sort it to a unique list |
||||||
|
tail -n +2 "${SINFO_NODES}" \ |
||||||
|
| awk '$4 !~ /fail|down/ {print $1}' \ |
||||||
|
| sort \ |
||||||
|
| uniq > "${OUTDIR}/nodes-active${TIMESTAMP}.txt" |
||||||
|
|
||||||
|
# Get the list of node names, strip the host number (-NNN), |
||||||
|
# sort it to a unique list and give the count |
||||||
|
tail -n +2 "${SINFO_NODES}" \ |
||||||
|
| awk '$4 !~ /fail|down/ {print $1}' \ |
||||||
|
| sort \ |
||||||
|
| uniq \ |
||||||
|
| sed -e 's/-[0-9][0-9]*$//' \ |
||||||
|
| uniq -c > "${OUTDIR}/nodes-active-types${TIMESTAMP}.txt" |
||||||
|
|
||||||
|
#tail -n +2 "${OUTDIR}/sinfo-N_${TIMESTAMP}.txt" | sed -e 's/-[0-9][0-9]*$//' | sort | uniq -c > "${OUTDIR}/sinfo-active-nodes-types_${TIMESTAMP}.txt" |
||||||
|
|
||||||
|
tail -n +2 "${SINFO_NODES}" \ |
||||||
|
| awk ' {print $1}' \ |
||||||
|
| sort \ |
||||||
|
| uniq \ |
||||||
|
| sed -e 's/-[0-9][0-9]*$//' \ |
||||||
|
| uniq -c > "${OUTDIR}/nodes-all-types${TIMESTAMP}.txt" |
||||||
|
|
Loading…
Reference in new issue