@ -6,6 +6,49 @@
# A tool that dumps every possibly imaginable info I want to get from
# a SGE-managed cluster.
"""
This is a tool that dumps every possible imaginable info I want to get from
a SGE - managed cluster .
This tool runs at user - level , so can only gather information that an
ordinary user can mine from the cluster .
Currently the info available for dumping are :
- cpufreq
- lscpu
- lspci
- free memory ( the ` free ` command )
- uname
- dmesg
- mount
- df
Typical workflow
- - - - - - - - - - - - - - - -
As a starter , use routine ` Gather_all ` to gather all information bits
from the compute nodes . This is an expensive gather operation ; it may
take a while to complete .
There is a tool called ` test_accessible_hosts ` to read the list of
nodes from ` qhost ` SGE command , then checks the availability of every
node by performing ssh into each one .
Analysis : CPU variety
- - - - - - - - - - - - - - - - - - - - -
To summarize the kinds of CPUs available on the compute nodes , as well
as listing the nodes that have them , use ` summarize_cpu ` and
` print_summarize_cpu ` .
This tool requires that the output of ` gather_cpuinfo ` has been saved
to ` cluster - info / $ HOSTNAME / cpuinfo . txt ` files ` , where $ HOSTNAME stands
for the host basename ( without domain qualifier ) for every compute
node .
The routine that does them all is ` analyze_cpu_composition ` .
"""
import os
import re
import subprocess
@ -291,6 +334,20 @@ def tally_summarize_cpu(summary):
def analyze_cpu_composition ( ) :
""" Performs analysis of the CPU composition of an SGE cluster.
Automatically queries the up ( available ) nodes and gathers the cpuinfo ,
if it is necessary .
"""
global NODE_GOOD_LIST , NODE_BAD_LIST
global ROOT_DIR
getfile = lambda H , bn : os . path . join ( ROOT_DIR , H . split ( ' . ' ) [ 0 ] , bn )
if len ( NODE_GOOD_LIST ) == 0 :
print ( " Warning: need to test node accesibility... " )
NODE_GOOD_LIST , NODE_BAD_LIST = test_accessible_hosts ( )
if not os . path . exists ( getfile ( NODE_GOOD_LIST [ 0 ] , " cpuinfo.txt " ) ) :
print ( " Warning: need to gather cpuinfo... " )
# Most likely you haven't run gather_cpuinfo then...
gather_cpu_info ( NODE_GOOD_LIST )
summ = summarize_cpu ( NODE_GOOD_LIST )
print_summarize_cpu ( summ )
@ -327,4 +384,3 @@ def Gather_all():
print ( " \n Gathering df... " )
gather_df ( NODE_GOOD_LIST )