@ -189,3 +189,112 @@ class ArrayStat(object):
else :
out . write ( rslt )
out . flush ( )
def study_sparsity ( M , binrange = ( - 16.5 , 6.5 , 1.0 ) , kind = ' log10x ' , minzero = 1e-30 , args = { } ) :
# Adapted from V2b_inspect.py research module.
""" Study the distribution of values or sparsity of a given array, M,
using histogram .
Returns a tuple of length N ; the first ( N - 2 ) elements are the
histogram ( ) output , while the last two are the number of elements
whose values are smaller ( greater ) than the lower ( upper ) bound of
the histogram bind range .
Argument ' kind ' : By default , the ' x ' values of the histogram is on
logarithmic - 10 scale .
Other common log scales can be chosen using ' log2x ' and ' logx ' .
Choose ' linx ' if you want the linear scale instead .
Argument ' binrange ' can be one of the following :
- 3 - tuple : the left edge of the first bin , the right edge of the last bin ,
and the width of each bin
- 2 - tuple : the left MIDPOINT of the first bin , the right MIDPOINT
of the last bin ; the width is assumed to be 1.
The defaults are a 3 - tuple , intended for log10x scale , where
we check order - of - magnitudes between 1e-16 and 1e+5 ( roughly ) .
"""
from numpy import abs , count_nonzero , arange , histogram , log10 , log2 , log
if kind == ' log10x ' : # usual way, for broad view
log_val = log10 ( abs ( M . flatten ( ) ) + minzero )
elif kind == ' log2x ' :
log_val = log2 ( abs ( M . flatten ( ) ) + minzero )
elif kind == ' logx ' :
log_val = log ( abs ( M . flatten ( ) ) + minzero )
elif kind == ' linx ' : # linear x scale, usually for more detailed view
log_val = abs ( M . flatten ( ) )
else :
raise ValueError , " Invalid kind= %s " % ( str ( kind ) )
if len ( binrange ) == 3 :
binedges = numpy . arange ( * binrange )
elif len ( binrange ) == 2 :
l = binrange [ 0 ] - 0.5
r = binrange [ 1 ] + 0.5
binedges = numpy . arange ( l , r )
else :
raise ValueError , " Invalid binrange parameter value "
#print binedges
hist = histogram ( log_val , bins = binedges , * * args )
# Count values outside the range being considered:
l = 0
r = 0
llim = binedges [ 0 ]
rlim = binedges [ - 1 ]
l = count_nonzero ( log_val < llim )
r = count_nonzero ( log_val > rlim )
return hist + ( l , r )
def print_histogram ( hist , xaxis = ' midpt ' ,
# output formatting
xwidth = 3 , xprec = 0 ,
out = sys . stdout ) :
# Adapted from V2b_inspect.py research module.
""" Prints histogram in an easier way to visualize in text fashion.
"""
if len ( hist ) > = 4 :
# special case: for study_sparsity output.
( bar , edges , lexcess , rexcess ) = hist
else :
# for all other cases--
( bar , edges ) = hist [ : 2 ]
lexcess = 0
rexcess = 0
if xaxis == ' midpt ' :
midpt = ( edges [ 1 : ] + edges [ : - 1 ] ) * 0.5
elif xaxis == ' leftpt ' :
midpt = edges [ : - 1 ]
elif xaxis == ' rightpt ' :
midpt = edges [ 1 : ]
else :
raise ValueError , " Invalid xaxis parameter value "
width = max ( int ( numpy . log10 ( max ( bar . max ( ) , lexcess , rexcess ) ) + 1 ) , xwidth )
#print "width = %d" % width
barfmt = " % " + str ( width ) + " d "
midfmt = " % " + str ( width ) + " . " + str ( xprec ) + " f "
if out == str :
from StringIO import StringIO
out = StringIO ( )
def retval ( ) :
return out . getvalue ( )
else :
def retval ( ) :
return None
def Print ( x ) :
out . write ( x )
out . write ( ( barfmt % lexcess ) \
+ " " . join ( [ barfmt % i for i in bar ] ) \
+ ( barfmt % rexcess ) \
+ " \n " )
out . write ( " " * width + " < " \
+ " " . join ( [ midfmt % i for i in midpt ] ) \
+ " " * width + " > " \
+ " \n " )
return retval ( )