* Module array_stat: Added study_sparsity() and print_histogram(),

adapted from V2b_inspect research module.
master
Wirawan Purwanto 11 years ago
parent aaa06dc531
commit bb3db2667a
  1. 109
      math/stats/array_stats.py

@ -189,3 +189,112 @@ class ArrayStat(object):
else: else:
out.write(rslt) out.write(rslt)
out.flush() out.flush()
def study_sparsity(M, binrange=(-16.5,6.5,1.0), kind='log10x', minzero=1e-30, args={}):
# Adapted from V2b_inspect.py research module.
"""Study the distribution of values or sparsity of a given array, M,
using histogram.
Returns a tuple of length N; the first (N-2) elements are the
histogram() output, while the last two are the number of elements
whose values are smaller (greater) than the lower (upper) bound of
the histogram bind range.
Argument 'kind': By default, the 'x' values of the histogram is on
logarithmic-10 scale.
Other common log scales can be chosen using 'log2x' and 'logx'.
Choose 'linx' if you want the linear scale instead.
Argument 'binrange' can be one of the following:
- 3-tuple: the left edge of the first bin, the right edge of the last bin,
and the width of each bin
- 2-tuple: the left MIDPOINT of the first bin, the right MIDPOINT
of the last bin; the width is assumed to be 1.
The defaults are a 3-tuple, intended for log10x scale, where
we check order-of-magnitudes between 1e-16 and 1e+5 (roughly).
"""
from numpy import abs, count_nonzero, arange, histogram, log10, log2, log
if kind == 'log10x': # usual way, for broad view
log_val = log10(abs(M.flatten()) + minzero)
elif kind == 'log2x':
log_val = log2(abs(M.flatten()) + minzero)
elif kind == 'logx':
log_val = log(abs(M.flatten()) + minzero)
elif kind == 'linx': # linear x scale, usually for more detailed view
log_val = abs(M.flatten())
else:
raise ValueError, "Invalid kind=%s" % (str(kind))
if len(binrange) == 3:
binedges = numpy.arange(*binrange)
elif len(binrange) == 2:
l = binrange[0]-0.5
r = binrange[1]+0.5
binedges = numpy.arange(l,r)
else:
raise ValueError, "Invalid binrange parameter value"
#print binedges
hist = histogram(log_val, bins=binedges, **args)
# Count values outside the range being considered:
l = 0
r = 0
llim = binedges[0]
rlim = binedges[-1]
l = count_nonzero(log_val < llim)
r = count_nonzero(log_val > rlim)
return hist + (l,r)
def print_histogram(hist, xaxis='midpt',
# output formatting
xwidth=3, xprec=0,
out=sys.stdout):
# Adapted from V2b_inspect.py research module.
"""Prints histogram in an easier way to visualize in text fashion.
"""
if len(hist) >= 4:
# special case: for study_sparsity output.
(bar,edges,lexcess,rexcess) = hist
else:
# for all other cases--
(bar,edges) = hist[:2]
lexcess = 0
rexcess = 0
if xaxis == 'midpt':
midpt = (edges[1:] + edges[:-1]) * 0.5
elif xaxis == 'leftpt':
midpt = edges[:-1]
elif xaxis == 'rightpt':
midpt = edges[1:]
else:
raise ValueError, "Invalid xaxis parameter value"
width = max(int(numpy.log10(max(bar.max(), lexcess, rexcess))+1), xwidth)
#print "width = %d" % width
barfmt = " %" + str(width) + "d"
midfmt = " %" + str(width) + "." + str(xprec) + "f"
if out == str:
from StringIO import StringIO
out = StringIO()
def retval():
return out.getvalue()
else:
def retval():
return None
def Print(x):
out.write(x)
out.write((barfmt % lexcess) \
+ "".join([ barfmt % i for i in bar ]) \
+ (barfmt % rexcess) \
+ "\n")
out.write(" " * width + "<" \
+ "".join([ midfmt % i for i in midpt ]) \
+ " " * width + ">" \
+ "\n")
return retval()

Loading…
Cancel
Save