NOTE: Only imported verbatimly from Cr2_analysis_cbs.py CVS rev 1.143.master
parent
1a537af020
commit
39dddbd51d
1 changed files with 403 additions and 0 deletions
@ -0,0 +1,403 @@ |
|||||||
|
# |
||||||
|
# wpylib.math.fitting.stochastic module |
||||||
|
# Created: 20150528 |
||||||
|
# Wirawan Purwanto |
||||||
|
# |
||||||
|
# Dependencies: |
||||||
|
# - numpy |
||||||
|
# - scipy |
||||||
|
# - matplotlib (for visualization routines) |
||||||
|
# |
||||||
|
|
||||||
|
""" |
||||||
|
wpylib.math.fitting.stochastic module |
||||||
|
|
||||||
|
Tools for stochastic curve fitting. |
||||||
|
""" |
||||||
|
|
||||||
|
from wpylib.math.fitting import fit_func_base |
||||||
|
|
||||||
|
|
||||||
|
class StochasticFitting(object): |
||||||
|
"""Standard stochastic fit procedure. |
||||||
|
|
||||||
|
""" |
||||||
|
debug = 0 |
||||||
|
dbg_guess_params = True |
||||||
|
def_opt_report_final_params = 3 |
||||||
|
def __init__(self): |
||||||
|
self.use_nlf_guess = 1 |
||||||
|
self.use_dy_weights = True |
||||||
|
|
||||||
|
def init_func(self, func): |
||||||
|
self.func = func |
||||||
|
|
||||||
|
def init_samples(self, x, y, dy): |
||||||
|
""" |
||||||
|
Initializes the sample data against which we will perform |
||||||
|
the stochastic fitting. |
||||||
|
This function takes N measurement samples: |
||||||
|
- the (multidimensional) domain points, x |
||||||
|
- the measured target points, y |
||||||
|
- the uncertainty of the target points, dy |
||||||
|
|
||||||
|
""" |
||||||
|
x = fit_func_base.domain_array(x) |
||||||
|
if not (len(x[0]) == len(y) == len(dy)): |
||||||
|
raise TypeError, "Length of x, y, dy arrays are not identical." |
||||||
|
|
||||||
|
# fix (or, actually, provide an accomodation for) a common "mistake" |
||||||
|
# for 1-D domain: make it standard by adding the "first" dimension |
||||||
|
if len(x.shape) == 1: |
||||||
|
x = x.reshape((1, x.shape[0])) |
||||||
|
|
||||||
|
self.samples_x = x |
||||||
|
self.samples_y = numpy.array(y) |
||||||
|
self.samples_dy = numpy.array(dy) |
||||||
|
self.samples_wt = (self.samples_dy)**(-2) |
||||||
|
|
||||||
|
def init_rng(self, seed=None, rng_class=numpy.random.RandomState): |
||||||
|
"""Initializes a standard random number generator for use in |
||||||
|
the fitting routine.""" |
||||||
|
if seed == None: |
||||||
|
seed = numpy.random.randint(numpy.iinfo(int).max) |
||||||
|
print "Using random seed: ", seed |
||||||
|
self.rng_seed = seed |
||||||
|
self.rng = rng_class(seed) |
||||||
|
|
||||||
|
def num_fit_params(self): |
||||||
|
"""An ad-hoc way to determine the number of fitting parameters. |
||||||
|
|
||||||
|
FIXME: There is still not an a priori way to find the number of |
||||||
|
fit parameters in the fit_func_base class or its derivatives. |
||||||
|
|
||||||
|
There are a few after-the-fact ways to determine this: |
||||||
|
|
||||||
|
1) Once the "deterministic" nonlinear fit is done, you can find the |
||||||
|
number of parameters by |
||||||
|
|
||||||
|
len(self.log_nlf_params) |
||||||
|
|
||||||
|
2) Once the stochastic fit is done, you can also find the number |
||||||
|
of fit parameters by |
||||||
|
|
||||||
|
len(self.log_mc_params[0]) |
||||||
|
""" |
||||||
|
try: |
||||||
|
return len(self.log_nlf_params) |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
return len(self.log_mc_params[0]) |
||||||
|
except: |
||||||
|
pass |
||||||
|
raise RuntimeError, "Cannot determine the number of fit parameters." |
||||||
|
|
||||||
|
def nlfit1(self): |
||||||
|
"""Performs the non-stochastic, standard nonlinear fit.""" |
||||||
|
from numpy.linalg import norm |
||||||
|
|
||||||
|
if self.use_dy_weights: |
||||||
|
dy = self.samples_dy |
||||||
|
else: |
||||||
|
dy = None |
||||||
|
rslt = self.func.fit(self.samples_x, self.samples_y, dy=dy) |
||||||
|
self.log_nlf_params = rslt |
||||||
|
self.nlf_f = self.func(self.log_nlf_params, self.samples_x) |
||||||
|
|
||||||
|
last_fit = self.func.last_fit |
||||||
|
mval_resid = self.nlf_f - self.samples_y |
||||||
|
self.nlf_ussr = norm(mval_resid)**2 |
||||||
|
self.nlf_wssr = norm(mval_resid / self.samples_dy)**2 |
||||||
|
self.nlf_funcalls = last_fit['funcalls'] |
||||||
|
self.nlf_rec = last_fit |
||||||
|
|
||||||
|
def mcfit_step1_toss_dice_(self): |
||||||
|
"""Generates a single Monte Carlo dataset for the mcfit_step1_ |
||||||
|
procedure.""" |
||||||
|
self.dice_dy = self.rng.normal(size=len(self.samples_dy)) |
||||||
|
self.dice_y = self.samples_y + self.samples_dy * self.dice_dy |
||||||
|
|
||||||
|
def mcfit_step1_(self): |
||||||
|
"""Performs a single Monte Carlo data fit.""" |
||||||
|
# Var name conventions: |
||||||
|
# - dice_* = values related to one "dice toss" of the sample |
||||||
|
# - mval_* = values related to the mean value of the samples |
||||||
|
# (i.e. samples_y) |
||||||
|
# FIXME: In future this *could* be run in parallel but the state vars |
||||||
|
# (such as dice_y, dice_dy, etc.) must be stored on per-thread basis. |
||||||
|
from numpy.linalg import norm |
||||||
|
self.mcfit_step1_toss_dice_() |
||||||
|
if self.use_dy_weights: |
||||||
|
dy = self.samples_dy |
||||||
|
else: |
||||||
|
dy = None |
||||||
|
rslt = self.func.fit(self.samples_x, self.dice_y, dy=dy, |
||||||
|
Guess=self.dice_param_guess, |
||||||
|
) |
||||||
|
# fit result of the stochastic data |
||||||
|
self.dice_params = rslt |
||||||
|
self.log_mc_params.append(rslt) |
||||||
|
self.dice_f = self.func(self.dice_params, self.samples_x) |
||||||
|
|
||||||
|
if self.dbg_guess_params: |
||||||
|
self.log_guess_params.append(self.func.guess_params) |
||||||
|
|
||||||
|
last_fit = self.func.last_fit |
||||||
|
dice_resid = self.dice_f - self.dice_y |
||||||
|
mval_resid = self.dice_f - self.samples_y |
||||||
|
dice_ussr = norm(dice_resid)**2 |
||||||
|
dice_wssr = norm(dice_resid / self.samples_dy)**2 |
||||||
|
mval_ussr = norm(mval_resid)**2 |
||||||
|
mval_wssr = norm(mval_resid / self.samples_dy)**2 |
||||||
|
self.log_mc_stats.append((dice_ussr, dice_wssr, mval_ussr, mval_wssr)) |
||||||
|
self.log_mc_funcalls.append(last_fit['funcalls']) |
||||||
|
|
||||||
|
def mcfit_step1_viz_(self, save=True): |
||||||
|
"""Generates a visual representation of the last MC fit step. |
||||||
|
""" |
||||||
|
from matplotlib import pyplot |
||||||
|
if not hasattr(self, "fig"): |
||||||
|
self.fig = pyplot.figure() |
||||||
|
self.fig.clf() |
||||||
|
ax = self.fig.add_subplot(1, 1, 1) |
||||||
|
title = "MC fit step %d" % self.mcfit_iter_num |
||||||
|
ax.set_title(title) |
||||||
|
x,y,dy = self.samples_x[0], self.samples_y, self.samples_dy |
||||||
|
ax.errorbar(x=x, y=y, yerr=dy, |
||||||
|
fmt="x", color="SlateGray", label="QMC", |
||||||
|
) |
||||||
|
samples_xmin = x.min() |
||||||
|
samples_xmax = x.max() |
||||||
|
samples_xrange = samples_xmax - samples_xmin |
||||||
|
samples_ymin = y.min() |
||||||
|
samples_ymax = y.max() |
||||||
|
samples_yrange = samples_ymax - samples_ymin |
||||||
|
len_plot_x = 10*len(y) |
||||||
|
plot_x = numpy.linspace(start=samples_xmin - 0.03 * samples_xrange, |
||||||
|
stop=samples_xmax + 0.03 * samples_xrange, |
||||||
|
num=len_plot_x, |
||||||
|
endpoint=True) |
||||||
|
ax.plot(plot_x, self.func(self.nlf_rec.xopt, [plot_x]), "-", |
||||||
|
color="SlateGray", label="nlfit") |
||||||
|
ax.errorbar(x=x, y=self.dice_y, yerr=dy, |
||||||
|
fmt="or", label="MC toss", |
||||||
|
) |
||||||
|
ax.plot(plot_x, self.func(self.dice_params, [plot_x]), "-", |
||||||
|
color="salmon", label="MC fit") |
||||||
|
|
||||||
|
samples_dy_max = numpy.max(self.samples_dy) |
||||||
|
ax.set_ylim((samples_ymin - samples_dy_max * 8, samples_ymax + samples_dy_max * 8)) |
||||||
|
if save: |
||||||
|
self.fig.savefig("mcfit-%04d.png" % self.mcfit_iter_num) |
||||||
|
|
||||||
|
def mcfit_loop_begin_(self): |
||||||
|
"""Performs final initialization before firing up mcfit_loop_. |
||||||
|
This need to be done only before the first mcfit_loop_() call; |
||||||
|
if more samples are collected later, then this routine should NOT be |
||||||
|
called again or else all the accumulators would reset.""" |
||||||
|
self.log_guess_params = [] |
||||||
|
self.log_mc_params = [] |
||||||
|
self.log_mc_stats = [] |
||||||
|
self.log_mc_funcalls = [] |
||||||
|
if self.use_nlf_guess: |
||||||
|
print "Using guess param from NLF: ", |
||||||
|
self.nlfit1() |
||||||
|
self.dice_param_guess = self.log_nlf_params |
||||||
|
#print "- Params = ", self.log_nlf_params |
||||||
|
print self.log_nlf_params |
||||||
|
else: |
||||||
|
self.dice_param_guess = None |
||||||
|
|
||||||
|
def mcfit_loop_end_(self): |
||||||
|
"""Performs final initialization before firing up do_mc_fitting: |
||||||
|
- Repackage log_mc_stats and log_mc_params as numpy array of structs |
||||||
|
""" |
||||||
|
# Number of fit parameters: |
||||||
|
num_params = len(self.log_mc_params[0]) |
||||||
|
#if True: |
||||||
|
try: |
||||||
|
pnames = self.func.param_names |
||||||
|
assert len(pnames) == num_params # Otherwise it will be faulty |
||||||
|
if self.func.use_lmfit_method: |
||||||
|
#from lmfit import Parameter |
||||||
|
ptype = float |
||||||
|
else: |
||||||
|
ptype = float |
||||||
|
param_dtype = [ (i, ptype) for i in pnames ] |
||||||
|
except: |
||||||
|
param_dtype = [ ("C"+str(i), float) for i in xrange(num_params) ] |
||||||
|
stats_dtype = [ (i, float) for i in ('dice_ussr', 'dice_wssr', 'mval_ussr', 'mval_wssr') ] |
||||||
|
|
||||||
|
# Can't initialize the self.mc_params array in a single step with |
||||||
|
# numpy.array construction function; we must copy the records one by one. |
||||||
|
# The reason is this: each element of the log_mc_params list is already |
||||||
|
# a numpy ndarray object. |
||||||
|
self.mc_params = numpy.empty((len(self.log_mc_params),), dtype=param_dtype) |
||||||
|
for (i,p) in enumerate(self.log_mc_params): |
||||||
|
if self.func.use_lmfit_method: |
||||||
|
self.mc_params[i] = tuple(p) |
||||||
|
else: |
||||||
|
self.mc_params[i] = p |
||||||
|
self.mc_stats = numpy.array(self.log_mc_stats, dtype=stats_dtype) |
||||||
|
self.fit_parameters = [ p[0] for p in param_dtype ] |
||||||
|
|
||||||
|
def mcfit_analysis_(self): |
||||||
|
"""Performs analysis of the Monte Carlo fitting. |
||||||
|
This version does no weighting or filtering based on some cutoff criteria. |
||||||
|
""" |
||||||
|
flds = self.fit_parameters # == self.mc_params.dtype.names |
||||||
|
rslt = {} |
||||||
|
for F in flds: |
||||||
|
mean = numpy.average(self.mc_params[F]) |
||||||
|
err = numpy.std(self.mc_params[F]) |
||||||
|
rslt[F] = errorbar(mean, err) |
||||||
|
self.final_mc_params = rslt |
||||||
|
|
||||||
|
def mcfit_loop1_(self, num_iter, save_fig=0): |
||||||
|
"""Performs the Monte-Carlo fit simulation after the |
||||||
|
input parameters are set up.""" |
||||||
|
|
||||||
|
for i in xrange(num_iter): |
||||||
|
self.mcfit_iter_num = i |
||||||
|
if self.debug >= 2: |
||||||
|
print "mcfit_loop1_: iteration %d" % i |
||||||
|
self.mcfit_step1_() |
||||||
|
if save_fig: |
||||||
|
self.mcfit_step1_viz_(save=True) |
||||||
|
|
||||||
|
def mcfit_report_final_params(self, format=None): |
||||||
|
if format == None: |
||||||
|
format = getattr(self, "opt_report_final_params", self.def_opt_report_final_params) |
||||||
|
if format in (None, False, 0): |
||||||
|
return # quiet! |
||||||
|
|
||||||
|
parm = self.final_mc_params |
||||||
|
if format == 3: |
||||||
|
print "Final parameters :", |
||||||
|
print " ".join([ |
||||||
|
"%s" % (parm[k],) for k in self.fit_parameters |
||||||
|
]) |
||||||
|
elif format == 2: |
||||||
|
print "Final parameters:" |
||||||
|
print "\n".join([ |
||||||
|
" %s = %s" % (k, parm[k]) |
||||||
|
for k in self.fit_parameters |
||||||
|
]) |
||||||
|
elif format == 1: |
||||||
|
print "Final parameters:" |
||||||
|
print parm |
||||||
|
|
||||||
|
def mcfit_run1(self, x=None, y=None, dy=None, data=None, func=None, rng_params=None, |
||||||
|
num_iter=100, save_fig=False): |
||||||
|
"""The main routine to perform stochastic fit.""" |
||||||
|
if data != None: |
||||||
|
raise NotImplementedError |
||||||
|
elif dy != None: |
||||||
|
# Assume OK |
||||||
|
pass |
||||||
|
elif y != None and dy == None: |
||||||
|
y_orig = y |
||||||
|
y = errorbar_mean(y_orig) |
||||||
|
dy = errorbar_err(y_orig) |
||||||
|
else: |
||||||
|
raise TypeError, "Invalid argument combination for the input data." |
||||||
|
|
||||||
|
if func != None: |
||||||
|
self.init_func(func) |
||||||
|
if not hasattr(self, "func"): |
||||||
|
raise RuntimeError, \ |
||||||
|
"The fit function in the fitting object is undefined." |
||||||
|
self.init_samples(x=x, |
||||||
|
y=y, |
||||||
|
dy=dy, |
||||||
|
) |
||||||
|
if rng_params != None: |
||||||
|
self.init_rng(**rng_params) |
||||||
|
elif not hasattr(self, "rng"): |
||||||
|
self.init_rng() |
||||||
|
|
||||||
|
self.mcfit_loop_begin_() |
||||||
|
self.mcfit_loop1_(num_iter=num_iter, save_fig=save_fig) |
||||||
|
self.mcfit_loop_end_() |
||||||
|
self.mcfit_analysis_() |
||||||
|
self.mcfit_report_final_params() |
||||||
|
return self.final_mc_params |
||||||
|
|
||||||
|
# The two routines below gives convenient way to evaluate the |
||||||
|
# fitted curve at arbitrary x values (good so long as they are not |
||||||
|
# far out from the range given by self.samples_x) |
||||||
|
|
||||||
|
def mcfit_eval_raw(self, x=None, yscale=1.0): |
||||||
|
"""Evaluates the curve (y) values for a given set of x value(s). |
||||||
|
This routine generates the raw values based on the stochastically |
||||||
|
sampled parameter values.""" |
||||||
|
if x == None: |
||||||
|
x = self.samples_x |
||||||
|
else: |
||||||
|
x = fit_func_base.domain_array(x) |
||||||
|
|
||||||
|
xlen = len(x[0]) |
||||||
|
mc_curve_y = numpy.empty((len(self.mc_params), xlen)) |
||||||
|
# The following loop could have been written as a batch operation, |
||||||
|
# but it requires some nontrivial change in the convention of how |
||||||
|
# fit_func_base.__call__() is written. |
||||||
|
# Double broadcasting and other dimensional retrofitting/reduction |
||||||
|
# ('dot product'?) may be required. |
||||||
|
# Example: in harm_fit_func class, the statement |
||||||
|
# xdisp = (x[0] - C[2]) |
||||||
|
# will have to be changed becasuse the length of x[0] |
||||||
|
# (which is the number of data points in the "x" argument) |
||||||
|
# and the length of C[2] |
||||||
|
# (which is the number of MC iterations) |
||||||
|
# will not match--and these numbers must NOT be subtracted elementwise! |
||||||
|
for (i,ppp) in enumerate(self.mc_params): |
||||||
|
mc_curve_y[i] = self.func(ppp, x) |
||||||
|
|
||||||
|
return mc_curve_y |
||||||
|
|
||||||
|
def mcfit_eval(self, x=None, yscale=1.0, ddof=1, outfmt=0): |
||||||
|
"""Evaluates the curve (y) values for a given set of x value(s). |
||||||
|
This routine generates the finalized values (with errorbar estimate) |
||||||
|
based on the stochastically sampled parameter values.""" |
||||||
|
|
||||||
|
# WARNING: CONVENTION CHANGES FROM ORIGINAL make_curve_errorbar() ROUTINE! |
||||||
|
# The default delta degree of freedom (ddof) should be 1 because we need |
||||||
|
# to take one out for the average itself. |
||||||
|
# If you need to reproduce old result, can revert to ddof=0. |
||||||
|
if x == None: |
||||||
|
x = self.samples_x |
||||||
|
else: |
||||||
|
x = fit_func_base.domain_array(x) |
||||||
|
mc_curve_y = self.mcfit_eval_raw(x=x) |
||||||
|
|
||||||
|
xlen = len(x[0]) |
||||||
|
final_mc_curve = numpy.empty((xlen,), dtype=[('val',float),('err',float)]) |
||||||
|
final_mc_curve['val'] = numpy.average(mc_curve_y, axis=0) |
||||||
|
final_mc_curve['err'] = numpy.std(mc_curve_y, axis=0, ddof=ddof) |
||||||
|
if yscale != 1.0: |
||||||
|
final_mc_curve['val'] *= yscale |
||||||
|
final_mc_curve['err'] *= yscale |
||||||
|
if outfmt == 0: |
||||||
|
pass # already in that format |
||||||
|
elif outfmt == 1: |
||||||
|
# Formatted as an array of "errorbar" objects |
||||||
|
final_mc_curve = numpy.array([errorbar(y,dy) for (y,dy) in final_mc_curve], dtype=errorbar) |
||||||
|
else: |
||||||
|
raise ValueError, "Unsupported outfmt value=%s." % (outfmt) |
||||||
|
return final_mc_curve |
||||||
|
|
||||||
|
def mcfit_dump_param_samples(self, out): |
||||||
|
"""Dump the generated parameter samples for diagnostic purposes. |
||||||
|
""" |
||||||
|
O = text_output(out) |
||||||
|
pnames = self.mc_params.dtype.names |
||||||
|
snames = self.mc_stats.dtype.names |
||||||
|
O.write("# %s ; %s ; nfev\n" % (" ".join(pnames), " ".join(snames))) |
||||||
|
O.write(matrix_str(array_hstack([ self.mc_params[k] for k in pnames ] + \ |
||||||
|
[ self.mc_stats[k] for k in snames ] + \ |
||||||
|
[ self.log_mc_funcalls]), |
||||||
|
" %#17.10g")+ "\n") |
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in new issue