1
0
forked from IPKM/nmreval
nmreval/nmreval/fit/result.py
2022-03-28 16:28:15 +02:00

345 lines
11 KiB
Python

import pathlib
import re
from collections import OrderedDict
import numpy as np
from scipy.stats import f as fdist
from scipy.interpolate import interp1d
from ..data.points import Points
from .parameter import Parameter
from ..data.signals import Signal
from ..utils.text import convert
class FitResultCreator:
@staticmethod
def make_from_session(x_orig, y_orig, idx, kwargs) -> (dict, list):
params = OrderedDict()
for key, pbest, err in zip(kwargs['pnames'], kwargs['parameter'], kwargs['error']):
params[key] = Parameter(pbest)
params[key].error = err
_x = kwargs['x']
_y = kwargs['y']
if len(_x) != len(x_orig):
f = interp1d(_x, _y)
rng = (x_orig >= np.min(_x)) * (x_orig <= np.max(_x))
resid = f(x_orig[rng]) - y_orig[rng]
else:
resid = kwargs['y'] - y_orig
stats = FitResultCreator.calc_statistics(resid, _y)
return FitResult(kwargs['x'], kwargs['y'], x_orig, y_orig, params, dict(kwargs['choice']), resid, 0, 0,
kwargs['name'], stats, idx), []
@staticmethod
def make_with_model(model, x_orig, y_orig, p, fun_kwargs, idx, nobs, nvar, corr, pcorr) -> (dict, list):
if np.all(x_orig > 0) and (np.max(x_orig) > 100 * np.min(x_orig)):
islog = True
else:
islog = False
if len(x_orig) < 51:
if islog:
_x = np.logspace(np.log10(np.min(x_orig)), np.log10(np.max(x_orig)), num=10*x_orig.size-9)
else:
_x = np.linspace(np.min(x_orig), np.max(x_orig), num=10*x_orig.size-9)
else:
_x = x_orig
try:
pnames = model.pnames
except AttributeError:
pnames = model.params
parameters = OrderedDict([(k, v) for k, v in zip(pnames, p)])
p_final = [p.value for p in parameters.values()]
part_functions = []
if model.is_multi:
for sub_y in model.sub(p_final, _x, **fun_kwargs):
if np.iscomplexobj(sub_y):
part_functions.append(Signal(_x, sub_y))
else:
part_functions.append(Points(_x, sub_y))
_y = model.func(p_final, _x, **fun_kwargs)
resid = model.func(p_final, x_orig, **fun_kwargs) - y_orig
stats = FitResultCreator.calc_statistics(_y, resid, nobs, nvar)
varied = [p.var for p in parameters.values()]
if corr is None:
correlation = np.eye(len(p_final), len(p_final))
partial_correlation = np.eye(len(p_final), len(p_final))
else:
if len(corr) < len(p_final):
correlation = np.eye(len(p_final), len(p_final))
partial_correlation = np.eye(len(p_final), len(p_final))
# probably some fixed variables
j = 0
for i in range(len(corr)):
while not varied[j]:
j += 1
correlation[j, varied] = corr[i]
partial_correlation[j, varied] = pcorr[i]
j += 1
else:
correlation = corr
partial_correlation = pcorr
return (
FitResult(_x, _y, x_orig, y_orig, parameters, fun_kwargs, resid,
nobs, nvar, model.name, stats,
idx=idx, corr=correlation, pcorr=partial_correlation,
islog=islog, iscomplex=model.is_complex),
part_functions,
)
@staticmethod
def calc_statistics(y, residual, nobs=None, nvar=None):
chi = (residual**2).sum()
try:
r = 1 - chi/((y-np.mean(y))**2).sum()
except RuntimeWarning:
r = -9999
if nobs is None:
nobs = 1
if nvar is None:
nvar = 0
dof = nobs - nvar
loglikehood = nobs * np.log(chi / nobs)
stats = {
'chi^2': chi,
'R^2': r,
'AIC': loglikehood + 2 * nvar,
'BIC': loglikehood + np.log(nobs) * nvar
}
if dof != 0:
stats['adj. R^2'] = 1 - (nobs-1)/dof * (1-r)
stats['red. chi^2'] = chi / dof if dof != 0 else 0
if dof != 1:
stats['AICc'] = stats['AIC'] + 2*(nvar+1)*nvar / (dof-1)
return stats
class FitResult(Points):
def __init__(self, x, y, x_data, y_data, params, fun_kwargs, resid, nobs, nvar, name, stats,
idx=None, corr=None, pcorr=None, islog=False, iscomplex=None,
**kwargs):
self.parameter, name = self._prepare_names(params, name)
super().__init__(x=x, y=y, name=name, **kwargs)
self.residual = resid
self.idx = idx
self.statistics = stats
self.nobs = nobs
self.nvar = nvar
self.fun_kwargs = fun_kwargs
self.correlation = corr
self.partial_correlation = pcorr
self.islog = islog
self.iscomplex = iscomplex
self.x_data = x_data
self.y_data = y_data
self._model_name = name
@staticmethod
def _prepare_names(parameter: dict, modelname: str):
pattern = re.compile(r'(\\?\w[\\\w .-]*(?:_{[\w\\ .-]})?)(\(\d+\))')
split_funcs = {g2: g1 for (g1, g2) in pattern.findall(modelname)}
parameter_dic = {}
for pname, pvalue in parameter.items():
nice_name = pname
nice_func = ''
m = pattern.match(pname)
if m:
func_number = m.group(2)
nice_name = m.group(1)
if func_number in split_funcs:
nice_func = split_funcs[func_number]
pvalue.name = nice_name
pvalue.function = nice_func
parameter_dic[pname] = pvalue
modelname = re.sub(r'\(\d+\)', '', modelname)
if modelname[0] == '(' and modelname[-1] == ')':
modelname = modelname[1:-1]
return parameter_dic, modelname
@property
def model_name(self):
return self._model_name
def __len__(self):
return len(self.parameter)
def __repr__(self):
try:
return 'Fit: ' + self.name
except AttributeError:
return 'FitObject'
@property
def p_final(self):
return [pp.value for pp in self.parameter.values()]
@property
def dof(self):
return self.nobs-self.nvar
def pprint(self, statistics=True, correlations=True):
print('Fit result:')
print(' model :', self.name)
print(' #data :', self.nobs)
print(' #var :', self.nvar)
print('\nParameter')
print(self._parameter_string())
if statistics:
print('Statistics')
for k, v in self.statistics.items():
print(f' {k} : {v:.4f}')
if correlations and self.correlation is not None:
print('\nCorrelation (partial corr.)')
print(self._correlation_string())
print()
def _parameter_string(self):
ret_val = ''
for pval in self.parameter.values():
ret_val += convert(str(pval), old='tex', new='str') + '\n'
if self.fun_kwargs:
for k, v in self.fun_kwargs.items():
ret_val += f' {k}: {v}\n'
return ret_val
def _correlation_string(self):
ret_val = ''
for p_i, p_j, corr_ij, pcorr_ij in self.correlation_list():
ret_val += ' {} / {} : {:.4f} ({:.4f})\n'.format(convert(p_i, old='tex', new='str'),
convert(p_j, old='tex', new='str'),
corr_ij, pcorr_ij)
return ret_val
def correlation_list(self, limit=0.1):
correlations = []
if self.correlation is not None:
pnames = list(self.parameter.keys())
corr = np.triu(self.correlation, k=1)
for i, j in zip(*np.unravel_index(np.argsort(np.abs(corr), axis=None)[::-1],
self.correlation.shape)):
if i == j:
continue
if abs(corr[i, j]) < limit:
break
correlations.append((pnames[i], pnames[j], self.correlation[i, j], self.partial_correlation[i, j]))
return correlations
def savetxt(self, fname, err=True):
header = self.name + '\n'
for pval in self.parameter.values():
header += '{}\t{}\t{}\n'.format(convert(pval.name, old='tex', new='str'), pval.value, pval.error)
if self.fun_kwargs:
for k, v in self.fun_kwargs.items():
header += '{}\t{}\n'.format(convert(k, old='tex', new='str'),
convert(str(v), old='tex', new='str'))
if self.iscomplex == 'complex':
np.savetxt(fname, np.c_[self.x, self.y.real, self.y.imag], header=header)
else:
np.savetxt(fname, np.c_[self.x, self.y], header=header)
def save_parameter(self, fname: str, label: str = None, overwrite: bool = False):
path = pathlib.Path(fname)
if not path.is_file():
overwrite = True
writemode = 'w' if overwrite else 'a'
if label is None:
label = self.value
with path.open(writemode) as f:
if overwrite or not path.exists():
f.write('# label(1)\t')
for i, pname in enumerate(self.parameter.keys()):
raw_name = convert(pname, old='tex', new='str')
f.write(f'{raw_name}({2*i+2})\t{raw_name}_err({2*i+3})\t')
f.write('\n')
f.write(str(label).replace(' ', '') + '\t')
for p in self.parameter.values():
if p.error is not None:
err = p.error
else:
err = 0.
f.write(f'{p.value:.8e}\t{err:.8e}\t')
if self.fun_kwargs:
f.write('# ')
for k, v in self.fun_kwargs.items():
f.write(f"{convert(k, old='tex', new='str')}: {convert(str(v), old='tex', new='str')}\t")
f.write('\n')
def f_test(self, chi2: float, dof: float):
if 'red. chi^2' not in self.statistics or dof == self.dof:
f_value = 1e318
else:
f_value = (chi2-self.statistics['chi^2']) / (dof-self.dof) / self.statistics['red. chi^2']
return f_value, 1-fdist.cdf(f_value, dof-self.dof, self.dof)
def get_state(self):
state = super().get_state()
for attr in ['idx', 'fun_kwargs', 'nobs', 'nvar',
'islog', 'iscomplex', 'x_data', 'y_data']:
state[attr] = getattr(self, attr)
state['name'] = self._model_name
state['corr'] = self.correlation
state['pcorr'] = self.partial_correlation
state['stats'] = self.statistics
state['resid'] = self.residual
state['params'] = {k: v.get_state() for k, v in self.parameter.items()}
state['mode'] = 'fit'
return state
@staticmethod
def set_state(state, **kwargs):
state['params'] = {k: Parameter.set_state(v) for k, v in state.pop('params').items()}
data = FitResult(**state)
return data