Source code for pyrfu.pyrf.optimize_nbins_1d
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 3rd party imports
import numpy as np
__author__ = "Louis Richard"
__email__ = "louisr@irfu.se"
__copyright__ = "Copyright 2020-2023"
__license__ = "MIT"
__version__ = "2.4.2"
__status__ = "Prototype"
[docs]def optimize_nbins_1d(x, n_min: int = 1, n_max: int = 100):
r"""Estimates the number of bins for 1d histogram that minimizes the
risk function in [1]_ , obtained by direct decomposition of the MISE
following the method described in [2]_ .
Parameters
----------
x : xarray.DataArray
Input time series
n_min : int, Optional
Minimum number of bins. Default is 1.
n_max : int, Optional
Maximum number of bins. Default is 100.
Returns
-------
opt_n_x : int
Number of bins that minimizes the cost function.
References
----------
.. [1] Rudemo, M. (1982) Empirical Choice of Histograms and Kernel Density
Estimators. Scandinavian Journal of Statistics, 9, 65-78.
.. [2] Shimazaki H. and Shinomoto S., A method for selecting the bin size
of a time histogram Neural Computation (2007) Vol. 19(6), 1503-1527
"""
x_min, x_max = [np.min(x.data), np.max(x.data)]
# #of Bins
ns_x = np.arange(n_min, n_max)
# Bin size vector
ds_x = (x_max - x_min) / ns_x
cs_x = np.zeros(ds_x.shape)
# Computation of the cost function to x and y
for i, n_x in enumerate(ns_x):
k_i = np.histogram(x, bins=n_x)
# The mean and the variance are simply computed from the
# event counts in all the bins of the 1-dimensional histogram.
k_i = k_i[0]
k_ = np.mean(k_i) # Mean of event count
v_ = np.var(k_i) # Variance of event count
# The cost Function
cs_x[i] = (2 * k_ - v_) / ds_x[i] ** 2
# Optimal Bin Size Selection
# combination of i and j that produces the minimum cost function
idx_min = np.argmin(cs_x) # get the index of the min Cxy
opt_n_x = int(ns_x[idx_min])
return opt_n_x