Source code for pyrfu.pyrf.optimize_nbins_1d

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# 3rd party imports
import numpy as np

__author__ = "Louis Richard"
__email__ = "louisr@irfu.se"
__copyright__ = "Copyright 2020-2023"
__license__ = "MIT"
__version__ = "2.4.2"
__status__ = "Prototype"


[docs]def optimize_nbins_1d(x, n_min: int = 1, n_max: int = 100): r"""Estimates the number of bins for 1d histogram that minimizes the risk function in [1]_ , obtained by direct decomposition of the MISE following the method described in [2]_ . Parameters ---------- x : xarray.DataArray Input time series n_min : int, Optional Minimum number of bins. Default is 1. n_max : int, Optional Maximum number of bins. Default is 100. Returns ------- opt_n_x : int Number of bins that minimizes the cost function. References ---------- .. [1] Rudemo, M. (1982) Empirical Choice of Histograms and Kernel Density Estimators. Scandinavian Journal of Statistics, 9, 65-78. .. [2] Shimazaki H. and Shinomoto S., A method for selecting the bin size of a time histogram Neural Computation (2007) Vol. 19(6), 1503-1527 """ x_min, x_max = [np.min(x.data), np.max(x.data)] # #of Bins ns_x = np.arange(n_min, n_max) # Bin size vector ds_x = (x_max - x_min) / ns_x cs_x = np.zeros(ds_x.shape) # Computation of the cost function to x and y for i, n_x in enumerate(ns_x): k_i = np.histogram(x, bins=n_x) # The mean and the variance are simply computed from the # event counts in all the bins of the 1-dimensional histogram. k_i = k_i[0] k_ = np.mean(k_i) # Mean of event count v_ = np.var(k_i) # Variance of event count # The cost Function cs_x[i] = (2 * k_ - v_) / ds_x[i] ** 2 # Optimal Bin Size Selection # combination of i and j that produces the minimum cost function idx_min = np.argmin(cs_x) # get the index of the min Cxy opt_n_x = int(ns_x[idx_min]) return opt_n_x