Source code for locan.utils.statistics

"""

Statistics related tools.

"""

from __future__ import annotations

import logging
from typing import NamedTuple

import numpy as np
import numpy.typing as npt

__all__: list[str] = [
    "weighted_mean_variance",
    "ratio_fwhm_to_sigma",
    "biased_variance",
]

logger = logging.getLogger(__name__)



[docs]
class WeightedMeanVariance(NamedTuple):
    weighted_mean: float | npt.NDArray[np.float64]
    weighted_mean_variance: float | npt.NDArray[np.float64]




[docs]
def weighted_mean_variance(
    values: npt.ArrayLike, weights: npt.ArrayLike | None
) -> WeightedMeanVariance:
    """
    Compute weighted mean (average)
    and the corresponding weighted mean variance [1]_.

    Parameters
    ----------
    values
        Values from which to compute the weighted average.
    weights
        Weights to use for weighted average.

    Returns
    -------
    WeightedMeanVariance
        weighted_mean, weighted_mean_variance

    References
    ----------
    .. [1] Donald F. Gatz, Luther Smith,
       The standard error of a weighted mean concentration —
       I. Bootstrapping vs other methods.
       Atmospheric Environment 29(11), 1995: 1185-1193,
       https://doi.org/10.1016/1352-2310(94)00210-C.
    """
    values = np.asarray(values)
    if values.ndim > 1:
        raise ValueError("values must be 1-dimensional.")
    if weights is None:
        weights = np.ones(shape=values.shape)
    else:
        weights = np.asarray(weights)
    if values.shape != weights.shape:
        raise TypeError("Shape of values and weights must be the same.")

    n_weights = len(weights)
    if n_weights == 1:
        weighted_average = np.mean(a=values)
        weighted_mean_variance_ = 0
    else:
        weighted_average = np.average(a=values, weights=weights)
        weights_mean = np.mean(a=weights)
        weighted_mean_variance_ = (
            n_weights
            / (n_weights - 1)
            / np.sum(weights) ** 2
            * (
                sum((weights * values - weights_mean * weighted_average) ** 2)
                - 2
                * weighted_average
                * sum(
                    (weights - weights_mean)
                    * (weights * values - weights_mean * weighted_average)
                )
                + weighted_average**2 * sum((weights - weights_mean) ** 2)
            )
        )

        # Check for negative values that should not occur
        # but did result with some floating values
        if (
            weighted_mean_variance_ != 0
            and weighted_mean_variance_[weighted_mean_variance_ < 0].any()  # type: ignore
        ):
            if np.ndim(weighted_mean_variance_) == 0:
                weighted_mean_variance_ = 0
            else:
                weighted_mean_variance_[weighted_mean_variance_ < 0] = 0  # type: ignore
            logger.warning(
                "Negative values for weighted_mean_variance occurred and were set to "
                "zero."
            )

    return WeightedMeanVariance(
        weighted_mean=weighted_average,
        weighted_mean_variance=weighted_mean_variance_,
    )




[docs]
def ratio_fwhm_to_sigma() -> float:
    """
    The numeric value of the ratio between full-width-half-max (fwhm) width
    and standard deviation (sigma) of a normal distribution.

    Returns
    -------
    float
    """
    return_value = 2 * np.sqrt(2 * np.log(2))
    return float(return_value)




[docs]
def biased_variance(
    variance: npt.ArrayLike, n_samples: npt.ArrayLike
) -> npt.NDArray[np.float64]:
    """
    The sample variance is biased if not corrected by Bessel's correction.
    This function yields the biased variance by applying the inverse
    correction.

    .. math::

        E(variance_{biased}) = variance * (1 - 1 / localization_counts).

    Parameters
    ----------
    variance
        An unbiased variance.
    n_samples
        Number of samples from which the biased sample variance would be
        computed.

    Returns
    -------
    npt.NDArray[np.float64]
    """
    variance = np.asarray(variance)
    n_samples = np.asarray(n_samples)
    return_value: npt.NDArray[np.float64] = variance * (1 - 1 / n_samples)
    return return_value