Source code for locan.analysis.localization_property

"""

Analyze localization property.

Localizations come with a range of properties including position coordinates,
emission strength, local background etc..
Most properties represent random variables that were drawn from an unknown
probability distribution.
It is often useful to analyze the properties from all localizations within a
selection and estimate the corresponding probability distribution.

"""

from __future__ import annotations

import logging
import sys
from collections.abc import Sequence
from typing import TYPE_CHECKING, Any

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

if TYPE_CHECKING:
    import matplotlib as mpl
    import pandas as pd

    from locan.data.locdata import LocData

from locan.analysis import metadata_analysis_pb2
from locan.analysis.analysis_base import _Analysis, _list_parameters

__all__: list[str] = ["LocalizationProperty"]

logger = logging.getLogger(__name__)


# The algorithms


def _localization_property(
    locdata: LocData, loc_property: str = "intensity", index: str | None = None
) -> pd.DataFrame:
    if index is None:
        results = locdata.data[[loc_property]]
    else:
        results = locdata.data[[loc_property, index]].set_index(index)
        results = results.sort_index()

    return results


# The specific analysis classes



[docs]
class LocalizationProperty(_Analysis):
    """
    Analyze localization property with respect to probability density or
    variation over a specified index.

    Parameters
    ----------
    meta : locan.analysis.metadata_analysis_pb2.AMetadata
        Metadata about the current analysis routine.
    loc_property : str
        The property to analyze.
    index : str | None
        The property name that should serve as index (i.e. x-axis in x-y-plot)

    Attributes
    ----------
    count : int
        A counter for counting instantiations (class attribute).
    parameter : dict[str, Any]
        A dictionary with all settings for the current computation.
    meta : locan.analysis.metadata_analysis_pb2.AMetadata
        Metadata about the current analysis routine.
    results : pandas.DataFrame
        Computed results.
    distribution_statistics : Distribution_stats | None
        Distribution parameters derived from MLE fitting of results.
    """

    def __init__(
        self,
        meta: metadata_analysis_pb2.AMetadata | None = None,
        loc_property: str = "intensity",
        index: str | None = None,
    ) -> None:
        parameters = self._get_parameters(locals())
        super().__init__(**parameters)

        self.results: pd.DataFrame | None = None
        self.distribution_statistics: _DistributionFits | None = None


[docs]
    def compute(self, locdata: LocData) -> Self:
        """
        Run the computation.

        Parameters
        ----------
        locdata
            Localization data.

        Returns
        -------
        Self
        """
        if not len(locdata):
            logger.warning("Locdata is empty.")
            return self

        self.results = _localization_property(locdata=locdata, **self.parameter)
        return self



[docs]
    def fit_distributions(
        self,
        distribution: str | stats.rv_continuous = stats.expon,
        with_constraints: bool = True,
        **kwargs: Any,
    ) -> None:
        """
        Fit probability density functions to the distributions of
        `loc_property` values in the results
        using MLE (scipy.stats).

        If with_constraints is true we put the following constraints on the
        fit procedure:
        If distribution is expon then
        `floc=np.min(self.analysis_class.results[self.loc_property].values)`.

        Parameters
        ----------
        distribution
            Distribution model to fit.
        with_constraints
            Flag to use predefined constraints on fit parameters.
        kwargs
            Other parameters are passed to `scipy.stat.distribution.fit()`.
        """
        if self:
            self.distribution_statistics = _DistributionFits(self)
            self.distribution_statistics.fit(
                distribution, with_constraints=with_constraints, **kwargs
            )
        else:
            logger.warning("No results available to fit.")



[docs]
    def plot(
        self, ax: mpl.axes.Axes | None = None, window: int = 1, **kwargs: Any
    ) -> mpl.axes.Axes:
        """
        Provide plot as :class:`matplotlib.axes.Axes` object showing the
        running average of results over window size.

        Parameters
        ----------
        ax : matplotlib.axes.Axes
            The axes on which to show the image
        window
            Window for running average that is applied before plotting.
        kwargs
            Other parameters passed to :func:`matplotlib.pyplot.plot`.

        Returns
        -------
        matplotlib.axes.Axes
            Axes object with the plot.
        """
        if ax is None:
            ax = plt.gca()

        if self.results is None:
            return ax

        self.results.rolling(window=window, center=True).mean().plot(
            ax=ax, **dict(dict(legend=False), **kwargs)
        )
        ax.set(
            title=f"{self.parameter['loc_property']}({self.parameter['index']})\n (window={window})",
            xlabel=self.parameter["index"],
            ylabel=self.parameter["loc_property"],
        )

        return ax



[docs]
    def hist(
        self,
        ax: mpl.axes.Axes | None = None,
        bins: int | Sequence[int | float] | str = "auto",
        log: bool = True,
        fit: bool | None = True,
        **kwargs: Any,
    ) -> mpl.axes.Axes:
        """
        Provide histogram as :class:`matplotlib.axes.Axes` object showing
        hist(results). Nan entries are ignored.

        Parameters
        ----------
        ax
            The axes on which to show the image
        bins
            Bin specifications (passed to :func:`matplotlib.hist`).
        log
            Flag for plotting on a log scale.
        fit
            Flag indicating if distribution fit is shown.
            The fit will only be computed if `distribution_statistics` is None.
        kwargs
            Other parameters passed to :func:`matplotlib.pyplot.hist`.

        Returns
        -------
        matplotlib.axes.Axes
            Axes object with the plot.
        """
        if ax is None:
            ax = plt.gca()

        if self.results is None:
            return ax

        ax.hist(
            self.results.dropna(axis=0).values,
            bins=bins,
            **dict(dict(density=True, log=log), **kwargs),
        )
        ax.set(
            title=self.parameter["loc_property"],
            xlabel=self.parameter["loc_property"],
            ylabel="PDF",
        )

        # fit distributions:
        if fit:
            if self.distribution_statistics is None:
                self.fit_distributions()

            assert (  # type narrowing # noqa: S101
                self.distribution_statistics is not None
            )
            self.distribution_statistics.plot(ax=ax)

        return ax




# todo add Dependence_stats to fit a plot to a linear function, log function, or exponential decay.


class _DistributionFits:
    """
    Handle for distribution fits.

    This class is typically instantiated by LocalizationProperty methods.
    It holds the statistical parameters derived by fitting the result
    distributions using MLE (scipy.stats).
    Statistical parameters are defined as described in
    :ref:(https://docs.scipy.org/doc/scipy/reference/tutorial/stats/continuous.html)

    Parameters
    ----------
    analyis_class : LocalizationPrecision
        The analysis class with result data to fit.

    Attributes
    ----------
    analyis_class : LocalizationProperty
        The analysis class with result data to fit.
    loc_property : str
        The LocData property for which to fit an appropriate distribution.
    distribution : scipy.stats.rv_continuous | None
        Distribution model to fit.
    parameters : list[str]
        Distribution parameters.
    """

    def __init__(self, analysis_class: LocalizationProperty) -> None:
        self.analysis_class = analysis_class
        self.loc_property = self.analysis_class.parameter["loc_property"]
        self.distribution: stats.rv_continuous | None = None
        self.parameters: list[str] = []

    def fit(
        self,
        distribution: stats.rv_continuous,
        with_constraints: bool = True,
        **kwargs: Any,
    ) -> None:
        """
        Fit scipy.stats.rv_continuous to analysis_class.results[loc_property].

        If with_constraints is true we put the following constraints on the
        fit procedure:
        If distribution is expon then
        floc=np.min(self.analysis_class.results[self.loc_property].values).

        Parameters
        ----------
        distribution
            Distribution model to fit.
        with_constraints
            Flag to use predefined constraints on fit parameters.
        kwargs
            Other parameters are passed to `scipy.stats.rv_continuous.fit()`.
        """
        if self.analysis_class.results is None:
            logger.warning("No results available to fit.")
            return None
        self.distribution = distribution
        for param in _list_parameters(distribution):
            self.parameters.append(self.loc_property + "_" + param)

        if with_constraints and self.distribution == stats.expon:
            # MLE fit of exponential distribution with constraints
            fit_results = stats.expon.fit(
                self.analysis_class.results[self.loc_property].values,
                **dict(
                    dict(
                        floc=np.min(
                            self.analysis_class.results[self.loc_property].values
                        )
                    ),
                    **kwargs,
                ),
            )
            for parameter, result in zip(self.parameters, fit_results):
                setattr(self, parameter, result)
        else:
            fit_results = self.distribution.fit(
                self.analysis_class.results[self.loc_property].values, **kwargs
            )
            for parameter, result in zip(self.parameters, fit_results):
                setattr(self, parameter, result)

    def plot(self, ax: mpl.axes.Axes | None = None, **kwargs: Any) -> mpl.axes.Axes:
        """
        Provide plot as :class:`matplotlib.axes.Axes` object showing the
        probability distribution functions of fitted results.

        Parameters
        ----------
        ax : matplotlib.axes.Axes
            The axes on which to show the image.
        kwargs
            Other parameters passed to :func:`matplotlib.pyplot.plot`.

        Returns
        -------
        matplotlib.axes.Axes
            Axes object with the plot.
        """
        if ax is None:
            ax = plt.gca()

        if self.distribution is None:
            return ax

        # plot fit curve
        parameter = self.parameter_dict().values()
        x_values = np.linspace(
            self.distribution.ppf(0.001, *parameter),
            self.distribution.ppf(0.999, *parameter),
            100,
        )
        ax.plot(
            x_values,
            self.distribution.pdf(x_values, *parameter),
            "r-",
            **dict(
                dict(lw=3, alpha=0.6, label=str(self.distribution.name) + " pdf"),
                **kwargs,
            ),
        )

        return ax

    def parameter_dict(self) -> dict[str, float]:
        """Dictionary of fitted parameters."""
        return {k: self.__dict__[k] for k in self.parameters}