Source code for locan.analysis.grouped_property_expectation

"""

Analyze grouped property expectations.

As an example, assume a collection of locdatas representing localization
clusters.
Some property of locdata `loc_property`(i.e. cluster property; e.g. intensity)
might depend on another property `other_loc_property` (e.g. number of
localizations in cluster).
For inspection, all values of loc_property are grouped by other_loc_property
and mean with standard deviations are displayed.

See Also
--------
position_variance_expectation
convex_hull_expectation
"""

from __future__ import annotations

import logging
import sys
from collections.abc import Mapping, Sequence
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

import boost_histogram as bh
import matplotlib.pyplot as plt
import pandas as pd

from locan.analysis import metadata_analysis_pb2
from locan.analysis.analysis_base import _Analysis
from locan.process.aggregate import Bins

if TYPE_CHECKING:
    import matplotlib as mpl

    from locan.data.locdata import LocData

__all__: list[str] = ["GroupedPropertyExpectation"]

logger = logging.getLogger(__name__)


[docs] @dataclass(repr=False) class GroupedPropertyExpectationResults: values: pd.DataFrame = field(default_factory=pd.DataFrame) # with index being reference_index # with columns: loc_property, other_loc_property, value_to_expectation_ratio grouped: pd.DataFrame = field(default_factory=pd.DataFrame)
# with index being other_loc_property # with columns: loc_property_mean, loc_property_std, expectation
[docs] class GroupedPropertyExpectation(_Analysis): """ Analyze variation of localization property in relation to other localization property that is grouped. Parameters ---------- meta : locan.analysis.metadata_analysis_pb2.AMetadata Metadata about the current analysis routine. loc_property: str | None The localization property to analyze. other_loc_property: str | None The localization property to group on. expectation : int | float | Mapping | pd.Series[Any] | None The expected value for all or each other localization property. Attributes ---------- count : int A counter for counting instantiations (class attribute). parameter : dict[str, Any] A dictionary with all settings for the current computation. meta : locan.analysis.metadata_analysis_pb2.AMetadata Metadata about the current analysis routine. results : GroupedPropertyExpectationResults Computed results. distribution_statistics : Distribution_stats, None Distribution parameters derived from MLE fitting of results.grouped. """ def __init__( self, meta: metadata_analysis_pb2.AMetadata | None = None, loc_property: str | None = None, other_loc_property: str | None = None, expectation: int | float | Mapping[str, Any] | pd.Series[Any] | None = None, ) -> None: parameters = self._get_parameters(locals()) super().__init__(**parameters) self.expectation = expectation self.results = None self.distribution_statistics = None
[docs] def compute(self, locdata: LocData) -> Self: """ Run the computation. Parameters ---------- locdata Localization data. Returns ------- Self """ if not len(locdata): logger.warning("Locdata is empty.") return self loc_property = self.parameter["loc_property"] other_loc_property = self.parameter["other_loc_property"] self.results = GroupedPropertyExpectationResults() self.results.values = locdata.data.loc[:, [loc_property, other_loc_property]] grouped = self.results.values.groupby(other_loc_property) self.results.grouped[loc_property + "_mean"] = grouped.mean() self.results.grouped[loc_property + "_std"] = grouped.std() if self.expectation is None: self.results.grouped["expectation"] = pd.NA elif isinstance(self.expectation, (int, float)): self.results.grouped["expectation"] = self.expectation elif isinstance(self.expectation, pd.Series): indices = self.results.grouped.index self.results.grouped["expectation"] = self.expectation.loc[indices] elif isinstance(self.expectation, Mapping): self.results.grouped["expectation"] = [ self.expectation[index_] for index_ in self.results.grouped.index ] self.results.values["expectation"] = self.results.grouped.loc[ self.results.values[other_loc_property], "expectation" ].to_numpy() self.results.values["value_to_expectation_ratio"] = ( self.results.values[loc_property] / self.results.values["expectation"] ) return self
[docs] def plot(self, ax: mpl.axes.Axes | None = None, **kwargs: Any) -> mpl.axes.Axes: """ Provide plot as :class:`matplotlib.axes.Axes` object showing the variances as function of localization counts. Parameters ---------- ax The axes on which to show the image kwargs Other parameters passed to :func:`matplotlib.pyplot.plot`. Returns ------- matplotlib.axes.Axes Axes object with the plot. """ if ax is None: ax = plt.gca() if self.results is None: return ax self.results.values.plot( kind="scatter", alpha=0.2, x=self.parameter["other_loc_property"], y=self.parameter["loc_property"], color="gray", ax=ax, **dict(dict(legend=False), **kwargs), ) self.results.grouped.plot( kind="line", marker=".", x=None, y=self.parameter["loc_property"] + "_mean", color="blue", ax=ax, ) if not self.results.grouped.expectation.isna().all(): self.results.grouped.plot( kind="line", x=None, y="expectation", color="black", ax=ax, ) ax.set( title="Grouped Property Expectation", xlabel=self.parameter["other_loc_property"], ylabel=self.parameter["loc_property"], ) return ax
[docs] def hist( self, ax: mpl.axes.Axes | None = None, bins: Bins | bh.axis.Axis | bh.axis.AxesTuple | None = None, n_bins: int | Sequence[int] | None = None, bin_size: float | Sequence[float] | Sequence[Sequence[float]] | None = None, bin_edges: Sequence[float] | Sequence[Sequence[float]] | None = None, bin_range: ( tuple[float, float] | Sequence[float] | Sequence[Sequence[float]] | None ) = None, log: bool = True, fit: bool = False, **kwargs: Any, ) -> mpl.axes.Axes: """ Provide plot as :class:`matplotlib.axes.Axes` object showing the 2-dimensional histogram. Parameters ---------- ax The axes on which to show the image bins The bin specification as defined in :class:`Bins` bin_edges Bin edges for all or each dimension with shape (dimension, n_bin_edges). bin_range Minimum and maximum edge for all or each dimensions with shape (2,) or (dimension, 2). n_bins The number of bins for all or each dimension. 5 yields 5 bins in all dimensions. (2, 5) yields 2 bins for one dimension and 5 for the other dimension. bin_size The size of bins for all or each bin and for all or each dimension with shape (dimension,) or (dimension, n_bins). 5 would describe bin_size of 5 for all bins in all dimensions. ((2, 5),) yield bins of size (2, 5) for one dimension. (2, 5) yields bins of size 2 for one dimension and 5 for the other dimension. ((2, 5), (1, 3)) yields bins of size (2, 5) for one dimension and (1, 3) for the other dimension. To specify arbitrary sequence of `bin_size` use `bin_edges` instead. log Flag for plotting on a log scale. fit Flag indicating if distribution fit is shown. kwargs Other parameters passed to :func:`matplotlib.pyplot.pcolormesh`. Returns ------- matplotlib.axes.Axes Axes object with the plot. """ if ax is None: ax = plt.gca() if self.results is None: return ax fig = ax.get_figure() other_loc_property = self.parameter["other_loc_property"] loc_property = self.parameter["loc_property"] if all( item_ is None for item_ in [bins, n_bins, bin_size, bin_edges, bin_range] ): max_other_loc_property_ = self.results.grouped.index.max() max_loc_property_ = self.results.values[loc_property].max() if log: axes = bh.axis.AxesTuple( ( bh.axis.Regular( max_other_loc_property_, 1, max_other_loc_property_, transform=bh.axis.transform.log, ), bh.axis.Regular( 50, 1, max_loc_property_, transform=bh.axis.transform.log ), ) ) else: axes = bh.axis.AxesTuple( ( bh.axis.Regular( max_other_loc_property_, 1, max_other_loc_property_ ), bh.axis.Regular(50, 1, max_loc_property_), ) ) bins = Bins(bins=axes) else: try: bins = Bins( bins, n_bins, bin_size, bin_edges, bin_range, labels=[loc_property, other_loc_property], ) except ValueError as exc: # todo: check if message is appropriate # the error is raised again only to adapt the message. raise ValueError( "Bin dimension and len of `loc_properties` is incompatible." ) from exc axes = bins.boost_histogram_axes histogram = bh.Histogram(*axes) histogram.reset() histogram.fill( self.results.values[other_loc_property], self.results.values[loc_property] ) mesh = ax.pcolormesh(*histogram.axes.edges.T, histogram.view().T, **kwargs) fig.colorbar(mesh) # type: ignore[union-attr] self.results.grouped.plot( kind="line", marker=".", x=None, y=self.parameter["loc_property"] + "_mean", color="blue", ax=ax, ) if not self.results.grouped.expectation.isna().all(): self.results.grouped.plot( kind="line", x=None, y="expectation", color="white", ax=ax, ) ax.set( title="Grouped Property Expectation", xlabel=self.parameter["other_loc_property"], ylabel=self.parameter["loc_property"], ) if log: ax.set(xscale="log", yscale="log") if fit: raise NotImplementedError return ax