Source code for locan.analysis.position_variance_expectation

"""

Analyze variance of localization coordinates.

Localization coordinates in localization clusters come with a certain variance.
In resolution-limited clusters the variance is determined by the
localization precision.

A close look at the variance of localization coordinates as function of
localization counts helps to characterize localization clusters [1]_.

References
----------
.. [1] Ebert V, Eiring P, Helmerich DA, Seifert R, Sauer M, Doose S.
   Convex hull as diagnostic tool in single-molecule localization microscopy.
   Bioinformatics 38(24), 2022, 5421-5429, doi: 10.1093/bioinformatics/btac700.

"""

# todo add fit procedure to estimate variances
from __future__ import annotations

import logging
import sys
from collections.abc import Mapping, Sequence
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

import boost_histogram as bh
import matplotlib.pyplot as plt
import pandas as pd

from locan.analysis import metadata_analysis_pb2
from locan.analysis.analysis_base import _Analysis
from locan.data.validation import _check_loc_properties
from locan.process.aggregate import Bins
from locan.utils.statistics import biased_variance

if TYPE_CHECKING:
    import matplotlib as mpl

    from locan.data.locdata import LocData

__all__: list[str] = ["PositionVarianceExpectation"]

logger = logging.getLogger(__name__)


# The algorithms


def _property_variances(
    collection: LocData,
    loc_property: str,
    biased: bool = False,
) -> pd.DataFrame:
    assert collection is not None and isinstance(  # type narrowing # noqa: S101
        collection.references, Sequence
    )
    loc_property = _check_loc_properties(collection, loc_properties=loc_property)[0]
    loc_property_var = loc_property + "_var"

    ddof = 0 if biased else 1
    results_df = pd.DataFrame()
    results_df["localization_count"] = collection.data.loc[:, "localization_count"]
    results_df[loc_property_var] = [
        reference_.data[loc_property].var(ddof=ddof)
        for reference_ in collection.references
    ]

    return results_df


# The specific analysis classes


[docs] @dataclass(repr=False) class PositionVarianceExpectationResults: values: pd.DataFrame = field(default_factory=pd.DataFrame) # with index being reference_index # with columns: loc_property, other_loc_property, value_to_expectation_ratio grouped: pd.DataFrame = field(default_factory=pd.DataFrame)
# with index being other_loc_property # with columns: loc_property_mean, loc_property_std, expectation
[docs] class PositionVarianceExpectation(_Analysis): """ Analyze variation of localization properties in relation to expected variations. Parameters ---------- meta : locan.analysis.metadata_analysis_pb2.AMetadata Metadata about the current analysis routine. loc_property: str The localization property to analyze. expectation : int | float | Mapping[str, Any] | pd.Series[Any] | None The expected variance for all or each localization property. The expected variance equals the squared localization precision for localization position coordinates. biased : bool Flag to use biased or unbiased (Bessel-corrected) variance Attributes ---------- count : int A counter for counting instantiations (class attribute). parameter : dict A dictionary with all settings for the current computation. meta : locan.analysis.metadata_analysis_pb2.AMetadata Metadata about the current analysis routine. results : PositionVarianceExpectationResults Computed results. distribution_statistics : Distribution_stats, None Distribution parameters derived from MLE fitting of results. """ def __init__( self, meta: metadata_analysis_pb2.AMetadata | None = None, loc_property: str = "position_x", expectation: int | float | Mapping[str, Any] | pd.Series[Any] | None = None, biased: bool = True, ) -> None: parameters = self._get_parameters(locals()) super().__init__(**parameters) self.results = None self.distribution_statistics = None
[docs] def compute(self, locdata: LocData) -> Self: """ Run the computation. Parameters ---------- locdata Localization data. Returns ------- Self """ if not len(locdata): logger.warning("Locdata is empty.") return self loc_property = self.parameter["loc_property"] + "_var" other_loc_property = "localization_count" self.results = PositionVarianceExpectationResults() self.results.values = _property_variances( collection=locdata, loc_property=self.parameter["loc_property"], biased=self.parameter["biased"], ) grouped = self.results.values.groupby(other_loc_property) self.results.grouped[loc_property + "_mean"] = grouped.mean() self.results.grouped[loc_property + "_std"] = grouped.std() expectation = self.parameter["expectation"] if expectation is None: self.results.grouped["expectation"] = pd.NA elif isinstance(expectation, (int, float)) and self.parameter["biased"] is True: n_samples = self.results.grouped.index expectation = biased_variance(variance=expectation, n_samples=n_samples) expectation = pd.Series(data=expectation, index=n_samples) self.results.grouped["expectation"] = expectation elif isinstance(expectation, (int, float)): self.results.grouped["expectation"] = expectation elif isinstance(expectation, pd.Series): indices = self.results.grouped.index self.results.grouped["expectation"] = expectation.loc[indices] elif isinstance(expectation, Mapping): self.results.grouped["expectation"] = [ expectation[index_] for index_ in self.results.grouped.index ] self.results.values["expectation"] = self.results.grouped.loc[ self.results.values[other_loc_property], "expectation" ].to_numpy() self.results.values["value_to_expectation_ratio"] = ( self.results.values[loc_property] / self.results.values["expectation"] ) return self
[docs] def plot(self, ax: mpl.axes.Axes | None = None, **kwargs: Any) -> mpl.axes.Axes: """ Provide plot as :class:`matplotlib.axes.Axes` object showing the variances as function of localization counts. Parameters ---------- ax The axes on which to show the image kwargs Other parameters passed to :func:`matplotlib.pyplot.plot`. Returns ------- matplotlib.axes.Axes Axes object with the plot. """ if ax is None: ax = plt.gca() if self.results is None: return ax self.results.values.plot( kind="scatter", alpha=0.2, x="localization_count", y=self.parameter["loc_property"] + "_var", color="gray", ax=ax, **dict(dict(legend=False), **kwargs), ) self.results.grouped.plot( kind="line", marker=".", x=None, y=self.parameter["loc_property"] + "_var_mean", color="blue", ax=ax, ) if not self.results.grouped.expectation.isna().all(): self.results.grouped.plot( kind="line", x=None, y="expectation", color="black", ax=ax, ) ax.set( title="Position Variance Expectation", xlabel="localization_count", ylabel=self.parameter["loc_property"] + "_var", ) return ax
[docs] def hist( self, ax: mpl.axes.Axes | None = None, bins: Bins | bh.axis.Axis | bh.axis.AxesTuple | None = None, n_bins: int | Sequence[int] | None = None, bin_size: float | Sequence[float] | Sequence[Sequence[float]] | None = None, bin_edges: Sequence[float] | Sequence[Sequence[float]] | None = None, bin_range: ( tuple[float, float] | Sequence[float] | Sequence[Sequence[float]] | None ) = None, log: bool = True, fit: bool = False, **kwargs: Any, ) -> mpl.axes.Axes: """ Provide plot as :class:`matplotlib.axes.Axes` object showing the 2-dimensional histogram of variances and localization counts. Parameters ---------- ax The axes on which to show the image bins The bin specification as defined in :class:`Bins` bin_edges Bin edges for all or each dimension with shape (dimension, n_bin_edges). bin_range Minimum and maximum edge for all or each dimensions with shape (2,) or (dimension, 2). n_bins The number of bins for all or each dimension. 5 yields 5 bins in all dimensions. (2, 5) yields 2 bins for one dimension and 5 for the other dimension. bin_size The size of bins for all or each bin and for all or each dimension with shape (dimension,) or (dimension, n_bins). 5 would describe bin_size of 5 for all bins in all dimensions. ((2, 5),) yield bins of size (2, 5) for one dimension. (2, 5) yields bins of size 2 for one dimension and 5 for the other dimension. ((2, 5), (1, 3)) yields bins of size (2, 5) for one dimension and (1, 3) for the other dimension. To specify arbitrary sequence of `bin_size` use `bin_edges` instead. log Flag for plotting on a log scale. fit Flag indicating if distribution fit is shown. The fit will only be computed if `distribution_statistics` is None. kwargs Other parameters passed to :func:`matplotlib.pyplot.pcolormesh`. Returns ------- matplotlib.axes.Axes Axes object with the plot. """ if ax is None: ax = plt.gca() if self.results is None: return ax fig = ax.get_figure() other_loc_property = "localization_count" loc_property = self.parameter["loc_property"] + "_var" if all( item_ is None for item_ in [bins, n_bins, bin_size, bin_edges, bin_range] ): max_other_loc_property_ = self.results.grouped.index.max() max_loc_property_ = self.results.values[loc_property].max() if log: axes = bh.axis.AxesTuple( ( bh.axis.Regular( max_other_loc_property_, 1, max_other_loc_property_, transform=bh.axis.transform.log, ), bh.axis.Regular( 50, 1, max_loc_property_, transform=bh.axis.transform.log ), ) ) else: axes = bh.axis.AxesTuple( ( bh.axis.Regular( max_other_loc_property_, 1, max_other_loc_property_ ), bh.axis.Regular(50, 1, max_loc_property_), ) ) bins = Bins(bins=axes) else: try: bins = Bins( bins, n_bins, bin_size, bin_edges, bin_range, labels=[loc_property, other_loc_property], ) except ValueError as exc: # todo: check if message is appropriate # the error is raised again only to adapt the message. raise ValueError( "Bin dimension and len of `loc_properties` is incompatible." ) from exc axes = bins.boost_histogram_axes histogram = bh.Histogram(*axes) histogram.reset() histogram.fill( self.results.values[other_loc_property], self.results.values[loc_property] ) mesh = ax.pcolormesh(*histogram.axes.edges.T, histogram.view().T, **kwargs) fig.colorbar(mesh) # type:ignore[union-attr] self.results.grouped.plot( kind="line", marker=".", x=None, y=self.parameter["loc_property"] + "_var_mean", color="blue", ax=ax, ) if not self.results.grouped.expectation.isna().all(): self.results.grouped.plot( kind="line", x=None, y="expectation", color="white", ax=ax, ) ax.set( title="Position Variance Expectation", xlabel=other_loc_property, ylabel=loc_property, ) if log: ax.set(xscale="log", yscale="log") if fit: raise NotImplementedError return ax