Source code for locan.analysis.convex_hull_expectation

"""

Analyze geometrical properties of the convex hull of localization coordinates.

Localization coordinates in localization clusters come with a certain variance.
In resolution-limited clusters the variance is determined by the
localization precision. Accordingly, the geometrical properties of convex hulls
also vary and can be analyzed as function of
localization counts to help characterize localization clusters [1]_.

References
----------
.. [1] Ebert V, Eiring P, Helmerich DA, Seifert R, Sauer M, Doose S.
   Convex hull as diagnostic tool in single-molecule localization microscopy.
   Bioinformatics 38(24), 2022, 5421-5429, doi: 10.1093/bioinformatics/btac700.

"""

# todo add fit procedure to estimate variance_estimate
from __future__ import annotations

import importlib.resources as importlib_resources
import logging
import sys
from collections.abc import Iterable, Sequence
from dataclasses import dataclass, field
from enum import Enum, auto
from typing import TYPE_CHECKING, Any, Literal, NamedTuple

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

if TYPE_CHECKING:
    from locan.data.locdata import LocData

import boost_histogram as bh
import matplotlib.pyplot as plt
import numpy as np
import numpy.typing as npt
import pandas as pd
import scipy.integrate as integrate
import scipy.special as special

from locan.analysis import metadata_analysis_pb2
from locan.analysis.analysis_base import _Analysis
from locan.process.aggregate import Bins

if TYPE_CHECKING:
    import matplotlib as mpl

__all__: list[str] = ["ConvexHullExpectation", "ConvexHullExpectationBatch"]

logger = logging.getLogger(__name__)


[docs] class ConvexHullProperty(Enum): REGION_MEASURE_2D = auto() SUBREGION_MEASURE_2D = auto() REGION_MEASURE_3D = auto() SUBREGION_MEASURE_3D = auto()
# might be extended - # but the previous entries must correspond to ConvexHullExpectationResource ConvexHullExpectationResource = dict( REGION_MEASURE_2D="lookup_table_area_2d.npy", SUBREGION_MEASURE_2D="lookup_table_peri_2d.npy", REGION_MEASURE_3D="lookup_table_vol_3d.npy", SUBREGION_MEASURE_3D="lookup_table_area_3d.npy", )
[docs] class ConvexHullExpectationValues(NamedTuple): n_points: list[int] | npt.NDArray[np.int64] expectation: npt.NDArray[np.int64 | np.float64] std_pos: npt.NDArray[np.float64] std_neg: npt.NDArray[np.float64]
def _get_resource( resource_directory: str, resource: str ) -> ConvexHullExpectationValues: """ Get convex hull property values from resource files produced by numerical simulations. Note ----- Original data can be found at https://github.com/super-resolution/Ebert-et-al-2022-supplement Parameters ---------- resource_directory Directory with resources resource Name of resource file within resource_directory Returns ------- ConvexHullExpectationValues """ resource_ = importlib_resources.files(resource_directory).joinpath(resource) resource_values = np.load(str(resource_)) if "2d" in resource: # hard coded corresponding to n_points in resources n_points = list(range(3, 201)) elif "3d" in resource: n_points = list(range(4, 201)) else: raise ValueError("resource is neither '2d' nor '3d'") if len(n_points) != resource_values.shape[1]: raise ValueError("The resource files are not correct.") return ConvexHullExpectationValues( n_points=n_points, expectation=resource_values[0], std_pos=resource_values[1], std_neg=resource_values[2], ) # The algorithms
[docs] def compute_convex_hull_region_measure_2d(n_points: int, sigma: float = 1) -> float: """ Compute the expected convex hull area for `n_points` data points that are normal distributed in 2-dimensional space with standard deviation `sigma`. Parameters ---------- n_points Number of points of convex hull sigma Standard deviation of normal-distributed point coordinates Returns ------- float Expectation value for convex hull area """ def pdf(x: float) -> float: return_value = 1 / (np.sqrt(2 * np.pi)) * np.exp((-1 / 2) * (x**2)) return float(return_value) def cdf(x: float) -> float: return_value = 1 / 2 * (1 + special.erf(x / np.sqrt(2))) return float(return_value) try: area = ( 3 * np.pi * special.binom(n_points, 3) * integrate.quad( lambda x: cdf(x) ** (n_points - 3) * pdf(x) ** 3, -np.inf, np.inf )[0] ) return_value = sigma**2 * area except ZeroDivisionError: return_value = np.nan return float(return_value)
def _get_convex_hull_property_expectation( convex_hull_property: str | ConvexHullProperty, n_points: npt.ArrayLike, sigma: float = 1, ) -> ConvexHullExpectationValues: """ Get the expected convex hull property for `n_points` data points that are normal distributed in 2- or 3-dimensional space with standard deviation `sigma`. Parameters ---------- convex_hull_property Choose property and dimension of convex hull n_points Number of points of convex hull sigma Standard deviation of normal-distributed point coordinates Returns ------- ConvexHullExpectationValues Expectation values for convex hull property """ try: convex_hull_property = convex_hull_property.name.upper() # type: ignore except AttributeError: convex_hull_property = convex_hull_property.upper() # type: ignore convex_hull_expectation_values = _get_resource( resource_directory="locan.analysis.resources.convex_hull_expectation", resource=ConvexHullExpectationResource[convex_hull_property], ) n_points, _, indices = np.intersect1d( n_points, convex_hull_expectation_values.n_points, return_indices=True ) if convex_hull_property == "REGION_MEASURE_2D": factor = sigma**2 elif convex_hull_property == "SUBREGION_MEASURE_2D": factor = sigma elif convex_hull_property == "REGION_MEASURE_3D": factor = sigma**3 logger.warning("The expectation is scaled by sigma^2") elif convex_hull_property == "SUBREGION_MEASURE_3D": factor = sigma**2 logger.warning("The expectation is scaled by sigma^2") else: raise ValueError("convex_hull_property is undefined") result = ConvexHullExpectationValues( n_points=n_points, expectation=convex_hull_expectation_values.expectation[indices] * factor, # type: ignore[arg-type] std_pos=convex_hull_expectation_values.std_pos[indices] * factor, std_neg=convex_hull_expectation_values.std_neg[indices] * factor, ) return result # The specific analysis classes
[docs] @dataclass(repr=False) class ConvexHullExpectationResults: values: pd.DataFrame = field(default_factory=pd.DataFrame) # with index being reference_index # with columns: loc_property, other_loc_property, value_to_expectation_ratio grouped: pd.DataFrame = field(default_factory=pd.DataFrame)
# with index being other_loc_property # with columns: loc_property_mean, loc_property_std, expectation
[docs] class ConvexHullExpectation(_Analysis): """ Analyze geometrical properties of the convex hull of localization coordinates in relation to expected values. Parameters ---------- meta : locan.analysis.metadata_analysis_pb2.AMetadata | None Metadata about the current analysis routine. convex_hull_property : Literal["region_measure_ch", "subregion_measure_ch"] One of 'region_measure_ch' (i.e. area or volume) or 'subregion_measure_ch' (i.e. circumference or surface.) expected_variance : float | Iterable[float] | None The expected variance for all or each localization property. The expected variance equals the squared localization precision for localization position coordinates. Attributes ---------- count : int A counter for counting instantiations (class attribute). parameter : dict[str, Any] A dictionary with all settings for the current computation. meta : locan.analysis.metadata_analysis_pb2.AMetadata Metadata about the current analysis routine. results : ConvexHullExpectationResults | None Computed results. distribution_statistics : dict[str, Any] | None Distribution parameters derived from MLE fitting of results. """ def __init__( self, meta: metadata_analysis_pb2.AMetadata | None = None, convex_hull_property: Literal[ "region_measure_ch", "subregion_measure_ch" ] = "region_measure_ch", expected_variance: float | Iterable[float] | None = None, ) -> None: if convex_hull_property not in ["region_measure_ch", "subregion_measure_ch"]: raise TypeError( "convex_hull_property must be one of " "[region_measure_ch, subregion_measure_ch]" ) parameters = self._get_parameters(locals()) super().__init__(**parameters) self.expected_variance = expected_variance self.results: ConvexHullExpectationResults | None = None self.distribution_statistics: dict[str, Any] | None = None self._dimension: int | None = None
[docs] def compute(self, locdata: LocData) -> Self: """ Run the computation. Parameters ---------- locdata : LocData Localization data. Returns ------- Self """ if not len(locdata): logger.warning("Locdata is empty.") return self self.results = ConvexHullExpectationResults() assert self.results is not None # type narrowing # noqa: S101 loc_property = self.parameter["convex_hull_property"] self._dimension = locdata.dimension try: self.results.values = locdata.data.loc[ :, ["localization_count", loc_property] # type: ignore[list-item] ] except KeyError: locdata.update_convex_hulls_in_references() self.results.values = locdata.data.loc[ :, ["localization_count", loc_property] # type: ignore[list-item] ] grouped = self.results.values.groupby("localization_count") self.results.grouped[loc_property + "_mean"] = grouped.mean() self.results.grouped[loc_property + "_std"] = grouped.std() convex_hull_property_ = loc_property[:-2] + f"{self._dimension}d" convex_hull_property_ = ConvexHullProperty[convex_hull_property_.upper()] if self.expected_variance is None: self.results.grouped["expectation"] = pd.NA self.results.grouped["expectation_std_pos"] = pd.NA self.results.grouped["expectation_std_neg"] = pd.NA else: convex_hull_expectation_values = _get_convex_hull_property_expectation( n_points=self.results.grouped.index, convex_hull_property=convex_hull_property_, sigma=np.sqrt(self.expected_variance), # type: ignore[arg-type] ) new_dict = dict( expectation=convex_hull_expectation_values.expectation, expectation_std_pos=convex_hull_expectation_values.std_pos, expectation_std_neg=convex_hull_expectation_values.std_neg, ) new_df = pd.DataFrame( new_dict, index=convex_hull_expectation_values.n_points ) self.results.grouped = pd.merge( self.results.grouped, new_df, how="outer", left_index=True, right_index=True, ) self.results.values = pd.merge( self.results.values, self.results.grouped["expectation"], how="outer", left_on="localization_count", right_index=True, ) self.results.values["value_to_expectation_ratio"] = ( self.results.values[loc_property] / self.results.values["expectation"] ) return self
[docs] def plot(self, ax: mpl.axes.Axes | None = None, **kwargs: Any) -> mpl.axes.Axes: """ Provide plot as :class:`matplotlib.axes.Axes` object showing the convex_hull_property as function of localization counts. Parameters ---------- ax The axes on which to show the image kwargs Other parameters passed to :func:`matplotlib.pyplot.plot`. Returns ------- matplotlib.axes.Axes Axes object with the plot. """ if ax is None: ax = plt.gca() if self.results is None: return ax self.results.values.plot( kind="scatter", alpha=0.2, x="localization_count", y=self.parameter["convex_hull_property"], color="gray", ax=ax, **dict(dict(legend=False), **kwargs), ) self.results.grouped.plot( kind="line", marker=".", x=None, y=self.parameter["convex_hull_property"] + "_mean", color="blue", ax=ax, ) if not self.results.grouped.expectation.isna().all(): self.results.grouped.plot( kind="line", x=None, y="expectation", color="black", ax=ax, ) if not self.results.grouped.expectation_std_pos.isna().all(): df = ( self.results.grouped.expectation + self.results.grouped.expectation_std_pos ) df.plot( kind="line", linestyle=":", color="gray", ax=ax, ) if not self.results.grouped.expectation_std_neg.isna().all(): df = ( self.results.grouped.expectation - self.results.grouped.expectation_std_neg ) df.plot( kind="line", linestyle=":", color="gray", ax=ax, ) ax.set( title="Convex Hull Expectation", xlabel="localization_count", ylabel=self.parameter["convex_hull_property"], ) return ax
[docs] def hist( self, ax: mpl.axes.Axes | None = None, bins: Bins | bh.axis.Axis | bh.axis.AxesTuple | None = None, n_bins: int | Sequence[int] | None = None, bin_size: float | Sequence[float] | Sequence[Sequence[float]] | None = None, bin_edges: Sequence[float] | Sequence[Sequence[float]] | None = None, bin_range: ( tuple[float, float] | Sequence[float] | Sequence[Sequence[float]] | None ) = None, log: bool = True, fit: bool = False, **kwargs: Any, ) -> mpl.axes.Axes: """ Provide plot as :class:`matplotlib.axes.Axes` object showing the 2-dimensional histogram of convex_hull_property and localization counts. Parameters ---------- ax The axes on which to show the image bins The bin specification as defined in :class:`Bins` bin_edges Bin edges for all or each dimension with shape (dimension, n_bin_edges). bin_range Minimum and maximum edge for all or each dimensions with shape (2,) or (dimension, 2). n_bins The number of bins for all or each dimension. 5 yields 5 bins in all dimensions. (2, 5) yields 2 bins for one dimension and 5 for the other dimension. bin_size The size of bins for all or each bin and for all or each dimension with shape (dimension,) or (dimension, n_bins). 5 would describe bin_size of 5 for all bins in all dimensions. ((2, 5),) yield bins of size (2, 5) for one dimension. (2, 5) yields bins of size 2 for one dimension and 5 for the other dimension. ((2, 5), (1, 3)) yields bins of size (2, 5) for one dimension and (1, 3) for the other dimension. To specify arbitrary sequence of `bin_size` use `bin_edges` instead. log Flag for plotting on a log scale. fit Flag indicating if distribution fit is shown. The fit will only be computed if `distribution_statistics` is None. kwargs Other parameters passed to :func:`matplotlib.pyplot.pcolormesh`. Returns ------- matplotlib.axes.Axes Axes object with the plot. """ if ax is None: ax = plt.gca() if self.results is None: return ax fig = ax.get_figure() other_loc_property = "localization_count" loc_property = self.parameter["convex_hull_property"] if all( item_ is None for item_ in [bins, n_bins, bin_size, bin_edges, bin_range] ): max_other_loc_property_ = self.results.grouped.index.max() max_loc_property_ = self.results.values[loc_property].max() if log: axes = bh.axis.AxesTuple( ( bh.axis.Regular( max_other_loc_property_, 3, max_other_loc_property_, transform=bh.axis.transform.log, ), bh.axis.Regular( 50, 1, max_loc_property_, transform=bh.axis.transform.log ), ) ) else: axes = bh.axis.AxesTuple( ( bh.axis.Regular( max_other_loc_property_, 3, max_other_loc_property_ ), bh.axis.Regular(50, 1, max_loc_property_), ) ) bins = Bins(bins=axes) else: try: bins = Bins( bins, n_bins, bin_size, bin_edges, bin_range, labels=[loc_property, other_loc_property], ) except ValueError as exc: # todo: check if message is appropriate # the error is raised again only to adapt the message. raise ValueError( "Bin dimension and len of `loc_properties` is incompatible." ) from exc axes = bins.boost_histogram_axes histogram = bh.Histogram(*axes) histogram.reset() histogram.fill( self.results.values[other_loc_property], self.results.values[loc_property] ) mesh = ax.pcolormesh(*histogram.axes.edges.T, histogram.view().T, **kwargs) fig.colorbar(mesh) # type: ignore[union-attr] self.results.grouped.plot( kind="line", marker=".", x=None, y=self.parameter["convex_hull_property"] + "_mean", color="blue", ax=ax, ) if not self.results.grouped.expectation.isna().all(): self.results.grouped.plot( kind="line", x=None, y="expectation", color="white", ax=ax, ) if not self.results.grouped.expectation_std_pos.isna().all(): df = ( self.results.grouped.expectation + self.results.grouped.expectation_std_pos ) df.plot( kind="line", linestyle=":", color="gray", ax=ax, ) if not self.results.grouped.expectation_std_neg.isna().all(): df = ( self.results.grouped.expectation - self.results.grouped.expectation_std_neg ) df.plot( kind="line", linestyle=":", color="gray", ax=ax, ) ax.set( title="Convex Hull Expectation", xlabel=other_loc_property, ylabel=loc_property, ) if log: ax.set(xscale="log", yscale="log") if fit: raise NotImplementedError return ax
[docs] class ConvexHullExpectationBatch(_Analysis): """ Analyze geometrical properties of the convex hull of localization coordinates in relation to expected values. See Also -------- :class:`ConvexHullExpectation` Parameters ---------- meta : locan.analysis.metadata_analysis_pb2.AMetadata Metadata about the current analysis routine. convex_hull_property : Literal['region_measure_ch', 'subregion_measure_ch'] One of 'region_measure_ch' (i.e. area or volume) or 'subregion_measure_ch' (i.e. circumference or surface.) expected_variance : float | Iterable[float] | None The expected variance for all or each localization property. The expected variance equals the squared localization precision for localization position coordinates. Attributes ---------- count : int A counter for counting instantiations (class attribute). parameter : dict A dictionary with all settings for the current computation. meta : locan.analysis.metadata_analysis_pb2.AMetadata Metadata about the current analysis routine. results : ConvexHullExpectationResults Computed results. distribution_statistics : Distribution_stats, None Distribution parameters derived from MLE fitting of results. """ def __init__( self, meta: metadata_analysis_pb2.AMetadata | None = None, convex_hull_property: Literal[ "region_measure_ch", "subregion_measure_ch" ] = "region_measure_ch", expected_variance: float | Iterable[float] | None = None, ) -> None: if convex_hull_property not in ["region_measure_ch", "subregion_measure_ch"]: raise TypeError( "convex_hull_property must be one of " "[region_measure_ch, subregion_measure_ch]" ) parameters = self._get_parameters(locals()) super().__init__(**parameters) self.expected_variance = expected_variance self.results = None self.batch = None self._dimension: int | None = None self._class = ConvexHullExpectation( convex_hull_property=convex_hull_property, expected_variance=expected_variance, ) self.distribution_statistics = None
[docs] def compute(self, locdatas: Iterable[LocData]) -> Self: """ Run the computation. Parameters ---------- locdatas : Iterable[LocData] Localization data. Returns ------- Self """ convex_hull_expectation_batch: list[ConvexHullExpectation] = [] dimensions: list[int | None] = [] for locdata_ in locdatas: che = ConvexHullExpectation( convex_hull_property=self.parameter["convex_hull_property"], expected_variance=self.parameter["expected_variance"], ).compute(locdata_) convex_hull_expectation_batch.append(che) dimensions.append(che._dimension) if bool(locdatas): dimensions_ = set(dimensions) if len(dimensions_) == 1: # check if all are equal self._dimension = dimensions_.pop() else: raise ValueError("The dimensions of all locdata must be the same.") self.from_batch(batch=convex_hull_expectation_batch) return self
[docs] def from_batch( self, batch: Iterable[ConvexHullExpectation], dimension: int | None = None ) -> Self: if not bool(batch) or all(item_.results is None for item_ in batch): logger.warning("The batch is empty.") return self loc_property = self.parameter["convex_hull_property"] if self._dimension is None: if dimension is not None: self._dimension = dimension else: dimension_ = set(item_._dimension for item_ in batch) if len(dimension_) == 1: self._dimension = dimension_.pop() self.results = ConvexHullExpectationResults() self.results.values = pd.concat( [item_.results.values for item_ in batch if item_.results is not None], ignore_index=True, ) grouped = self.results.values.groupby("localization_count") self.results.grouped.loc[:, loc_property + "_mean"] = grouped[ loc_property ].mean() self.results.grouped.loc[:, loc_property + "_std"] = grouped[loc_property].std() convex_hull_property_ = loc_property[:-2] + f"{self._dimension}d" convex_hull_property_ = ConvexHullProperty[convex_hull_property_.upper()] if self.expected_variance is None: self.results.grouped.loc[:, "expectation"] = pd.NA # type: ignore self.results.grouped.loc[:, "expectation_std_pos"] = pd.NA # type: ignore self.results.grouped.loc[:, "expectation_std_neg"] = pd.NA # type: ignore else: convex_hull_expectation_values = _get_convex_hull_property_expectation( n_points=self.results.grouped.index, convex_hull_property=convex_hull_property_, sigma=np.sqrt(self.expected_variance), # type: ignore ) new_dict = dict( expectation=convex_hull_expectation_values.expectation, expectation_std_pos=convex_hull_expectation_values.std_pos, expectation_std_neg=convex_hull_expectation_values.std_neg, ) new_df = pd.DataFrame( new_dict, index=convex_hull_expectation_values.n_points ) self.results.grouped = pd.merge( self.results.grouped, new_df, how="outer", left_index=True, right_index=True, ) self._class.results = self.results return self
[docs] def plot(self, ax: mpl.axes.Axes | None = None, **kwargs: Any) -> mpl.axes.Axes: return self._class.plot(ax=ax, **kwargs)
[docs] def hist( self, ax: mpl.axes.Axes | None = None, bins: Bins | bh.axis.Axis | bh.axis.AxesTuple | None = None, n_bins: int | Sequence[int] | None = None, bin_size: float | Sequence[float] | Sequence[Sequence[float]] | None = None, bin_edges: Sequence[float] | Sequence[Sequence[float]] | None = None, bin_range: ( tuple[float, float] | Sequence[float] | Sequence[Sequence[float]] | None ) = None, log: bool = True, fit: bool = False, **kwargs: Any, ) -> mpl.axes.Axes: return self._class.hist( ax=ax, bins=bins, n_bins=n_bins, bin_size=bin_size, bin_edges=bin_edges, bin_range=bin_range, log=log, fit=fit, **kwargs, )