"""
Coordinate-based colocalization.
Colocalization is estimated by computing a colocalization index for each
localization using the so-called coordinate-based colocalization
algorithm [1]_.
References
----------
.. [1] Malkusch S, Endesfelder U, Mondry J, Gelléri M, Verveer PJ, Heilemann M.,
Coordinate-based colocalization analysis of single-molecule localization microscopy data.
Histochem Cell Biol. 2012, 137(1):1-10.
doi: 10.1007/s00418-011-0880-5
"""
from __future__ import annotations
import logging
import sys
from collections.abc import Sequence
from typing import TYPE_CHECKING, Any, Literal
if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self
if TYPE_CHECKING:
import matplotlib as mpl
from locan.data.locdata import LocData
import matplotlib.pyplot as plt
import numpy as np
import numpy.typing as npt
import pandas as pd
from scipy.stats import spearmanr
from sklearn.neighbors import NearestNeighbors
from locan.analysis import metadata_analysis_pb2
from locan.analysis.analysis_base import _Analysis
__all__: list[str] = ["CoordinateBasedColocalization"]
logger = logging.getLogger(__name__)
# The algorithm
def _coordinate_based_colocalization(
points: npt.ArrayLike,
other_points: npt.ArrayLike | None = None,
radius: int | float = 100,
n_steps: int = 10,
) -> npt.NDArray[np.float64]:
"""
Compute a colocalization index for each localization by coordinate-based
colocalization.
Parameters
----------
points
Array of points with shape (n_points, dimensions)
for which CBC values are computed.
other_points
Array of points with shape (n_points, dimensions) to be
compared with points. If None other_points are set to points.
radius
The maximum radius up to which nearest neighbors are determined
n_steps
The number of bins from which Spearman correlation is computed.
Returns
-------
npt.NDArray[np.float64]
An array with coordinate-based colocalization coefficients for each
input point.
"""
points = np.asarray(points)
# sampled radii
radii = np.linspace(0, radius, n_steps + 1)
# nearest neighbors within radius
nneigh_1 = NearestNeighbors(radius=radius, metric="euclidean").fit(points)
distances_1 = np.array(nneigh_1.radius_neighbors()[0])
if other_points is None:
nneigh_2 = NearestNeighbors(radius=radius, metric="euclidean").fit(points)
else:
nneigh_2 = NearestNeighbors(radius=radius, metric="euclidean").fit(other_points)
distances_2 = np.array(nneigh_2.radius_neighbors(points)[0])
# CBC for each point
correlation = np.empty(len(points))
for i, (d_1, d_2) in enumerate(zip(distances_1, distances_2)):
if len(d_1) and len(d_2):
# binning
hist_1 = np.histogram(d_1, bins=radii, range=(0, radius))[0]
hist_2 = np.histogram(d_2, bins=radii, range=(0, radius))[0]
# normalization
values_1 = np.cumsum(hist_1) * radius**2 / radii[1:] ** 2 / len(d_1)
values_2 = np.cumsum(hist_2) * radius**2 / radii[1:] ** 2 / len(d_2)
# Spearman rank correlation
rho, _pval = spearmanr(values_1, values_2)
correlation[i] = rho
else:
correlation[i] = np.nan
# CBC normalization for each point
max_distances = np.array(
[np.max(d, initial=0) for d in distances_2]
) # max is set to 0 for empty arrays.
norm_spearmanr = np.exp(-1 * max_distances / radius)
correlation = correlation * norm_spearmanr
return correlation # type: ignore[no-any-return]
# The specific analysis classes
[docs]
class CoordinateBasedColocalization(_Analysis):
"""
Compute a colocalization index for each localization by coordinate-based
colocalization (CBC).
The colocalization index is calculated for each localization in `locdata`
by finding nearest neighbors in `locdata` or `other_locdata` within
`radius`. A normalized number of nearest neighbors at a certain radius is
computed for `n_steps` equally-sized steps of increasing radii ranging
from 0 to `radius`.
The Spearman rank correlation coefficent is computed for these values and
weighted by Exp[-nearestNeighborDistance/distanceMax].
Parameters
----------
meta : locan.analysis.metadata_analysis_pb2.AMetadata
Metadata about the current analysis routine.
radius : int | float
The maximum radius up to which nearest neighbors are determined
n_steps : int
The number of bins from which Spearman correlation is computed.
Attributes
----------
count : int
A counter for counting instantiations.
parameter : dict
A dictionary with all settings for the current computation.
meta : locan.analysis.metadata_analysis_pb2.AMetadata
Metadata about the current analysis routine.
results : pandas.DataFrame
Coordinate-based colocalization coefficients for each input point.
"""
count = 0
def __init__(
self,
meta: metadata_analysis_pb2.AMetadata | None = None,
radius: int | float = 100,
n_steps: int = 10,
) -> None:
parameters = self._get_parameters(locals())
super().__init__(**parameters)
self.results = None
[docs]
def compute(self, locdata: LocData, other_locdata: LocData | None = None) -> Self:
"""
Run the computation.
Parameters
----------
locdata
Localization data for which CBC values are computed.
other_locdata
Localization data to be colocalized.
If None other_locdata is set to locdata.
Returns
-------
Self
"""
if not len(locdata):
logger.warning("Locdata is empty.")
return self
points = locdata.coordinates
if other_locdata is not None:
other_points = other_locdata.coordinates
id_ = other_locdata.meta.identifier
else:
other_points = None
id_ = "self"
self.results = pd.DataFrame(
{
f"colocalization_cbc_{id_}": _coordinate_based_colocalization(
points, other_points, **self.parameter
)
}
)
return self
[docs]
def hist(
self,
ax: mpl.axes.Axes | None = None,
bins: int | Sequence[int | float] | Literal["auto"] = (-1, -0.3333, 0.3333, 1),
density: bool = True,
**kwargs: Any,
) -> mpl.axes.Axes:
"""
Provide histogram as :class:`matplotlib.axes.Axes` object showing
hist(results).
Parameters
----------
ax
The axes on which to show the image
bins
Bin specification as used in :func:`matplotlib.hist`.
density
Flag for normalization as used in :func:`matplotlib.hist`.
True returns probability density function;
None returns counts.
kwargs
Other parameters passed to :func:`matplotlib.hist`.
Returns
-------
matplotlib.axes.Axes
Axes object with the plot.
"""
if ax is None:
ax = plt.gca()
if self.results is None:
return ax
ax.hist(
self.results.iloc[:, 0].values,
bins=bins,
density=density,
label="cbc",
**kwargs,
)
ax.set(
title="CBC histogram",
xlabel="colocalization_cbc",
ylabel="pdf" if density else "counts",
)
ax.legend(loc="best")
return ax