"""
Compute Ripley's k function.
Spatial clustering of localization data is characterized by Ripley's k or
related l and h functions [1]_.
Ripley's k function is computed for 2D and 3D data for a series of radii as
described in [2]_ in order to provide evidence for deviations from a spatially
homogeneous Poisson process (i.e. complete spatial randomness, CSR).
Ripley' s k function is estimated by summing all points or over test points
being a random subset of all points:
.. math::
k(r) = \\frac{1}{\\lambda (n-1)} \\sum_{i=1}^{n} N_{p_{i}}(r)
here :math:`p_i` is the :math:`i^{th}` point of n test points,
:math:`N_{p_{i}}` is the number of points within the region of radius r
around :math:`p_{i}`, and :math:`\\lambda` is the density of all points.
We follow the definition of l and h functions in [2]_. Ripley's l function is:
.. math::
l(r) &= \\sqrt{k(r)) / \\pi} \\qquad \\text{in 2D}
l(r) &= \\sqrt[3]{\\frac{3}{4 \\pi} k(r)} \\qquad \\text{in 3D}
And Ripley's h function is:
.. math::
h(r) = l(r) - r
References
----------
.. [1] B.D. Ripley, Modelling spatial patterns. Journal of the Royal
Statistical Society, 1977, 172–212.
.. [2] Kiskowski, M. A., Hancock, J. F., and Kenworthy, A. K.,
On the use of Ripley's K-function and its
derivatives to analyze domain size.
Biophysical journal, 2009, 97, 1095–1103.
"""
from __future__ import annotations
import logging
import sys
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any, Literal
if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self
if TYPE_CHECKING:
import matplotlib as mpl
from locan.data.locdata import LocData
import matplotlib.pyplot as plt
import numpy as np
import numpy.typing as npt
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from locan.analysis import metadata_analysis_pb2
from locan.analysis.analysis_base import _Analysis
__all__: list[str] = ["RipleysKFunction", "RipleysLFunction", "RipleysHFunction"]
logger = logging.getLogger(__name__)
# The algorithms
def _ripleys_k_function(
points: npt.ArrayLike,
radii: Iterable[float],
region_measure: float = 1,
other_points: npt.ArrayLike | None = None,
) -> npt.NDArray[np.float64]:
"""
Compute Ripley's K function for two- or three-dimensional data at the given radii.
Parameters
----------
points
2D or 3D points on which to estimate Ripley's K function.
radii
Radii at which to compute Ripley's k function.
region_measure
region measure (area or volume) for points.
other_points
2D or 3D points from which to estimate Ripley's K function (e.g. subset of points). For None other_points
is set to points (default).
Returns
-------
npt.NDArray[np.float64]
Ripley's K function evaluated at input radii with shape (n_radii,).
"""
points = np.asarray(points)
if other_points is None:
other_points = points
else:
other_points = np.asarray(other_points)
nn = NearestNeighbors(metric="euclidean").fit(points)
ripley_0 = []
for radius in radii:
indices_list = nn.radius_neighbors(other_points, radius, return_distance=False)
ripley_0.append(np.array([len(indices) - 1 for indices in indices_list]).sum())
ripley = (
region_measure / (len(points) * (len(other_points) - 1)) * np.array(ripley_0)
)
return ripley
def _ripleys_l_function(
points: npt.ArrayLike,
radii: Iterable[float],
region_measure: float = 1,
other_points: npt.ArrayLike | None = None,
) -> npt.NDArray[np.float64]:
"""
Evaluates Ripley's L function which is different for 2D and 3D data points.
For parameter description see _ripleys_k_function.
"""
if np.shape(points)[1] == 2:
return_value: npt.NDArray[np.float64] = np.sqrt(
_ripleys_k_function(points, radii, region_measure, other_points) / np.pi
)
elif np.shape(points)[1] == 3:
return_value = np.cbrt(
_ripleys_k_function(points, radii, region_measure, other_points)
* 3
/ 4
/ np.pi
)
return return_value
def _ripleys_h_function(
points: npt.ArrayLike,
radii: Iterable[float],
region_measure: float = 1,
other_points: npt.ArrayLike | None = None,
) -> npt.NDArray[np.float64]:
"""
Evaluates Ripley's H function. For parameter description
see _ripleys_k_function.
"""
return_value: npt.NDArray[np.float64] = _ripleys_l_function(
points, radii, region_measure, other_points
) - np.asarray(radii)
return return_value
# The specific analysis classes
[docs]
class RipleysKFunction(_Analysis):
"""
Compute Ripley's K function for two- or three-dimensional data at the given radii.
Parameters
----------
meta : locan.analysis.metadata_analysis_pb2.AMetadata | None
Metadata about the current analysis routine.
radii: Iterable[float] | None
Radii at which to compute Ripley's k function.
region_measure : float | Literal['bb']
Region measure (area or volume) for point region. For 'bb' the region
measure of the bounding_box is used.
Attributes
----------
count : int
A counter for counting instantiations.
parameter : dict
A dictionary with all settings for the current computation.
meta : locan.analysis.metadata_analysis_pb2.AMetadata
Metadata about the current analysis routine.
results : pandas.DataFrame
Data frame with radii as provided and Ripley's K function.
"""
count = 0
def __init__(
self,
meta: metadata_analysis_pb2.AMetadata | None = None,
radii: Iterable[float] | None = None,
region_measure: float | Literal["bb"] = "bb",
) -> None:
radii = np.linspace(0, 100, 10) if radii is None else radii
parameters = self._get_parameters(locals())
super().__init__(**parameters)
self.results = None
[docs]
def compute(self, locdata: LocData, other_locdata: LocData | None = None) -> Self:
"""
Run the computation.
Parameters
----------
locdata
Localization data with 2D or 3D coordinates on which to estimate
Ripley's K function.
other_locdata
Other localization data from which to estimate Ripley's K function
(e.g. subset of points).
For None other_points is set to points (default).
Returns
-------
Self
"""
if not len(locdata):
logger.warning("Locdata is empty.")
return self
points = locdata.coordinates
if other_locdata is not None:
other_points = other_locdata.coordinates
else:
other_points = None
# choose the right region_measure
# todo: add other hull regions
if self.parameter["region_measure"] == "bb":
region_measure = float(locdata.properties["region_measure_bb"])
else:
region_measure = self.parameter["region_measure"]
ripley = _ripleys_k_function(
points=points,
radii=self.parameter["radii"],
region_measure=region_measure,
other_points=other_points,
)
self.results = pd.DataFrame(
{"radius": self.parameter["radii"], "Ripley_k_data": ripley}
)
self.results = self.results.set_index("radius")
return self
[docs]
def plot(self, ax: mpl.axes.Axes | None = None, **kwargs: Any) -> mpl.axes.Axes:
return plot(self=self, ax=ax, **kwargs)
[docs]
class RipleysLFunction(_Analysis):
"""
Compute Ripley's L function for two- or three-dimensional data at the
given radii.
Parameters
----------
meta : locan.analysis.metadata_analysis_pb2.AMetadata | None
Metadata about the current analysis routine.
radii: npt.ArrayLike | None
Radii at which to compute Ripley's k function.
region_measure : float | Literal["bb"]
Region measure (area or volume) for point region. For 'bb' the region
measure of the bounding_box is used.
Attributes
----------
count : int
A counter for counting instantiations.
parameter : dict
A dictionary with all settings for the current computation.
meta : locan.analysis.metadata_analysis_pb2.AMetadata
Metadata about the current analysis routine.
results : pandas.DataFrame
Data frame with radii as provided and Ripley's L function.
"""
count = 0
def __init__(
self,
meta: metadata_analysis_pb2.AMetadata | None = None,
radii: npt.ArrayLike | None = None,
region_measure: float | Literal["bb"] = "bb",
) -> None:
radii = np.linspace(0, 100, 10) if radii is None else radii
parameters = self._get_parameters(locals())
super().__init__(**parameters)
self.results = None
[docs]
def compute(self, locdata: LocData, other_locdata: LocData | None = None) -> Self:
"""
Run the computation.
Parameters
----------
locdata
Localization data with 2D or 3D coordinates on which to estimate
Ripley's L function.
other_locdata
Other localization data from which to estimate Ripley's L function
(e.g. subset of points).
For None other_points is set to points (default).
Returns
-------
Self
"""
if not len(locdata):
logger.warning("Locdata is empty.")
return self
points = locdata.coordinates
if other_locdata is not None:
other_points = other_locdata.coordinates
else:
other_points = None
# choose the right region_measure
# todo: add other hull regions
if self.parameter["region_measure"] == "bb":
region_measure = float(locdata.properties["region_measure_bb"])
else:
region_measure = self.parameter["region_measure"]
ripley = _ripleys_l_function(
points=points,
radii=self.parameter["radii"],
region_measure=region_measure,
other_points=other_points,
)
self.results = pd.DataFrame(
{"radius": self.parameter["radii"], "Ripley_l_data": ripley}
)
self.results = self.results.set_index("radius")
return self
[docs]
def plot(self, ax: mpl.axes.Axes | None = None, **kwargs: Any) -> mpl.axes.Axes:
return plot(self, ax, **kwargs)
[docs]
class RipleysHFunction(_Analysis):
"""
Compute Ripley's H function for two- or three-dimensional data at the given radii.
Parameters
----------
meta : locan.analysis.metadata_analysis_pb2.AMetadata | None
Metadata about the current analysis routine.
radii: npt.ArrayLike | None
Radii at which to compute Ripley's k function.
region_measure : float | Literal['bb']
Region measure (area or volume) for point region. For 'bb' the region
measure of the bounding_box is used.
Attributes
----------
count : int
A counter for counting instantiations.
parameter : dict[str, Any]
A dictionary with all settings for the current computation.
meta : locan.analysis.metadata_analysis_pb2.AMetadata
Metadata about the current analysis routine.
results : pd.DataFrame | None
Data frame with radii as provided and Ripley's H function.
Ripley_h_maximum : pd.DataFrame | None
Data frame with radius and Ripley's H value for the radius at which
the H function has its maximum.
"""
count = 0
def __init__(
self,
meta: metadata_analysis_pb2.AMetadata | None = None,
radii: npt.ArrayLike | None = None,
region_measure: float | Literal["bb"] = "bb",
) -> None:
radii = np.linspace(0, 100, 10) if radii is None else radii
parameters: dict[str, Any] = self._get_parameters(locals())
super().__init__(**parameters)
self.results: pd.DataFrame | None = None
self._Ripley_h_maximum: pd.DataFrame | None = None
[docs]
def compute(self, locdata: LocData, other_locdata: LocData | None = None) -> Self:
"""
Run the computation.
Parameters
----------
locdata
Localization data with 2D or 3D coordinates on which to estimate
Ripley's H function.
other_locdata
Other localization data from which to estimate Ripley's H function
(e.g. subset of points).
For None other_points is set to points (default).
Returns
-------
Self
"""
if not len(locdata):
logger.warning("Locdata is empty.")
return self
# reset secondary results
self._Ripley_h_maximum = None
points = locdata.coordinates
if other_locdata is not None:
other_points = other_locdata.coordinates
else:
other_points = None
# choose the right region_measure
# todo: add other hull regions
if self.parameter["region_measure"] == "bb":
region_measure = float(locdata.properties["region_measure_bb"])
else:
region_measure = self.parameter["region_measure"]
ripley = _ripleys_h_function(
points=points,
radii=self.parameter["radii"],
region_measure=region_measure,
other_points=other_points,
)
self.results = pd.DataFrame(
{"radius": self.parameter["radii"], "Ripley_h_data": ripley}
)
self.results = self.results.set_index("radius")
return self
@property
def Ripley_h_maximum(self) -> pd.DataFrame | None:
if self.results is None:
self._Ripley_h_maximum = None
elif self._Ripley_h_maximum is None:
index = self.results["Ripley_h_data"].idxmax()
self._Ripley_h_maximum = pd.DataFrame(
{"radius": index, "Ripley_h_maximum": self.results.loc[index]}
)
return self._Ripley_h_maximum
@Ripley_h_maximum.deleter
def Ripley_h_maximum(self) -> None:
self._Ripley_h_maximum = None
[docs]
def plot(self, ax: mpl.axes.Axes | None = None, **kwargs: Any) -> mpl.axes.Axes:
return plot(self, ax, **kwargs)
# Interface functions
[docs]
def plot(
self: _Analysis, ax: mpl.axes.Axes | None = None, **kwargs: Any
) -> mpl.axes.Axes:
"""
Provide plot of results as :class:`matplotlib.axes.Axes` object.
Parameters
----------
ax
The axes on which to show the image
kwargs
Other parameters passed to :func:`matplotlib.pyplot.plot`.
Returns
-------
matplotlib.axes.Axes
Axes object with the plot.
"""
if ax is None:
ax = plt.gca()
if self.results is None:
return ax
self.results.plot(ax=ax, **kwargs)
if self.results.columns[0] == "Ripley_k_data":
ylabel = "K-function"
elif self.results.columns[0] == "Ripley_l_data":
ylabel = "L-function"
elif self.results.columns[0] == "Ripley_h_data":
ylabel = "H-function"
else:
ylabel = ""
ax.set(title="Ripley's " + ylabel, xlabel="radius", ylabel=ylabel)
return ax