Source code for locan.data.locdata

"""

A class to carry localization data.

"""

from __future__ import annotations

import copy
import logging
import os
import sys
import warnings
from collections.abc import Callable, Iterable, Sequence
from itertools import accumulate
from typing import TYPE_CHECKING, Any, BinaryIO, Literal, TypeVar

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

import numpy as np
import numpy.typing as npt
import pandas as pd
from google.protobuf import json_format

try:
    from scipy.spatial import QhullError
except ImportError:
    from scipy.spatial.qhull import QhullError  # needed for Python 3.7

import locan.data.hulls
from locan import (  # is required to use locdata_id as global variable  # noqa: F401
    locdata_id,
)
from locan.constants import PROPERTY_KEYS, PropertyKey
from locan.data import metadata_pb2
from locan.data.locdata_utils import _dataframe_to_pandas, _get_linked_coordinates
from locan.data.metadata_utils import (
    _modify_meta,
    merge_metadata,
    metadata_to_formatted_string,
)
from locan.data.regions.region import Region, RoiRegion

if TYPE_CHECKING:
    from locan.locan_types import DataFrame  # noqa F401

__all__: list[str] = ["LocData"]

logger = logging.getLogger(__name__)

T_LocData = TypeVar("T_LocData", bound="LocData")


[docs] class LocData: """ This class carries localization data, aggregated properties and meta data. Data consist of individual elements being either localizations or other `LocData` objects. Both, localizations and `Locdata` objects have properties. Properties come from the original data or are added by analysis procedures. Parameters ---------- references A `LocData` reference or an array with references to `LocData` objects referring to the selected localizations in dataset. dataframe Dataframe with localization data. indices Indices for dataframe in references that makes up the data. `indices` refers to index label, not position. meta Metadata about the current dataset and its history. Attributes ---------- references : LocData | list[LocData] | None A LocData reference or an array with references to LocData objects referring to the selected localizations in dataframe. dataframe : pandas.DataFrame Dataframe with localization data. indices : slice | list[int] | None Indices for dataframe in references that makes up the data. meta : locan.data.metadata_pb2.Metadata Metadata about the current dataset and its history. properties : dict[str, Any] List of properties generated from data. coordinate_keys : list[str] The available coordinate properties. uncertainty_keys : list[str] The available uncertainty properties. dimension : int Number of coordinates available for each localization (i.e. size of `coordinate_keys`). """ count = 0 """int: A counter for counting LocData instantiations (class attribute).""" def __init__( self, references: LocData | Iterable[LocData] | None = None, dataframe: pd.DataFrame | None = None, indices: ( int | list[int | bool] | npt.NDArray[np.int64 | np.bool_] | slice | pd.Index[int] | None ) = None, meta: ( metadata_pb2.Metadata | dict[str, Any] | str | bytes | os.PathLike[Any] | BinaryIO | None ) = None, ): self.__class__.count += 1 self.references: LocData | Iterable[LocData] | None = references self.dataframe: pd.DataFrame = ( pd.DataFrame() if dataframe is None else dataframe ) self.indices: ( int | list[int | bool] | npt.NDArray[np.int64 | np.bool_] | slice | pd.Index[int] | None ) = indices self.meta: metadata_pb2.Metadata = metadata_pb2.Metadata() self.properties: dict[str, Any] = {} # regions and hulls self._region: Region | None = None self._bounding_box: locan.data.hulls.BoundingBox | None = None self._oriented_bounding_box: locan.data.hulls.OrientedBoundingBox | None = None self._convex_hull: locan.data.hulls.ConvexHull | None = None self._alpha_shape: locan.data.hulls.AlphaShape | None = None self._inertia_moments: locan.process.properties.misc.InertiaMoments | None = ( None ) self.dimension: int = len(self.coordinate_keys) self._update_properties() # meta global locdata_id locdata_id += 1 self.meta.identifier = str(locdata_id) self.meta.creation_time.GetCurrentTime() self.meta.source = metadata_pb2.DESIGN self.meta.state = metadata_pb2.RAW self.meta.element_count = len(self.data.index) if "frame" in self.data.columns: self.meta.frame_count = len(self.data["frame"].unique()) self.meta = merge_metadata(metadata=self.meta, other_metadata=meta) @property def coordinate_keys(self) -> list[str]: return [ label_ for label_ in PropertyKey.coordinate_keys() if label_ in self.data.columns ] @property def uncertainty_keys(self) -> list[str]: return [ label_ for label_ in PropertyKey.uncertainty_keys() if label_ in self.data.columns ] def _update_properties( self, update_function: dict[str, Callable[..., Any]] | None = None ) -> Self: """ Compute properties from localization data. For each loc_property in update_function the supplied callable is used. If None the following functions are used if loc_properties are available: coordinates and corresponding uncertainties: weighted_mean_variance `intensity`: sum `local_background`: mean `frame`: min Parameters ---------- update_function mapping of localization property onto callable to compute property from corresponding localization data Returns ------- Self """ self.properties = dict() self.properties["localization_count"] = len(self.data.index) properties_for_update = [ loc_property_ for loc_property_ in [ *self.coordinate_keys, "frame", "intensity", "local_background", ] if loc_property_ in self.data.columns ] if update_function is not None: properties_for_update = [ loc_property_ for loc_property_ in properties_for_update if loc_property_ not in update_function.keys() ] # localization coordinates if all(c_label_ in properties_for_update for c_label_ in self.coordinate_keys): self.properties.update(_get_linked_coordinates(locdata=self.data)) if "intensity" in properties_for_update: self.properties["intensity"] = np.sum(self.data["intensity"]) if "local_background" in properties_for_update: self.properties["local_background"] = np.mean(self.data["local_background"]) if "frame" in properties_for_update: self.properties["frame"] = np.min(self.data["frame"]) if update_function is not None: for loc_property_, function_ in update_function.items(): self.properties[loc_property_] = function_(self.data[loc_property_]) self.bounding_box # update self._bounding_box # noqa B018 return self def __del__(self) -> None: """Updating the counter upon deletion of class instance.""" self.__class__.count -= 1 def __len__(self) -> int: """ Return the length of data, i.e. the number of elements (localizations or collection elements). """ return len(self.data.index) def __getstate__(self) -> dict[str, Any]: """Modify pickling behavior.""" # Copy the object's state from self.__dict__ to avoid modifying the original state. state = self.__dict__.copy() # Serialize the unpicklable protobuf entries. json_string = json_format.MessageToJson( self.meta, always_print_fields_with_no_presence=False ) state["meta"] = json_string return state def __setstate__(self, state: dict[str, Any]) -> None: """Modify pickling behavior.""" # Restore instance attributes. self.__dict__.update(state) # Restore protobuf class for meta attribute self.meta = metadata_pb2.Metadata() self.meta = json_format.Parse(state["meta"], self.meta) def __copy__(self) -> LocData: """ Create a shallow copy of locdata (keeping all references) with the following exceptions: (i) The class variable `count` is increased for the copied LocData object. (ii) Metadata keeps the original meta.creation_time while meta.modification_time and meta.history is updated. """ new_locdata = LocData(self.references, self.dataframe, self.indices, meta=None) new_locdata._region = self._region # meta meta_ = _modify_meta( self, new_locdata, function_name="LocData.copy", parameter=None, meta=None ) new_locdata.meta = meta_ return new_locdata def __deepcopy__(self, memodict: dict[Any, Any] | None = None) -> LocData: """ Create a deep copy of locdata (including all references) with the following exceptions: (i) The class variable `count` is increased for all deepcopied LocData objects. (ii) Metadata keeps the original meta.creation_time while meta.modification_time and meta.history is updated. """ if memodict is None: memodict = {} new_locdata = LocData( copy.deepcopy(self.references, memodict), copy.deepcopy(self.dataframe, memodict), copy.deepcopy(self.indices, memodict), meta=None, ) new_locdata._region = self._region # meta meta_ = _modify_meta( self, new_locdata, function_name="LocData.deepcopy", parameter=None, meta=None, ) new_locdata.meta = meta_ return new_locdata @property def bounding_box(self) -> locan.data.hulls.BoundingBox | None: """ Hull object: Return an object representing the axis-aligned minimal bounding box. """ if self._bounding_box is None: try: self._bounding_box = locan.data.hulls.BoundingBox(self.coordinates) except ValueError: warnings.warn( "Properties related to bounding box could not be computed.", UserWarning, stacklevel=1, ) self._update_properties_bounding_box() return self._bounding_box def _update_properties_bounding_box(self) -> None: if self._bounding_box is not None: self.properties["region_measure_bb"] = self._bounding_box.region_measure if self._bounding_box.region_measure: self.properties["localization_density_bb"] = ( self.properties["localization_count"] / self._bounding_box.region_measure ) if self._bounding_box.subregion_measure: self.properties["subregion_measure_bb"] = ( self._bounding_box.subregion_measure ) @property def convex_hull(self) -> locan.data.hulls.ConvexHull | None: """ Hull object: Return an object representing the convex hull of all localizations. """ if self._convex_hull is None: try: self._convex_hull = locan.data.hulls.ConvexHull(self.coordinates) except (TypeError, QhullError): warnings.warn( "Properties related to convex hull could not be computed.", UserWarning, stacklevel=1, ) self._update_properties_convex_hull() return self._convex_hull def _update_properties_convex_hull(self) -> None: if self._convex_hull is not None: self.properties["region_measure_ch"] = self._convex_hull.region_measure if self._convex_hull.region_measure: self.properties["localization_density_ch"] = ( self.properties["localization_count"] / self._convex_hull.region_measure ) if self._convex_hull.subregion_measure: self.properties["subregion_measure_ch"] = ( self._convex_hull.subregion_measure ) @property def oriented_bounding_box(self) -> locan.data.hulls.OrientedBoundingBox | None: """ Hull object: Return an object representing the oriented minimal bounding box. """ if self._oriented_bounding_box is None: try: self._oriented_bounding_box = locan.data.hulls.OrientedBoundingBox( self.coordinates ) except TypeError: warnings.warn( "Properties related to oriented bounding box could not be computed.", UserWarning, stacklevel=1, ) self._update_properties_oriented_bounding_box() return self._oriented_bounding_box def _update_properties_oriented_bounding_box(self) -> None: if self._oriented_bounding_box is not None: self.properties["region_measure_obb"] = ( self._oriented_bounding_box.region_measure ) if self._oriented_bounding_box.region_measure: self.properties["localization_density_obb"] = ( self.properties["localization_count"] / self._oriented_bounding_box.region_measure ) if self._oriented_bounding_box.subregion_measure: self.properties["subregion_measure_obb"] = ( self._oriented_bounding_box.subregion_measure ) # todo: add 3D properties if self.dimension == 2: self.properties["orientation_obb"] = self._oriented_bounding_box.angle self.properties["circularity_obb"] = ( self._oriented_bounding_box.elongation ) @property def alpha_shape(self) -> locan.data.hulls.AlphaShape | None: """ Hull object: Return an object representing the alpha-shape of all localizations. """ return self._alpha_shape
[docs] def update_alpha_shape(self, alpha: float) -> Self: """ Compute the alpha shape for specific `alpha` and update `self.alpha_shape`. Parameters ---------- alpha Alpha parameter specifying a unique alpha complex. Returns ------- Self The modified object """ try: if self._alpha_shape is None: self._alpha_shape = locan.data.hulls.AlphaShape( points=self.coordinates, alpha=alpha ) else: self._alpha_shape.alpha = alpha except TypeError: warnings.warn( "Properties related to alpha shape could not be computed.", UserWarning, stacklevel=1, ) self._update_properties_alpha_shape() return self
def _update_properties_alpha_shape(self) -> None: if self._alpha_shape is not None: self.properties["region_measure_as"] = self._alpha_shape.region_measure try: self.properties["localization_density_as"] = ( self._alpha_shape.n_points_alpha_shape / self._alpha_shape.region_measure ) except ZeroDivisionError: self.properties["localization_density_as"] = float("nan")
[docs] def update_alpha_shape_in_references(self, alpha: float) -> Self: """ Compute the alpha shape for each element in `locdata.references` and update `locdata.dataframe`. Parameters ---------- alpha Alpha parameter specifying a unique alpha complex. Returns ------- Self The modified object """ if isinstance(self.references, list): for reference in self.references: reference.update_alpha_shape(alpha=alpha) new_df = pd.DataFrame( [reference.properties for reference in self.references] ) new_df.index = self.data.index if self.dataframe is None: self.dataframe = new_df else: self.dataframe.update(new_df) new_columns = [ column for column in new_df.columns if column in self.dataframe.columns ] new_df.drop(columns=new_columns, inplace=True, errors="ignore") self.dataframe = pd.concat([self.dataframe, new_df], axis=1) return self
@property def inertia_moments(self) -> locan.process.properties.misc.InertiaMoments | None: """ Inertia moments are returned as computed by :func:`locan.process.properties.inertia_moments`. """ if self._inertia_moments is None: try: self._inertia_moments = locan.process.properties.inertia_moments( self.coordinates ) except TypeError: warnings.warn( "Properties related to inertia_moments could not be computed.", UserWarning, stacklevel=1, ) self._update_properties_inertia_moments() return self._inertia_moments def _update_properties_inertia_moments(self) -> None: if self._inertia_moments is not None: self.properties["orientation_im"] = self._inertia_moments.orientation self.properties["circularity_im"] = self._inertia_moments.eccentricity
[docs] def update_inertia_moments_in_references(self) -> Self: """ Compute inertia_moments for each element in locdata.references and update locdata.dataframe. Returns ------- Self The modified object """ if isinstance(self.references, list): for reference in self.references: reference.inertia_moments # request property to update # noqa B018 new_df = pd.DataFrame( [reference.properties for reference in self.references] ) new_df.index = self.data.index if self.dataframe is None: self.dataframe = new_df else: self.dataframe.update(new_df) new_columns = [ column for column in new_df.columns if column in self.dataframe.columns ] new_df.drop(columns=new_columns, inplace=True, errors="ignore") self.dataframe = pd.concat([self.dataframe, new_df], axis=1) return self
@property def region(self) -> Region | None: """RoiRegion object: Return the region that supports all localizations.""" return self._region @region.setter def region(self, region: Region | None) -> None: if region is not None: if region.dimension != self.dimension: raise TypeError( "Region dimension and coordinates dimension must be identical." ) elif len(self) != len(region.contains(self.coordinates)): logger.warning("Not all coordinates are within region.") if isinstance(region, (Region, RoiRegion)) or region is None: self._region = region elif isinstance( region, dict ): # legacy code to deal with deprecated RoiLegacy_0 region_ = RoiRegion(**region) if region_ is not None: if region_.dimension != self.dimension: raise TypeError( "Region dimension and coordinates dimension must be identical." ) elif len(self) != len(region_.contains(self.coordinates)): logger.warning("Not all coordinates are within region.") self._region = region_ else: raise TypeError # property for region measures if self._region is not None: if self._region.region_measure: self.properties["region_measure"] = self._region.region_measure self.properties["localization_density"] = ( self.meta.element_count / self._region.region_measure ) if self._region.subregion_measure: self.properties["subregion_measure"] = self._region.subregion_measure @property def data(self) -> pd.DataFrame: """ pandas.DataFrame: Return all elements either copied from the reference or referencing the current dataframe. """ if isinstance(self.references, LocData): # we refer to the localization data by its index label, not position # in other words we decided not to use iloc but loc # df = self.references.data.loc[self.indices] ... but this does not work in pandas. # also see: # https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike try: df = self.references.data.loc[self.indices] # type: ignore except KeyError: df = self.references.data.loc[ self.references.data.index.intersection(self.indices) # type: ignore ] df = pd.merge( df, self.dataframe, left_index=True, right_index=True, how="outer" ) return df else: return self.dataframe @property def coordinates(self) -> npt.NDArray[np.float64]: """npt.NDArray[float]: Return all coordinate values.""" return_value: npt.NDArray[np.float64] = self.data[self.coordinate_keys].values return return_value @property def centroid(self) -> npt.NDArray[np.float64]: """ npt.NDArray[np.float64]: Return coordinate values of the centroid (being the property values for all coordinate labels). """ return np.array( [ self.properties[coordinate_label] for coordinate_label in self.coordinate_keys ] )
[docs] @classmethod def from_dataframe( cls: type[T_LocData], # noqa: UP006 dataframe: DataFrame | None = None, meta: ( metadata_pb2.Metadata | dict[str, Any] | str | bytes | os.PathLike[Any] | BinaryIO | None ) = None, ) -> T_LocData: """ Create new LocData object from DataFrame with localization data. Parameters ---------- dataframe Localization data. meta Metadata about the current dataset and its history. Returns ------- LocData A new LocData instance with dataframe representing the concatenated data. """ dataframe = _dataframe_to_pandas(dataframe=dataframe, allow_copy=True) dataframe = pd.DataFrame() if dataframe is None else dataframe meta_ = metadata_pb2.Metadata() meta_.source = metadata_pb2.DESIGN meta_.state = metadata_pb2.RAW meta_.history.add(name="LocData.from_dataframe") meta_ = merge_metadata(metadata=meta_, other_metadata=meta) return cls(dataframe=dataframe, meta=meta_)
[docs] @classmethod def from_coordinates( cls: type[T_LocData], # noqa: UP006 coordinates: npt.ArrayLike | None = None, coordinate_labels: Sequence[str] | None = None, meta: ( metadata_pb2.Metadata | dict[str, Any] | str | bytes | os.PathLike[Any] | BinaryIO | None ) = None, ) -> T_LocData: """ Create new LocData object from a sequence of localization coordinates. Parameters ---------- coordinates Sequence of tuples with localization coordinates with shape (n_loclizations, dimension) coordinate_labels The available coordinate properties. meta Metadata about the current dataset and its history. Returns ------- LocData A new LocData instance with dataframe representing the oncatenated data. """ if coordinates is None: coordinates = np.array([]) else: coordinates = np.asarray(coordinates) if np.size(coordinates): dimension = len(coordinates[0]) if coordinate_labels is None: coordinate_labels = ["position_x", "position_y", "position_z"][ 0:dimension ] else: if all(cl in PROPERTY_KEYS for cl in coordinate_labels): coordinate_labels = coordinate_labels else: raise ValueError( "The given coordinate_keys are not standard property keys." ) dataframe = pd.DataFrame.from_records( data=coordinates, columns=coordinate_labels ) else: dataframe = pd.DataFrame() meta_ = metadata_pb2.Metadata() meta_.source = metadata_pb2.DESIGN meta_.state = metadata_pb2.RAW meta_.history.add(name="LocData.from_coordinates") meta_ = merge_metadata(metadata=meta_, other_metadata=meta) return cls(dataframe=dataframe, meta=meta_)
[docs] @classmethod def from_selection( cls: type[T_LocData], # noqa: UP006 locdata: LocData, indices: ( int | list[int | bool] | npt.NDArray[np.int64 | np.bool_] | slice | pd.Index[int] | None ) = None, meta: ( metadata_pb2.Metadata | dict[str, Any] | str | bytes | os.PathLike[Any] | BinaryIO | None ) = None, ) -> T_LocData: """ Create new LocData object from selected elements in another `LocData`. Parameters ---------- locdata Locdata object from which to select elements. indices Index labels for elements in locdata that make up the new data. Note that contrary to usual python slices, both the start and the stop are included (see pandas documentation). `Indices` refer to index value not position in list. meta Metadata about the current dataset and its history. Returns ------- LocData A new LocData instance with dataframe representing the selected data. Note ---- No error is raised if indices do not exist in locdata. """ references = locdata if indices is None: indices = slice(0, None) meta_ = metadata_pb2.Metadata() meta_.CopyFrom(locdata.meta) try: meta_.ClearField("identifier") except ValueError: pass try: meta_.ClearField("element_count") except ValueError: pass try: meta_.ClearField("frame_count") except ValueError: pass meta_.modification_time.GetCurrentTime() meta_.state = metadata_pb2.MODIFIED meta_.ancestor_identifiers.append(locdata.meta.identifier) meta_.history.add(name="LocData.from_selection") meta_ = merge_metadata(metadata=meta_, other_metadata=meta) new_locdata = cls(references=references, indices=indices, meta=meta_) new_locdata.region = references.region return new_locdata
[docs] @classmethod def from_collection( cls: type[T_LocData], # noqa: UP006 locdatas: Iterable[LocData], meta: ( metadata_pb2.Metadata | dict[str, Any] | str | bytes | os.PathLike[Any] | BinaryIO | None ) = None, ) -> T_LocData: """ Create new LocData object by collecting LocData objects. Parameters ---------- locdatas Locdata objects to collect. meta Metadata about the current dataset and its history. Returns ------- LocData A new LocData instance with dataframe representing the concatenated data. """ references = locdatas dataframe = pd.DataFrame([ref.properties for ref in references]) meta_ = metadata_pb2.Metadata() meta_.source = metadata_pb2.DESIGN meta_.state = metadata_pb2.RAW meta_.ancestor_identifiers[:] = [ref.meta.identifier for ref in references] meta_.history.add(name="LocData.from_collection") meta_ = merge_metadata(metadata=meta_, other_metadata=meta) return cls(references=references, dataframe=dataframe, meta=meta_)
[docs] @classmethod def concat( cls: type[T_LocData], # noqa: UP006 locdatas: Iterable[LocData], meta: ( metadata_pb2.Metadata | dict[str, Any] | str | bytes | os.PathLike[Any] | BinaryIO | None ) = None, ) -> T_LocData: """ Concatenate LocData objects. Parameters ---------- locdatas Locdata objects to concatenate. meta Metadata about the current dataset and its history. Returns ------- LocData A new LocData instance with dataframe representing the concatenated data. """ dataframe = pd.concat([i.data for i in locdatas], ignore_index=True, sort=False) # concatenate references also if None references: list[LocData] = [] for locdata in locdatas: try: references.extend(locdata.references) # type: ignore except TypeError: references.append(locdata.references) # type: ignore # check if all elements are None new_references = None if not any(references) else references meta_ = metadata_pb2.Metadata() meta_.creation_time.GetCurrentTime() meta_.source = metadata_pb2.DESIGN meta_.state = metadata_pb2.MODIFIED meta_.ancestor_identifiers[:] = [dat.meta.identifier for dat in locdatas] meta_.history.add(name="concat") meta_ = merge_metadata(metadata=meta_, other_metadata=meta) return cls(references=new_references, dataframe=dataframe, meta=meta_)
[docs] @classmethod def from_chunks( cls: type[T_LocData], # noqa: UP006 locdata: LocData, chunks: Sequence[tuple[int, ...]] | None = None, chunk_size: int | None = None, n_chunks: int | None = None, order: Literal["successive", "alternating"] = "successive", drop: bool = False, meta: ( metadata_pb2.Metadata | dict[str, Any] | str | bytes | os.PathLike[Any] | BinaryIO | None ) = None, ) -> T_LocData: """ Divide locdata in chunks of localization elements. Parameters ---------- locdata Locdata to divide. chunks Localization chunks as defined by a list of index-tuples. One of `chunks`, `chunk_size` or `n_chunks` must be different from None. chunk_size Number of consecutive localizations to form a single chunk of data. One of `chunks`, `chunk_size` or `n_chunks` must be different from None. n_chunks Number of chunks. One of `chunks`, `chunk_size` or `n_chunks` must be different from None. order The order in which to select localizations. One of 'successive' or 'alternating'. drop If True the last chunk will be eliminated if it has fewer localizations than the other chunks. meta Metadata about the current dataset and its history. Returns ------- LocData A new LocData instance with references and dataframe elements representing the individual chunks. """ n_nones = sum(element is None for element in [chunks, chunk_size, n_chunks]) if n_nones != 2: raise ValueError( "One and only one of `chunks`, `chunk_size` or `n_chunks` must " "be different from None." ) elif chunks is not None: index_lists = list(chunks) else: if chunk_size is not None: if (len(locdata) % chunk_size) == 0: n_chunks = len(locdata) // chunk_size else: n_chunks = len(locdata) // chunk_size + 1 else: # if n_chunks is not None assert n_chunks is not None # type narrowing # noqa: S101 if (len(locdata) % n_chunks) == 0: chunk_size = len(locdata) // n_chunks else: chunk_size = len(locdata) // (n_chunks - 1) if order == "successive": if (len(locdata) % chunk_size) == 0: chunk_sizes = [chunk_size] * n_chunks else: chunk_sizes = [chunk_size] * (n_chunks - 1) + [ len(locdata) % chunk_size ] cum_chunk_sizes = list(accumulate(chunk_sizes)) cum_chunk_sizes.insert(0, 0) index_lists = [ locdata.data.index[slice(lower, upper)] # type: ignore for lower, upper in zip(cum_chunk_sizes[:-1], cum_chunk_sizes[1:]) ] elif order == "alternating": index_lists = [ locdata.data.index[slice(i_chunk, None, n_chunks)] # type: ignore for i_chunk in range(n_chunks) ] else: raise ValueError(f"The order {order} is not implemented.") if drop and len(index_lists) > 1 and len(index_lists[-1]) < len(index_lists[0]): index_lists = index_lists[:-1] references = [ LocData.from_selection(locdata=locdata, indices=list(index_list)) for index_list in index_lists ] dataframe = pd.DataFrame([ref.properties for ref in references]) meta_ = metadata_pb2.Metadata() meta_.creation_time.GetCurrentTime() meta_.source = metadata_pb2.DESIGN meta_.state = metadata_pb2.RAW meta_.ancestor_identifiers[:] = [ref.meta.identifier for ref in references] meta_.history.add(name="LocData.chunks") meta_ = merge_metadata(metadata=meta_, other_metadata=meta) return cls(references=references, dataframe=dataframe, meta=meta_)
[docs] def reset(self, reset_index: bool = False) -> Self: """ Reset hulls and properties. This is needed after the dataframe attribute has been modified in place. Note ---- Should be used with care because metadata is not updated accordingly. The region property is not changed. Better to just re-instantiate with `LocData.from_dataframe()` or use `locdata.update()`. Parameters ---------- reset_index Flag indicating if the index is reset to integer values. If True the previous index values are discarded. Returns ------- Self The modified object """ if reset_index is True: self.dataframe.reset_index(drop=True, inplace=True) self.properties = {} self._bounding_box = None self._oriented_bounding_box = None self._convex_hull = None self._alpha_shape = None self._inertia_moments = None self._update_properties() return self
[docs] def update( self, dataframe: pd.DataFrame | None, reset_index: bool = False, meta: ( metadata_pb2.Metadata | dict[str, Any] | str | bytes | os.PathLike[Any] | BinaryIO | None ) = None, ) -> Self: """ Update the dataframe attribute in place. Use this function rather than setting locdata.dataframe directly in order to automatically update the attributes for dimension, hulls, properties, and metadata. Parameters ---------- dataframe Dataframe with localization data. reset_index Flag indicating if the index is reset to integer values. If True the previous index values are discarded. meta : locan.data.metadata_pb2.Metadata | dict | str | bytes | os.PathLike | BinaryIO | None Metadata about the current dataset and its history. Returns ------- Self The modified object """ if dataframe is None: return self local_parameter = locals() del local_parameter[ "dataframe" ] # dataframe is obvious and possibly large and should not be repeated in meta. if self.references is not None: self.reduce(reset_index=reset_index) logger.warning( "LocData.reduce() was applied since self.references was not None." ) self.dataframe = dataframe self.dimension = len(self.coordinate_keys) self.reset(reset_index=reset_index) # update hulls and properties # update meta self.meta.modification_time.GetCurrentTime() self.meta.state = metadata_pb2.MODIFIED self.meta.history.add(name="LocData.update", parameter=str(local_parameter)) self.meta.element_count = len(self.data.index) if "frame" in self.data.columns: self.meta.frame_count = len(self.data["frame"].unique()) self.meta = merge_metadata(metadata=self.meta, other_metadata=meta) return self
[docs] def reduce(self, reset_index: bool = False) -> Self: """ Clean up references. This includes to update `Locdata.dataframe` and set `LocData.references` and `LocData.indices` to None. Parameters ---------- reset_index Flag indicating if the index is reset to integer values. If True the previous index values are discarded. Returns ------- Self The modified object """ if self.references is None: pass elif isinstance(self.references, (LocData, list)): self.dataframe = self.data self.indices = None self.references = None else: raise ValueError("references has undefined value.") if reset_index is True: self.dataframe.reset_index(drop=True, inplace=True) return self
[docs] def update_convex_hulls_in_references(self) -> Self: """ Compute the convex hull for each element in locdata.references and update locdata.dataframe. Returns ------- Self The modified object """ if isinstance(self.references, list): for reference in self.references: reference.convex_hull # request property to update reference._convex_hull # noqa B018 new_df = pd.DataFrame( [reference.properties for reference in self.references] ) new_df.index = self.data.index self.dataframe.update(new_df) new_columns = [ column for column in new_df.columns if column in self.dataframe.columns ] new_df.drop(columns=new_columns, inplace=True, errors="ignore") self.dataframe = pd.concat([self.dataframe, new_df], axis=1) return self
[docs] def update_oriented_bounding_box_in_references(self) -> Self: """ Compute the oriented bounding box for each element in locdata.references and update locdata.dataframe. Returns ------- Self The modified object """ if isinstance(self.references, list): for reference in self.references: reference.oriented_bounding_box # request property to update reference._convex_hull # noqa B018 new_df = pd.DataFrame( [reference.properties for reference in self.references] ) new_df.index = self.data.index self.dataframe.update(new_df) new_columns = [ column for column in new_df.columns if column in self.dataframe.columns ] new_df.drop(columns=new_columns, inplace=True, errors="ignore") self.dataframe = pd.concat([self.dataframe, new_df], axis=1) return self
[docs] def projection(self, coordinate_labels: str | list[str]) -> LocData: """ Reduce dimensions by projecting all localization coordinates onto selected coordinates. Parameters ---------- coordinate_labels The coordinate labels to project onto. Returns ------- LocData """ local_parameter = locals() if isinstance(coordinate_labels, str): coordinate_labels = [coordinate_labels] new_locdata: LocData = copy.deepcopy(self) # reduce coordinate dimensions coordinate_labels_to_drop = [ label for label in self.coordinate_keys if label not in coordinate_labels ] columns = self.data.columns new_columns = [ column for column in columns if column not in coordinate_labels_to_drop ] dataframe = new_locdata.data[new_columns] # update _meta = metadata_pb2.Metadata() _meta.history.add(name="LocData.projection", parameter=str(local_parameter)) # other updates are done in the coming update call. new_locdata = new_locdata.update(dataframe=dataframe, meta=_meta) return new_locdata
[docs] def print_meta(self) -> None: """ Print Locdata.metadata. See Also -------- :func:`locan.data.metadata_utils.metadata_to_formatted_string` """ print(metadata_to_formatted_string(self.meta))
[docs] def print_summary(self) -> None: """ Print a summary containing the most common metadata keys. """ meta_ = metadata_pb2.Metadata() if self.meta.HasField("file"): meta_.file.CopyFrom(self.meta.file) meta_.identifier = self.meta.identifier meta_.comment = self.meta.comment meta_.creation_time.CopyFrom(self.meta.creation_time) if self.meta.HasField("modification_time"): meta_.modification_time.CopyFrom(self.meta.modification_time) meta_.source = self.meta.source meta_.state = self.meta.state meta_.element_count = self.meta.element_count meta_.frame_count = self.meta.frame_count print(metadata_to_formatted_string(meta_))
[docs] def update_properties_in_references( self, properties: ( dict[str, Iterable[Any]] | pd.Series[Any] | pd.DataFrame | Callable[..., Any] | None ) = None, ) -> Self: """ Add properties for each element in self.references and update self.dataframe. Parameters ---------- properties new property values for each reference or function to compute property for LocData object. Returns ------- Self """ if not isinstance(self.references, list): raise TypeError("self.references must be a list of Locdata") if properties is None: pass elif isinstance(properties, dict): for key, values in properties.items(): for reference, value_ in zip(self.references, values): reference.properties.update({key: value_}) elif isinstance(properties, pd.Series): if any(properties.index != range(len(self.references))): raise ValueError( f"properties must have a range index of length {len(self.references)}" ) for reference, value_ in zip(self.references, properties.to_numpy()): reference.properties.update({properties.name: value_}) elif isinstance(properties, pd.DataFrame): if any(properties.index != range(len(self.references))): raise ValueError( f"properties must have a range index of length {len(self.references)}" ) for name in properties.columns: for reference, value_ in zip( self.references, properties[name].to_numpy() ): reference.properties.update({name: value_}) elif callable(properties): for reference in self.references: reference.properties.update(properties(reference)) new_df = pd.DataFrame([reference.properties for reference in self.references]) new_df.index = self.data.index references_ = self.references self.update(dataframe=new_df) self.references = references_ return self