Source code for locan.locan_io.locdata.elyra_io

"""

File input/output for localization data in Elyra files.

"""

from __future__ import annotations

import io
import logging
import os
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
    from _typeshed import SupportsRead, SupportsReadline

import pandas as pd

import locan.constants
from locan.data import metadata_pb2
from locan.data.locdata import LocData
from locan.locan_io.locdata.utilities import (
    convert_property_names,
    convert_property_types,
    open_path_or_file_like,
)

__all__: list[str] = ["load_Elyra_header", "load_Elyra_file"]

logger = logging.getLogger(__name__)


def _read_Elyra_header(file: SupportsReadline[Any]) -> list[str]:
    """
    Read xml header from a Zeiss Elyra single-molecule localization file and
    identify column names.

    Parameters
    ----------
    file
        A file to load.

    Returns
    -------
    list[str]
        A list of valid dataset property keys as derived from the
        identifiers.
    """
    header = file.readline().split("\n")[0]

    # list identifiers
    identifiers = header.split("\t")

    column_keys = convert_property_names(
        properties=identifiers, property_mapping=locan.constants.ELYRA_KEYS
    )

    return column_keys



[docs]
def load_Elyra_header(
    path: str | os.PathLike[Any] | SupportsRead[Any],
) -> list[str]:
    """
    Load xml header from a Zeiss Elyra single-molecule localization file and
    identify column names.

    Parameters
    ----------
    path
        File path for a file to load.

    Returns
    -------
    list[str]
        A list of valid dataset property keys as derived from the
        identifiers.
    """

    with open_path_or_file_like(path, encoding="latin-1") as file:
        return _read_Elyra_header(file)




[docs]
def load_Elyra_file(
    path: str | os.PathLike[Any] | SupportsRead[Any],
    nrows: int | None = None,
    convert: bool = True,
    **kwargs: Any,
) -> LocData:
    """
    Load data from a rapidSTORM single-molecule localization file.

    Parameters
    ----------
    path
        File path for a file to load.
    nrows
        The number of localizations to load from file. None means that all
        available rows are loaded.
    convert
        If True convert types by applying type specifications in
        locan.constants.PROPERTY_KEYS.
    kwargs
        Other parameters passed to `pandas.read_csv()`.

    Returns
    -------
    LocData
        A new instance of LocData with all localizations.

    Note
    ----
    Data is loaded with encoding = 'latin-1' and only data before the first
    NUL character is returned.
    Additional information appended at the end of the file is thus ignored.
    """
    with open_path_or_file_like(path, encoding="latin-1") as file:
        columns = _read_Elyra_header(file)
        string = file.read()
        # remove metadata following nul byte
        string = string.split("\x00")[0]

        stream = io.StringIO(string)
        dataframe = pd.read_csv(
            stream, sep="\t", skiprows=0, nrows=nrows, names=columns, **kwargs
        )

    if convert:
        dataframe = convert_property_types(
            dataframe, types=locan.constants.PROPERTY_KEYS
        )

    dat = LocData.from_dataframe(dataframe=dataframe)

    dat.meta.source = metadata_pb2.EXPERIMENT
    dat.meta.state = metadata_pb2.RAW
    dat.meta.file.type = metadata_pb2.ELYRA
    dat.meta.file.path = str(path)

    del dat.meta.history[:]
    dat.meta.history.add(
        name="load_Elyra_file",
        parameter=f"path={str(path)}, nrows={nrows}",
    )

    return dat