Source code for locan.locan_io.locdata.utilities

"""

Utility functions for file input/output of localization data.

"""

from __future__ import annotations

import io
import logging
import os
from collections.abc import Iterable, Mapping
from contextlib import closing
from typing import TYPE_CHECKING, Any, cast

if TYPE_CHECKING:
    from _typeshed import SupportsRead

import pandas as pd

from locan.constants import PROPERTY_KEYS

__all__: list[str] = ["convert_property_types", "convert_property_names"]

logger = logging.getLogger(__name__)


[docs] def convert_property_types( dataframe: pd.DataFrame, types: Mapping[str, str | type], loc_properties: Iterable[str] | None = None, ) -> pd.DataFrame: """ Convert data types according to the column-type mapping in types. If the target type is one of 'integer', 'signed', 'unsigned', 'float' then :func:`pandas.to_numeric` will be applied. Otherwise, if the target type is any type object like `int`, `str`, `np.float64` or similar then :func:`pandas.astype` will be applied. Parameters ---------- dataframe Data to be converted types Mapping of loc_properties to types loc_properties The columns in dataframe to be converted. If None, all columns will be converted according to types. Returns ------- pandas.DataFrame A copy of dataframe with converted types """ new_df = dataframe.copy() if loc_properties is None: loc_properties = dataframe.columns else: loc_properties = [key for key in loc_properties if key in dataframe.columns] selected_property_keys = {key: types[key] for key in loc_properties if key in types} for key, value in selected_property_keys.items(): if isinstance(value, str) and value in [ "integer", "signed", "unsigned", "float", ]: new_df[key] = pd.to_numeric(dataframe[key], downcast=value, errors="coerce") # type: ignore else: new_df[key] = dataframe[key].astype(value) # type: ignore return new_df
[docs] def open_path_or_file_like( path_or_file_like: str | bytes | os.PathLike[Any] | int | SupportsRead[Any], mode: str = "r", encoding: str | None = None, ) -> Any: """ Provide open-file context from `path_or_file_like` input. Parameters ---------- path_or_file_like Identifier for file mode same as in `open()` encoding same as in `open()` Returns ------- context for file object """ try: all(getattr(path_or_file_like, attr) for attr in ("seek", "read", "close")) file = path_or_file_like except (AttributeError, io.UnsupportedOperation): try: # if hasattr(path_or_file_like, "__fspath__") # or isinstance(path_or_file_like, (str, bytes)): path_or_file_like = cast( str | bytes | os.PathLike[str] | os.PathLike[bytes] | int, path_or_file_like, ) file = open( path_or_file_like, mode=mode, encoding=encoding, ) except TypeError as exc: raise TypeError( "path_or_file_like must be str, bytes, os.PathLike or file-like." ) from exc return closing(file)
[docs] def convert_property_names( properties: Iterable[str], property_mapping: dict[str, str] | Iterable[dict[str, str]] | None = None, ) -> list[str]: """ Convert property names to standard locan property names if a mapping is provided. Otherwise, leave the property name as is and throw a warning. Parameters ---------- properties Properties to be converted property_mapping Mappings between other property names and locan property names Returns ------- list[str] Converted property names """ property_mapping_: dict[str, str] = {} if property_mapping is None: pass elif isinstance(property_mapping, dict): property_mapping_ = property_mapping elif isinstance(property_mapping, Iterable): for mapping in property_mapping: property_mapping_.update(mapping) # type: ignore[arg-type] else: raise TypeError(f"property_mapping has wrong type {type(property_mapping)}") column_keys = [] for i in properties: if i in PROPERTY_KEYS.keys(): column_keys.append(i) elif i in property_mapping_: column_keys.append(property_mapping_[i]) else: logger.warning(f"Column {i} is not a Locan property standard.") column_keys.append(i) return column_keys