Source code for locan.data.metadata_utils
"""
Deal with metadata in LocData objects.
Functions to modify metadata in LocData objects.
"""
from __future__ import annotations
import importlib
import logging
import os
from typing import TYPE_CHECKING, Any, BinaryIO
try:
import tomllib # type: ignore[import]
except ModuleNotFoundError:
import tomli as tomllib # type: ignore # for sys.version_info < (3, 11):
from google.protobuf import json_format, text_format
from google.protobuf.message import Message
from locan.data import metadata_pb2
if TYPE_CHECKING:
from locan.data.locdata import LocData
__all__: list[str] = [
"metadata_to_formatted_string",
"metadata_from_toml_string",
"load_metadata_from_toml",
"message_scheme",
"merge_metadata",
]
logger = logging.getLogger(__name__)
def _modify_meta(
locdata: LocData,
new_locdata: LocData,
function_name: str | None = None,
parameter: dict[str, Any] | None = None,
meta: metadata_pb2.Metadata | None = None,
) -> metadata_pb2.Metadata:
"""
Update metadata in Locdata after modification of locdata.
Parameters
----------
locdata
original locdata before modification
new_locdata
new locdata after modification
function_name
Name of function that was applied for modification.
parameter
Parameter for function that was applied for modification.
meta
Metadata about the current dataset and its history.
Returns
-------
locan.data.metadata_pb2.Metadata
Metadata about the current dataset and its history.
"""
meta_ = metadata_pb2.Metadata()
meta_.CopyFrom(locdata.meta)
# try:
# meta_.ClearField("identifier")
# except ValueError:
# pass
#
# try:
# meta_.ClearField("element_count")
# except ValueError:
# pass
#
# try:
# meta_.ClearField("frame_count")
# except ValueError:
# pass
meta_.identifier = new_locdata.meta.identifier
meta_.element_count = new_locdata.meta.element_count
meta_.frame_count = new_locdata.meta.frame_count
meta_.modification_time.CopyFrom(new_locdata.meta.creation_time)
meta_.state = metadata_pb2.MODIFIED
meta_.ancestor_identifiers.append(locdata.meta.identifier)
meta_.history.add(name=function_name, parameter=str(parameter))
if meta is None:
pass
elif isinstance(meta, dict):
for key, value in meta.items():
setattr(meta_, key, value)
else:
meta_.MergeFrom(meta)
return meta_
def _dict_to_protobuf(
dictionary: dict[str, Any], message: Message, inplace: bool = False
) -> Message | None:
"""
Parse dictionary with message attributes and their values in message.
"""
if inplace is False:
message_ = message.__class__()
message_.CopyFrom(message)
message = message_
for key, value in dictionary.items():
try:
attr_ = getattr(message, key)
except AttributeError as e:
logger.warning(f"AttributeError while parsing: {e}")
break
if isinstance(value, dict):
_dict_to_protobuf(dictionary=value, message=attr_, inplace=True)
elif isinstance(value, list):
try:
attr_.extend(value)
except TypeError:
for element in value:
submessage = attr_.add()
_dict_to_protobuf(
dictionary=element, message=submessage, inplace=True
)
else:
try:
setattr(message, key, value)
except (AttributeError, TypeError) as exception:
if attr_.DESCRIPTOR.name == "Timestamp":
attr_.FromJsonString(value)
elif attr_.DESCRIPTOR.name == "Duration":
attr_.FromNanoseconds(value)
else:
raise exception
if inplace:
return None
else:
return message
[docs]
def metadata_to_formatted_string(message: Message, **kwargs: Any) -> str:
"""
Get formatted string from Locdata.metadata.
Parameters
----------
message
Protobuf message like locan.data.metadata_pb2.Metadata
kwargs
Other kwargs that are passed to
:func:`google.protobuf.text_format.MessageToString`.
Returns
-------
str
Formatted metadata string.
"""
def message_formatter(
message: Message, indent: int, as_one_line: bool
) -> str | None:
if message.DESCRIPTOR.name in ["Timestamp", "Duration"]:
return_value: str | None = message.ToJsonString() # type: ignore[attr-defined]
else:
return_value = None
return return_value
return text_format.MessageToString(
message, message_formatter=message_formatter, **kwargs
)
def _toml_dict_to_protobuf(toml_dict: dict[str, Any]) -> dict[str, Message]:
"""
Turn toml dict into protobuf messages.
Parameters
----------
toml_dict
Dict from TOML string with metadata.
Returns
-------
dict[str, google.protobuf.message.Message]
Message instances with name as declared in toml file.
"""
# instantiate messages
instances: dict[str, Message] = {}
for message in toml_dict.pop("messages"):
module = importlib.import_module(message["module"])
class_ = getattr(module, message["class_name"])
instances[message["name"]] = class_()
# parse values
for message_name, dictionary in toml_dict.items():
_dict_to_protobuf(
dictionary=dictionary, message=instances[message_name], inplace=True
)
return instances
[docs]
def metadata_from_toml_string(toml_string: str | None) -> dict[str, Message] | None:
"""
Turn toml string into protobuf message instances.
Note
-----
Parses Timestamp elements from string '2022-05-14T06:58:00Z'.
Parses Duration elements from int in nanoseconds.
Parameters
----------
toml_string
TOML string with metadata.
Returns
-------
dict[str, google.protobuf.message.Message] | None
Message instances with name as declared in toml file.
"""
if toml_string is None:
return None
toml_dict = tomllib.loads(toml_string)
return _toml_dict_to_protobuf(toml_dict)
[docs]
def load_metadata_from_toml(
path_or_file_like: str | bytes | os.PathLike[Any] | BinaryIO | None,
) -> dict[str, Message] | None:
"""
Turn toml file into protobuf message instances.
Note
-----
Parses Timestamp elements from string '2022-05-14T06:58:00Z'.
Parses Duration elements from int in nanoseconds.
Parameters
----------
path_or_file_like
File path or file-like for a TOML file.
Returns
-------
dict[str, google.protobuf.message.Message] | None
Message instances with name as declared in toml file.
"""
if path_or_file_like is None:
return None
try:
toml_dict = tomllib.load(path_or_file_like) # type: ignore[arg-type]
except AttributeError:
with open(path_or_file_like, "rb") as file: # type: ignore[arg-type]
toml_dict = tomllib.load(file)
return _toml_dict_to_protobuf(toml_dict)
[docs]
def message_scheme(message: Message) -> dict[str, Any]:
"""
Provide message scheme with defaults including nested messages.
Parameters
----------
message
Protobuf message
Returns
-------
dict[str, Any]
A nested dictionary with all message fields including default values.
"""
message_dict = json_format.MessageToDict(
message,
always_print_fields_with_no_presence=True,
preserving_proto_field_name=True,
)
for descriptor in message.DESCRIPTOR.fields:
if descriptor.type == descriptor.TYPE_MESSAGE:
attr_ = getattr(message, descriptor.name)
if not descriptor.is_repeated:
message_dict[descriptor.name] = message_scheme(attr_)
elif (
descriptor.is_repeated
and "ScalarMap" not in type(attr_).__name__
and "MessageMapContainer" not in type(attr_).__name__
):
attr_ = attr_.add()
message_dict[descriptor.name] = message_scheme(attr_)
return message_dict
[docs]
def merge_metadata(
metadata: metadata_pb2.Metadata | None = None,
other_metadata: (
metadata_pb2.Metadata
| None
| dict[str, Any]
| str
| bytes
| os.PathLike[Any]
| BinaryIO
) = None,
) -> metadata_pb2.Metadata:
"""
Merge `other_metadata` into Locdata.meta.
Parameters
----------
metadata
Original LocData metadata before modification
other_metadata
Metadata to be merged.
Returns
-------
locan.data.metadata_pb2.Metadata
Merged metadata
"""
if metadata is None:
new_metadata = metadata_pb2.Metadata()
else:
new_metadata = metadata
if other_metadata is None:
pass
elif isinstance(other_metadata, str | bytes | os.PathLike | BinaryIO):
meta_ = load_metadata_from_toml(other_metadata)
if meta_ is not None:
new_metadata.MergeFrom(meta_["metadata"]) # type: ignore[arg-type]
elif isinstance(other_metadata, dict):
for key, value in other_metadata.items():
setattr(new_metadata, key, value)
else:
new_metadata.MergeFrom(other_metadata)
return new_metadata