Source code for locan.datasets

"""
Utility functions to deal with exemplary datasets.

The data is located in a separate repository `https://github.com/super-resolution/LocanDatasets`.

When calling a function the datasets are expected to reside in a directory specified by the
`locan.constants.DATASETS_DIR` variable.
If the directory does not exist the exemplary files are downloaded from GitHub.
"""

from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, Any

from locan.configuration import DATASETS_DIR
from locan.dependencies import HAS_DEPENDENCY, needs_package
from locan.locan_io.locdata.asdf_io import load_asdf_file

if HAS_DEPENDENCY["httpx"]:
    import httpx

if TYPE_CHECKING:
    from locan.data.locdata import LocData


__all__: list[str] = ["load_npc", "load_tubulin"]


[docs] @needs_package("httpx") def load_npc(**kwargs: Any) -> LocData: """ Locdata representing nuclear pore complexes. The data was generated by dSTORM [1]_. It shows the gp210 protein of the nuclear pore complex labeled with AlexaFluor647. References ---------- .. [1] Löschberger A, van de Linde S, Dabauvalle MC, Rieger B, Heilemann M, Krohne G, Sauer M., Super-resolution imaging visualizes the eightfold symmetry of gp210 proteins around the nuclear pore complex and resolves the central channel with nanometer resolution. J Cell Sci. 2012, 125:570-5, doi: 10.1242/jcs.098822. Parameters ---------- kwargs Parameters passed to `locan.load_asdf_file()`. Returns ------- LocData """ path = Path(DATASETS_DIR) / "npc_gp210.asdf" if not path.exists(): DATASETS_DIR.mkdir(exist_ok=True) url = "https://raw.githubusercontent.com/super-resolution/LocanDatasets/main/smlm_data/npc_gp210.asdf" response = httpx.get(url, timeout=10) if response.status_code != httpx.codes.ok: # type: ignore raise ConnectionError("response.status_code != requests.codes.ok") with open(path, "wb") as file: for chunk in response.iter_bytes(chunk_size=128): file.write(chunk) locdata = load_asdf_file(path, **kwargs) return locdata
[docs] @needs_package("httpx") def load_tubulin(**kwargs: Any) -> LocData: """ Locdata representing microtubules. The data was generated by dSTORM [1]_. It shows alpha-tubulin as part of microtubules within COS-7 cells. Tubulin was targeted by primary IgG-antibodies labeled with AlexaFluor647 (2.1 degree of labeling) and recorded over 75_000 frames. References ---------- .. [1] Dominic A. Helmerich, Gerti Beliu, and Markus Sauer, Multiple-Labeled Antibodies Behave Like Single Emitters in Photoswitching Buffer ACS Nano 2020, 14, 10, 12629–12641, DOI: 10.1021/acsnano.0c06099 Parameters ---------- kwargs Parameters passed to `locan.load_rapidSTORM_file()`. Returns ------- LocData """ path = Path(DATASETS_DIR) / "tubulin_cos7.asdf" if not path.exists(): DATASETS_DIR.mkdir(exist_ok=True) url = "https://raw.githubusercontent.com/super-resolution/LocanDatasets/main/smlm_data/tubulin_cos7.asdf" response = httpx.get(url, timeout=10) if response.status_code != httpx.codes.ok: # type: ignore raise ConnectionError("response.status_code != httpx.codes.ok") with open(path, "wb") as file: for chunk in response.iter_bytes(chunk_size=128): file.write(chunk) locdata = load_asdf_file(path, **kwargs) return locdata