Tutorial about coordinate-based colocalization#

Coordinate-based colocalization of two selections is computed as introduced by Malkusch et al. (1). A local density of locdata A is compared with the local density of locdata B within a varying radius for each localization in selection A. Local densities at various radii are compared by Spearman-rank-correlation and weighted by an exponential factor including the Euclidean distance to the nearest neighbor for each localization. The colocalization coefficient can take a value between -1 and 1 with -1 indicating anti-correlation (i.e. nearby localizations of selection B), 0 no colocalization, 1 strong colocalization.

The colocalization coefficient is provided as property for each localization within the corresponding dataset. The property key refers to colocalizing locdata A with locdata B.

from pathlib import Path

%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as colors

import locan as lc
/tmp/ipykernel_979/1945727434.py:6: DeprecationWarning: 
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
lc.show_versions(system=False, dependencies=False, verbose=False)
Locan:
   version: 0.20.0.dev41+g755b969

Python:
   version: 3.11.6

Synthetic data#

Provide synthetic data made of clustered localizations that are normal distributed around their center positions.

rng = np.random.default_rng(seed=1)
def set_centers(n_centers_1d=3, feature_range = (0, 1000)):
    dist = (feature_range[1] - feature_range[0])/(n_centers_1d + 1)
    centers = np.mgrid[feature_range[0] + dist : feature_range[1] : dist, feature_range[0] + dist : feature_range[1] : dist].reshape(2, -1).T
    return centers
n_centers_1d = 3
feature_range = (0, 1000)
centers = set_centers(n_centers_1d, feature_range)
offspring = rng.normal(loc=0, scale=20, size=(len(centers), 100, 2))
locdata = lc.simulate_cluster(centers=centers, region=[feature_range] * 2, offspring=offspring, clip=True, shuffle=True, seed=rng)

locdata.print_summary()
Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
identifier: "1"
comment: ""
source: SIMULATION
state: RAW
element_count: 900
frame_count: 0
creation_time {
  2024-03-14T11:05:27.455710Z
}

A second dataset is provided by shifting the first dataset by an offset.

locdata_trans = lc.transform_affine(locdata, offset=(20,0))
locdata_trans.print_summary()
identifier: "2"
comment: ""
source: SIMULATION
state: MODIFIED
element_count: 900
frame_count: 0
creation_time {
  2024-03-14T11:05:27.455710Z
}
modification_time {
  2024-03-14T11:05:27.470967Z
}
fig, ax = plt.subplots(nrows=1, ncols=1)
locdata.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Blue', label='origin')
locdata_trans.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Red', label='transform')
plt.show()
../../_images/e1a367d31a449e7a4409dfcb05e83e3e51f1d2d9e125bf09581c7f6f86b65d63.png

CBC computation#

cbc = lc.CoordinateBasedColocalization(radius=100, n_steps=10).compute(locdata=locdata, other_locdata=locdata_trans)
cbc.results.head()
colocalization_cbc_2
0 0.382530
1 0.280571
2 0.036730
3 0.152874
4 0.385790

Results#

points = locdata.coordinates
color = cbc.results['colocalization_cbc_2'].values

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))
cax = axes[0].scatter(x=points[:,0], y=points[:,1], marker='.', c=color, cmap='coolwarm', norm= colors.Normalize(-1., 1.), label='points')
axes[0].set_title('CBC coefficients for original data')

# axes[1].scatter(x=points[:,0], y=points[:,1], marker='.', color='Blue', label='points')
# axes[1].scatter(x=points_trans[:,0], y=points_trans[:,1], marker='o', color='Red', label='transformed points')
locdata.data.plot.scatter(x='position_x', y='position_y', ax=axes[1], color='Blue', label='origin')
locdata_trans.data.plot.scatter(x='position_x', y='position_y', ax=axes[1], color='Red', label='transform')
axes[1].set_title('Transformed points')
plt.colorbar(cax, ax=axes[0])
plt.tight_layout()
plt.show()
../../_images/e0d9fb994683f96d158563022126b2d1e931f6484e8bd0e37177b6a6d36f8f43.png

CBC for various shifts#

n_centers_1d = 3 
feature_range = (0, 2000)
centers = set_centers(n_centers_1d, feature_range)
offspring = rng.normal(loc=0, scale=20, size=(len(centers), 100, 2))
locdata = lc.simulate_cluster(centers=centers, region=[feature_range] * 2, offspring=offspring, clip=True, shuffle=True, seed=rng)

locdata.print_summary()
identifier: "3"
comment: ""
source: SIMULATION
state: RAW
element_count: 900
frame_count: 0
creation_time {
  2024-03-14T11:05:29.468484Z
}
offsets = [0, 50, 100, 200]
locdata_trans_list = [lc.transform_affine(locdata, offset=(offset, 0)) for offset in offsets]
cbc_list = [lc.CoordinateBasedColocalization(radius=100, n_steps=10).compute(locdata=locdata, other_locdata=other_locdata) for other_locdata in locdata_trans_list]
points = locdata.coordinates
n_rows = 1
n_cols = 4

fig = plt.figure(figsize=(15, 5))
for n, (cbc, offset) in enumerate(zip(cbc_list, offsets)):
    ax = fig.add_subplot(n_rows, n_cols, n+1)
    color = cbc.results.iloc[:, 0].values
    ax.scatter(x=points[:,0], y=points[:,1], marker='.', c=color, cmap='coolwarm', norm=colors.Normalize(-1., 1.))
    ax.set_title(f'offset: {offset}')
plt.show()
../../_images/1deca7fd3241209e03b0254b86ab1d1c2d4b351af0e2eb0d5a6bbc3b02df7674.png

CBC on various length scales (for small cluster)#

n_centers_1d = 3 
feature_range = (0, 2000)
centers = set_centers(n_centers_1d, feature_range)
offspring = rng.normal(loc=0, scale=20, size=(len(centers), 100, 2))
locdata = lc.simulate_cluster(centers=centers, region=[feature_range] * 2, offspring=offspring, clip=True, shuffle=True, seed=rng)
offsets = [0, 50, 100, 200]
locdata_trans_list = [lc.transform_affine(locdata, offset=(offset,0)) for offset in offsets]
radii = [50, 100, 150, 200, 250, 300, 350, 400]
cbc_list = [lc.CoordinateBasedColocalization(radius=radius, n_steps=10).compute(locdata=locdata, other_locdata=other_locdata) for radius in radii 
            for other_locdata in locdata_trans_list
           ]
points = locdata.coordinates
params = [(radius, offset) for radius in radii for offset in offsets]

n_rows = len(radii)
n_cols = len(offsets)

fig = plt.figure(figsize=(15, 15))
for n, (cbc, (radius, offset)) in enumerate(zip(cbc_list, params)):
    ax = fig.add_subplot(n_rows, n_cols, n+1)
    color = cbc.results.iloc[:, 0].values
    if not all(np.isnan(color)):
        ax.scatter(x=points[:,0], y=points[:,1], marker='.', c=color, cmap='coolwarm', norm=colors.Normalize(-1., 1.))
    ax.set_title(f'offset: {offset}, radius: {radius}')
plt.tight_layout()
plt.show()
../../_images/64009e024d3ed155b2d98a77d2c31f113b8994d389c8bc0cda2487715f517585.png

CBC on various length scales (for larger cluster)#

n_centers_1d = 3 
feature_range = (0, 10_000)
centers = set_centers(n_centers_1d, feature_range)
offspring = rng.normal(loc=0, scale=100, size=(len(centers), 100, 2))
locdata = lc.simulate_cluster(centers=centers, region=[feature_range] * 2, offspring=offspring, clip=True, shuffle=True, seed=rng)
offsets = [0, 50, 100, 200]
locdata_trans_list = [lc.transform_affine(locdata, offset=(offset,0)) for offset in offsets]
radii = [50, 100, 150, 200, 250, 300, 350, 400]
cbc_list = [lc.CoordinateBasedColocalization(radius=radius, n_steps=10).compute(locdata=locdata, other_locdata=other_locdata) for radius in radii 
            for other_locdata in locdata_trans_list
           ]
points = locdata.coordinates
params = [(radius, offset) for radius in radii for offset in offsets]

n_rows = len(radii)
n_cols = len(offsets)

fig = plt.figure(figsize=(15, 15))
for n, (cbc, (radius, offset)) in enumerate(zip(cbc_list, params)):
    ax = fig.add_subplot(n_rows, n_cols, n+1)
    color = cbc.results.iloc[:, 0].values
    ax.scatter(x=points[:,0], y=points[:,1], marker='.', c=color, cmap='coolwarm', norm=colors.Normalize(-1., 1.))
    ax.set_title(f'offset: {offset}, radius: {radius}')
plt.tight_layout()
plt.show()
../../_images/4bb654095e97a1b6e2bf65a6b2ea7f890f353234627de0c11bb08410bc14d875.png