Tutorial about coordinate-based colocalization

Coordinate-based colocalization of two selections is computed as introduced by Malkusch et al. (1). A local density of locdata A is compared with the local density of locdata B within a varying radius for each localization in selection A. Local densities at various radii are compared by Spearman-rank-correlation and weighted by an exponential factor including the Euclidean distance to the nearest neighbor for each localization. The colocalization coefficient can take a value between -1 and 1 with -1 indicating anti-correlation (i.e. nearby localizations of selection B), 0 no colocalization, 1 strong colocalization.

The colocalization coefficient is provided as property for each localization within the corresponding dataset. The property key refers to colocalizing locdata A with locdata B.

from pathlib import Path

%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as colors

import locan as lc
lc.show_versions(system=False, dependencies=False, verbose=False)
Locan:
   version: 0.22.0.dev32+g4bfc3ab8b

Python:
   version: 3.11.14

Synthetic data

Provide synthetic data made of clustered localizations that are normal distributed around their center positions.

rng = np.random.default_rng(seed=1)
def set_centers(n_centers_1d=3, feature_range = (0, 1000)):
    dist = (feature_range[1] - feature_range[0])/(n_centers_1d + 1)
    centers = np.mgrid[feature_range[0] + dist : feature_range[1] : dist, feature_range[0] + dist : feature_range[1] : dist].reshape(2, -1).T
    return centers
n_centers_1d = 3
feature_range = (0, 1000)
centers = set_centers(n_centers_1d, feature_range)
offspring = rng.normal(loc=0, scale=20, size=(len(centers), 100, 2))
locdata = lc.simulate_cluster(centers=centers, region=[feature_range] * 2, offspring=offspring, clip=True, shuffle=True, seed=rng)

locdata.print_summary()
Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
identifier: "1"
comment: ""
source: SIMULATION
state: RAW
element_count: 900
frame_count: 0
creation_time {
  2026-04-30T08:33:41.792546Z
}

A second dataset is provided by shifting the first dataset by an offset.

locdata_trans = lc.transform_affine(locdata, offset=(20,0))
locdata_trans.print_summary()
identifier: "2"
comment: ""
source: SIMULATION
state: MODIFIED
element_count: 900
frame_count: 0
creation_time {
  2026-04-30T08:33:41.792546Z
}
modification_time {
  2026-04-30T08:33:41.882659Z
}
fig, ax = plt.subplots(nrows=1, ncols=1)
locdata.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Blue', label='origin')
locdata_trans.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Red', label='transform')
plt.show()
../../_images/12c9fd2558979505cf5fe1e5dc21fca2228389e66ad7ff0d5bcf0ee7cec4dc80.png

CBC computation

cbc = lc.CoordinateBasedColocalization(radius=100, n_steps=10).compute(locdata=locdata, other_locdata=locdata_trans)
cbc.results.head()
colocalization_cbc_2
0 0.382530
1 0.280571
2 0.036730
3 0.152874
4 0.385790

Results

points = locdata.coordinates
color = cbc.results['colocalization_cbc_2'].values

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))
cax = axes[0].scatter(x=points[:,0], y=points[:,1], marker='.', c=color, cmap='coolwarm', norm= colors.Normalize(-1., 1.), label='points')
axes[0].set_title('CBC coefficients for original data')

# axes[1].scatter(x=points[:,0], y=points[:,1], marker='.', color='Blue', label='points')
# axes[1].scatter(x=points_trans[:,0], y=points_trans[:,1], marker='o', color='Red', label='transformed points')
locdata.data.plot.scatter(x='position_x', y='position_y', ax=axes[1], color='Blue', label='origin')
locdata_trans.data.plot.scatter(x='position_x', y='position_y', ax=axes[1], color='Red', label='transform')
axes[1].set_title('Transformed points')
plt.colorbar(cax, ax=axes[0])
plt.tight_layout()
plt.show()
../../_images/6aee2812e5487c6873ae16ab2ca42127f0eaf2330374effeff01c60e0d46159a.png

CBC for various shifts

n_centers_1d = 3 
feature_range = (0, 2000)
centers = set_centers(n_centers_1d, feature_range)
offspring = rng.normal(loc=0, scale=20, size=(len(centers), 100, 2))
locdata = lc.simulate_cluster(centers=centers, region=[feature_range] * 2, offspring=offspring, clip=True, shuffle=True, seed=rng)

locdata.print_summary()
identifier: "3"
comment: ""
source: SIMULATION
state: RAW
element_count: 900
frame_count: 0
creation_time {
  2026-04-30T08:33:43.437022Z
}
offsets = [0, 50, 100, 200]
locdata_trans_list = [lc.transform_affine(locdata, offset=(offset, 0)) for offset in offsets]
cbc_list = [lc.CoordinateBasedColocalization(radius=100, n_steps=10).compute(locdata=locdata, other_locdata=other_locdata) for other_locdata in locdata_trans_list]
points = locdata.coordinates
n_rows = 1
n_cols = 4

fig = plt.figure(figsize=(15, 5))
for n, (cbc, offset) in enumerate(zip(cbc_list, offsets)):
    ax = fig.add_subplot(n_rows, n_cols, n+1)
    color = cbc.results.iloc[:, 0].values
    ax.scatter(x=points[:,0], y=points[:,1], marker='.', c=color, cmap='coolwarm', norm=colors.Normalize(-1., 1.))
    ax.set_title(f'offset: {offset}')
plt.show()
../../_images/be7080861d709756d21e67a57586e57ca0654c4254138569e5cd4587e652fb35.png

CBC on various length scales (for small cluster)

n_centers_1d = 3 
feature_range = (0, 2000)
centers = set_centers(n_centers_1d, feature_range)
offspring = rng.normal(loc=0, scale=20, size=(len(centers), 100, 2))
locdata = lc.simulate_cluster(centers=centers, region=[feature_range] * 2, offspring=offspring, clip=True, shuffle=True, seed=rng)
offsets = [0, 50, 100, 200]
locdata_trans_list = [lc.transform_affine(locdata, offset=(offset,0)) for offset in offsets]
radii = [50, 100, 150, 200, 250, 300, 350, 400]
cbc_list = [lc.CoordinateBasedColocalization(radius=radius, n_steps=10).compute(locdata=locdata, other_locdata=other_locdata) for radius in radii 
            for other_locdata in locdata_trans_list
           ]
points = locdata.coordinates
params = [(radius, offset) for radius in radii for offset in offsets]

n_rows = len(radii)
n_cols = len(offsets)

fig = plt.figure(figsize=(15, 15))
for n, (cbc, (radius, offset)) in enumerate(zip(cbc_list, params)):
    ax = fig.add_subplot(n_rows, n_cols, n+1)
    color = cbc.results.iloc[:, 0].values
    if not all(np.isnan(color)):
        ax.scatter(x=points[:,0], y=points[:,1], marker='.', c=color, cmap='coolwarm', norm=colors.Normalize(-1., 1.))
    ax.set_title(f'offset: {offset}, radius: {radius}')
plt.tight_layout()
plt.show()
../../_images/13f273d6048aadb870dd2271fa37cbeccae91a52dfec1bec5086067807872943.png

CBC on various length scales (for larger cluster)

n_centers_1d = 3 
feature_range = (0, 10_000)
centers = set_centers(n_centers_1d, feature_range)
offspring = rng.normal(loc=0, scale=100, size=(len(centers), 100, 2))
locdata = lc.simulate_cluster(centers=centers, region=[feature_range] * 2, offspring=offspring, clip=True, shuffle=True, seed=rng)
offsets = [0, 50, 100, 200]
locdata_trans_list = [lc.transform_affine(locdata, offset=(offset,0)) for offset in offsets]
radii = [50, 100, 150, 200, 250, 300, 350, 400]
cbc_list = [lc.CoordinateBasedColocalization(radius=radius, n_steps=10).compute(locdata=locdata, other_locdata=other_locdata) for radius in radii 
            for other_locdata in locdata_trans_list
           ]
points = locdata.coordinates
params = [(radius, offset) for radius in radii for offset in offsets]

n_rows = len(radii)
n_cols = len(offsets)

fig = plt.figure(figsize=(15, 15))
for n, (cbc, (radius, offset)) in enumerate(zip(cbc_list, params)):
    ax = fig.add_subplot(n_rows, n_cols, n+1)
    color = cbc.results.iloc[:, 0].values
    ax.scatter(x=points[:,0], y=points[:,1], marker='.', c=color, cmap='coolwarm', norm=colors.Normalize(-1., 1.))
    ax.set_title(f'offset: {offset}, radius: {radius}')
plt.tight_layout()
plt.show()
../../_images/05cdf0e20123bb5bd858b0f9ca8ef8442113a3c1d2a78eeebf9f7b791a3ba3e9.png