Stereo-Seq MOB Clustering Analysis
5: Stereo_seq_Clustering (MOB)¶
HiSTaR can process Stereo-Seq data to extract low-dimension representation. In this tutorial, we use mouse olfactory bulb (MOB) dataset to introduce the analysis.
Stereo-seq mouse olfactory bulb data comes from SEDR https://github.com/JinmiaoChenLab/SEDR_analyses
The complete experimental dataset is available here https://zenodo.org/records/15599070
In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import torch
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
import os
from pathlib import Path
from sklearn.metrics import silhouette_score
import warnings
warnings.filterwarnings('ignore')
In [2]:
import HiSTaR
random_seed = 2023
HiSTaR.fix_seed(random_seed)
In [3]:
import matplotlib as mpl
mpl.rcParams.update({
'font.family': 'Arial',
'axes.labelweight': 'bold',
'axes.titleweight': 'bold',
'axes.titlesize': 12,
'figure.constrained_layout.use': True
})
mclust_palette = [
"#F3766E", "#5BB300", "#2E96FF", "#C655D9", "#FFB549",
"#00C6EA", "#9B8500", "#009E73", "#FF6EB4", "#7AFF33",
"#A837D8", "#FFD700", "#00FF99", "#FF7F50", "#8A2BE2",
"#4B0082", "#FF1493", "#32CD32", "#8B4513"
]
Loading data¶
In [4]:
data_root = Path('../data/Stereo-seq')
In [5]:
if not os.path.exists(data_root / 'raw.h5ad'):
counts = pd.read_csv(data_root / 'RNA_counts.tsv.gz', sep='\t', index_col=0).T
counts.index = [f'Spot_{i}' for i in counts.index]
adata = sc.AnnData(counts)
adata.X = csr_matrix(adata.X, dtype=np.float32)
df_pos = pd.read_csv(data_root / 'position.tsv', sep='\t')
adata.obsm['spatial'] = df_pos[['y','x']].values
used_barcode = pd.read_csv(os.path.join(data_root / 'used_barcodes.txt'), sep='\t', header=None)
used_barcode = used_barcode[0]
adata = adata[used_barcode,]
adata.write( data_root / 'raw.h5ad')
else:
adata = sc.read_h5ad( data_root / 'raw.h5ad')
Preprocessing¶
In [6]:
adata.layers['count'] = adata.X.toarray()
sc.pp.normalize_total(adata, target_sum=1e6)
sc.pp.scale(adata)
from sklearn.decomposition import PCA
adata_X = PCA(n_components=200, random_state=42).fit_transform(adata.X)
adata.obsm['X_pca'] = adata_X
construct graph¶
In [7]:
# construct graph
graph_dict = HiSTaR.graph_construction(adata,12)
In [8]:
adata
Out[8]:
AnnData object with n_obs × n_vars = 19109 × 27106
var: 'mean', 'std'
obsm: 'spatial', 'X_pca'
layers: 'count'
Running HiSTaR¶
In [9]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
histar_net = HiSTaR.histar(adata.obsm['X_pca'], graph_dict, device=device)
histar_net.train()
histar_feat, _, _, _ = histar_net.process()
adata.obsm['HiSTaR'] = histar_feat
Clustering¶
In [10]:
#HiSTaR.configure_r_environment() # If you encounter problems loading R packages, you can manually configure your path in this function.
In [11]:
n_clusters = 7
HiSTaR.mclust_R(adata, n_clusters, use_rep='HiSTaR', key_added='HiSTaR_clust')
R[write to console]: __ __
____ ___ _____/ /_ _______/ /_
/ __ `__ \/ ___/ / / / / ___/ __/
/ / / / / / /__/ / /_/ (__ ) /_
/_/ /_/ /_/\___/_/\__,_/____/\__/ version 6.1.1
Type 'citation("mclust")' for citing this R package in publications.
fitting ... |======================================================================| 100%
Out[11]:
AnnData object with n_obs × n_vars = 19109 × 27106
obs: 'HiSTaR_clust'
var: 'mean', 'std'
obsm: 'spatial', 'X_pca', 'HiSTaR'
layers: 'count'
Visualization¶
In [12]:
adata.obs['HiSTaR_clust'] = adata.obs['HiSTaR_clust'].astype('category')
plt.figure(figsize=(4, 3), facecolor='white')
sc.pl.embedding(
adata,
basis="spatial",
color="HiSTaR_clust",
palette=mclust_palette,
s=12,
title=f"HiSTaR",
show=False,
frameon=False
)
ax = plt.gca()
ax.set_facecolor('white')
for spine in ax.spines.values():
spine.set_visible(False)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
# plt.savefig('Stereo_seq_spatial.png',dpi=300, bbox_inches='tight')
<Figure size 400x300 with 0 Axes>
In [13]:
sc.pp.neighbors(adata,n_neighbors=12,use_rep='HiSTaR',key_added='HiSTaR_temp_neighbors')
sc.tl.umap(adata, neighbors_key='HiSTaR_temp_neighbors')
plt.figure(figsize=(4, 3), facecolor='white')
sc.pl.umap(
adata,
color='HiSTaR_clust',
palette=mclust_palette,
title=f'HiSTaR',
show=False,
s=12,
frameon=False
)
ax = plt.gca()
ax.set_facecolor('white')
ax.figure.set_facecolor('white')
plt.tight_layout()
# plt.savefig("Stereo_seq_umap.png",dpi=300, bbox_inches='tight')
<Figure size 400x300 with 0 Axes>