"""plotting functions"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from pandas.core.dtypes.common import is_numeric_dtype
import seaborn as sns
from adjustText import adjust_text
from pandas.api.types import (
is_string_dtype,
is_categorical_dtype,
)
from scipy.sparse import find
import warnings
# import plotly.express as px
# import plotly.graph_objects as go
from .._settings import settings
from ._utils import (
generate_palette
)
[docs]
def violin(adata,
list_obs=None,
list_var=None,
jitter=0.4,
size=1,
log=False,
pad=1.08,
w_pad=None,
h_pad=3,
fig_size=(3, 3),
fig_ncol=3,
save_fig=False,
fig_path=None,
fig_name='plot_violin.pdf',
**kwargs):
"""Violin plot
Parameters
----------
adata : `Anndata`
Annotated data matrix.
list_obs : `list`, optional (default: None)
A list of observations to plot.
list_var : `list`, optional (default: None)
A list of variables to plot.
jitter : `float`, optional (default: 0.4)
Amount of jitter to apply.
size : `int`, optional (default: 1)
The marker size
log : `bool`, optional (default: False)
If True, natural logarithm transformation will be performed.
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
fig_size: `tuple`, optional (default: (3,3))
figure size.
fig_ncol: `int`, optional (default: 3)
the number of columns of the figure panel
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'plot_violin.pdf')
if `save_fig` is True, specify figure name.
**kwargs: `dict`, optional
Other keyword arguments are passed through to ``sns.violinplot``
Returns
-------
None
"""
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
if list_obs is None:
list_obs = []
if list_var is None:
list_var = []
for obs in list_obs:
if obs not in adata.obs_keys():
raise ValueError(f"could not find {obs} in `adata.obs_keys()`")
for var in list_var:
if var not in adata.var_keys():
raise ValueError(f"could not find {var} in `adata.var_keys()`")
if len(list_obs) > 0:
df_plot = adata.obs[list_obs].copy()
if log:
df_plot = pd.DataFrame(data=np.log1p(df_plot.values),
index=df_plot.index,
columns=df_plot.columns)
fig_nrow = int(np.ceil(len(list_obs)/fig_ncol))
fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
fig_size[1]*fig_nrow))
for i, obs in enumerate(list_obs):
ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
sns.violinplot(ax=ax_i,
y=obs,
data=df_plot,
inner=None,
**kwargs)
sns.stripplot(ax=ax_i,
y=obs,
data=df_plot,
color='black',
jitter=jitter,
s=size)
ax_i.set_title(obs)
ax_i.set_ylabel('')
ax_i.locator_params(axis='y', nbins=6)
ax_i.tick_params(axis="y", pad=-2)
ax_i.spines['right'].set_visible(False)
ax_i.spines['top'].set_visible(False)
plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
plt.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
if len(list_var) > 0:
df_plot = adata.var[list_var].copy()
if log:
df_plot = pd.DataFrame(data=np.log1p(df_plot.values),
index=df_plot.index,
columns=df_plot.columns)
fig_nrow = int(np.ceil(len(list_obs)/fig_ncol))
fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
fig_size[1]*fig_nrow))
for i, var in enumerate(list_var):
ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
sns.violinplot(ax=ax_i,
y=var,
data=df_plot,
inner=None,
**kwargs)
sns.stripplot(ax=ax_i,
y=var,
data=df_plot,
color='black',
jitter=jitter,
s=size)
ax_i.set_title(var)
ax_i.set_ylabel('')
ax_i.locator_params(axis='y', nbins=6)
ax_i.tick_params(axis="y", pad=-2)
ax_i.spines['right'].set_visible(False)
ax_i.spines['top'].set_visible(False)
plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
plt.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
[docs]
def hist(adata,
list_obs=None,
list_var=None,
kde=True,
log=False,
pad=1.08,
w_pad=None,
h_pad=3,
fig_size=(3, 3),
fig_ncol=3,
save_fig=False,
fig_path=None,
fig_name='plot_histogram.pdf',
**kwargs
):
"""histogram plot
Parameters
----------
adata : `Anndata`
Annotated data matrix.
list_obs : `list`, optional (default: None)
A list of observations to plot.
list_var : `list`, optional (default: None)
A list of variables to plot.
kde : `bool`, optional (default: True)
If True, compute a kernel density estimate to smooth the distribution
and show on the plot
log : `bool`, optional (default: False)
If True, natural logarithm transformation will be performed.
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
fig_size: `tuple`, optional (default: (3,3))
figure size.
fig_ncol: `int`, optional (default: 3)
the number of columns of the figure panel
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'plot_violin.pdf')
if `save_fig` is True, specify figure name.
**kwargs: `dict`, optional
Other keyword arguments are passed through to ``sns.histplot``
Returns
-------
None
"""
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
if list_obs is None:
list_obs = []
if list_var is None:
list_var = []
for obs in list_obs:
if obs not in adata.obs_keys():
raise ValueError(f"could not find {obs} in `adata.obs_keys()`")
for var in list_var:
if var not in adata.var_keys():
raise ValueError(f"could not find {var} in `adata.var_keys()`")
if len(list_obs) > 0:
df_plot = adata.obs[list_obs].copy()
if log:
df_plot = pd.DataFrame(data=np.log1p(df_plot.values),
index=df_plot.index,
columns=df_plot.columns)
fig_nrow = int(np.ceil(len(list_obs)/fig_ncol))
fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
fig_size[1]*fig_nrow))
for i, obs in enumerate(list_obs):
ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
sns.histplot(ax=ax_i,
x=obs,
data=df_plot,
kde=kde,
**kwargs)
ax_i.locator_params(axis='y', nbins=6)
ax_i.tick_params(axis="y", pad=-2)
ax_i.spines['right'].set_visible(False)
ax_i.spines['top'].set_visible(False)
plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
plt.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
if len(list_var) > 0:
df_plot = adata.var[list_var].copy()
if log:
df_plot = pd.DataFrame(data=np.log1p(df_plot.values),
index=df_plot.index,
columns=df_plot.columns)
fig_nrow = int(np.ceil(len(list_obs)/fig_ncol))
fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
fig_size[1]*fig_nrow))
for i, var in enumerate(list_var):
ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
sns.histplot(ax=ax_i,
x=var,
data=df_plot,
kde=kde,
**kwargs)
ax_i.locator_params(axis='y', nbins=6)
ax_i.tick_params(axis="y", pad=-2)
ax_i.spines['right'].set_visible(False)
ax_i.spines['top'].set_visible(False)
plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
plt.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
[docs]
def pca_variance_ratio(adata,
log=True,
show_cutoff=True,
fig_size=(4, 4),
save_fig=None,
fig_path=None,
fig_name='plot_variance_ratio.pdf',
pad=1.08,
w_pad=None,
h_pad=None,
**kwargs):
"""Plot the variance ratio.
Parameters
----------
adata : `Anndata`
Annotated data matrix.
log : `bool`, optional (default: True)
If True, variance_ratio will be log-transformed.
show_cutoff : `bool`, optional (default: True)
If True, cutoff on `n_pcs` will be shown
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
fig_size: `tuple`, optional (default: (3,3))
figure size.
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'plot_variance_ratio.pdf')
if `save_fig` is True, specify figure name.
**kwargs: `dict`, optional
Other keyword arguments are passed through to ``plt.plot``
Returns
-------
None
"""
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
n_components = len(adata.uns['pca']['variance_ratio'])
fig = plt.figure(figsize=fig_size)
if log:
plt.plot(range(n_components),
np.log(adata.uns['pca']['variance_ratio']),
**kwargs)
else:
plt.plot(range(n_components),
adata.uns['pca']['variance_ratio'],
**kwargs)
if show_cutoff:
n_pcs = adata.uns['pca']['n_pcs']
print(f'the number of selected PC is: {n_pcs}')
plt.axvline(n_pcs, ls='--', c='red')
plt.xlabel('Principal Component')
plt.ylabel('Variance Ratio')
plt.locator_params(axis='x', nbins=5)
plt.locator_params(axis='y', nbins=5)
plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
plt.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
[docs]
def pcs_features(adata,
log=False,
size=3,
show_cutoff=True,
pad=1.08,
w_pad=None,
h_pad=None,
fig_size=(3, 3),
fig_ncol=3,
save_fig=None,
fig_path=None,
fig_name='plot_pcs_features.pdf',
**kwargs):
"""Plot features that contribute to the top PCs.
Parameters
----------
adata : `Anndata`
Annotated data matrix.
log : `bool`, optional (default: True)
If True, variance_ratio will be log-transformed.
show_cutoff : `bool`, optional (default: True)
If True, cutoff on `n_pcs` will be shown
size : `int`, optional (default: 3)
The marker size
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
fig_size: `tuple`, optional (default: (3,3))
figure size.
fig_ncol: `int`, optional (default: 3)
the number of columns of the figure panel
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'plot_pcs_features.pdf')
if `save_fig` is True, specify figure name.
**kwargs: `dict`, optional
Other keyword arguments are passed through to ``plt.scatter``
Returns
-------
None
"""
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
n_pcs = adata.uns['pca']['n_pcs']
n_features = adata.uns['pca']['PCs'].shape[0]
fig_nrow = int(np.ceil(n_pcs/fig_ncol))
fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05, fig_size[1]*fig_nrow))
for i in range(n_pcs):
ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
if log:
ax_i.scatter(range(n_features),
np.log(np.sort(
np.abs(adata.uns['pca']['PCs'][:, i],))[::-1]),
s=size,
**kwargs)
else:
ax_i.scatter(range(n_features),
np.sort(
np.abs(adata.uns['pca']['PCs'][:, i],))[::-1],
s=size,
**kwargs)
n_ft_selected_i = len(adata.uns['pca']['features'][f'pc_{i}'])
if show_cutoff:
ax_i.axvline(n_ft_selected_i, ls='--', c='red')
ax_i.set_xlabel('Feautures')
ax_i.set_ylabel('Loadings')
ax_i.locator_params(axis='x', nbins=3)
ax_i.locator_params(axis='y', nbins=5)
ax_i.ticklabel_format(axis="x", style="sci", scilimits=(0, 0))
ax_i.set_title(f'PC {i}')
plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
plt.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
[docs]
def variable_genes(adata,
show_texts=False,
n_texts=10,
size=8,
text_size=10,
pad=1.08,
w_pad=None,
h_pad=None,
fig_size=(4, 4),
save_fig=None,
fig_path=None,
fig_name='plot_variable_genes.pdf',
**kwargs):
"""Plot highly variable genes.
Parameters
----------
adata : `Anndata`
Annotated data matrix.
show_texts : `bool`, optional (default: False)
If True, text annotation will be shown.
n_texts : `int`, optional (default: 10)
The number of texts to plot.
size : `int`, optional (default: 8)
The marker size
text_size : `int`, optional (default: 10)
The text size
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
fig_size: `tuple`, optional (default: (3,3))
figure size.
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'plot_variable_genes.pdf')
if `save_fig` is True, specify figure name.
**kwargs: `dict`, optional
Other keyword arguments are passed through to ``plt.scatter``
Returns
-------
None
"""
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
means = adata.var['means']
variances_norm = adata.var['variances_norm']
mask = adata.var['highly_variable']
genes = adata.var_names
fig, ax = plt.subplots(figsize=fig_size)
ax.scatter(means[~mask],
variances_norm[~mask],
s=size,
c='#1F2433',
**kwargs)
ax.scatter(means[mask],
variances_norm[mask],
s=size,
c='#ce3746',
**kwargs)
ax.set_xscale(value='log')
if show_texts:
ids = variances_norm.values.argsort()[-n_texts:][::-1]
texts = [plt.text(means[i], variances_norm[i], genes[i],
fontdict={'family': 'serif',
'color': 'black',
'weight': 'normal',
'size': text_size})
for i in ids]
adjust_text(texts,
arrowprops=dict(arrowstyle='-', color='black'))
ax.set_xlabel('average expression')
ax.set_ylabel('standardized variance')
ax.locator_params(axis='x', tight=True)
ax.locator_params(axis='y', tight=True)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
fig.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
fig.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
def _scatterplot2d(df,
x,
y,
list_hue=None,
hue_palette=None,
drawing_order='sorted',
dict_drawing_order=None,
size=8,
show_texts=False,
texts=None,
text_size=10,
text_expand=(1.05, 1.2),
fig_size=None,
fig_ncol=3,
fig_legend_ncol=1,
fig_legend_order=None,
vmin=None,
vmax=None,
alpha=0.8,
pad=1.08,
w_pad=None,
h_pad=None,
save_fig=None,
fig_path=None,
fig_name='scatterplot2d.pdf',
copy=False,
**kwargs):
"""2d scatter plot
Parameters
----------
data: `pd.DataFrame`
Input data structure of shape (n_samples, n_features).
x: `str`
Variable in `data` that specify positions on the x axis.
y: `str`
Variable in `data` that specify positions on the x axis.
list_hue: `str`, optional (default: None)
A list of variables that will produce points with different colors.
drawing_order: `str` (default: 'sorted')
The order in which values are plotted, This can be
one of the following values
- 'original': plot points in the same order as in input dataframe
- 'sorted' : plot points with higher values on top.
- 'random' : plot points in a random order
fig_size: `tuple`, optional (default: None)
figure size.
fig_ncol: `int`, optional (default: 3)
the number of columns of the figure panel
fig_legend_order: `dict`,optional (default: None)
Specified order for the appearance of the annotation keys.
Only valid for categorical/string variable
e.g. fig_legend_order = {'ann1':['a','b','c'],'ann2':['aa','bb','cc']}
fig_legend_ncol: `int`, optional (default: 1)
The number of columns that the legend has.
vmin,vmax: `float`, optional (default: None)
The min and max values are used to normalize continuous values.
If None, the respective min and max of continuous values is used.
alpha: `float`, optional (default: 0.8)
0.0 transparent through 1.0 opaque
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'scatterplot2d.pdf')
if save_fig is True, specify figure name.
Returns
-------
None
"""
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
list_ax = list()
if list_hue is None:
list_hue = [None]
else:
for hue in list_hue:
if hue not in df.columns:
raise ValueError(f"could not find {hue}")
if hue_palette is None:
hue_palette = dict()
assert isinstance(hue_palette, dict), "`hue_palette` must be dict"
legend_order = {hue: np.unique(df[hue]) for hue in list_hue
if (is_string_dtype(df[hue])
or is_categorical_dtype(df[hue]))}
if fig_legend_order is not None:
if not isinstance(fig_legend_order, dict):
raise TypeError("`fig_legend_order` must be a dictionary")
for hue in fig_legend_order.keys():
if hue in legend_order.keys():
legend_order[hue] = fig_legend_order[hue]
else:
print(f"{hue} is ignored for ordering legend labels"
"due to incorrect name or data type")
if dict_drawing_order is None:
dict_drawing_order = dict()
assert drawing_order in ['sorted', 'random', 'original'],\
"`drawing_order` must be one of ['original', 'sorted', 'random']"
if len(list_hue) < fig_ncol:
fig_ncol = len(list_hue)
fig_nrow = int(np.ceil(len(list_hue)/fig_ncol))
fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05, fig_size[1]*fig_nrow))
for i, hue in enumerate(list_hue):
ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
if hue is None:
sc_i = sns.scatterplot(ax=ax_i,
x=x,
y=y,
data=df,
alpha=alpha,
linewidth=0,
s=size,
**kwargs)
else:
if is_string_dtype(df[hue]) or is_categorical_dtype(df[hue]):
if hue in hue_palette.keys():
palette = hue_palette[hue]
else:
palette = None
if hue in dict_drawing_order.keys():
param_drawing_order = dict_drawing_order[hue]
else:
param_drawing_order = drawing_order
if param_drawing_order == 'sorted':
df_updated = df.sort_values(by=hue)
elif param_drawing_order == 'random':
df_updated = df.sample(frac=1, random_state=100)
else:
df_updated = df
sc_i = sns.scatterplot(ax=ax_i,
x=x,
y=y,
hue=hue,
hue_order=legend_order[hue],
data=df_updated,
alpha=alpha,
linewidth=0,
palette=palette,
s=size,
**kwargs)
ax_i.legend(bbox_to_anchor=(1, 0.5),
loc='center left',
ncol=fig_legend_ncol,
frameon=False,
)
else:
vmin_i = df[hue].min() if vmin is None else vmin
vmax_i = df[hue].max() if vmax is None else vmax
if hue in dict_drawing_order.keys():
param_drawing_order = dict_drawing_order[hue]
else:
param_drawing_order = drawing_order
if param_drawing_order == 'sorted':
df_updated = df.sort_values(by=hue)
elif param_drawing_order == 'random':
df_updated = df.sample(frac=1, random_state=100)
else:
df_updated = df
sc_i = ax_i.scatter(df_updated[x],
df_updated[y],
c=df_updated[hue],
vmin=vmin_i,
vmax=vmax_i,
alpha=alpha,
s=size,
**kwargs)
cbar = plt.colorbar(sc_i,
ax=ax_i,
pad=0.01,
fraction=0.05,
aspect=40)
cbar.solids.set_edgecolor("face")
cbar.ax.locator_params(nbins=5)
if show_texts:
if texts is not None:
plt_texts = [plt.text(df[x][t],
df[y][t],
t,
fontdict={'family': 'serif',
'color': 'black',
'weight': 'normal',
'size': text_size})
for t in texts]
adjust_text(plt_texts,
expand_text=text_expand,
expand_points=text_expand,
expand_objects=text_expand,
arrowprops=dict(arrowstyle='->', color='black'))
ax_i.set_xlabel(x)
ax_i.set_ylabel(y)
ax_i.locator_params(axis='x', nbins=5)
ax_i.locator_params(axis='y', nbins=5)
ax_i.tick_params(axis="both", labelbottom=True, labelleft=True)
ax_i.set_title(hue)
list_ax.append(ax_i)
plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
plt.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
if copy:
return list_ax
# def _scatterplot2d_plotly(df,
# x,
# y,
# list_hue=None,
# hue_palette=None,
# drawing_order='sorted',
# fig_size=None,
# fig_ncol=3,
# fig_legend_order=None,
# alpha=0.8,
# save_fig=None,
# fig_path=None,
# **kwargs):
# """interactive 2d scatter plot by Plotly
# Parameters
# ----------
# data: `pd.DataFrame`
# Input data structure of shape (n_samples, n_features).
# x: `str`
# Variable in `data` that specify positions on the x axis.
# y: `str`
# Variable in `data` that specify positions on the x axis.
# list_hue: `str`, optional (default: None)
# A list of variables that will produce points with different colors.
# drawing_order: `str` (default: 'sorted')
# The order in which values are plotted, This can be
# one of the following values
# - 'original': plot points in the same order as in input dataframe
# - 'sorted' : plot points with higher values on top.
# - 'random' : plot points in a random order
# fig_size: `tuple`, optional (default: None)
# figure size.
# fig_ncol: `int`, optional (default: 3)
# the number of columns of the figure panel
# fig_legend_order: `dict`,optional (default: None)
# Specified order for the appearance of the annotation keys.
# Only valid for categorical/string variable
# e.g. fig_legend_order = {'ann1':['a','b','c'],
# 'ann2':['aa','bb','cc']}
# fig_legend_ncol: `int`, optional (default: 1)
# The number of columns that the legend has.
# vmin,vmax: `float`, optional (default: None)
# The min and max values are used to normalize continuous values.
# If None, the respective min and max of continuous values is used.
# alpha: `float`, optional (default: 0.8)
# 0.0 transparent through 1.0 opaque
# pad: `float`, optional (default: 1.08)
# Padding between the figure edge and the edges of subplots,
# as a fraction of the font size.
# h_pad, w_pad: `float`, optional (default: None)
# Padding (height/width) between edges of adjacent subplots,
# as a fraction of the font size. Defaults to pad.
# save_fig: `bool`, optional (default: False)
# if True,save the figure.
# fig_path: `str`, optional (default: None)
# If save_fig is True, specify figure path.
# fig_name: `str`, optional (default: 'scatterplot2d.pdf')
# if save_fig is True, specify figure name.
# Returns
# -------
# None
# """
# if fig_size is None:
# fig_size = mpl.rcParams['figure.figsize']
# if save_fig is None:
# save_fig = settings.save_fig
# if fig_path is None:
# fig_path = os.path.join(settings.workdir, 'figures')
# for hue in list_hue:
# if(hue not in df.columns):
# raise ValueError(f"could not find {hue} in `df.columns`")
# if hue_palette is None:
# hue_palette = dict()
# assert isinstance(hue_palette, dict), "`hue_palette` must be dict"
# assert drawing_order in ['sorted', 'random', 'original'],\
# "`drawing_order` must be one of ['original', 'sorted', 'random']"
# legend_order = {hue: np.unique(df[hue]) for hue in list_hue
# if (is_string_dtype(df[hue])
# or is_categorical_dtype(df[hue]))}
# if(fig_legend_order is not None):
# if(not isinstance(fig_legend_order, dict)):
# raise TypeError("`fig_legend_order` must be a dictionary")
# for hue in fig_legend_order.keys():
# if(hue in legend_order.keys()):
# legend_order[hue] = fig_legend_order[hue]
# else:
# print(f"{hue} is ignored for ordering legend labels"
# "due to incorrect name or data type")
# if(len(list_hue) < fig_ncol):
# fig_ncol = len(list_hue)
# fig_nrow = int(np.ceil(len(list_hue)/fig_ncol))
# fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
# fig_size[1]*fig_nrow))
# for hue in list_hue:
# if hue in hue_palette.keys():
# palette = hue_palette[hue]
# else:
# palette = None
# if drawing_order == 'sorted':
# df_updated = df.sort_values(by=hue)
# elif drawing_order == 'random':
# df_updated = df.sample(frac=1, random_state=100)
# else:
# df_updated = df
# fig = px.scatter(df_updated,
# x=x,
# y=y,
# color=hue,
# opacity=alpha,
# color_continuous_scale=px.colors.sequential.Viridis,
# color_discrete_map=palette,
# **kwargs)
# fig.update_layout(legend={'itemsizing': 'constant'},
# width=500,
# height=500)
# fig.show(renderer="notebook")
# TO-DO add 3D plot
[docs]
def umap(adata,
color=None,
dict_palette=None,
n_components=None,
size=8,
drawing_order='sorted',
dict_drawing_order=None,
show_texts=False,
texts=None,
text_size=10,
text_expand=(1.05, 1.2),
fig_size=None,
fig_ncol=3,
fig_legend_ncol=1,
fig_legend_order=None,
vmin=None,
vmax=None,
alpha=1,
pad=1.08,
w_pad=None,
h_pad=None,
save_fig=None,
fig_path=None,
fig_name='plot_umap.pdf',
plolty=False,
**kwargs):
""" Plot coordinates in UMAP
Parameters
----------
data: `pd.DataFrame`
Input data structure of shape (n_samples, n_features).
x: `str`
Variable in `data` that specify positions on the x axis.
y: `str`
Variable in `data` that specify positions on the x axis.
color: `list`, optional (default: None)
A list of variables that will produce points with different colors.
e.g. color = ['anno1', 'anno2']
dict_palette: `dict`,optional (default: None)
A dictionary of palettes for different variables in `color`.
Only valid for categorical/string variables
e.g. dict_palette = {'ann1': {},'ann2': {}}
drawing_order: `str` (default: 'sorted')
The order in which values are plotted, This can be
one of the following values
- 'original': plot points in the same order as in input dataframe
- 'sorted' : plot points with higher values on top.
- 'random' : plot points in a random order
dict_drawing_order: `dict`,optional (default: None)
A dictionary of drawing_order for different variables in `color`.
Only valid for categorical/string variables
e.g. dict_drawing_order = {'ann1': 'original','ann2': 'sorted'}
size: `int` (default: 8)
Point size.
show_texts : `bool`, optional (default: False)
If True, text annotation will be shown.
text_size : `int`, optional (default: 10)
The text size.
texts: `list` optional (default: None)
Point names to plot.
text_expand : `tuple`, optional (default: (1.05, 1.2))
Two multipliers (x, y) by which to expand the bounding box of texts
when repelling them from each other/points/other objects.
fig_size: `tuple`, optional (default: None)
figure size.
fig_ncol: `int`, optional (default: 3)
the number of columns of the figure panel
fig_legend_order: `dict`,optional (default: None)
Specified order for the appearance of the annotation keys.
Only valid for categorical/string variable
e.g. fig_legend_order = {'ann1':['a','b','c'],'ann2':['aa','bb','cc']}
fig_legend_ncol: `int`, optional (default: 1)
The number of columns that the legend has.
vmin,vmax: `float`, optional (default: None)
The min and max values are used to normalize continuous values.
If None, the respective min and max of continuous values is used.
alpha: `float`, optional (default: 0.8)
0.0 transparent through 1.0 opaque
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'plot_umap.pdf')
if save_fig is True, specify figure name.
Returns
-------
None
"""
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
if n_components is None:
n_components = min(3, adata.obsm['X_umap'].shape[1])
if n_components not in [2, 3]:
raise ValueError("n_components should be 2 or 3")
if n_components > adata.obsm['X_umap'].shape[1]:
print(f"`n_components` is greater than the available dimension.\n"
f"It is corrected to {adata.obsm['X_umap'].shape[1]}")
n_components = adata.obsm['X_umap'].shape[1]
if dict_palette is None:
dict_palette = dict()
df_plot = pd.DataFrame(index=adata.obs.index,
data=adata.obsm['X_umap'],
columns=['UMAP'+str(x+1) for x in
range(adata.obsm['X_umap'].shape[1])])
if color is None:
_scatterplot2d(df_plot,
x='UMAP1',
y='UMAP2',
drawing_order=drawing_order,
size=size,
show_texts=show_texts,
text_size=text_size,
texts=texts,
text_expand=text_expand,
fig_size=fig_size,
alpha=alpha,
pad=pad,
w_pad=w_pad,
h_pad=h_pad,
save_fig=save_fig,
fig_path=fig_path,
fig_name=fig_name,
**kwargs)
else:
color = list(dict.fromkeys(color)) # remove duplicate keys
for ann in color:
if ann in adata.obs_keys():
df_plot[ann] = adata.obs[ann]
if not is_numeric_dtype(df_plot[ann]):
if 'color' not in adata.uns_keys():
adata.uns['color'] = dict()
if ann not in dict_palette.keys():
if (ann+'_color' in adata.uns['color'].keys()) \
and \
(all(np.isin(np.unique(df_plot[ann]),
list(adata.uns['color']
[ann+'_color'].keys())))):
dict_palette[ann] = \
adata.uns['color'][ann+'_color']
else:
dict_palette[ann] = \
generate_palette(adata.obs[ann])
adata.uns['color'][ann+'_color'] = \
dict_palette[ann].copy()
else:
if ann+'_color' not in adata.uns['color'].keys():
adata.uns['color'][ann+'_color'] = \
dict_palette[ann].copy()
elif ann in adata.var_names:
df_plot[ann] = adata.obs_vector(ann)
else:
raise ValueError(f"could not find {ann} in `adata.obs.columns`"
" and `adata.var_names`")
if plolty:
print('Plotly is not supported yet.')
# _scatterplot2d_plotly(df_plot,
# x='UMAP1',
# y='UMAP2',
# list_hue=color,
# hue_palette=dict_palette,
# drawing_order=drawing_order,
# fig_size=fig_size,
# fig_ncol=fig_ncol,
# fig_legend_order=fig_legend_order,
# alpha=alpha,
# save_fig=save_fig,
# fig_path=fig_path,
# **kwargs)
else:
_scatterplot2d(df_plot,
x='UMAP1',
y='UMAP2',
list_hue=color,
hue_palette=dict_palette,
drawing_order=drawing_order,
dict_drawing_order=dict_drawing_order,
size=size,
show_texts=show_texts,
text_size=text_size,
text_expand=text_expand,
texts=texts,
fig_size=fig_size,
fig_ncol=fig_ncol,
fig_legend_ncol=fig_legend_ncol,
fig_legend_order=fig_legend_order,
vmin=vmin,
vmax=vmax,
alpha=alpha,
pad=pad,
w_pad=w_pad,
h_pad=h_pad,
save_fig=save_fig,
fig_path=fig_path,
fig_name=fig_name,
**kwargs)
[docs]
def discretize(adata,
kde=None,
fig_size=(6, 6),
pad=1.08,
w_pad=None,
h_pad=None,
save_fig=None,
fig_path=None,
fig_name='plot_discretize.pdf',
**kwargs):
"""Plot original data VS discretized data
Parameters
----------
adata : `Anndata`
Annotated data matrix.
kde : `bool`, optional (default: None)
If True, compute a kernel density estimate to smooth the distribution
and show on the plot. Invalid as of v0.2.
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
fig_size: `tuple`, optional (default: (5,8))
figure size.
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'plot_discretize.pdf')
if `save_fig` is True, specify figure name.
**kwargs: `dict`, optional
Other keyword arguments are passed through to ``plt.hist()``
Returns
-------
None
"""
if kde is not None:
warnings.warn("kde is not supported as of v0.2", DeprecationWarning)
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
assert 'disc' in adata.uns_keys(), \
"please run `si.tl.discretize()` first"
if kde is not None:
warnings.warn("kde is no longer supported as of v1.1",
DeprecationWarning)
hist_edges = adata.uns['disc']['hist_edges']
hist_count = adata.uns['disc']['hist_count']
bin_edges = adata.uns['disc']['bin_edges']
bin_count = adata.uns['disc']['bin_count']
fig, ax = plt.subplots(2, 1, figsize=fig_size)
_ = ax[0].hist(hist_edges[:-1],
hist_edges,
weights=hist_count,
linewidth=0,
**kwargs)
_ = ax[1].hist(bin_edges[:-1],
bin_edges,
weights=bin_count,
**kwargs)
ax[0].set_xlabel('Non-zero values')
ax[0].set_ylabel('Count')
ax[0].set_title('Original')
ax[1].set_xlabel('Non-zero values')
ax[1].set_ylabel('Count')
ax[1].set_title('Discretized')
plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
plt.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
[docs]
def node_similarity(adata,
bins=20,
log=True,
show_cutoff=True,
cutoff=None,
n_edges=5000,
fig_size=(5, 3),
pad=1.08,
w_pad=None,
h_pad=None,
save_fig=None,
fig_path=None,
fig_name='plot_node_similarity.pdf',
):
"""Plot similarity scores of nodes
Parameters
----------
adata : `Anndata`
Annotated data matrix.
bins : `int`, optional (default: 20)
The number of equal-width bins in the given range for histogram plot.
log : `bool`, optional (default: True)
If True, log scale will be used for y axis.
show_cutoff : `bool`, optional (default: True)
If True, cutoff on scores will be shown
cutoff: `int`, optional (default: None)
Cutoff used to select edges
n_edges: `int`, optional (default: 5000)
The number of edges to select.
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
fig_size: `tuple`, optional (default: (5,8))
figure size.
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'plot_node_similarity.pdf')
if `save_fig` is True, specify figure name.
Returns
-------
None
"""
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
mat_sim = adata.X
fig, ax = plt.subplots(1, 1, figsize=fig_size)
ax.hist(mat_sim.data, bins=bins)
if log:
ax.set_yscale('log')
if show_cutoff:
if cutoff is None:
if n_edges is None:
raise ValueError('"cutoff" or "n_edges" has to be specified')
else:
cutoff = \
np.partition(mat_sim.data,
(mat_sim.size-n_edges))[mat_sim.size-n_edges]
id_x, id_y, _ = find(mat_sim > cutoff)
print(f'#selected edges: {len(id_x)}')
plt.axvline(cutoff, ls='--', c='red')
ax.set_xlabel('similariy scores')
ax.set_title('Node similarity')
plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
if save_fig:
if not os.path.exists(fig_path):
os.makedirs(fig_path)
fig.savefig(os.path.join(fig_path, fig_name),
pad_inches=1,
bbox_inches='tight')
plt.close(fig)
[docs]
def svd_nodes(adata,
comp1=1,
comp2=2,
color=None,
dict_palette=None,
cutoff=None,
n_edges=5000,
size=8,
drawing_order='random',
dict_drawing_order=None,
fig_size=(4, 4),
fig_ncol=3,
fig_legend_ncol=1,
fig_legend_order=None,
alpha=1,
pad=1.08,
w_pad=None,
h_pad=None,
save_fig=None,
fig_path=None,
fig_name='plot_svd_nodes.pdf',
vmin=None,
vmax=None,
**kwargs):
"""Plot SVD coordinates
Parameters
----------
adata : `Anndata`
Annotated data matrix.
comp1: `int`, optional (default: 1)
Component used for x axis.
comp2: `int`, optional (default: 2)
Component used for y axis.
color: `list`, optional (default: None)
A list of variables that will produce points with different colors.
e.g. color = ['anno1', 'anno2']
cutoff: `int`, optional (default: None)
Cutoff used to select edges
n_edges: `int`, optional (default: 5000)
The number of edges to select
dict_palette: `dict`,optional (default: None)
A dictionary of palettes for different variables in `color`.
Only valid for categorical/string variables
e.g. dict_palette = {'ann1': {},'ann2': {}}
drawing_order: `str` (default: 'random')
The order in which values are plotted, This can be
one of the following values
- 'original': plot points in the same order as in input dataframe
- 'sorted' : plot points with higher values on top.
- 'random' : plot points in a random order
dict_drawing_order: `dict`,optional (default: None)
A dictionary of drawing_order for different variables in `color`.
Only valid for categorical/string variables
e.g. dict_drawing_order = {'ann1': 'original','ann2': 'sorted'}
size: `int` (default: 8)
Point size.
fig_size: `tuple`, optional (default: (4, 4))
figure size.
fig_ncol: `int`, optional (default: 3)
the number of columns of the figure panel
fig_legend_order: `dict`,optional (default: None)
Specified order for the appearance of the annotation keys.
Only valid for categorical/string variable
e.g. fig_legend_order = {'ann1':['a','b','c'],'ann2':['aa','bb','cc']}
fig_legend_ncol: `int`, optional (default: 1)
The number of columns that the legend has.
vmin,vmax: `float`, optional (default: None)
The min and max values are used to normalize continuous values.
If None, the respective min and max of continuous values is used.
alpha: `float`, optional (default: 1)
0.0 transparent through 1.0 opaque
pad: `float`, optional (default: 1.08)
Padding between the figure edge and the edges of subplots,
as a fraction of the font size.
h_pad, w_pad: `float`, optional (default: None)
Padding (height/width) between edges of adjacent subplots,
as a fraction of the font size. Defaults to pad.
save_fig: `bool`, optional (default: False)
if True,save the figure.
fig_path: `str`, optional (default: None)
If save_fig is True, specify figure path.
fig_name: `str`, optional (default: 'plot_umap.pdf')
if save_fig is True, specify figure name.
Returns
-------
None
"""
if fig_size is None:
fig_size = mpl.rcParams['figure.figsize']
if save_fig is None:
save_fig = settings.save_fig
if fig_path is None:
fig_path = os.path.join(settings.workdir, 'figures')
mat_sim = adata.X
if cutoff is None:
if n_edges is None:
raise ValueError('"cutoff" or "n_edges" has to be specified')
else:
cutoff = \
np.partition(mat_sim.data,
(mat_sim.size-n_edges))[mat_sim.size-n_edges]
id_x, id_y, _ = find(mat_sim > cutoff)
X_cca_ref = adata.obsm['svd']
X_cca_query = adata.varm['svd']
df_plot_ref = pd.DataFrame(data=X_cca_ref[:, [comp1-1, comp2-1]],
index=adata.obs.index,
columns=[f'Dim {comp1}', f'Dim {comp2}'])
df_plot_ref['group'] = 'ref'
df_plot_ref['selected'] = 'no'
df_plot_ref.loc[df_plot_ref.index[id_x], 'selected'] = 'yes'
df_plot_query = pd.DataFrame(data=X_cca_query[:, [comp1-1, comp2-1]],
index=adata.var.index,
columns=[f'Dim {comp1}', f'Dim {comp2}'])
df_plot_query['group'] = 'query'
df_plot_query['selected'] = 'no'
df_plot_query.loc[df_plot_query.index[id_y], 'selected'] = 'yes'
df_plot = pd.concat([df_plot_ref, df_plot_query], axis=0)
if dict_palette is None:
dict_palette = dict()
dict_palette['group'] = {'query': '#4c72b0', 'ref': '#dd8452'}
dict_palette['selected'] = {'yes': '#000000', 'no': '#D4D3D3'}
if dict_drawing_order is None:
dict_drawing_order = dict()
dict_drawing_order['group'] = 'random'
dict_drawing_order['selected'] = 'sorted'
adata.uns['color'] = dict_palette.copy()
if color is None:
color = []
else:
color = list(dict.fromkeys(color)) # remove duplicate keys
for ann in color:
if (ann in adata.obs_keys()) and (ann in adata.var_keys()):
df_plot[ann] = pd.concat([adata.obs[ann], adata.var[ann]], axis=0)
if not is_numeric_dtype(df_plot[ann]):
if ann not in dict_palette.keys():
if (ann+'_color' in adata.uns['color'].keys()) \
and \
(all(np.isin(np.unique(df_plot[ann]),
list(adata.uns['color']
[ann+'_color'].keys())))):
dict_palette[ann] = \
adata.uns['color'][ann+'_color']
else:
dict_palette[ann] = \
generate_palette(adata.obs[ann])
adata.uns['color'][ann+'_color'] = \
dict_palette[ann].copy()
else:
if ann+'_color' not in adata.uns['color'].keys():
adata.uns['color'][ann+'_color'] = \
dict_palette[ann].copy()
else:
raise ValueError(f"could not find {ann} in both "
"`adata.obs.columns`"
" and `adata.var.columns`")
color = ['group', 'selected'] + color
_scatterplot2d(df_plot,
x=f'Dim {comp1}',
y=f'Dim {comp2}',
list_hue=color,
hue_palette=dict_palette,
drawing_order=drawing_order,
dict_drawing_order=dict_drawing_order,
size=size,
fig_size=fig_size,
fig_ncol=fig_ncol,
fig_legend_ncol=fig_legend_ncol,
fig_legend_order=fig_legend_order,
vmin=vmin,
vmax=vmax,
alpha=alpha,
pad=pad,
w_pad=w_pad,
h_pad=h_pad,
save_fig=save_fig,
fig_path=fig_path,
fig_name=fig_name,
**kwargs)