Source code for simba.plotting._plot

"""plotting functions"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from pandas.core.dtypes.common import is_numeric_dtype
import seaborn as sns
from adjustText import adjust_text
from pandas.api.types import (
    is_string_dtype,
    is_categorical_dtype,
)
from scipy.sparse import find
import warnings
# import plotly.express as px
# import plotly.graph_objects as go


from .._settings import settings
from ._utils import (
    generate_palette
)



[docs]
def violin(adata,
           list_obs=None,
           list_var=None,
           jitter=0.4,
           size=1,
           log=False,
           pad=1.08,
           w_pad=None,
           h_pad=3,
           fig_size=(3, 3),
           fig_ncol=3,
           save_fig=False,
           fig_path=None,
           fig_name='plot_violin.pdf',
           **kwargs):
    """Violin plot

    Parameters
    ----------
    adata : `Anndata`
        Annotated data matrix.
    list_obs : `list`, optional (default: None)
        A list of observations to plot.
    list_var : `list`, optional (default: None)
        A list of variables to plot.
    jitter : `float`, optional (default: 0.4)
        Amount of jitter to apply.
    size : `int`, optional (default: 1)
        The marker size
    log : `bool`, optional (default: False)
        If True, natural logarithm transformation will be performed.
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    fig_size: `tuple`, optional (default: (3,3))
        figure size.
    fig_ncol: `int`, optional (default: 3)
        the number of columns of the figure panel
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'plot_violin.pdf')
        if `save_fig` is True, specify figure name.
    **kwargs: `dict`, optional
        Other keyword arguments are passed through to ``sns.violinplot``

    Returns
    -------
    None
    """
    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')
    if list_obs is None:
        list_obs = []
    if list_var is None:
        list_var = []
    for obs in list_obs:
        if obs not in adata.obs_keys():
            raise ValueError(f"could not find {obs} in `adata.obs_keys()`")
    for var in list_var:
        if var not in adata.var_keys():
            raise ValueError(f"could not find {var} in `adata.var_keys()`")
    if len(list_obs) > 0:
        df_plot = adata.obs[list_obs].copy()
        if log:
            df_plot = pd.DataFrame(data=np.log1p(df_plot.values),
                                   index=df_plot.index,
                                   columns=df_plot.columns)
        fig_nrow = int(np.ceil(len(list_obs)/fig_ncol))
        fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
                         fig_size[1]*fig_nrow))
        for i, obs in enumerate(list_obs):
            ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
            sns.violinplot(ax=ax_i,
                           y=obs,
                           data=df_plot,
                           inner=None,
                           **kwargs)
            sns.stripplot(ax=ax_i,
                          y=obs,
                          data=df_plot,
                          color='black',
                          jitter=jitter,
                          s=size)

            ax_i.set_title(obs)
            ax_i.set_ylabel('')
            ax_i.locator_params(axis='y', nbins=6)
            ax_i.tick_params(axis="y", pad=-2)
            ax_i.spines['right'].set_visible(False)
            ax_i.spines['top'].set_visible(False)
        plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
        if save_fig:
            if not os.path.exists(fig_path):
                os.makedirs(fig_path)
            plt.savefig(os.path.join(fig_path, fig_name),
                        pad_inches=1,
                        bbox_inches='tight')
            plt.close(fig)
    if len(list_var) > 0:
        df_plot = adata.var[list_var].copy()
        if log:
            df_plot = pd.DataFrame(data=np.log1p(df_plot.values),
                                   index=df_plot.index,
                                   columns=df_plot.columns)
        fig_nrow = int(np.ceil(len(list_obs)/fig_ncol))
        fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
                                  fig_size[1]*fig_nrow))
        for i, var in enumerate(list_var):
            ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
            sns.violinplot(ax=ax_i,
                           y=var,
                           data=df_plot,
                           inner=None,
                           **kwargs)
            sns.stripplot(ax=ax_i,
                          y=var,
                          data=df_plot,
                          color='black',
                          jitter=jitter,
                          s=size)

            ax_i.set_title(var)
            ax_i.set_ylabel('')
            ax_i.locator_params(axis='y', nbins=6)
            ax_i.tick_params(axis="y", pad=-2)
            ax_i.spines['right'].set_visible(False)
            ax_i.spines['top'].set_visible(False)
        plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
        if save_fig:
            if not os.path.exists(fig_path):
                os.makedirs(fig_path)
            plt.savefig(os.path.join(fig_path, fig_name),
                        pad_inches=1,
                        bbox_inches='tight')
            plt.close(fig)




[docs]
def hist(adata,
         list_obs=None,
         list_var=None,
         kde=True,
         log=False,
         pad=1.08,
         w_pad=None,
         h_pad=3,
         fig_size=(3, 3),
         fig_ncol=3,
         save_fig=False,
         fig_path=None,
         fig_name='plot_histogram.pdf',
         **kwargs
         ):
    """histogram plot

    Parameters
    ----------
    adata : `Anndata`
        Annotated data matrix.
    list_obs : `list`, optional (default: None)
        A list of observations to plot.
    list_var : `list`, optional (default: None)
        A list of variables to plot.
    kde : `bool`, optional (default: True)
        If True, compute a kernel density estimate to smooth the distribution
        and show on the plot
    log : `bool`, optional (default: False)
        If True, natural logarithm transformation will be performed.
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    fig_size: `tuple`, optional (default: (3,3))
        figure size.
    fig_ncol: `int`, optional (default: 3)
        the number of columns of the figure panel
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'plot_violin.pdf')
        if `save_fig` is True, specify figure name.
    **kwargs: `dict`, optional
        Other keyword arguments are passed through to ``sns.histplot``

    Returns
    -------
    None
    """
    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')
    if list_obs is None:
        list_obs = []
    if list_var is None:
        list_var = []
    for obs in list_obs:
        if obs not in adata.obs_keys():
            raise ValueError(f"could not find {obs} in `adata.obs_keys()`")
    for var in list_var:
        if var not in adata.var_keys():
            raise ValueError(f"could not find {var} in `adata.var_keys()`")

    if len(list_obs) > 0:
        df_plot = adata.obs[list_obs].copy()
        if log:
            df_plot = pd.DataFrame(data=np.log1p(df_plot.values),
                                   index=df_plot.index,
                                   columns=df_plot.columns)
        fig_nrow = int(np.ceil(len(list_obs)/fig_ncol))
        fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
                                  fig_size[1]*fig_nrow))
        for i, obs in enumerate(list_obs):
            ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
            sns.histplot(ax=ax_i,
                         x=obs,
                         data=df_plot,
                         kde=kde,
                         **kwargs)
            ax_i.locator_params(axis='y', nbins=6)
            ax_i.tick_params(axis="y", pad=-2)
            ax_i.spines['right'].set_visible(False)
            ax_i.spines['top'].set_visible(False)
        plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
        if save_fig:
            if not os.path.exists(fig_path):
                os.makedirs(fig_path)
            plt.savefig(os.path.join(fig_path, fig_name),
                        pad_inches=1,
                        bbox_inches='tight')
            plt.close(fig)
    if len(list_var) > 0:
        df_plot = adata.var[list_var].copy()
        if log:
            df_plot = pd.DataFrame(data=np.log1p(df_plot.values),
                                   index=df_plot.index,
                                   columns=df_plot.columns)
        fig_nrow = int(np.ceil(len(list_obs)/fig_ncol))
        fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
                                  fig_size[1]*fig_nrow))
        for i, var in enumerate(list_var):
            ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
            sns.histplot(ax=ax_i,
                         x=var,
                         data=df_plot,
                         kde=kde,
                         **kwargs)
            ax_i.locator_params(axis='y', nbins=6)
            ax_i.tick_params(axis="y", pad=-2)
            ax_i.spines['right'].set_visible(False)
            ax_i.spines['top'].set_visible(False)
        plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
        if save_fig:
            if not os.path.exists(fig_path):
                os.makedirs(fig_path)
            plt.savefig(os.path.join(fig_path, fig_name),
                        pad_inches=1,
                        bbox_inches='tight')
            plt.close(fig)




[docs]
def pca_variance_ratio(adata,
                       log=True,
                       show_cutoff=True,
                       fig_size=(4, 4),
                       save_fig=None,
                       fig_path=None,
                       fig_name='plot_variance_ratio.pdf',
                       pad=1.08,
                       w_pad=None,
                       h_pad=None,
                       **kwargs):
    """Plot the variance ratio.

    Parameters
    ----------
    adata : `Anndata`
        Annotated data matrix.
    log : `bool`, optional (default: True)
        If True, variance_ratio will be log-transformed.
    show_cutoff : `bool`, optional (default: True)
        If True, cutoff on `n_pcs` will be shown
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    fig_size: `tuple`, optional (default: (3,3))
        figure size.
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'plot_variance_ratio.pdf')
        if `save_fig` is True, specify figure name.
    **kwargs: `dict`, optional
        Other keyword arguments are passed through to ``plt.plot``

    Returns
    -------
    None
    """
    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')

    n_components = len(adata.uns['pca']['variance_ratio'])

    fig = plt.figure(figsize=fig_size)
    if log:
        plt.plot(range(n_components),
                 np.log(adata.uns['pca']['variance_ratio']),
                 **kwargs)
    else:
        plt.plot(range(n_components),
                 adata.uns['pca']['variance_ratio'],
                 **kwargs)
    if show_cutoff:
        n_pcs = adata.uns['pca']['n_pcs']
        print(f'the number of selected PC is: {n_pcs}')
        plt.axvline(n_pcs, ls='--', c='red')
    plt.xlabel('Principal Component')
    plt.ylabel('Variance Ratio')
    plt.locator_params(axis='x', nbins=5)
    plt.locator_params(axis='y', nbins=5)
    plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
    if save_fig:
        if not os.path.exists(fig_path):
            os.makedirs(fig_path)
        plt.savefig(os.path.join(fig_path, fig_name),
                    pad_inches=1,
                    bbox_inches='tight')
        plt.close(fig)




[docs]
def pcs_features(adata,
                 log=False,
                 size=3,
                 show_cutoff=True,
                 pad=1.08,
                 w_pad=None,
                 h_pad=None,
                 fig_size=(3, 3),
                 fig_ncol=3,
                 save_fig=None,
                 fig_path=None,
                 fig_name='plot_pcs_features.pdf',
                 **kwargs):
    """Plot features that contribute to the top PCs.

    Parameters
    ----------
    adata : `Anndata`
        Annotated data matrix.
    log : `bool`, optional (default: True)
        If True, variance_ratio will be log-transformed.
    show_cutoff : `bool`, optional (default: True)
        If True, cutoff on `n_pcs` will be shown
    size : `int`, optional (default: 3)
        The marker size
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    fig_size: `tuple`, optional (default: (3,3))
        figure size.
    fig_ncol: `int`, optional (default: 3)
        the number of columns of the figure panel
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'plot_pcs_features.pdf')
        if `save_fig` is True, specify figure name.
    **kwargs: `dict`, optional
        Other keyword arguments are passed through to ``plt.scatter``

    Returns
    -------
    None
    """

    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')

    n_pcs = adata.uns['pca']['n_pcs']
    n_features = adata.uns['pca']['PCs'].shape[0]

    fig_nrow = int(np.ceil(n_pcs/fig_ncol))
    fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05, fig_size[1]*fig_nrow))

    for i in range(n_pcs):
        ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
        if log:
            ax_i.scatter(range(n_features),
                         np.log(np.sort(
                             np.abs(adata.uns['pca']['PCs'][:, i],))[::-1]),
                         s=size,
                         **kwargs)
        else:
            ax_i.scatter(range(n_features),
                         np.sort(
                             np.abs(adata.uns['pca']['PCs'][:, i],))[::-1],
                         s=size,
                         **kwargs)
        n_ft_selected_i = len(adata.uns['pca']['features'][f'pc_{i}'])
        if show_cutoff:
            ax_i.axvline(n_ft_selected_i, ls='--', c='red')
        ax_i.set_xlabel('Feautures')
        ax_i.set_ylabel('Loadings')
        ax_i.locator_params(axis='x', nbins=3)
        ax_i.locator_params(axis='y', nbins=5)
        ax_i.ticklabel_format(axis="x", style="sci", scilimits=(0, 0))
        ax_i.set_title(f'PC {i}')
    plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
    if save_fig:
        if not os.path.exists(fig_path):
            os.makedirs(fig_path)
        plt.savefig(os.path.join(fig_path, fig_name),
                    pad_inches=1,
                    bbox_inches='tight')
        plt.close(fig)




[docs]
def variable_genes(adata,
                   show_texts=False,
                   n_texts=10,
                   size=8,
                   text_size=10,
                   pad=1.08,
                   w_pad=None,
                   h_pad=None,
                   fig_size=(4, 4),
                   save_fig=None,
                   fig_path=None,
                   fig_name='plot_variable_genes.pdf',
                   **kwargs):
    """Plot highly variable genes.

    Parameters
    ----------
    adata : `Anndata`
        Annotated data matrix.
    show_texts : `bool`, optional (default: False)
        If True, text annotation will be shown.
    n_texts : `int`, optional (default: 10)
        The number of texts to plot.
    size : `int`, optional (default: 8)
        The marker size
    text_size : `int`, optional (default: 10)
        The text size
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    fig_size: `tuple`, optional (default: (3,3))
        figure size.
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'plot_variable_genes.pdf')
        if `save_fig` is True, specify figure name.
    **kwargs: `dict`, optional
        Other keyword arguments are passed through to ``plt.scatter``

    Returns
    -------
    None
    """

    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')

    means = adata.var['means']
    variances_norm = adata.var['variances_norm']
    mask = adata.var['highly_variable']
    genes = adata.var_names

    fig, ax = plt.subplots(figsize=fig_size)
    ax.scatter(means[~mask],
               variances_norm[~mask],
               s=size,
               c='#1F2433',
               **kwargs)
    ax.scatter(means[mask],
               variances_norm[mask],
               s=size,
               c='#ce3746',
               **kwargs)
    ax.set_xscale(value='log')

    if show_texts:
        ids = variances_norm.values.argsort()[-n_texts:][::-1]
        texts = [plt.text(means[i], variances_norm[i], genes[i],
                 fontdict={'family': 'serif',
                           'color': 'black',
                           'weight': 'normal',
                           'size': text_size})
                 for i in ids]
        adjust_text(texts,
                    arrowprops=dict(arrowstyle='-', color='black'))

    ax.set_xlabel('average expression')
    ax.set_ylabel('standardized variance')
    ax.locator_params(axis='x', tight=True)
    ax.locator_params(axis='y', tight=True)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    fig.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
    if save_fig:
        if not os.path.exists(fig_path):
            os.makedirs(fig_path)
        fig.savefig(os.path.join(fig_path, fig_name),
                    pad_inches=1,
                    bbox_inches='tight')
        plt.close(fig)



def _scatterplot2d(df,
                   x,
                   y,
                   list_hue=None,
                   hue_palette=None,
                   drawing_order='sorted',
                   dict_drawing_order=None,
                   size=8,
                   show_texts=False,
                   texts=None,
                   text_size=10,
                   text_expand=(1.05, 1.2),
                   fig_size=None,
                   fig_ncol=3,
                   fig_legend_ncol=1,
                   fig_legend_order=None,
                   vmin=None,
                   vmax=None,
                   alpha=0.8,
                   pad=1.08,
                   w_pad=None,
                   h_pad=None,
                   save_fig=None,
                   fig_path=None,
                   fig_name='scatterplot2d.pdf',
                   copy=False,
                   **kwargs):
    """2d scatter plot

    Parameters
    ----------
    data: `pd.DataFrame`
        Input data structure of shape (n_samples, n_features).
    x: `str`
        Variable in `data` that specify positions on the x axis.
    y: `str`
        Variable in `data` that specify positions on the x axis.
    list_hue: `str`, optional (default: None)
        A list of variables that will produce points with different colors.
    drawing_order: `str` (default: 'sorted')
        The order in which values are plotted, This can be
        one of the following values
        - 'original': plot points in the same order as in input dataframe
        - 'sorted' : plot points with higher values on top.
        - 'random' : plot points in a random order
    fig_size: `tuple`, optional (default: None)
        figure size.
    fig_ncol: `int`, optional (default: 3)
        the number of columns of the figure panel
    fig_legend_order: `dict`,optional (default: None)
        Specified order for the appearance of the annotation keys.
        Only valid for categorical/string variable
        e.g. fig_legend_order = {'ann1':['a','b','c'],'ann2':['aa','bb','cc']}
    fig_legend_ncol: `int`, optional (default: 1)
        The number of columns that the legend has.
    vmin,vmax: `float`, optional (default: None)
        The min and max values are used to normalize continuous values.
        If None, the respective min and max of continuous values is used.
    alpha: `float`, optional (default: 0.8)
        0.0 transparent through 1.0 opaque
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'scatterplot2d.pdf')
        if save_fig is True, specify figure name.
    Returns
    -------
    None
    """

    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')

    list_ax = list()
    if list_hue is None:
        list_hue = [None]
    else:
        for hue in list_hue:
            if hue not in df.columns:
                raise ValueError(f"could not find {hue}")
        if hue_palette is None:
            hue_palette = dict()
        assert isinstance(hue_palette, dict), "`hue_palette` must be dict"
        legend_order = {hue: np.unique(df[hue]) for hue in list_hue
                        if (is_string_dtype(df[hue])
                            or is_categorical_dtype(df[hue]))}
        if fig_legend_order is not None:
            if not isinstance(fig_legend_order, dict):
                raise TypeError("`fig_legend_order` must be a dictionary")
            for hue in fig_legend_order.keys():
                if hue in legend_order.keys():
                    legend_order[hue] = fig_legend_order[hue]
                else:
                    print(f"{hue} is ignored for ordering legend labels"
                          "due to incorrect name or data type")

    if dict_drawing_order is None:
        dict_drawing_order = dict()
    assert drawing_order in ['sorted', 'random', 'original'],\
        "`drawing_order` must be one of ['original', 'sorted', 'random']"

    if len(list_hue) < fig_ncol:
        fig_ncol = len(list_hue)
    fig_nrow = int(np.ceil(len(list_hue)/fig_ncol))
    fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05, fig_size[1]*fig_nrow))
    for i, hue in enumerate(list_hue):
        ax_i = fig.add_subplot(fig_nrow, fig_ncol, i+1)
        if hue is None:
            sc_i = sns.scatterplot(ax=ax_i,
                                   x=x,
                                   y=y,
                                   data=df,
                                   alpha=alpha,
                                   linewidth=0,
                                   s=size,
                                   **kwargs)
        else:
            if is_string_dtype(df[hue]) or is_categorical_dtype(df[hue]):
                if hue in hue_palette.keys():
                    palette = hue_palette[hue]
                else:
                    palette = None
                if hue in dict_drawing_order.keys():
                    param_drawing_order = dict_drawing_order[hue]
                else:
                    param_drawing_order = drawing_order
                if param_drawing_order == 'sorted':
                    df_updated = df.sort_values(by=hue)
                elif param_drawing_order == 'random':
                    df_updated = df.sample(frac=1, random_state=100)
                else:
                    df_updated = df
                sc_i = sns.scatterplot(ax=ax_i,
                                       x=x,
                                       y=y,
                                       hue=hue,
                                       hue_order=legend_order[hue],
                                       data=df_updated,
                                       alpha=alpha,
                                       linewidth=0,
                                       palette=palette,
                                       s=size,
                                       **kwargs)
                ax_i.legend(bbox_to_anchor=(1, 0.5),
                            loc='center left',
                            ncol=fig_legend_ncol,
                            frameon=False,
                            )
            else:
                vmin_i = df[hue].min() if vmin is None else vmin
                vmax_i = df[hue].max() if vmax is None else vmax
                if hue in dict_drawing_order.keys():
                    param_drawing_order = dict_drawing_order[hue]
                else:
                    param_drawing_order = drawing_order
                if param_drawing_order == 'sorted':
                    df_updated = df.sort_values(by=hue)
                elif param_drawing_order == 'random':
                    df_updated = df.sample(frac=1, random_state=100)
                else:
                    df_updated = df
                sc_i = ax_i.scatter(df_updated[x],
                                    df_updated[y],
                                    c=df_updated[hue],
                                    vmin=vmin_i,
                                    vmax=vmax_i,
                                    alpha=alpha,
                                    s=size,
                                    **kwargs)
                cbar = plt.colorbar(sc_i,
                                    ax=ax_i,
                                    pad=0.01,
                                    fraction=0.05,
                                    aspect=40)
                cbar.solids.set_edgecolor("face")
                cbar.ax.locator_params(nbins=5)
        if show_texts:
            if texts is not None:
                plt_texts = [plt.text(df[x][t],
                                      df[y][t],
                                      t,
                                      fontdict={'family': 'serif',
                                                'color': 'black',
                                                'weight': 'normal',
                                                'size': text_size})
                             for t in texts]
                adjust_text(plt_texts,
                            expand_text=text_expand,
                            expand_points=text_expand,
                            expand_objects=text_expand,
                            arrowprops=dict(arrowstyle='->', color='black'))
        ax_i.set_xlabel(x)
        ax_i.set_ylabel(y)
        ax_i.locator_params(axis='x', nbins=5)
        ax_i.locator_params(axis='y', nbins=5)
        ax_i.tick_params(axis="both", labelbottom=True, labelleft=True)
        ax_i.set_title(hue)
        list_ax.append(ax_i)
    plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
    if save_fig:
        if not os.path.exists(fig_path):
            os.makedirs(fig_path)
        plt.savefig(os.path.join(fig_path, fig_name),
                    pad_inches=1,
                    bbox_inches='tight')
        plt.close(fig)
    if copy:
        return list_ax


# def _scatterplot2d_plotly(df,
#                           x,
#                           y,
#                           list_hue=None,
#                           hue_palette=None,
#                           drawing_order='sorted',
#                           fig_size=None,
#                           fig_ncol=3,
#                           fig_legend_order=None,
#                           alpha=0.8,
#                           save_fig=None,
#                           fig_path=None,
#                           **kwargs):
#     """interactive 2d scatter plot by Plotly

#     Parameters
#     ----------
#     data: `pd.DataFrame`
#         Input data structure of shape (n_samples, n_features).
#     x: `str`
#         Variable in `data` that specify positions on the x axis.
#     y: `str`
#         Variable in `data` that specify positions on the x axis.
#     list_hue: `str`, optional (default: None)
#         A list of variables that will produce points with different colors.
#     drawing_order: `str` (default: 'sorted')
#         The order in which values are plotted, This can be
#         one of the following values
#         - 'original': plot points in the same order as in input dataframe
#         - 'sorted' : plot points with higher values on top.
#         - 'random' : plot points in a random order
#     fig_size: `tuple`, optional (default: None)
#         figure size.
#     fig_ncol: `int`, optional (default: 3)
#         the number of columns of the figure panel
#     fig_legend_order: `dict`,optional (default: None)
#         Specified order for the appearance of the annotation keys.
#         Only valid for categorical/string variable
#         e.g. fig_legend_order = {'ann1':['a','b','c'],
#                                  'ann2':['aa','bb','cc']}
#     fig_legend_ncol: `int`, optional (default: 1)
#         The number of columns that the legend has.
#     vmin,vmax: `float`, optional (default: None)
#         The min and max values are used to normalize continuous values.
#         If None, the respective min and max of continuous values is used.
#     alpha: `float`, optional (default: 0.8)
#         0.0 transparent through 1.0 opaque
#     pad: `float`, optional (default: 1.08)
#         Padding between the figure edge and the edges of subplots,
#         as a fraction of the font size.
#     h_pad, w_pad: `float`, optional (default: None)
#         Padding (height/width) between edges of adjacent subplots,
#         as a fraction of the font size. Defaults to pad.
#     save_fig: `bool`, optional (default: False)
#         if True,save the figure.
#     fig_path: `str`, optional (default: None)
#         If save_fig is True, specify figure path.
#     fig_name: `str`, optional (default: 'scatterplot2d.pdf')
#         if save_fig is True, specify figure name.
#     Returns
#     -------
#     None
#     """

#     if fig_size is None:
#         fig_size = mpl.rcParams['figure.figsize']
#     if save_fig is None:
#         save_fig = settings.save_fig
#     if fig_path is None:
#         fig_path = os.path.join(settings.workdir, 'figures')

#     for hue in list_hue:
#         if(hue not in df.columns):
#             raise ValueError(f"could not find {hue} in `df.columns`")
#     if hue_palette is None:
#         hue_palette = dict()
#     assert isinstance(hue_palette, dict), "`hue_palette` must be dict"

#     assert drawing_order in ['sorted', 'random', 'original'],\
#         "`drawing_order` must be one of ['original', 'sorted', 'random']"

#     legend_order = {hue: np.unique(df[hue]) for hue in list_hue
#                     if (is_string_dtype(df[hue])
#                         or is_categorical_dtype(df[hue]))}
#     if(fig_legend_order is not None):
#         if(not isinstance(fig_legend_order, dict)):
#             raise TypeError("`fig_legend_order` must be a dictionary")
#         for hue in fig_legend_order.keys():
#             if(hue in legend_order.keys()):
#                 legend_order[hue] = fig_legend_order[hue]
#             else:
#                 print(f"{hue} is ignored for ordering legend labels"
#                       "due to incorrect name or data type")

#     if(len(list_hue) < fig_ncol):
#         fig_ncol = len(list_hue)
#     fig_nrow = int(np.ceil(len(list_hue)/fig_ncol))
#     fig = plt.figure(figsize=(fig_size[0]*fig_ncol*1.05,
#                      fig_size[1]*fig_nrow))
#     for hue in list_hue:
#         if hue in hue_palette.keys():
#             palette = hue_palette[hue]
#         else:
#             palette = None
#         if drawing_order == 'sorted':
#             df_updated = df.sort_values(by=hue)
#         elif drawing_order == 'random':
#             df_updated = df.sample(frac=1, random_state=100)
#         else:
#             df_updated = df
#         fig = px.scatter(df_updated,
#                          x=x,
#                          y=y,
#                          color=hue,
#                          opacity=alpha,
#                          color_continuous_scale=px.colors.sequential.Viridis,
#                          color_discrete_map=palette,
#                          **kwargs)
#         fig.update_layout(legend={'itemsizing': 'constant'},
#                           width=500,
#                           height=500)
#         fig.show(renderer="notebook")


# TO-DO add 3D plot

[docs]
def umap(adata,
         color=None,
         dict_palette=None,
         n_components=None,
         size=8,
         drawing_order='sorted',
         dict_drawing_order=None,
         show_texts=False,
         texts=None,
         text_size=10,
         text_expand=(1.05, 1.2),
         fig_size=None,
         fig_ncol=3,
         fig_legend_ncol=1,
         fig_legend_order=None,
         vmin=None,
         vmax=None,
         alpha=1,
         pad=1.08,
         w_pad=None,
         h_pad=None,
         save_fig=None,
         fig_path=None,
         fig_name='plot_umap.pdf',
         plolty=False,
         **kwargs):
    """ Plot coordinates in UMAP

    Parameters
    ----------
    data: `pd.DataFrame`
        Input data structure of shape (n_samples, n_features).
    x: `str`
        Variable in `data` that specify positions on the x axis.
    y: `str`
        Variable in `data` that specify positions on the x axis.
    color: `list`, optional (default: None)
        A list of variables that will produce points with different colors.
        e.g. color = ['anno1', 'anno2']
    dict_palette: `dict`,optional (default: None)
        A dictionary of palettes for different variables in `color`.
        Only valid for categorical/string variables
        e.g. dict_palette = {'ann1': {},'ann2': {}}
    drawing_order: `str` (default: 'sorted')
        The order in which values are plotted, This can be
        one of the following values
        - 'original': plot points in the same order as in input dataframe
        - 'sorted' : plot points with higher values on top.
        - 'random' : plot points in a random order
    dict_drawing_order: `dict`,optional (default: None)
        A dictionary of drawing_order for different variables in `color`.
        Only valid for categorical/string variables
        e.g. dict_drawing_order = {'ann1': 'original','ann2': 'sorted'}
    size: `int` (default: 8)
        Point size.
    show_texts : `bool`, optional (default: False)
        If True, text annotation will be shown.
    text_size : `int`, optional (default: 10)
        The text size.
    texts: `list` optional (default: None)
        Point names to plot.
    text_expand : `tuple`, optional (default: (1.05, 1.2))
        Two multipliers (x, y) by which to expand the bounding box of texts
        when repelling them from each other/points/other objects.
    fig_size: `tuple`, optional (default: None)
        figure size.
    fig_ncol: `int`, optional (default: 3)
        the number of columns of the figure panel
    fig_legend_order: `dict`,optional (default: None)
        Specified order for the appearance of the annotation keys.
        Only valid for categorical/string variable
        e.g. fig_legend_order = {'ann1':['a','b','c'],'ann2':['aa','bb','cc']}
    fig_legend_ncol: `int`, optional (default: 1)
        The number of columns that the legend has.
    vmin,vmax: `float`, optional (default: None)
        The min and max values are used to normalize continuous values.
        If None, the respective min and max of continuous values is used.
    alpha: `float`, optional (default: 0.8)
        0.0 transparent through 1.0 opaque
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'plot_umap.pdf')
        if save_fig is True, specify figure name.
    Returns
    -------
    None
    """

    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')

    if n_components is None:
        n_components = min(3, adata.obsm['X_umap'].shape[1])
    if n_components not in [2, 3]:
        raise ValueError("n_components should be 2 or 3")
    if n_components > adata.obsm['X_umap'].shape[1]:
        print(f"`n_components` is greater than the available dimension.\n"
              f"It is corrected to {adata.obsm['X_umap'].shape[1]}")
        n_components = adata.obsm['X_umap'].shape[1]

    if dict_palette is None:
        dict_palette = dict()
    df_plot = pd.DataFrame(index=adata.obs.index,
                           data=adata.obsm['X_umap'],
                           columns=['UMAP'+str(x+1) for x in
                                    range(adata.obsm['X_umap'].shape[1])])
    if color is None:
        _scatterplot2d(df_plot,
                       x='UMAP1',
                       y='UMAP2',
                       drawing_order=drawing_order,
                       size=size,
                       show_texts=show_texts,
                       text_size=text_size,
                       texts=texts,
                       text_expand=text_expand,
                       fig_size=fig_size,
                       alpha=alpha,
                       pad=pad,
                       w_pad=w_pad,
                       h_pad=h_pad,
                       save_fig=save_fig,
                       fig_path=fig_path,
                       fig_name=fig_name,
                       **kwargs)
    else:
        color = list(dict.fromkeys(color))  # remove duplicate keys
        for ann in color:
            if ann in adata.obs_keys():
                df_plot[ann] = adata.obs[ann]
                if not is_numeric_dtype(df_plot[ann]):
                    if 'color' not in adata.uns_keys():
                        adata.uns['color'] = dict()

                    if ann not in dict_palette.keys():
                        if (ann+'_color' in adata.uns['color'].keys()) \
                            and \
                            (all(np.isin(np.unique(df_plot[ann]),
                                         list(adata.uns['color']
                                         [ann+'_color'].keys())))):
                            dict_palette[ann] = \
                                adata.uns['color'][ann+'_color']
                        else:
                            dict_palette[ann] = \
                                generate_palette(adata.obs[ann])
                            adata.uns['color'][ann+'_color'] = \
                                dict_palette[ann].copy()
                    else:
                        if ann+'_color' not in adata.uns['color'].keys():
                            adata.uns['color'][ann+'_color'] = \
                                dict_palette[ann].copy()

            elif ann in adata.var_names:
                df_plot[ann] = adata.obs_vector(ann)
            else:
                raise ValueError(f"could not find {ann} in `adata.obs.columns`"
                                 " and `adata.var_names`")
        if plolty:
            print('Plotly is not supported yet.')
            # _scatterplot2d_plotly(df_plot,
            #                       x='UMAP1',
            #                       y='UMAP2',
            #                       list_hue=color,
            #                       hue_palette=dict_palette,
            #                       drawing_order=drawing_order,
            #                       fig_size=fig_size,
            #                       fig_ncol=fig_ncol,
            #                       fig_legend_order=fig_legend_order,
            #                       alpha=alpha,
            #                       save_fig=save_fig,
            #                       fig_path=fig_path,
            #                       **kwargs)
        else:
            _scatterplot2d(df_plot,
                           x='UMAP1',
                           y='UMAP2',
                           list_hue=color,
                           hue_palette=dict_palette,
                           drawing_order=drawing_order,
                           dict_drawing_order=dict_drawing_order,
                           size=size,
                           show_texts=show_texts,
                           text_size=text_size,
                           text_expand=text_expand,
                           texts=texts,
                           fig_size=fig_size,
                           fig_ncol=fig_ncol,
                           fig_legend_ncol=fig_legend_ncol,
                           fig_legend_order=fig_legend_order,
                           vmin=vmin,
                           vmax=vmax,
                           alpha=alpha,
                           pad=pad,
                           w_pad=w_pad,
                           h_pad=h_pad,
                           save_fig=save_fig,
                           fig_path=fig_path,
                           fig_name=fig_name,
                           **kwargs)




[docs]
def discretize(adata,
               kde=None,
               fig_size=(6, 6),
               pad=1.08,
               w_pad=None,
               h_pad=None,
               save_fig=None,
               fig_path=None,
               fig_name='plot_discretize.pdf',
               **kwargs):
    """Plot original data VS discretized data

    Parameters
    ----------
    adata : `Anndata`
        Annotated data matrix.
    kde : `bool`, optional (default: None)
        If True, compute a kernel density estimate to smooth the distribution
        and show on the plot. Invalid as of v0.2.
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    fig_size: `tuple`, optional (default: (5,8))
        figure size.
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'plot_discretize.pdf')
        if `save_fig` is True, specify figure name.
    **kwargs: `dict`, optional
        Other keyword arguments are passed through to ``plt.hist()``

    Returns
    -------
    None
    """
    if kde is not None:
        warnings.warn("kde is not supported as of v0.2", DeprecationWarning)
    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')

    assert 'disc' in adata.uns_keys(), \
        "please run `si.tl.discretize()` first"
    if kde is not None:
        warnings.warn("kde is no longer supported as of v1.1",
                      DeprecationWarning)

    hist_edges = adata.uns['disc']['hist_edges']
    hist_count = adata.uns['disc']['hist_count']
    bin_edges = adata.uns['disc']['bin_edges']
    bin_count = adata.uns['disc']['bin_count']

    fig, ax = plt.subplots(2, 1, figsize=fig_size)
    _ = ax[0].hist(hist_edges[:-1],
                   hist_edges,
                   weights=hist_count,
                   linewidth=0,
                   **kwargs)
    _ = ax[1].hist(bin_edges[:-1],
                   bin_edges,
                   weights=bin_count,
                   **kwargs)
    ax[0].set_xlabel('Non-zero values')
    ax[0].set_ylabel('Count')
    ax[0].set_title('Original')
    ax[1].set_xlabel('Non-zero values')
    ax[1].set_ylabel('Count')
    ax[1].set_title('Discretized')
    plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
    if save_fig:
        if not os.path.exists(fig_path):
            os.makedirs(fig_path)
        plt.savefig(os.path.join(fig_path, fig_name),
                    pad_inches=1,
                    bbox_inches='tight')
        plt.close(fig)




[docs]
def node_similarity(adata,
                    bins=20,
                    log=True,
                    show_cutoff=True,
                    cutoff=None,
                    n_edges=5000,
                    fig_size=(5, 3),
                    pad=1.08,
                    w_pad=None,
                    h_pad=None,
                    save_fig=None,
                    fig_path=None,
                    fig_name='plot_node_similarity.pdf',
                    ):
    """Plot similarity scores of nodes

    Parameters
    ----------
    adata : `Anndata`
        Annotated data matrix.
    bins : `int`, optional (default: 20)
        The number of equal-width bins in the given range for histogram plot.
    log : `bool`, optional (default: True)
        If True, log scale will be used for y axis.
    show_cutoff : `bool`, optional (default: True)
        If True, cutoff on scores will be shown
    cutoff: `int`, optional (default: None)
        Cutoff used to select edges
    n_edges: `int`, optional (default: 5000)
        The number of edges to select.
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    fig_size: `tuple`, optional (default: (5,8))
        figure size.
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'plot_node_similarity.pdf')
        if `save_fig` is True, specify figure name.

    Returns
    -------
    None
    """
    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')

    mat_sim = adata.X

    fig, ax = plt.subplots(1, 1, figsize=fig_size)
    ax.hist(mat_sim.data, bins=bins)
    if log:
        ax.set_yscale('log')
    if show_cutoff:
        if cutoff is None:
            if n_edges is None:
                raise ValueError('"cutoff" or "n_edges" has to be specified')
            else:
                cutoff = \
                    np.partition(mat_sim.data,
                                 (mat_sim.size-n_edges))[mat_sim.size-n_edges]
        id_x, id_y, _ = find(mat_sim > cutoff)
        print(f'#selected edges: {len(id_x)}')
        plt.axvline(cutoff, ls='--', c='red')
    ax.set_xlabel('similariy scores')
    ax.set_title('Node similarity')
    plt.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad)
    if save_fig:
        if not os.path.exists(fig_path):
            os.makedirs(fig_path)
        fig.savefig(os.path.join(fig_path, fig_name),
                    pad_inches=1,
                    bbox_inches='tight')
        plt.close(fig)




[docs]
def svd_nodes(adata,
              comp1=1,
              comp2=2,
              color=None,
              dict_palette=None,
              cutoff=None,
              n_edges=5000,
              size=8,
              drawing_order='random',
              dict_drawing_order=None,
              fig_size=(4, 4),
              fig_ncol=3,
              fig_legend_ncol=1,
              fig_legend_order=None,
              alpha=1,
              pad=1.08,
              w_pad=None,
              h_pad=None,
              save_fig=None,
              fig_path=None,
              fig_name='plot_svd_nodes.pdf',
              vmin=None,
              vmax=None,
              **kwargs):
    """Plot SVD coordinates

    Parameters
    ----------
    adata : `Anndata`
        Annotated data matrix.
    comp1: `int`, optional (default: 1)
        Component used for x axis.
    comp2: `int`, optional (default: 2)
        Component used for y axis.
    color: `list`, optional (default: None)
        A list of variables that will produce points with different colors.
        e.g. color = ['anno1', 'anno2']
    cutoff: `int`, optional (default: None)
        Cutoff used to select edges
    n_edges: `int`, optional (default: 5000)
        The number of edges to select
    dict_palette: `dict`,optional (default: None)
        A dictionary of palettes for different variables in `color`.
        Only valid for categorical/string variables
        e.g. dict_palette = {'ann1': {},'ann2': {}}
    drawing_order: `str` (default: 'random')
        The order in which values are plotted, This can be
        one of the following values
        - 'original': plot points in the same order as in input dataframe
        - 'sorted' : plot points with higher values on top.
        - 'random' : plot points in a random order
    dict_drawing_order: `dict`,optional (default: None)
        A dictionary of drawing_order for different variables in `color`.
        Only valid for categorical/string variables
        e.g. dict_drawing_order = {'ann1': 'original','ann2': 'sorted'}
    size: `int` (default: 8)
        Point size.
    fig_size: `tuple`, optional (default: (4, 4))
        figure size.
    fig_ncol: `int`, optional (default: 3)
        the number of columns of the figure panel
    fig_legend_order: `dict`,optional (default: None)
        Specified order for the appearance of the annotation keys.
        Only valid for categorical/string variable
        e.g. fig_legend_order = {'ann1':['a','b','c'],'ann2':['aa','bb','cc']}
    fig_legend_ncol: `int`, optional (default: 1)
        The number of columns that the legend has.
    vmin,vmax: `float`, optional (default: None)
        The min and max values are used to normalize continuous values.
        If None, the respective min and max of continuous values is used.
    alpha: `float`, optional (default: 1)
        0.0 transparent through 1.0 opaque
    pad: `float`, optional (default: 1.08)
        Padding between the figure edge and the edges of subplots,
        as a fraction of the font size.
    h_pad, w_pad: `float`, optional (default: None)
        Padding (height/width) between edges of adjacent subplots,
        as a fraction of the font size. Defaults to pad.
    save_fig: `bool`, optional (default: False)
        if True,save the figure.
    fig_path: `str`, optional (default: None)
        If save_fig is True, specify figure path.
    fig_name: `str`, optional (default: 'plot_umap.pdf')
        if save_fig is True, specify figure name.
    Returns
    -------
    None
    """
    if fig_size is None:
        fig_size = mpl.rcParams['figure.figsize']
    if save_fig is None:
        save_fig = settings.save_fig
    if fig_path is None:
        fig_path = os.path.join(settings.workdir, 'figures')

    mat_sim = adata.X
    if cutoff is None:
        if n_edges is None:
            raise ValueError('"cutoff" or "n_edges" has to be specified')
        else:
            cutoff = \
                np.partition(mat_sim.data,
                             (mat_sim.size-n_edges))[mat_sim.size-n_edges]
    id_x, id_y, _ = find(mat_sim > cutoff)

    X_cca_ref = adata.obsm['svd']
    X_cca_query = adata.varm['svd']

    df_plot_ref = pd.DataFrame(data=X_cca_ref[:, [comp1-1, comp2-1]],
                               index=adata.obs.index,
                               columns=[f'Dim {comp1}', f'Dim {comp2}'])
    df_plot_ref['group'] = 'ref'
    df_plot_ref['selected'] = 'no'
    df_plot_ref.loc[df_plot_ref.index[id_x], 'selected'] = 'yes'
    df_plot_query = pd.DataFrame(data=X_cca_query[:, [comp1-1, comp2-1]],
                                 index=adata.var.index,
                                 columns=[f'Dim {comp1}', f'Dim {comp2}'])
    df_plot_query['group'] = 'query'
    df_plot_query['selected'] = 'no'
    df_plot_query.loc[df_plot_query.index[id_y], 'selected'] = 'yes'

    df_plot = pd.concat([df_plot_ref, df_plot_query], axis=0)
    if dict_palette is None:
        dict_palette = dict()
    dict_palette['group'] = {'query': '#4c72b0', 'ref': '#dd8452'}
    dict_palette['selected'] = {'yes': '#000000', 'no': '#D4D3D3'}
    if dict_drawing_order is None:
        dict_drawing_order = dict()
    dict_drawing_order['group'] = 'random'
    dict_drawing_order['selected'] = 'sorted'

    adata.uns['color'] = dict_palette.copy()
    if color is None:
        color = []
    else:
        color = list(dict.fromkeys(color))  # remove duplicate keys
    for ann in color:
        if (ann in adata.obs_keys()) and (ann in adata.var_keys()):
            df_plot[ann] = pd.concat([adata.obs[ann], adata.var[ann]], axis=0)
            if not is_numeric_dtype(df_plot[ann]):
                if ann not in dict_palette.keys():
                    if (ann+'_color' in adata.uns['color'].keys()) \
                        and \
                        (all(np.isin(np.unique(df_plot[ann]),
                                     list(adata.uns['color']
                                     [ann+'_color'].keys())))):
                        dict_palette[ann] = \
                            adata.uns['color'][ann+'_color']
                    else:
                        dict_palette[ann] = \
                            generate_palette(adata.obs[ann])
                        adata.uns['color'][ann+'_color'] = \
                            dict_palette[ann].copy()
                else:
                    if ann+'_color' not in adata.uns['color'].keys():
                        adata.uns['color'][ann+'_color'] = \
                            dict_palette[ann].copy()
        else:
            raise ValueError(f"could not find {ann} in both "
                             "`adata.obs.columns`"
                             " and `adata.var.columns`")
    color = ['group', 'selected'] + color
    _scatterplot2d(df_plot,
                   x=f'Dim {comp1}',
                   y=f'Dim {comp2}',
                   list_hue=color,
                   hue_palette=dict_palette,
                   drawing_order=drawing_order,
                   dict_drawing_order=dict_drawing_order,
                   size=size,
                   fig_size=fig_size,
                   fig_ncol=fig_ncol,
                   fig_legend_ncol=fig_legend_ncol,
                   fig_legend_order=fig_legend_order,
                   vmin=vmin,
                   vmax=vmax,
                   alpha=alpha,
                   pad=pad,
                   w_pad=w_pad,
                   h_pad=h_pad,
                   save_fig=save_fig,
                   fig_path=fig_path,
                   fig_name=fig_name,
                   **kwargs)