Source code for simba.preprocessing._qc

"""Quality Control"""

import numpy as np
from scipy.sparse import (
    issparse,
    csr_matrix,
)
import re


[docs] def cal_qc(adata, expr_cutoff=1): """Calculate quality control metrics. Parameters ---------- adata: AnnData Annotated data matrix. expr_cutoff: `float`, optional (default: 1) Expression cutoff. If greater than expr_cutoff,the feature is considered 'expressed' assay: `str`, optional (default: 'rna') Choose from {'rna','atac'},case insensitive Returns ------- updates `adata` with the following fields. n_counts: `pandas.Series` (`adata.var['n_counts']`,dtype `int`) The total read count for a feature. n_samples: `pandas.Series` (`adata.var['n_samples']`,dtype `int`) The number of samples that express at least one feature. pct_samples: `pandas.Series` (`adata.var['pct_samples']`,dtype `float`) The percentage of samples that express at least one feature. n_counts: `pandas.Series` (`adata.obs['n_counts']`,dtype `int`) The total read count for a sample. n_features: `pandas.Series` (`adata.obs['n_features']`,dtype `int`) The number of features expressed in a sample. pct_features: `pandas.Series` (`adata.obs['pct_features']`,dtype `float`) The percentage of features expressed in a sample. """ if not issparse(adata.X): adata.X = csr_matrix(adata.X) n_counts = adata.X.sum(axis=0).A1 adata.var['n_counts'] = n_counts n_samples = (adata.X >= expr_cutoff).sum(axis=0).A1 adata.var['n_samples'] = n_samples adata.var['pct_samples'] = n_samples/adata.shape[0] n_counts = adata.X.sum(axis=1).A1 adata.obs['n_counts'] = n_counts n_features = (adata.X >= expr_cutoff).sum(axis=1).A1 adata.obs['n_features'] = n_features adata.obs['pct_features'] = n_features/adata.shape[1]
[docs] def cal_qc_rna(adata, expr_cutoff=1): """Calculate quality control metrics. Parameters ---------- adata: AnnData Annotated data matrix. expr_cutoff: `float`, optional (default: 1) Expression cutoff. If greater than expr_cutoff,the feature is considered 'expressed' assay: `str`, optional (default: 'rna') Choose from {'rna','atac'},case insensitive Returns ------- updates `adata` with the following fields. n_counts: `pandas.Series` (`adata.var['n_counts']`,dtype `int`) The total read count for a gene. n_cells: `pandas.Series` (`adata.var['n_cells']`,dtype `int`) The number of cells that express at least one gene. pct_cells: `pandas.Series` (`adata.var['pct_cells']`,dtype `float`) The percentage of cells that express at least one gene. n_counts: `pandas.Series` (`adata.obs['n_counts']`,dtype `int`) The total read count for a cell. n_genes: `pandas.Series` (`adata.obs['n_genes']`,dtype `int`) The number of genes expressed in a cell. pct_genes: `pandas.Series` (`adata.obs['pct_genes']`,dtype `float`) The percentage of genes expressed in a cell. pct_mt: `pandas.Series` (`adata.obs['pct_mt']`,dtype `float`) the percentage of mitochondrial gene reads for a cell. """ if not issparse(adata.X): adata.X = csr_matrix(adata.X) n_counts = adata.X.sum(axis=0).A1 adata.var['n_counts'] = n_counts n_cells = (adata.X >= expr_cutoff).sum(axis=0).A1 adata.var['n_cells'] = n_cells adata.var['pct_cells'] = n_cells/adata.shape[0] n_counts = adata.X.sum(axis=1).A1 adata.obs['n_counts'] = n_counts n_features = (adata.X >= expr_cutoff).sum(axis=1).A1 adata.obs['n_genes'] = n_features adata.obs['pct_genes'] = n_features/adata.shape[1] r = re.compile("^MT-", flags=re.IGNORECASE) mt_genes = list(filter(r.match, adata.var_names)) if len(mt_genes) > 0: n_counts_mt = adata[:, mt_genes].X.sum(axis=1).A1 adata.obs['pct_mt'] = n_counts_mt/n_counts else: adata.obs['pct_mt'] = 0
[docs] def cal_qc_atac(adata, expr_cutoff=1): """Calculate quality control metrics. Parameters ---------- adata: AnnData Annotated data matrix. expr_cutoff: `float`, optional (default: 1) Expression cutoff. If greater than expr_cutoff,the feature is considered 'expressed' assay: `str`, optional (default: 'rna') Choose from {'rna','atac'},case insensitive Returns ------- updates `adata` with the following fields. n_counts: `pandas.Series` (`adata.var['n_counts']`,dtype `int`) The total read count for a peak. n_cells: `pandas.Series` (`adata.var['n_cells']`,dtype `int`) The total number of cells which have at least one peak. pct_cells: `pandas.Series` (`adata.var['pct_cells']`,dtype `float`) The percentage of cells which have at least one peak. n_counts: `pandas.Series` (`adata.obs['n_counts']`,dtype `int`) The total read count for a cell. n_peaks: `pandas.Series` (`adata.obs['n_peaks']`,dtype `int`) The number of peaks that are present in each cell. pct_peaks: `pandas.Series` (`adata.obs['pct_peaks']`,dtype `int`) The percentage of peaks that are present in each cell. """ if not issparse(adata.X): adata.X = csr_matrix(adata.X) n_counts = adata.X.sum(axis=0).A1 adata.var['n_counts'] = n_counts n_cells = (adata.X >= expr_cutoff).sum(axis=0).A1 adata.var['n_cells'] = n_cells adata.var['pct_cells'] = n_cells/adata.shape[0] n_counts = adata.X.sum(axis=1).A1 adata.obs['n_counts'] = n_counts n_features = (adata.X >= expr_cutoff).sum(axis=1).A1 adata.obs['n_peaks'] = n_features adata.obs['pct_peaks'] = n_features/adata.shape[1]
[docs] def filter_samples(adata, min_n_features=1, max_n_features=None, min_pct_features=None, max_pct_features=None, min_n_counts=None, max_n_counts=None, expr_cutoff=1): """Filter out samples based on different metrics. Parameters ---------- adata: AnnData Annotated data matrix. min_n_features: `int`, optional (default: None) Minimum number of features expressed min_pct_features: `float`, optional (default: None) Minimum percentage of features expressed min_n_counts: `int`, optional (default: None) Minimum number of read count for one cell expr_cutoff: `float`, optional (default: 1) Expression cutoff. If greater than expr_cutoff,the gene is considered 'expressed' assay: `str`, optional (default: 'rna') Choose from {{'rna','atac'}},case insensitive Returns ------- updates `adata` with a subset of cells that pass the filtering. updates `adata` with the following fields if cal_qc() was not performed. n_counts: `pandas.Series` (`adata.obs['n_counts']`,dtype `int`) The number of read count each cell has. n_genes: `pandas.Series` (`adata.obs['n_genes']`,dtype `int`) The number of genes expressed in each cell. pct_genes: `pandas.Series` (`adata.obs['pct_genes']`,dtype `float`) The percentage of genes expressed in each cell. n_peaks: `pandas.Series` (`adata.obs['n_peaks']`,dtype `int`) The number of peaks expressed in each cell. pct_peaks: `pandas.Series` (`adata.obs['pct_peaks']`,dtype `int`) The percentage of peaks expressed in each cell. """ if not issparse(adata.X): adata.X = csr_matrix(adata.X) if 'n_counts' in adata.obs_keys(): n_counts = adata.obs['n_counts'] else: n_counts = np.sum(adata.X, axis=1).A adata.obs['n_counts'] = n_counts if 'n_features' in adata.obs_keys(): n_features = adata.obs['n_features'] else: n_features = np.sum(adata.X >= expr_cutoff, axis=1).A1 adata.obs['n_features'] = n_features if 'pct_features' in adata.obs_keys(): pct_features = adata.obs['pct_features'] else: pct_features = n_features/adata.shape[1] adata.obs['pct_features'] = pct_features print('before filtering: ') print(f"{adata.shape[0]} samples, {adata.shape[1]} feature") if sum(list(map(lambda x: x is None, [min_n_features, min_pct_features, min_n_counts, max_n_features, max_pct_features, max_n_counts]))) == 6: print('No filtering') else: cell_subset = np.ones(len(adata.obs_names), dtype=bool) if min_n_features is not None: print('filter samples based on min_n_features') cell_subset = (n_features >= min_n_features) & cell_subset if max_n_features is not None: print('filter samples based on max_n_features') cell_subset = (n_features <= max_n_features) & cell_subset if min_pct_features is not None: print('filter samples based on min_pct_features') cell_subset = (pct_features >= min_pct_features) & cell_subset if max_pct_features is not None: print('filter samples based on max_pct_features') cell_subset = (pct_features <= max_pct_features) & cell_subset if min_n_counts is not None: print('filter samples based on min_n_counts') cell_subset = (n_counts >= min_n_counts) & cell_subset if max_n_counts is not None: print('filter samples based on max_n_counts') cell_subset = (n_counts <= max_n_counts) & cell_subset adata._inplace_subset_obs(cell_subset) print('after filtering out low-quality samples: ') print(f"{adata.shape[0]} samples, {adata.shape[1]} feature") return None
[docs] def filter_cells_rna(adata, min_n_genes=None, max_n_genes=None, min_pct_genes=None, max_pct_genes=None, min_n_counts=None, max_n_counts=None, expr_cutoff=1): """Filter out cells for RNA-seq based on different metrics. Parameters ---------- adata: AnnData Annotated data matrix. min_n_genes: `int`, optional (default: None) Minimum number of genes expressed min_pct_genes: `float`, optional (default: None) Minimum percentage of genes expressed min_n_counts: `int`, optional (default: None) Minimum number of read count for one cell expr_cutoff: `float`, optional (default: 1) Expression cutoff. If greater than expr_cutoff,the gene is considered 'expressed' assay: `str`, optional (default: 'rna') Choose from {{'rna','atac'}},case insensitive Returns ------- updates `adata` with a subset of cells that pass the filtering. updates `adata` with the following fields if cal_qc() was not performed. n_counts: `pandas.Series` (`adata.obs['n_counts']`,dtype `int`) The number of read count each cell has. n_genes: `pandas.Series` (`adata.obs['n_genes']`,dtype `int`) The number of genes expressed in each cell. pct_genes: `pandas.Series` (`adata.obs['pct_genes']`,dtype `float`) The percentage of genes expressed in each cell. n_peaks: `pandas.Series` (`adata.obs['n_peaks']`,dtype `int`) The number of peaks expressed in each cell. pct_peaks: `pandas.Series` (`adata.obs['pct_peaks']`,dtype `int`) The percentage of peaks expressed in each cell. """ if not issparse(adata.X): adata.X = csr_matrix(adata.X) if 'n_counts' in adata.obs_keys(): n_counts = adata.obs['n_counts'] else: n_counts = np.sum(adata.X, axis=1).A1 adata.obs['n_counts'] = n_counts if 'n_genes' in adata.obs_keys(): n_genes = adata.obs['n_genes'] else: n_genes = np.sum(adata.X >= expr_cutoff, axis=1).A1 adata.obs['n_genes'] = n_genes if 'pct_genes' in adata.obs_keys(): pct_genes = adata.obs['pct_genes'] else: pct_genes = n_genes/adata.shape[1] adata.obs['pct_genes'] = pct_genes print('before filtering: ') print(f"{adata.shape[0]} cells, {adata.shape[1]} genes") if sum(list(map(lambda x: x is None, [min_n_genes, min_pct_genes, min_n_counts, max_n_genes, max_pct_genes, max_n_counts]))) == 6: print('No filtering') else: cell_subset = np.ones(len(adata.obs_names), dtype=bool) if min_n_genes is not None: print('filter cells based on min_n_genes') cell_subset = (n_genes >= min_n_genes) & cell_subset if max_n_genes is not None: print('filter cells based on max_n_genes') cell_subset = (n_genes <= max_n_genes) & cell_subset if min_pct_genes is not None: print('filter cells based on min_pct_genes') cell_subset = (pct_genes >= min_pct_genes) & cell_subset if max_pct_genes is not None: print('filter cells based on max_pct_genes') cell_subset = (pct_genes <= max_pct_genes) & cell_subset if min_n_counts is not None: print('filter cells based on min_n_counts') cell_subset = (n_counts >= min_n_counts) & cell_subset if max_n_counts is not None: print('filter cells based on max_n_counts') cell_subset = (n_counts <= max_n_counts) & cell_subset adata._inplace_subset_obs(cell_subset) print('after filtering out low-quality cells: ') print(f"{adata.shape[0]} cells, {adata.shape[1]} genes") return None
[docs] def filter_cells_atac(adata, min_n_peaks=None, max_n_peaks=None, min_pct_peaks=None, max_pct_peaks=None, min_n_counts=None, max_n_counts=None, expr_cutoff=1): """Filter out cells for ATAC-seq based on different metrics. Parameters ---------- adata: AnnData Annotated data matrix. min_n_peaks: `int`, optional (default: None) Minimum number of peaks expressed min_pct_peaks: `float`, optional (default: None) Minimum percentage of peaks expressed min_n_counts: `int`, optional (default: None) Minimum number of read count for one cell expr_cutoff: `float`, optional (default: 1) Expression cutoff. If greater than expr_cutoff,the gene is considered 'expressed' assay: `str`, optional (default: 'rna') Choose from {{'rna','atac'}},case insensitive Returns ------- updates `adata` with a subset of cells that pass the filtering. updates `adata` with the following fields if cal_qc() was not performed. n_counts: `pandas.Series` (`adata.obs['n_counts']`,dtype `int`) The number of read count each cell has. n_genes: `pandas.Series` (`adata.obs['n_genes']`,dtype `int`) The number of genes expressed in each cell. pct_genes: `pandas.Series` (`adata.obs['pct_genes']`,dtype `float`) The percentage of genes expressed in each cell. n_peaks: `pandas.Series` (`adata.obs['n_peaks']`,dtype `int`) The number of peaks expressed in each cell. pct_peaks: `pandas.Series` (`adata.obs['pct_peaks']`,dtype `int`) The percentage of peaks expressed in each cell. """ if not issparse(adata.X): adata.X = csr_matrix(adata.X) if 'n_counts' in adata.obs_keys(): n_counts = adata.obs['n_counts'] else: n_counts = np.sum(adata.X, axis=1).A1 adata.obs['n_counts'] = n_counts if 'n_peaks' in adata.obs_keys(): n_peaks = adata.obs['n_peaks'] else: n_peaks = np.sum(adata.X >= expr_cutoff, axis=1).A1 adata.obs['n_peaks'] = n_peaks if 'pct_peaks' in adata.obs_keys(): pct_peaks = adata.obs['pct_peaks'] else: pct_peaks = n_peaks/adata.shape[1] adata.obs['pct_peaks'] = pct_peaks print('before filtering: ') print(f"{adata.shape[0]} cells, {adata.shape[1]} peaks") if sum(list(map(lambda x: x is None, [min_n_peaks, min_pct_peaks, min_n_counts, max_n_peaks, max_pct_peaks, max_n_counts]))) == 6: print('No filtering') else: cell_subset = np.ones(len(adata.obs_names), dtype=bool) if min_n_peaks is not None: print('filter cells based on min_n_peaks') cell_subset = (n_peaks >= min_n_peaks) & cell_subset if max_n_peaks is not None: print('filter cells based on max_n_peaks') cell_subset = (n_peaks <= max_n_peaks) & cell_subset if min_pct_peaks is not None: print('filter cells based on min_pct_peaks') cell_subset = (pct_peaks >= min_pct_peaks) & cell_subset if max_pct_peaks is not None: print('filter cells based on max_pct_peaks') cell_subset = (pct_peaks <= max_pct_peaks) & cell_subset if min_n_counts is not None: print('filter cells based on min_n_counts') cell_subset = (n_counts >= min_n_counts) & cell_subset if max_n_counts is not None: print('filter cells based on max_n_counts') cell_subset = (n_counts <= max_n_counts) & cell_subset adata._inplace_subset_obs(cell_subset) print('after filtering out low-quality cells: ') print(f"{adata.shape[0]} cells, {adata.shape[1]} peaks") return None
[docs] def filter_genes(adata, min_n_cells=3, max_n_cells=None, min_pct_cells=None, max_pct_cells=None, min_n_counts=None, max_n_counts=None, expr_cutoff=1): """Filter out features based on different metrics. Parameters ---------- adata: AnnData Annotated data matrix. min_n_cells: `int`, optional (default: 5) Minimum number of cells expressing one feature min_pct_cells: `float`, optional (default: None) Minimum percentage of cells expressing one feature min_n_counts: `int`, optional (default: None) Minimum number of read count for one feature expr_cutoff: `float`, optional (default: 1) Expression cutoff. If greater than expr_cutoff,the feature is considered 'expressed' assay: `str`, optional (default: 'rna') Choose from {{'rna','atac'}},case insensitive Returns ------- updates `adata` with a subset of features that pass the filtering. updates `adata` with the following fields if cal_qc() was not performed. n_counts: `pandas.Series` (`adata.var['n_counts']`,dtype `int`) The number of read count each gene has. n_cells: `pandas.Series` (`adata.var['n_cells']`,dtype `int`) The number of cells in which each gene is expressed. pct_cells: `pandas.Series` (`adata.var['pct_cells']`,dtype `float`) The percentage of cells in which each gene is expressed. """ feature = 'genes' if not issparse(adata.X): adata.X = csr_matrix(adata.X) if 'n_counts' in adata.var_keys(): n_counts = adata.var['n_counts'] else: n_counts = np.sum(adata.X, axis=0).A1 adata.var['n_counts'] = n_counts if 'n_cells' in adata.var_keys(): n_cells = adata.var['n_cells'] else: n_cells = np.sum(adata.X >= expr_cutoff, axis=0).A1 adata.var['n_cells'] = n_cells if 'pct_cells' in adata.var_keys(): pct_cells = adata.var['pct_cells'] else: pct_cells = n_cells/adata.shape[0] adata.var['pct_cells'] = pct_cells print('Before filtering: ') print(str(adata.shape[0])+' cells, ' + str(adata.shape[1])+' '+feature) if sum(list(map(lambda x: x is None, [min_n_cells, min_pct_cells, min_n_counts, max_n_cells, max_pct_cells, max_n_counts, ]))) == 6: print('No filtering') else: feature_subset = np.ones(len(adata.var_names), dtype=bool) if min_n_cells is not None: print('Filter '+feature+' based on min_n_cells') feature_subset = (n_cells >= min_n_cells) & feature_subset if max_n_cells is not None: print('Filter '+feature+' based on max_n_cells') feature_subset = (n_cells <= max_n_cells) & feature_subset if min_pct_cells is not None: print('Filter '+feature+' based on min_pct_cells') feature_subset = (pct_cells >= min_pct_cells) & feature_subset if max_pct_cells is not None: print('Filter '+feature+' based on max_pct_cells') feature_subset = (pct_cells <= max_pct_cells) & feature_subset if min_n_counts is not None: print('Filter '+feature+' based on min_n_counts') feature_subset = (n_counts >= min_n_counts) & feature_subset if max_n_counts is not None: print('Filter '+feature+' based on max_n_counts') feature_subset = (n_counts <= max_n_counts) & feature_subset adata._inplace_subset_var(feature_subset) print('After filtering out low-expressed '+feature+': ') print(str(adata.shape[0])+' cells, ' + str(adata.shape[1])+' '+feature) return None
[docs] def filter_peaks(adata, min_n_cells=5, max_n_cells=None, min_pct_cells=None, max_pct_cells=None, min_n_counts=None, max_n_counts=None, expr_cutoff=1): """Filter out features based on different metrics. Parameters ---------- adata: AnnData Annotated data matrix. min_n_cells: `int`, optional (default: 5) Minimum number of cells expressing one feature min_pct_cells: `float`, optional (default: None) Minimum percentage of cells expressing one feature min_n_counts: `int`, optional (default: None) Minimum number of read count for one feature expr_cutoff: `float`, optional (default: 1) Expression cutoff. If greater than expr_cutoff,the feature is considered 'expressed' assay: `str`, optional (default: 'rna') Choose from {{'rna','atac'}},case insensitive Returns ------- updates `adata` with a subset of features that pass the filtering. updates `adata` with the following fields if cal_qc() was not performed. n_counts: `pandas.Series` (`adata.var['n_counts']`,dtype `int`) The number of read count each gene has. n_cells: `pandas.Series` (`adata.var['n_cells']`,dtype `int`) The number of cells in which each gene is expressed. pct_cells: `pandas.Series` (`adata.var['pct_cells']`,dtype `float`) The percentage of cells in which each gene is expressed. """ feature = 'peaks' if not issparse(adata.X): adata.X = csr_matrix(adata.X) if 'n_counts' in adata.var_keys(): n_counts = adata.var['n_counts'] else: n_counts = np.sum(adata.X, axis=0).A1 adata.var['n_counts'] = n_counts if 'n_cells' in adata.var_keys(): n_cells = adata.var['n_cells'] else: n_cells = np.sum(adata.X >= expr_cutoff, axis=0).A1 adata.var['n_cells'] = n_cells if 'pct_cells' in adata.var_keys(): pct_cells = adata.var['pct_cells'] else: pct_cells = n_cells/adata.shape[0] adata.var['pct_cells'] = pct_cells print('Before filtering: ') print(str(adata.shape[0])+' cells, ' + str(adata.shape[1])+' '+feature) if sum(list(map(lambda x: x is None, [min_n_cells, min_pct_cells, min_n_counts, max_n_cells, max_pct_cells, max_n_counts, ]))) == 6: print('No filtering') else: feature_subset = np.ones(len(adata.var_names), dtype=bool) if min_n_cells is not None: print('Filter '+feature+' based on min_n_cells') feature_subset = (n_cells >= min_n_cells) & feature_subset if max_n_cells is not None: print('Filter '+feature+' based on max_n_cells') feature_subset = (n_cells <= max_n_cells) & feature_subset if min_pct_cells is not None: print('Filter '+feature+' based on min_pct_cells') feature_subset = (pct_cells >= min_pct_cells) & feature_subset if max_pct_cells is not None: print('Filter '+feature+' based on max_pct_cells') feature_subset = (pct_cells <= max_pct_cells) & feature_subset if min_n_counts is not None: print('Filter '+feature+' based on min_n_counts') feature_subset = (n_counts >= min_n_counts) & feature_subset if max_n_counts is not None: print('Filter '+feature+' based on max_n_counts') feature_subset = (n_counts <= max_n_counts) & feature_subset adata._inplace_subset_var(feature_subset) print('After filtering out low-expressed '+feature+': ') print(str(adata.shape[0])+' cells, ' + str(adata.shape[1])+' '+feature) return None
[docs] def filter_features(adata, min_n_samples=5, max_n_samples=None, min_pct_samples=None, max_pct_samples=None, min_n_counts=None, max_n_counts=None, expr_cutoff=1): """Filter out features based on different metrics. Parameters ---------- adata: AnnData Annotated data matrix. min_n_cells: `int`, optional (default: 5) Minimum number of cells expressing one feature min_pct_cells: `float`, optional (default: None) Minimum percentage of cells expressing one feature min_n_counts: `int`, optional (default: None) Minimum number of read count for one feature expr_cutoff: `float`, optional (default: 1) Expression cutoff. If greater than expr_cutoff,the feature is considered 'expressed' assay: `str`, optional (default: 'rna') Choose from {{'rna','atac'}},case insensitive Returns ------- updates `adata` with a subset of features that pass the filtering. updates `adata` with the following fields if cal_qc() was not performed. n_counts: `pandas.Series` (`adata.var['n_counts']`,dtype `int`) The number of read count each gene has. n_cells: `pandas.Series` (`adata.var['n_cells']`,dtype `int`) The number of cells in which each gene is expressed. pct_cells: `pandas.Series` (`adata.var['pct_cells']`,dtype `float`) The percentage of cells in which each gene is expressed. """ if not issparse(adata.X): adata.X = csr_matrix(adata.X) if 'n_counts' in adata.var_keys(): n_counts = adata.var['n_counts'] else: n_counts = np.sum(adata.X, axis=0).A1 adata.var['n_counts'] = n_counts if 'n_samples' in adata.var_keys(): n_samples = adata.var['n_samples'] else: n_samples = np.sum(adata.X >= expr_cutoff, axis=0).A1 adata.var['n_samples'] = n_samples if 'pct_samples' in adata.var_keys(): pct_samples = adata.var['pct_samples'] else: pct_samples = n_samples/adata.shape[0] adata.var['pct_samples'] = pct_samples print('Before filtering: ') print(f"{adata.shape[0]} samples, {adata.shape[1]} features") if sum(list(map(lambda x: x is None, [min_n_samples, min_pct_samples, min_n_counts, max_n_samples, max_pct_samples, max_n_counts, ]))) == 6: print('No filtering') else: feature_subset = np.ones(len(adata.var_names), dtype=bool) if min_n_samples is not None: print('Filter features based on min_n_samples') feature_subset = (n_samples >= min_n_samples) & feature_subset if max_n_samples is not None: print('Filter features based on max_n_samples') feature_subset = (n_samples <= max_n_samples) & feature_subset if min_pct_samples is not None: print('Filter features based on min_pct_samples') feature_subset = (pct_samples >= min_pct_samples) & feature_subset if max_pct_samples is not None: print('Filter features based on max_pct_samples') feature_subset = (pct_samples <= max_pct_samples) & feature_subset if min_n_counts is not None: print('Filter features based on min_n_counts') feature_subset = (n_counts >= min_n_counts) & feature_subset if max_n_counts is not None: print('Filter features based on max_n_counts') feature_subset = (n_counts <= max_n_counts) & feature_subset adata._inplace_subset_var(feature_subset) print('After filtering out low-expressed features: ') print(f"{adata.shape[0]} samples, {adata.shape[1]} features") return None