Source code for scanpex.tl._gene_score

from typing import Union

import anndata as ad
import numpy as np
import pandas as pd
import scanpy as sc
from scipy.stats import zscore


def sigmoid(x: Union[np.ndarray, pd.Series]) -> np.ndarray:
    """
    Apply sigmoid transformation subsequently to the Z-score transformation.

    Note that Z-score calculation requires a distribution (array-like),
    not a single scalar value.

    Parameters
    ----------
    x : Union[np.ndarray, pd.Series]
        Array-like data to be converted (e.g., a column of adata.obs).

    Returns
    -------
    np.ndarray
        Sigmoid-transformed Z-score of x (values between 0 and 1).
    """
    return 1 / (1 + np.exp(-zscore(x, nan_policy="omit")))


[docs] def prob_genes(adata: ad.AnnData, gene_list: list, **kwargs) -> Union[None, ad.AnnData]: """ Calculate gene signature scores and transform them to [0, 1] probability using the sigmoid-Z-score transformation. This is a wrapper of `scanpy.tl.score_genes`. It adds a `{score_name}_prob` column to `adata.obs`. Parameters ---------- adata : ad.AnnData Annotated data matrix. gene_list : list The list of genes to be scored. **kwargs Additional arguments passed to `scanpy.tl.score_genes`. (e.g., `score_name`, `ctrl_size`, `random_state`) Returns ------- Union[None, ad.AnnData] Returns `None` if `copy=False` (default), otherwise returns a copy of `adata`. The result is stored in `adata.obs['{score_name}_prob']`. """ score_name = kwargs.get("score_name", "score") result = sc.tl.score_genes(adata=adata, gene_list=gene_list, **kwargs) data = result if result is not None else adata data.obs[f"{score_name}_prob"] = sigmoid(data.obs[score_name]) return result
[docs] def score_genes_cell_cycle( adata: ad.AnnData, s_genes: list, g2m_genes: list, **kwargs ) -> None: """ Assign cell cycle phases based on "S_score" and "G2M_score". This is a wrapper of `scanpex.tl.prob_genes` and `scanpy.tl.score_genes_cell_cycle`. It calculates raw scores, probability scores (0-1), and assigns phases. Parameters ---------- adata : ad.AnnData Annotated data matrix. s_genes : list The list of S-phase related genes to be scored. g2m_genes : list The list of G2/M-phase related genes to be scored. **kwargs Additional arguments passed to `scanpy.tl.score_genes` and `scanpy.tl.score_genes_cell_cycle`. Returns ------- None The following columns are added to `adata.obs`: - "S_score", "S_score_prob" - "G2M_score", "G2M_score_prob" - "phase" (with "G2M" label prettified to "G2/M") """ prob_genes( adata=adata, gene_list=s_genes, score_name="S_score", copy=False, **kwargs ) prob_genes( adata=adata, gene_list=g2m_genes, score_name="G2M_score", copy=False, **kwargs ) sc.tl.score_genes_cell_cycle( adata=adata, s_genes=s_genes, g2m_genes=g2m_genes, copy=False, **kwargs ) adata.obs["phase"] = adata.obs["phase"].map(lambda p: p if p != "G2M" else "G2/M")
[docs] def curate_phase(adata: ad.AnnData, thresh: float = 0) -> None: """ Manually curate the threshold for cell cycle phase assignment. This function re-assigns phases (G1, S, G2/M) based on existing "S_score" and "G2M_score" using a user-defined threshold. Parameters ---------- adata : ad.AnnData Annotated data matrix with pre-assigned cell cycle scores (e.g., via `scanpex.tl.score_genes_cell_cycle`). thresh : float, optional (default: 0) The threshold value for determination. Scores below this value are considered G1 phase. Returns ------- None `adata.obs["phase"]` is overwritten with the new assignments. """ # Create flags using decimal place value logic # 10s place: S score exceeds threshold s_phase = adata.obs["S_score"].map(lambda s: 10 if s >= thresh else 0) # 100s place: G2M score exceeds threshold g2m_phase = adata.obs["G2M_score"].map(lambda s: 100 if s >= thresh else 0) # 1s place: G2M score is strictly larger than S score g2m_is_larger = (adata.obs["G2M_score"] - adata.obs["S_score"]).map( lambda s: 1 if s > 0 else 0 ) # Calculate unique key for each state # 0 (000) -> G1 (Both < thresh) # 1 (001) -> G1 (Both < thresh, G2M > S) ... unlikely but logically G1 # 10 (010) -> S (S >= thresh > G2M) # 101 (101) -> G2/M (G2M >= thresh > S) # 110 (110) -> S (Both >= thresh, S >= G2M) # 111 (111) -> G2/M (Both >= thresh, G2M > S) adata.obs["phase"] = ( (s_phase + g2m_phase + g2m_is_larger) .map( lambda s: {0: "G1", 1: "G1", 10: "S", 101: "G2/M", 110: "S", 111: "G2/M"}[s] ) .astype("category") )