diff --git a/LICENSE b/LICENSE index 6f78d4a9fd..bd681d6077 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,6 @@ BSD 3-Clause License +Copyright (c) 2025 scverse Copyright (c) 2017 F. Alexander Wolf, P. Angerer, Theis Lab All rights reserved. diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index e69de29bb2..b191ce6234 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -0,0 +1 @@ +"""ASV benchmark suite for scanpy.""" diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py index 93bb4623f9..8ae43914a2 100644 --- a/benchmarks/benchmarks/_utils.py +++ b/benchmarks/benchmarks/_utils.py @@ -32,7 +32,7 @@ def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ... @cache def _pbmc68k_reduced() -> AnnData: - """A small datasets with a dense `.X`""" + """A small datasets with a dense `.X`.""" # noqa: D401 adata = sc.datasets.pbmc68k_reduced() assert isinstance(adata.X, np.ndarray) assert not np.isfortran(adata.X) @@ -179,11 +179,10 @@ def get_count_dataset( def param_skipper( param_names: Sequence[str], params: tuple[Sequence[object], ...] ) -> ParamSkipper: - """Creates a decorator that will skip all combinations that contain any of the given parameters. + """Create a decorator that will skip all combinations that contain any of the given parameters. Examples -------- - >>> param_names = ["letters", "numbers"] >>> params = [["a", "b"], [3, 4, 5]] >>> skip_when = param_skipper(param_names, params) @@ -194,6 +193,7 @@ def param_skipper( >>> run_as_asv_benchmark(func) b 4 b 5 + """ def skip(**skipped: AbstractSet) -> Callable[[C], C]: diff --git a/benchmarks/benchmarks/preprocessing_counts.py b/benchmarks/benchmarks/preprocessing_counts.py index 587b441e1c..a2130eafc6 100644 --- a/benchmarks/benchmarks/preprocessing_counts.py +++ b/benchmarks/benchmarks/preprocessing_counts.py @@ -1,6 +1,6 @@ -""" -This module will benchmark preprocessing operations in Scanpy that run on counts -API documentation: https://scanpy.readthedocs.io/en/stable/api/preprocessing.html +"""Benchmark preprocessing operations in Scanpy that run on counts. + +API documentation: . """ from __future__ import annotations @@ -23,7 +23,7 @@ def setup(dataset: Dataset, layer: KeyCount, *_): - """Setup global variables before each benchmark.""" + """Set up global variables before each benchmark.""" global adata, batch_key adata, batch_key = get_count_dataset(dataset, layer=layer) assert "log1p" not in adata.uns diff --git a/benchmarks/benchmarks/preprocessing_log.py b/benchmarks/benchmarks/preprocessing_log.py index 6a04d4c7c6..f211919c63 100644 --- a/benchmarks/benchmarks/preprocessing_log.py +++ b/benchmarks/benchmarks/preprocessing_log.py @@ -1,6 +1,6 @@ -""" -This module will benchmark preprocessing operations in Scanpy that run on log-transformed data -API documentation: https://scanpy.readthedocs.io/en/stable/api/preprocessing.html +"""Benchmark preprocessing operations in Scanpy that run on log-transformed data. + +API documentation: . """ from __future__ import annotations @@ -25,7 +25,7 @@ def setup(dataset: Dataset, layer: KeyX, *_): - """Setup global variables before each benchmark.""" + """Set up global variables before each benchmark.""" global adata, batch_key adata, batch_key = get_dataset(dataset, layer=layer) diff --git a/benchmarks/benchmarks/tools.py b/benchmarks/benchmarks/tools.py index cd22ea718c..db63213755 100644 --- a/benchmarks/benchmarks/tools.py +++ b/benchmarks/benchmarks/tools.py @@ -1,6 +1,6 @@ -""" -This module will benchmark tool operations in Scanpy -API documentation: https://scanpy.readthedocs.io/en/stable/api/tools.html +"""Benchmark tool operations in Scanpy. + +API documentation: . """ from __future__ import annotations diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py index 9c57c4e746..595b480b83 100755 --- a/ci/scripts/min-deps.py +++ b/ci/scripts/min-deps.py @@ -3,6 +3,7 @@ # requires-python = ">=3.11" # dependencies = [ "packaging" ] # /// +"""Parse a pyproject.toml file and output a list of minimum dependencies.""" from __future__ import annotations @@ -25,14 +26,13 @@ def min_dep(req: Requirement) -> Requirement: - """ - Given a requirement, return the minimum version specifier. + """Given a requirement, return the minimum version specifier. Example ------- - >>> min_dep(Requirement("numpy>=1.0")) + """ req_name = req.name if req.extras: @@ -58,6 +58,7 @@ def min_dep(req: Requirement) -> Requirement: def extract_min_deps( dependencies: Iterable[Requirement], *, pyproject ) -> Generator[Requirement, None, None]: + """Extract minimum dependencies from a list of requirements.""" dependencies = deque(dependencies) # We'll be mutating this project_name = pyproject["project"]["name"] @@ -77,8 +78,8 @@ def extract_min_deps( class Args(argparse.Namespace): - """\ - Parse a pyproject.toml file and output a list of minimum dependencies. + """Parse a pyproject.toml file and output a list of minimum dependencies. + Output is optimized for `[uv] pip install` (see `-o`/`--output` for details). """ @@ -89,10 +90,12 @@ class Args(argparse.Namespace): @classmethod def parse(cls, argv: Sequence[str] | None = None) -> Self: + """Parse CLI arguments.""" return cls.parser().parse_args(argv, cls()) @classmethod def parser(cls) -> argparse.ArgumentParser: + """Construct a CLI argument parser.""" parser = argparse.ArgumentParser( prog="min-deps", description=cls.__doc__, @@ -134,10 +137,12 @@ def parser(cls) -> argparse.ArgumentParser: @cached_property def pyproject(self) -> dict[str, Any]: + """Return the parsed `pyproject.toml`.""" return tomllib.loads(self._path.read_text()) @cached_property def extras(self) -> AbstractSet[str]: + """Return the extras to install.""" if self._extras: if self._all_extras: sys.exit("Cannot specify both --extras and --all-extras") @@ -148,6 +153,7 @@ def extras(self) -> AbstractSet[str]: def main(argv: Sequence[str] | None = None) -> None: + """Run main entry point.""" args = Args.parse(argv) project_name = args.pyproject["project"]["name"] diff --git a/ci/scripts/towncrier_automation.py b/ci/scripts/towncrier_automation.py index 10a8b0c9dc..e6bddcc0bd 100755 --- a/ci/scripts/towncrier_automation.py +++ b/ci/scripts/towncrier_automation.py @@ -2,6 +2,7 @@ # /// script # dependencies = [ "towncrier", "packaging" ] # /// +"""Script to automate towncrier release note PRs.""" from __future__ import annotations @@ -16,11 +17,14 @@ class Args(argparse.Namespace): + """Command line arguments.""" + version: str dry_run: bool def parse_args(argv: Sequence[str] | None = None) -> Args: + """Construct a CLI argument parser.""" parser = argparse.ArgumentParser( prog="towncrier-automation", description=( @@ -52,6 +56,7 @@ def parse_args(argv: Sequence[str] | None = None) -> Args: def main(argv: Sequence[str] | None = None) -> None: + """Run main entry point.""" args = parse_args(argv) # Run towncrier diff --git a/docs/conf.py b/docs/conf.py index 60da6f797d..7a9f1acebd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,3 +1,5 @@ +"""Configuration for Scanpy’s Sphinx documentation.""" + from __future__ import annotations import sys @@ -33,7 +35,7 @@ project = "Scanpy" author = "Scanpy development team" repository_url = "https://github.com/scverse/scanpy" -copyright = f"{datetime.now():%Y}, the Scanpy development team" +copyright = f"{datetime.now():%Y}, scverse" version = scanpy.__version__.replace(".dirty", "") # Bumping the version updates all docs, so don't do that diff --git a/docs/extensions/canonical_tutorial.py b/docs/extensions/canonical_tutorial.py index b459fbb059..bfee050ec7 100644 --- a/docs/extensions/canonical_tutorial.py +++ b/docs/extensions/canonical_tutorial.py @@ -1,3 +1,5 @@ +"""Extension for a stub ``canonical-tutorial`` directive.""" + from __future__ import annotations from typing import TYPE_CHECKING @@ -16,9 +18,10 @@ class CanonicalTutorial(SphinxDirective): required_arguments: ClassVar = 1 - def run(self) -> list[nodes.Node]: + def run(self) -> list[nodes.Node]: # noqa: D102 return [] def setup(app: Sphinx) -> None: + """App setup hook.""" app.add_directive("canonical-tutorial", CanonicalTutorial) diff --git a/docs/extensions/debug_docstrings.py b/docs/extensions/debug_docstrings.py index 208a8793b0..11572002e9 100644 --- a/docs/extensions/debug_docstrings.py +++ b/docs/extensions/debug_docstrings.py @@ -1,3 +1,5 @@ +"""Extension for debugging docstrings.""" + # Just do the following to see the rst of a function: # rm ./_build/doctrees/api/generated/scanpy..doctree; DEBUG=1 make html from __future__ import annotations @@ -14,10 +16,12 @@ def pd_new(app, what, name, obj, options, lines): # noqa: PLR0917 + """Wrap ``sphinx.ext.napoleon._process_docstring``.""" _pd_orig(app, what, name, obj, options, lines) print(*lines, sep="\n") def setup(app: Sphinx): + """App setup hook.""" if os.environ.get("DEBUG") is not None: sphinx.ext.napoleon._process_docstring = pd_new diff --git a/docs/extensions/function_images.py b/docs/extensions/function_images.py index 7a5e6f6d99..8beaea02a8 100644 --- a/docs/extensions/function_images.py +++ b/docs/extensions/function_images.py @@ -1,4 +1,4 @@ -"""Images for plot functions""" +"""Images for plot functions.""" from __future__ import annotations @@ -15,6 +15,7 @@ def insert_function_images( # noqa: PLR0917 app: Sphinx, what: str, name: str, obj: Any, options: Options, lines: list[str] ): + """Insert images for plot functions.""" path = app.config.api_dir / f"{name}.png" if what != "function" or not path.is_file(): return @@ -27,5 +28,6 @@ def insert_function_images( # noqa: PLR0917 def setup(app: Sphinx): + """App setup hook.""" app.add_config_value("api_dir", Path(), "env") app.connect("autodoc-process-docstring", insert_function_images) diff --git a/docs/extensions/git_ref.py b/docs/extensions/git_ref.py index dd61f0cce0..04dfab493f 100644 --- a/docs/extensions/git_ref.py +++ b/docs/extensions/git_ref.py @@ -13,13 +13,17 @@ def git(*args: str) -> str: + """Run a git command and return the output as a string.""" return subprocess.check_output(["git", *args]).strip().decode() # https://github.com/DisnakeDev/disnake/blob/7853da70b13fcd2978c39c0b7efa59b34d298186/docs/conf.py#L192 @lru_cache def get() -> str | None: - """Current git reference. Uses branch/tag name if found, otherwise uses commit hash""" + """Get current git reference. + + Uses branch/tag name if found, otherwise uses commit hash. + """ git_ref = None try: git_ref = git("name-rev", "--name-only", "--no-undefined", "HEAD") @@ -37,8 +41,10 @@ def get() -> str | None: def set_ref(app: Sphinx, config: Config): + """`config-inited` hook to set `html_theme_options["repository_branch"]`.""" app.config["html_theme_options"]["repository_branch"] = get() or "main" def setup(app: Sphinx) -> None: + """App setup hook.""" app.connect("config-inited", set_ref) diff --git a/docs/extensions/has_attr_test.py b/docs/extensions/has_attr_test.py index d570498366..70a8967cb8 100644 --- a/docs/extensions/has_attr_test.py +++ b/docs/extensions/has_attr_test.py @@ -1,3 +1,5 @@ +"""Extension adding a jinja2 filter that tests if an object has an attribute.""" + from __future__ import annotations from inspect import get_annotations @@ -11,10 +13,12 @@ def has_member(obj_path: str, attr: str) -> bool: + """Test if an object has an attribute.""" # https://jinja.palletsprojects.com/en/3.0.x/api/#custom-tests obj = import_string(obj_path) return hasattr(obj, attr) or attr in get_annotations(obj) def setup(app: Sphinx): + """App setup hook.""" DEFAULT_NAMESPACE["has_member"] = has_member diff --git a/docs/extensions/param_police.py b/docs/extensions/param_police.py index 234ad28e62..9e74b6f1b6 100644 --- a/docs/extensions/param_police.py +++ b/docs/extensions/param_police.py @@ -1,3 +1,5 @@ +"""Extension to warn about numpydoc-style parameter types in docstrings.""" + from __future__ import annotations import warnings @@ -13,6 +15,7 @@ def scanpy_log_param_types(self, fields, field_role="param", type_role="type"): + """Wrap ``NumpyDocstring._format_docutils_params``.""" for _name, _type, _desc in fields: if not _type or not self._obj.__module__.startswith("scanpy"): continue @@ -23,6 +26,7 @@ def scanpy_log_param_types(self, fields, field_role="param", type_role="type"): def show_param_warnings(app, exception): + """Warn about numpydoc-style parameter types in docstring.""" import inspect for (fname, fun), params in param_warnings.items(): @@ -42,5 +46,6 @@ def show_param_warnings(app, exception): def setup(app: Sphinx): + """App setup hook.""" NumpyDocstring._format_docutils_params = scanpy_log_param_types app.connect("build-finished", show_param_warnings) diff --git a/docs/extensions/patch_myst_nb.py b/docs/extensions/patch_myst_nb.py index f370acaf5e..d4c47d4876 100644 --- a/docs/extensions/patch_myst_nb.py +++ b/docs/extensions/patch_myst_nb.py @@ -22,9 +22,11 @@ def get_cell_level_config( cell_metadata: dict[str, object], line: int | None = None, ): + """Correct version of ``MditRenderMixin.get_cell_level_config``.""" rv = get_orig(self, field, cell_metadata, line) return copy(rv) def setup(app: Sphinx): + """App setup hook.""" MditRenderMixin.get_cell_level_config = get_cell_level_config diff --git a/pyproject.toml b/pyproject.toml index 57f246d8cf..c5f625b120 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -227,6 +227,7 @@ select = [ "W", # Warning detected by Pycodestyle "UP", # pyupgrade "I", # isort + "D", # pydocstyle "TC", # manage type checking blocks "TID251", # Banned imports "ICN", # Follow import conventions @@ -249,13 +250,25 @@ ignore = [ "E741", # `Literal["..."] | str` is useful for autocompletion "PYI051", + # We ban blank lines before docstrings instead of the opposite + "D203", + # We want multiline summaries to start on the first line, not the second + "D213", + # TODO: replace our current param docs reuse with this and remove it here: + "D417", ] [tool.ruff.lint.per-file-ignores] # Do not assign a lambda expression, use a def "src/scanpy/tools/_rank_genes_groups.py" = [ "E731" ] +# No need for docstrings for all benchmarks +"benchmarks/**/*.py" = [ "D102", "D103" ] +# No need for docstrings for all test modules and test functions +"tests/**/*.py" = [ "D100", "D101", "D103" ] [tool.ruff.lint.isort] known-first-party = [ "scanpy", "testing.scanpy" ] required-imports = [ "from __future__ import annotations" ] +[tool.ruff.lint.pydocstyle] +convention = "numpy" [tool.ruff.lint.flake8-tidy-imports.banned-api] "pytest.importorskip".msg = "Use the “@needs” decorator/mark instead" "pandas.api.types.is_categorical_dtype".msg = "Use isinstance(s.dtype, CategoricalDtype) instead" diff --git a/src/scanpy/__main__.py b/src/scanpy/__main__.py index f0ec49f773..45d0f603d9 100644 --- a/src/scanpy/__main__.py +++ b/src/scanpy/__main__.py @@ -1,3 +1,5 @@ +"""Scanpy CLI entry point.""" + from __future__ import annotations from .cli import console_main diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py index 7b7ed2465f..0cdda60d80 100644 --- a/src/scanpy/_compat.py +++ b/src/scanpy/_compat.py @@ -100,8 +100,7 @@ def njit() -> Callable[[Callable[P, R]], Callable[P, R]]: ... def njit( fn: Callable[P, R] | None = None, / ) -> Callable[P, R] | Callable[[Callable[P, R]], Callable[P, R]]: - """\ - Jit-compile a function using numba. + """Jit-compile a function using numba. On call, this function dispatches to a parallel or sequential numba function, depending on if it has been called from a thread pool. @@ -159,8 +158,7 @@ def _is_in_unsafe_thread_pool() -> bool: @cache def _numba_threading_layer() -> Layer: - """\ - Get numba’s threading layer. + """Get numba’s threading layer. This function implements the algorithm as described in @@ -195,7 +193,6 @@ def _legacy_numpy_gen( random_state: _LegacyRandom | None = None, ) -> np.random.Generator: """Return a random generator that behaves like the legacy one.""" - if random_state is not None: if isinstance(random_state, np.random.RandomState): np.random.set_state(random_state.get_state(legacy=False)) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index fc9ead09b0..9a55b24172 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -63,9 +63,7 @@ def level(self) -> int: def override( self, verbosity: Verbosity | str | int ) -> Generator[Verbosity, None, None]: - """\ - Temporarily override verbosity - """ + """Temporarily override verbosity.""" settings.verbosity = verbosity yield self settings.verbosity = self @@ -88,9 +86,7 @@ def _type_check(var: Any, varname: str, types: type | tuple[type, ...]): class ScanpyConfig: - """\ - Config manager for scanpy. - """ + """Config manager for scanpy.""" N_PCS: int """Default number of principal components to use.""" @@ -158,8 +154,7 @@ def __init__( @property def verbosity(self) -> Verbosity: - """ - Verbosity level (default `warning`) + """Verbosity level (default `warning`). Level 0: only show 'error' messages. Level 1: also show 'warning' messages. @@ -239,8 +234,7 @@ def file_format_figs(self, figure_format: str): @property def autosave(self) -> bool: - """\ - Automatically save figures in :attr:`~scanpy._settings.ScanpyConfig.figdir` (default `False`). + """Automatically save figures in :attr:`~scanpy._settings.ScanpyConfig.figdir` (default `False`). Do not show plots/figures interactively. """ @@ -253,8 +247,7 @@ def autosave(self, autosave: bool): @property def autoshow(self) -> bool: - """\ - Automatically show figures if `autosave == False` (default `True`). + """Automatically show figures if `autosave == False` (default `True`). There is no need to call the matplotlib pl.show() in this case. """ @@ -267,9 +260,7 @@ def autoshow(self, autoshow: bool): @property def writedir(self) -> Path: - """\ - Directory where the function scanpy.write writes to by default. - """ + """Directory where the function scanpy.write writes to by default.""" return self._writedir @writedir.setter @@ -279,9 +270,7 @@ def writedir(self, writedir: Path | str): @property def cachedir(self) -> Path: - """\ - Directory for cache files (default `'./cache/'`). - """ + """Directory for cache files (default `'./cache/'`).""" return self._cachedir @cachedir.setter @@ -291,9 +280,7 @@ def cachedir(self, cachedir: Path | str): @property def datasetdir(self) -> Path: - """\ - Directory for example :mod:`~scanpy.datasets` (default `'./data/'`). - """ + """Directory for example :mod:`~scanpy.datasets` (default `'./data/'`).""" return self._datasetdir @datasetdir.setter @@ -303,9 +290,7 @@ def datasetdir(self, datasetdir: Path | str): @property def figdir(self) -> Path: - """\ - Directory for saving figures (default `'./figures/'`). - """ + """Directory for saving figures (default `'./figures/'`).""" return self._figdir @figdir.setter @@ -315,8 +300,7 @@ def figdir(self, figdir: Path | str): @property def cache_compression(self) -> str | None: - """\ - Compression for `sc.read(..., cache=True)` (default `'lzf'`). + """Compression for `sc.read(..., cache=True)` (default `'lzf'`). May be `'lzf'`, `'gzip'`, or `None`. """ @@ -334,8 +318,7 @@ def cache_compression(self, cache_compression: str | None): @property def max_memory(self) -> int | float: - """\ - Maximum memory usage in Gigabyte. + """Maximum memory usage in Gigabyte. Is currently not well respected… """ @@ -348,8 +331,7 @@ def max_memory(self, max_memory: float): @property def n_jobs(self) -> int: - """\ - Default number of jobs/ CPUs to use for parallel computing. + """Default number of jobs/ CPUs to use for parallel computing. Set to `-1` in order to use all available cores. Not all algorithms support special behavior for numbers < `-1`, @@ -364,9 +346,7 @@ def n_jobs(self, n_jobs: int): @property def logpath(self) -> Path | None: - """\ - The file path `logfile` was set to. - """ + """The file path `logfile` was set to.""" return self._logpath @logpath.setter @@ -378,8 +358,7 @@ def logpath(self, logpath: Path | str | None): @property def logfile(self) -> TextIO: - """\ - The open file to write logs to. + """The open file to write logs to. Set it to a :class:`~pathlib.Path` or :class:`str` to open a new one. The default `None` corresponds to :obj:`sys.stdout` in jupyter notebooks @@ -402,9 +381,7 @@ def logfile(self, logfile: Path | str | TextIO | None): @property def categories_to_ignore(self) -> list[str]: - """\ - Categories that are omitted in plotting etc. - """ + """Categories that are omitted in plotting etc.""" return self._categories_to_ignore @categories_to_ignore.setter @@ -448,8 +425,7 @@ def set_figure_params( transparent: bool = False, ipython_format: str = "png2x", ) -> None: - """\ - Set resolution/size, styling and format of figures. + """Set resolution/size, styling and format of figures. Parameters ---------- @@ -481,6 +457,7 @@ def set_figure_params( ipython_format Only concerns the notebook/IPython environment; see :func:`~IPython.display.set_matplotlib_formats` for details. + """ if self._is_run_from_ipython(): import IPython @@ -512,7 +489,7 @@ def set_figure_params( @staticmethod def _is_run_from_ipython(): - """Determines whether we're currently in IPython.""" + """Determine whether we're currently in IPython.""" import builtins return getattr(builtins, "__IPYTHON__", False) diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py index 1374133cd7..82d15174f5 100644 --- a/src/scanpy/_utils/__init__.py +++ b/src/scanpy/_utils/__init__.py @@ -1,4 +1,4 @@ -"""Utility functions and classes +"""Utility functions and classes. This file largely consists of the old _utils.py file. Over time, these functions should be moved of this file. @@ -17,7 +17,7 @@ from enum import Enum from functools import partial, reduce, singledispatch, wraps from operator import mul, or_, truediv -from textwrap import dedent +from textwrap import indent from types import MethodType, ModuleType, UnionType from typing import ( TYPE_CHECKING, @@ -66,6 +66,8 @@ _MemoryArray = NDArray | _CSMatrix _SupportedArray = _MemoryArray | DaskArray + _ForT = TypeVar("_ForT", bound=Callable | type) + SeedLike = int | np.integer | Sequence[int] | np.random.SeedSequence RNGLike = np.random.Generator | np.random.BitGenerator @@ -84,8 +86,8 @@ def __repr__(self) -> str: class RNGIgraph: - """ - Random number generator for ipgraph so global seed is not changed. + """Random number generator for ipgraph so global seed is not changed. + See :func:`igraph.set_random_number_generator` for the requirements. """ @@ -243,20 +245,35 @@ def annotate_doc_types(mod: ModuleType, root: str): c_or_f.getdoc = partial(getdoc, c_or_f) -def _doc_params(**kwds): - """\ - Docstrings should start with ``\\`` in the first line for proper formatting. - """ +_leading_whitespace_re = re.compile("(^[ ]*)(?:[^ \n])", re.MULTILINE) + - def dec(obj): - obj.__doc__ = dedent(obj.__doc__).format_map(kwds) +def _doc_params(**replacements: str): + def dec(obj: _ForT) -> _ForT: + assert obj.__doc__ + assert "\t" not in obj.__doc__ + + # The first line of the docstring is unindented, + # so find indent size starting after it. + start_line_2 = obj.__doc__.find("\n") + 1 + assert start_line_2 > 0, f"{obj.__name__} has single-line docstring." + n_spaces = min( + len(m.group(1)) + for m in _leading_whitespace_re.finditer(obj.__doc__[start_line_2:]) + ) + + # The placeholder is already indented, so only indent subsequent lines + indented_replacements = { + k: indent(v, " " * n_spaces)[n_spaces:] for k, v in replacements.items() + } + obj.__doc__ = obj.__doc__.format_map(indented_replacements) return obj return dec def _check_array_function_arguments(**kwargs): - """Checks for invalid arguments when an array is passed. + """Check for invalid arguments when an array is passed. Helper for functions that work on either AnnData objects or array-likes. """ @@ -270,8 +287,7 @@ def _check_array_function_arguments(**kwargs): def _check_use_raw( adata: AnnData, use_raw: None | bool, *, layer: str | None = None ) -> bool: - """ - Normalize checking `use_raw`. + """Normalize checking `use_raw`. My intentention here is to also provide a single place to throw a deprecation warning from in future. """ @@ -355,6 +371,7 @@ def compute_association_matrix_of_groups( asso_matrix Matrix where rows correspond to the predicted labels and columns to the reference labels, entries are proportional to degree of association. + """ if normalization not in {"prediction", "reference"}: msg = '`normalization` needs to be either "prediction" or "reference".' @@ -416,7 +433,7 @@ def get_associated_colors_of_groups( def identify_groups(ref_labels, pred_labels, *, return_overlaps: bool = False): - """Which predicted label explains which reference label? + """Identify which predicted label explains which reference label. A predicted label explains the reference label which maximizes the minimum of ``relative_overlaps_pred`` and ``relative_overlaps_ref``. @@ -431,6 +448,7 @@ def identify_groups(ref_labels, pred_labels, *, return_overlaps: bool = False): If ``return_overlaps`` is ``True``, this will in addition return the overlap of the reference group with the predicted group; normalized with respect to the reference group size and the predicted group size, respectively. + """ ref_unique, ref_counts = np.unique(ref_labels, return_counts=True) ref_dict = dict(zip(ref_unique, ref_counts)) @@ -494,7 +512,8 @@ def moving_average(a: np.ndarray, n: int): Returns ------- An array view storing the moving average. - """ + + """ # noqa: D401 ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1 :] / n @@ -517,13 +536,12 @@ def update_params( *, check: bool = False, ) -> dict[str, Any]: - """\ - Update old_params with new_params. + """Update `old_params` with `new_params`. - If check==False, this merely adds and overwrites the content of old_params. + If check==False, this merely adds and overwrites the content of `old_params`. If check==True, this only allows updating of parameters that are already - present in old_params. + present in `old_params`. Parameters ---------- @@ -534,6 +552,7 @@ def update_params( Returns ------- updated_params + """ updated_params = dict(old_params) if new_params: # allow for new_params to be None @@ -834,7 +853,7 @@ def aggregate_sum(*args, **kwargs): @singledispatch def check_nonnegative_integers(X: _SupportedArray) -> bool | DaskArray: - """Checks values of X to ensure it is count data""" + """Check values of X to ensure it is count data.""" raise NotImplementedError @@ -910,13 +929,14 @@ def select_groups( def warn_with_traceback(message, category, filename, lineno, file=None, line=None): # noqa: PLR0917 - """Get full tracebacks when warning is raised by setting + """Get full tracebacks when warning is raised by setting. warnings.showwarning = warn_with_traceback - See also + See Also -------- https://stackoverflow.com/questions/22373927/get-traceback-of-warnings + """ import traceback @@ -938,8 +958,7 @@ def subsample( subsample: int = 1, seed: int = 0, ) -> tuple[np.ndarray, np.ndarray]: - """\ - Subsample a fraction of 1/subsample samples from the rows of X. + """Subsample a fraction of 1/subsample samples from the rows of X. Parameters ---------- @@ -956,6 +975,7 @@ def subsample( Subsampled X. rows Indices of rows that are stored in Xsampled. + """ if subsample == 1 and seed == 0: return X, np.arange(X.shape[0], dtype=int) @@ -996,6 +1016,7 @@ def subsample_n( Subsampled X. rows Indices of rows that are stored in Xsampled. + """ if n < 0: msg = "n must be greater 0" @@ -1016,7 +1037,7 @@ def check_presence_download(filename: Path, backup_url): def lazy_import(full_name): - """Imports a module in a way that it’s only executed on member access""" + """Import a module in a way that it’s only executed on member access.""" try: return sys.modules[full_name] except KeyError: @@ -1050,7 +1071,6 @@ class NeighborsView: Parameters ---------- - adata AnnData object. key @@ -1071,6 +1091,7 @@ class NeighborsView: adata.uns[key]['params'] adata.uns[key]['connectivities_key'] in adata.obsp 'params' in adata.uns[key] + """ def __init__(self, adata: AnnData, key=None): diff --git a/src/scanpy/_utils/_doctests.py b/src/scanpy/_utils/_doctests.py index 0b3be18bbe..1fd785d9cf 100644 --- a/src/scanpy/_utils/_doctests.py +++ b/src/scanpy/_utils/_doctests.py @@ -31,6 +31,5 @@ def decorator(func: F) -> F: def doctest_internet(func: F) -> F: """Mark function so doctest gets the internet mark.""" - func._doctest_internet = True return func diff --git a/src/scanpy/_utils/compute/is_constant.py b/src/scanpy/_utils/compute/is_constant.py index 43da127fb7..a342139e24 100644 --- a/src/scanpy/_utils/compute/is_constant.py +++ b/src/scanpy/_utils/compute/is_constant.py @@ -49,8 +49,7 @@ def is_constant(a: _Array, axis: Literal[0, 1]) -> NDArray[np.bool_]: ... def is_constant( a: NDArray, axis: Literal[0, 1] | None = None ) -> bool | NDArray[np.bool_]: - """ - Check whether values in array are constant. + """Check whether values in array are constant. Params ------ @@ -66,7 +65,6 @@ def is_constant( Example ------- - >>> a = np.array([[0, 1], [0, 0]]) >>> a array([[0, 1], @@ -77,6 +75,7 @@ def is_constant( array([ True, False]) >>> is_constant(a, axis=1) array([False, True]) + """ raise NotImplementedError() diff --git a/src/scanpy/cli.py b/src/scanpy/cli.py index c934292dba..e8b31185b8 100644 --- a/src/scanpy/cli.py +++ b/src/scanpy/cli.py @@ -1,3 +1,5 @@ +"""Definition for scanpy’s CLI entry point to be used programmatically.""" + from __future__ import annotations import os @@ -17,7 +19,7 @@ class _DelegatingSubparsersAction(_SubParsersAction): - """Like a normal subcommand action, but uses a delegator for more choices""" + """Like a normal subcommand action, but uses a delegator for more choices.""" def __init__(self, *args, _command: str, _runargs: dict[str, Any], **kwargs): super().__init__(*args, **kwargs) @@ -28,10 +30,7 @@ def __init__(self, *args, _command: str, _runargs: dict[str, Any], **kwargs): class _CommandDelegator(MutableMapping): - """\ - Provide the ability to delegate, - but don’t calculate the whole list until necessary - """ + """Provide the ability to delegate, but don’t calculate the whole list until necessary.""" def __init__(self, command: str, action: _DelegatingSubparsersAction, **runargs): self.command = command @@ -94,7 +93,7 @@ def commands(self) -> frozenset[str]: class _DelegatingParser(ArgumentParser): - """Just sets parse_args().func to run the subcommand""" + """Just sets parse_args().func to run the subcommand.""" def __init__(self, cd: _CommandDelegator, subcmd: str): super().__init__(f"{cd.command}-{subcmd}", add_help=False) @@ -121,8 +120,7 @@ def _cmd_settings() -> None: def main( argv: Sequence[str] | None = None, *, check: bool = True, **runargs ) -> CompletedProcess | None: - """\ - Run a builtin scanpy command or a scanpy-* subcommand. + """Run a builtin scanpy command or a scanpy-* subcommand. Uses :func:`subcommand.run` for the latter: `~run(['scanpy', *argv], **runargs)` @@ -149,10 +147,7 @@ def main( def console_main(): - """\ - This serves as CLI entry point and will not show a Python traceback - if a called command fails - """ + """Serve as CLI entry point and don’t show a Python traceback if a called command fails.""" cmd = main(check=False) if cmd is not None: sys.exit(cmd.returncode) diff --git a/src/scanpy/datasets/_datasets.py b/src/scanpy/datasets/_datasets.py index 8859de4d74..4ffc6820f7 100644 --- a/src/scanpy/datasets/_datasets.py +++ b/src/scanpy/datasets/_datasets.py @@ -65,8 +65,7 @@ def blobs( n_observations: int = 640, random_state: _LegacyRandom = 0, ) -> AnnData: - """\ - Gaussian Blobs. + """Gaussian Blobs. Parameters ---------- @@ -93,6 +92,7 @@ def blobs( >>> sc.datasets.blobs() AnnData object with n_obs × n_vars = 640 × 11 obs: 'blobs' + """ import sklearn.datasets @@ -109,8 +109,7 @@ def blobs( @doctest_internet @check_datasetdir_exists def burczynski06() -> AnnData: - """\ - Bulk data with conditions ulcerative colitis (UC) and Crohn’s disease (CD) :cite:p:`Burczynski2006`. + """Bulk data with conditions ulcerative colitis (UC) and Crohn’s disease (CD) :cite:p:`Burczynski2006`. The study assesses transcriptional profiles in peripheral blood mononuclear cells from 42 healthy individuals, 59 CD patients, and 26 UC patients by @@ -126,6 +125,7 @@ def burczynski06() -> AnnData: >>> sc.datasets.burczynski06() AnnData object with n_obs × n_vars = 127 × 22283 obs: 'groups' + """ filename = settings.datasetdir / "burczynski06/GDS1615_full.soft.gz" url = "ftp://ftp.ncbi.nlm.nih.gov/geo/datasets/GDS1nnn/GDS1615/soft/GDS1615_full.soft.gz" @@ -133,12 +133,11 @@ def burczynski06() -> AnnData: def krumsiek11() -> AnnData: - """\ - Simulated myeloid progenitors :cite:p:`Krumsiek2011`. + r"""Simulated myeloid progenitors :cite:p:`Krumsiek2011`. The literature-curated boolean network from :cite:t:`Krumsiek2011` was used to simulate the data. It describes development to four cell fates annotated in - :attr:`~anndata.AnnData.obs`\\ `["cell_type"]`: + :attr:`~anndata.AnnData.obs`\ `["cell_type"]`: “monocyte” (`Mo`), “erythrocyte” (`Ery`), “megakaryocyte” (`Mk`) and “neutrophil” (`Neu`). See also the discussion of this data in :cite:t:`Wolf2019`. @@ -158,7 +157,8 @@ def krumsiek11() -> AnnData: AnnData object with n_obs × n_vars = 640 × 11 obs: 'cell_type' uns: 'iroot', 'highlights' - """ + + """ # noqa: D401 with settings.verbosity.override("error"): # suppress output... adata = read(HERE / "krumsiek11.txt", first_column_names=True) adata.uns["iroot"] = 0 @@ -178,13 +178,12 @@ def krumsiek11() -> AnnData: @doctest_needs("openpyxl") @check_datasetdir_exists def moignard15() -> AnnData: - """\ - Hematopoiesis in early mouse embryos :cite:p:`Moignard2015`. + r"""Hematopoiesis in early mouse embryos :cite:p:`Moignard2015`. The data was obtained using qRT–PCR. :attr:`~anndata.AnnData.X` contains the normalized dCt values from supp. table 7 of the publication. - :attr:`~anndata.AnnData.obs`\\ `["exp_groups"]` contains the stages derived by + :attr:`~anndata.AnnData.obs`\ `["exp_groups"]` contains the stages derived by flow sorting and GFP marker status: “primitive streak” (`PS`), “neural plate” (`NP`), “head fold (`HF`), “four somite” blood/GFP⁺ (4SG), and “four somite” endothelial/GFP¯ (`4SFG`). @@ -200,6 +199,7 @@ def moignard15() -> AnnData: AnnData object with n_obs × n_vars = 3934 × 42 obs: 'exp_groups' uns: 'iroot', 'exp_groups_colors' + """ filename = settings.datasetdir / "moignard15/nbt.3154-S3.xlsx" backup_url = "https://static-content.springer.com/esm/art%3A10.1038%2Fnbt.3154/MediaObjects/41587_2015_BFnbt3154_MOESM4_ESM.xlsx" @@ -233,8 +233,7 @@ def moignard15() -> AnnData: @doctest_internet @check_datasetdir_exists def paul15() -> AnnData: - """\ - Development of Myeloid Progenitors :cite:p:`Paul2015`. + """Development of Myeloid Progenitors :cite:p:`Paul2015`. Non-logarithmized raw data. @@ -253,6 +252,7 @@ def paul15() -> AnnData: AnnData object with n_obs × n_vars = 2730 × 3451 obs: 'paul15_clusters' uns: 'iroot' + """ import h5py @@ -292,8 +292,7 @@ def paul15() -> AnnData: def toggleswitch() -> AnnData: - """\ - Simulated toggleswitch. + """Simulated toggleswitch. Data obtained simulating a simple toggleswitch :cite:p:`Gardner2000` @@ -311,7 +310,8 @@ def toggleswitch() -> AnnData: utils.warn_names_duplicates("obs") AnnData object with n_obs × n_vars = 200 × 2 uns: 'iroot' - """ + + """ # noqa: D401 filename = HERE / "toggleswitch.txt" adata = read(filename, first_column_names=True) adata.uns["iroot"] = 0 @@ -320,13 +320,12 @@ def toggleswitch() -> AnnData: @filter_oldformatwarning def pbmc68k_reduced() -> AnnData: - """\ - Subsampled and processed 68k PBMCs. + r"""Subsampled and processed 68k PBMCs. `PBMC 68k dataset`_ from 10x Genomics. The original PBMC 68k dataset was preprocessed with steps including - :func:`~scanpy.pp.normalize_total`\\ [#norm]_ and :func:`~scanpy.pp.scale`. + :func:`~scanpy.pp.normalize_total`\ [#norm]_ and :func:`~scanpy.pp.scale`. It was saved keeping only 724 cells and 221 highly variable genes. The saved file contains the annotation of cell types (key: `'bulk_labels'`), @@ -351,8 +350,8 @@ def pbmc68k_reduced() -> AnnData: obsm: 'X_pca', 'X_umap' varm: 'PCs' obsp: 'distances', 'connectivities' - """ + """ filename = HERE / "10x_pbmc68k_reduced.h5ad" with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning, module="anndata") @@ -363,8 +362,7 @@ def pbmc68k_reduced() -> AnnData: @filter_oldformatwarning @check_datasetdir_exists def pbmc3k() -> AnnData: - """\ - 3k PBMCs from 10x Genomics. + r"""3k PBMCs from 10x Genomics. The data consists in 3k PBMCs from a Healthy Donor and is freely available from 10x Genomics (file_ from this webpage_). @@ -377,7 +375,7 @@ def pbmc3k() -> AnnData: .. note:: This downloads 5.9 MB of data upon the first call of the function and stores it in - :attr:`~scanpy._settings.ScanpyConfig.datasetdir`\\ `/pbmc3k_raw.h5ad`. + :attr:`~scanpy._settings.ScanpyConfig.datasetdir`\ `/pbmc3k_raw.h5ad`. The following code was run to produce the file. @@ -405,6 +403,7 @@ def pbmc3k() -> AnnData: >>> sc.datasets.pbmc3k() AnnData object with n_obs × n_vars = 2700 × 32738 var: 'gene_ids' + """ url = "https://falexwolf.de/data/pbmc3k_raw.h5ad" adata = read(settings.datasetdir / "pbmc3k_raw.h5ad", backup_url=url) @@ -415,8 +414,7 @@ def pbmc3k() -> AnnData: @filter_oldformatwarning @check_datasetdir_exists def pbmc3k_processed() -> AnnData: - """\ - Processed 3k PBMCs from 10x Genomics. + """Processed 3k PBMCs from 10x Genomics. Processed using the basic tutorial :doc:`/tutorials/basics/clustering-2017`. @@ -444,7 +442,8 @@ def pbmc3k_processed() -> AnnData: obsm: 'X_pca', 'X_tsne', 'X_umap', 'X_draw_graph_fr' varm: 'PCs' obsp: 'distances', 'connectivities' - """ + + """ # noqa: D401 url = "https://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad" with warnings.catch_warnings(): @@ -459,8 +458,7 @@ def _download_visium_dataset( base_dir: Path | None = None, download_image: bool = False, ) -> Path: - """\ - Download Visium spatial data from 10x Genomics’ database. + """Download Visium spatial data from 10x Genomics’ database. Params ------ @@ -517,8 +515,7 @@ def visium_sge( *, include_hires_tiff: bool = False, ) -> AnnData: - """\ - Processed Visium Spatial Gene Expression data from 10x Genomics’ database. + """Processed Visium Spatial Gene Expression data from 10x Genomics’ database. .. deprecated:: 1.11.0 Use :func:`squidpy.datasets.visium` instead. @@ -541,15 +538,15 @@ def visium_sge( Examples -------- - >>> import scanpy as sc - >>> sc.datasets.visium_sge(sample_id='V1_Breast_Cancer_Block_A_Section_1') + >>> sc.datasets.visium_sge(sample_id="V1_Breast_Cancer_Block_A_Section_1") AnnData object with n_obs × n_vars = 3798 × 36601 obs: 'in_tissue', 'array_row', 'array_col' var: 'gene_ids', 'feature_types', 'genome' uns: 'spatial' obsm: 'spatial' - """ + + """ # noqa: D401 spaceranger_version = "1.1.0" if "V1_" in sample_id else "1.2.0" sample_dir = _download_visium_dataset( sample_id, spaceranger_version, download_image=include_hires_tiff diff --git a/src/scanpy/datasets/_ebi_expression_atlas.py b/src/scanpy/datasets/_ebi_expression_atlas.py index 05d5f88fc6..0379ead6f7 100644 --- a/src/scanpy/datasets/_ebi_expression_atlas.py +++ b/src/scanpy/datasets/_ebi_expression_atlas.py @@ -108,8 +108,7 @@ def read_expression_from_archive(archive: ZipFile) -> anndata.AnnData: def ebi_expression_atlas( accession: str, *, filter_boring: bool = False ) -> anndata.AnnData: - """\ - Load a dataset from the EBI Single Cell Expression Atlas. + """Load a dataset from the EBI Single Cell Expression Atlas. The atlas_ can be browsed online to find the ``accession`` you want. Downloaded datasets are saved in the directory specified by @@ -138,6 +137,7 @@ def ebi_expression_atlas( >>> sc.datasets.ebi_expression_atlas("E-MTAB-4888") # doctest: +ELLIPSIS AnnData object with n_obs × n_vars = 2261 × 23899 obs: 'Sample Characteristic[organism]', 'Sample Characteristic Ontology Term[organism]', ..., 'Factor Value[cell type]', 'Factor Value Ontology Term[cell type]' + """ experiment_dir = settings.datasetdir / accession dataset_path = experiment_dir / f"{accession}.h5ad" diff --git a/src/scanpy/datasets/_utils.py b/src/scanpy/datasets/_utils.py index fb0c609102..664b5ce9f1 100644 --- a/src/scanpy/datasets/_utils.py +++ b/src/scanpy/datasets/_utils.py @@ -27,9 +27,7 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: def filter_oldformatwarning(f: Callable[P, R]) -> Callable[P, R]: - """ - Filters anndata.OldFormatWarning from being thrown by the wrapped function. - """ + """Filter anndata.OldFormatWarning from being thrown by the wrapped function.""" @wraps(f) def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: diff --git a/src/scanpy/experimental/__init__.py b/src/scanpy/experimental/__init__.py index 1ad2751169..6bfafdbaf2 100644 --- a/src/scanpy/experimental/__init__.py +++ b/src/scanpy/experimental/__init__.py @@ -1,3 +1,5 @@ +"""Experimental functions and modules.""" + from __future__ import annotations from . import pp diff --git a/src/scanpy/experimental/pp/__init__.py b/src/scanpy/experimental/pp/__init__.py index 135840e2f2..d1b5f7aff6 100644 --- a/src/scanpy/experimental/pp/__init__.py +++ b/src/scanpy/experimental/pp/__init__.py @@ -1,3 +1,5 @@ +"""Experimental preprocessing functions.""" + from __future__ import annotations from scanpy.experimental.pp._highly_variable_genes import highly_variable_genes diff --git a/src/scanpy/experimental/pp/_highly_variable_genes.py b/src/scanpy/experimental/pp/_highly_variable_genes.py index 7ad9f36bd7..370090cdfe 100644 --- a/src/scanpy/experimental/pp/_highly_variable_genes.py +++ b/src/scanpy/experimental/pp/_highly_variable_genes.py @@ -48,10 +48,7 @@ def _calculate_res_sparse( n_cells: int, ) -> NDArray[np.float64]: def get_value(cell: int, sparse_idx: int, stop_idx: int) -> np.float64: - """ - This function navigates the sparsity of the CSC (Compressed Sparse Column) matrix, - returning the value at the specified cell location if it exists, or zero otherwise. - """ + """Return the value at the specified cell location if it exists, or zero otherwise.""" if sparse_idx < stop_idx and index[sparse_idx] == cell: return data[sparse_idx] else: @@ -325,8 +322,7 @@ def highly_variable_genes( subset: bool = False, inplace: bool = True, ) -> pd.DataFrame | None: - """\ - Select highly variable genes using analytic Pearson residuals :cite:p:`Lause2021`. + """Select highly variable genes using analytic Pearson residuals :cite:p:`Lause2021`. In :cite:t:`Lause2021`, Pearson residuals of a negative binomial offset model are computed (with overdispersion `theta` shared across genes). By default, overdispersion @@ -375,8 +371,8 @@ def highly_variable_genes( Notes ----- Experimental version of `sc.pp.highly_variable_genes()` - """ + """ logg.info("extracting highly variable genes") if not isinstance(adata, AnnData): diff --git a/src/scanpy/experimental/pp/_normalization.py b/src/scanpy/experimental/pp/_normalization.py index ef3d0311d7..ef3587f065 100644 --- a/src/scanpy/experimental/pp/_normalization.py +++ b/src/scanpy/experimental/pp/_normalization.py @@ -95,8 +95,7 @@ def normalize_pearson_residuals( inplace: bool = True, copy: bool = False, ) -> AnnData | dict[str, np.ndarray] | None: - """\ - Applies analytic Pearson residual normalization, based on :cite:t:`Lause2021`. + """Apply analytic Pearson residual normalization, based on :cite:t:`Lause2021`. The residuals are based on a negative binomial offset model with overdispersion `theta` shared across genes. By default, residuals are clipped to `sqrt(n_obs)` @@ -126,8 +125,8 @@ def normalize_pearson_residuals( The used value of the clipping parameter. `.uns['pearson_residuals_normalization']['computed_on']` The name of the layer on which the residuals were computed. - """ + """ if copy: if not inplace: msg = "`copy=True` cannot be used with `inplace=False`." @@ -179,8 +178,7 @@ def normalize_pearson_residuals_pca( check_values: bool = True, inplace: bool = True, ) -> AnnData | None: - """\ - Applies analytic Pearson residual normalization and PCA, based on :cite:t:`Lause2021`. + """Apply analytic Pearson residual normalization and PCA, based on :cite:t:`Lause2021`. The residuals are based on a negative binomial offset model with overdispersion `theta` shared across genes. By default, residuals are clipped to `sqrt(n_obs)`, @@ -221,8 +219,8 @@ def normalize_pearson_residuals_pca( Ratio of explained variance. `.uns['pca']['variance']` Explained variance, equivalent to the eigenvalues of the covariance matrix. - """ + """ # Unify new mask argument and deprecated use_highly_varible argument _, mask_var = _handle_mask_var(adata, mask_var, use_highly_variable) del use_highly_variable diff --git a/src/scanpy/experimental/pp/_recipes.py b/src/scanpy/experimental/pp/_recipes.py index 02aabf09db..c511a3b60b 100644 --- a/src/scanpy/experimental/pp/_recipes.py +++ b/src/scanpy/experimental/pp/_recipes.py @@ -43,8 +43,7 @@ def recipe_pearson_residuals( check_values: bool = True, inplace: bool = True, ) -> tuple[AnnData, pd.DataFrame] | None: - """\ - Full pipeline for HVG selection and normalization by analytic Pearson residuals :cite:p:`Lause2021`. + """Full pipeline for HVG selection and normalization by analytic Pearson residuals :cite:p:`Lause2021`. Applies gene selection based on Pearson residuals. On the resulting subset, Pearson residual normalization and PCA are performed. @@ -106,8 +105,8 @@ def recipe_pearson_residuals( Ratio of explained variance. `.uns['pca']['variance']` Explained variance, equivalent to the eigenvalues of the covariance matrix. - """ + """ hvg_args = dict( flavor="pearson_residuals", n_top_genes=n_top_genes, diff --git a/src/scanpy/external/__init__.py b/src/scanpy/external/__init__.py index 6adb8cb9fc..2087fc2671 100644 --- a/src/scanpy/external/__init__.py +++ b/src/scanpy/external/__init__.py @@ -1,3 +1,5 @@ +"""External preprocessing and analysis tools and their plotting.""" + from __future__ import annotations import sys diff --git a/src/scanpy/external/exporting.py b/src/scanpy/external/exporting.py index 8379720ea6..7077eeb93b 100644 --- a/src/scanpy/external/exporting.py +++ b/src/scanpy/external/exporting.py @@ -1,6 +1,4 @@ -"""\ -Exporting to formats for other software. -""" +"""Exporting to formats for other software.""" from __future__ import annotations @@ -47,8 +45,7 @@ def spring_project( neighbors_key: str | None = None, overwrite: bool = False, ) -> None: - """\ - Exports to a SPRING project directory :cite:p:`Weinreb2017`. + """Export to a SPRING project directory :cite:p:`Weinreb2017`. Visualize annotation present in `adata`. By default, export all gene expression data from `adata.raw` and categorical and continuous annotations present in `adata.obs`. @@ -79,8 +76,8 @@ def spring_project( Examples -------- See this `tutorial `__. - """ + """ # need to get nearest neighbors first if neighbors_key is None: neighbors_key = "neighbors" @@ -273,8 +270,7 @@ def _get_edges(adata, neighbors_key=None): def write_hdf5_genes(E, gene_list, filename): - '''SPRING standard: filename = main_spring_dir + "counts_norm_sparse_genes.hdf5"''' - + """SPRING standard: filename = main_spring_dir + "counts_norm_sparse_genes.hdf5".""" E = E.tocsc() hf = h5py.File(filename, "w") @@ -295,8 +291,7 @@ def write_hdf5_genes(E, gene_list, filename): def write_hdf5_cells(E, filename): - '''SPRING standard: filename = main_spring_dir + "counts_norm_sparse_cells.hdf5"''' - + """SPRING standard: filename = main_spring_dir + "counts_norm_sparse_cells.hdf5".""" E = E.tocsr() hf = h5py.File(filename, "w") @@ -514,11 +509,10 @@ def cellbrowser( port: int | None = None, do_debug: bool = False, ): - """\ - Export adata to a UCSC Cell Browser project directory. If `html_dir` is - set, subsequently build the html files from the project directory into - `html_dir`. If `port` is set, start an HTTP server in the background and - serve `html_dir` on `port`. + """Export adata to a UCSC Cell Browser project directory. + + If `html_dir` is set, subsequently build the html files from the project directory into `html_dir`. + If `port` is set, start an HTTP server in the background and serve `html_dir` on `port`. By default, export all gene expression data from `adata.raw`, the annotations `louvain`, `percent_mito`, `n_genes` and `n_counts` and the top @@ -587,8 +581,8 @@ def cellbrowser( -------- See this `tutorial `__. - """ + """ try: import cellbrowser.cellbrowser as cb except ImportError: diff --git a/src/scanpy/external/pl.py b/src/scanpy/external/pl.py index ce305e2f06..f9e4bd5d4c 100644 --- a/src/scanpy/external/pl.py +++ b/src/scanpy/external/pl.py @@ -1,3 +1,5 @@ +"""Plotting functions for external tools.""" + from __future__ import annotations import contextlib @@ -45,8 +47,7 @@ show_save_ax=doc_show_save_ax, ) def phate(adata: AnnData, **kwargs) -> list[Axes] | None: - """\ - Scatter plot in PHATE basis. + """Scatter plot in PHATE basis. Parameters ---------- @@ -74,15 +75,16 @@ def phate(adata: AnnData, **kwargs) -> list[Axes] | None: >>> data.shape (2000, 100) >>> adata = AnnData(data) - >>> adata.obs['branches'] = branches + >>> adata.obs["branches"] = branches >>> sce.tl.phate(adata, k=5, a=20, t=150) - >>> adata.obsm['X_phate'].shape + >>> adata.obsm["X_phate"].shape (2000, 2) >>> sce.pl.phate( ... adata, - ... color='branches', - ... color_map='tab20', + ... color="branches", + ... color_map="tab20", ... ) + """ return embedding(adata, "phate", **kwargs) @@ -95,8 +97,7 @@ def phate(adata: AnnData, **kwargs) -> list[Axes] | None: show_save_ax=doc_show_save_ax, ) def trimap(adata: AnnData, **kwargs) -> Axes | list[Axes] | None: - """\ - Scatter plot in TriMap basis. + """Scatter plot in TriMap basis. Parameters ---------- @@ -108,6 +109,7 @@ def trimap(adata: AnnData, **kwargs) -> Axes | list[Axes] | None: Returns ------- If `show==False` a :class:`~matplotlib.axes.Axes` or a list of it. + """ return embedding(adata, "trimap", **kwargs) @@ -122,8 +124,7 @@ def trimap(adata: AnnData, **kwargs) -> Axes | list[Axes] | None: def harmony_timeseries( adata: AnnData, *, show: bool = True, return_fig: bool = False, **kwargs ) -> Axes | list[Axes] | None: - """\ - Scatter plot in Harmony force-directed layout basis. + """Scatter plot in Harmony force-directed layout basis. Parameters ---------- @@ -136,8 +137,8 @@ def harmony_timeseries( ------- If `return_fig` is True, a :class:`~matplotlib.figure.Figure`. If `show==False` a :class:`~matplotlib.axes.Axes` or a list of it. - """ + """ tp_name = adata.uns["harmony_timepoint_var"] tps = adata.obs[tp_name].unique() @@ -175,8 +176,7 @@ def sam( s: float = 10.0, **kwargs: Any, ) -> Axes: - """\ - Scatter plot using the SAM projection or another input projection. + """Scatter plot using the SAM projection or another input projection. Parameters ---------- @@ -192,8 +192,8 @@ def sam( figure window. kwargs all keyword arguments in matplotlib.pyplot.scatter are eligible. - """ + """ if isinstance(projection, str): try: dt = adata.obsm[projection] @@ -283,9 +283,9 @@ def wishbone_marker_trajectory( save: str | bool | None = None, ax: Axes | None = None, ): - """\ - Plot marker trends along trajectory, and return trajectory branches for further - analysis and visualization (heatmap, etc..) + """Plot marker trends along trajectory, and return trajectory branches. + + Intended for further analysis and visualization (heatmap, etc.). Parameters ---------- @@ -318,8 +318,8 @@ def wishbone_marker_trajectory( Computed values for the first branch `branch2_wishbone` : :class:`pandas.DataFrame` (`adata.uns`) Computed values for the second branch. - """ + """ wb = _anndata_to_wishbone(adata) if figsize is None: diff --git a/src/scanpy/external/pp/__init__.py b/src/scanpy/external/pp/__init__.py index b14e222981..71b1dae74d 100644 --- a/src/scanpy/external/pp/__init__.py +++ b/src/scanpy/external/pp/__init__.py @@ -1,3 +1,5 @@ +"""External preprocessing functions.""" + from __future__ import annotations from sklearn.utils import deprecated diff --git a/src/scanpy/external/pp/_bbknn.py b/src/scanpy/external/pp/_bbknn.py index ee280cc824..939b67fb06 100644 --- a/src/scanpy/external/pp/_bbknn.py +++ b/src/scanpy/external/pp/_bbknn.py @@ -34,8 +34,7 @@ def bbknn( local_connectivity: int = 1, **kwargs, ) -> AnnData | None: - """\ - Batch balanced kNN :cite:p:`Polanski2019`. + """Batch balanced kNN :cite:p:`Polanski2019`. Batch balanced kNN alters the kNN procedure to identify each cell's top neighbours in each batch separately instead of the entire cell pool with no accounting for batch. @@ -129,6 +128,7 @@ def bbknn( Returns ------- The `adata` with the batch-corrected graph. + """ try: from bbknn import bbknn diff --git a/src/scanpy/external/pp/_dca.py b/src/scanpy/external/pp/_dca.py index 20a97034b8..799eeaa006 100644 --- a/src/scanpy/external/pp/_dca.py +++ b/src/scanpy/external/pp/_dca.py @@ -71,8 +71,7 @@ def dca( return_info: bool = False, copy: bool = False, ) -> AnnData | None: - """\ - Deep count autoencoder :cite:p:`Eraslan2019`. + """Deep count autoencoder :cite:p:`Eraslan2019`. Fits a count autoencoder to the raw count data given in the anndata object in order to denoise the data and to capture hidden representation of @@ -176,8 +175,8 @@ def dca( If `return_model` is given, trained model is returned. When both `copy` and `return_model` are true, a tuple of anndata and model is returned in that order. - """ + """ try: from dca.api import dca except ImportError: diff --git a/src/scanpy/external/pp/_harmony_integrate.py b/src/scanpy/external/pp/_harmony_integrate.py index 824309f817..0bb06b198c 100644 --- a/src/scanpy/external/pp/_harmony_integrate.py +++ b/src/scanpy/external/pp/_harmony_integrate.py @@ -1,6 +1,4 @@ -""" -Use harmony to integrate cells from different experiments. -""" +"""Use harmony to integrate cells from different experiments.""" from __future__ import annotations @@ -26,8 +24,7 @@ def harmony_integrate( adjusted_basis: str = "X_pca_harmony", **kwargs, ): - """\ - Use harmonypy :cite:p:`Korsunsky2019` to integrate different experiments. + """Use harmonypy :cite:p:`Korsunsky2019` to integrate different experiments. Harmony :cite:p:`Korsunsky2019` is an algorithm for integrating single-cell data from multiple experiments. This function uses the python @@ -79,14 +76,15 @@ def harmony_integrate( be a column in ``adata.obs`` giving the experiment each cell came from. - >>> adata.obs['batch'] = 1350*['a'] + 1350*['b'] + >>> adata.obs["batch"] = 1350 * ["a"] + 1350 * ["b"] Finally, run harmony. Afterwards, there will be a new table in ``adata.obsm`` containing the adjusted PC's. - >>> sce.pp.harmony_integrate(adata, 'batch') - >>> 'X_pca_harmony' in adata.obsm + >>> sce.pp.harmony_integrate(adata, "batch") + >>> "X_pca_harmony" in adata.obsm True + """ try: import harmonypy diff --git a/src/scanpy/external/pp/_hashsolo.py b/src/scanpy/external/pp/_hashsolo.py index dcb44239b1..9483680bdf 100644 --- a/src/scanpy/external/pp/_hashsolo.py +++ b/src/scanpy/external/pp/_hashsolo.py @@ -1,18 +1,31 @@ -""" -HashSolo script provides a probabilistic cell hashing demultiplexing method -which generates a noise distribution and signal distribution for -each hashing barcode from empirically observed counts. These distributions -are updates from the global signal and noise barcode distributions, which -helps in the setting where not many cells are observed. Signal distributions -for a hashing barcode are estimated from samples where that hashing barcode -has the highest count. Noise distributions for a hashing barcode are estimated -from samples where that hashing barcode is one the k-2 lowest barcodes, where -k is the number of barcodes. A doublet should then have its two highest -barcode counts most likely coming from a signal distribution for those barcodes. -A singlet should have its highest barcode from a signal distribution, and its -second highest barcode from a noise distribution. A negative two highest -barcodes should come from noise distributions. We test each of these -hypotheses in a bayesian fashion, and select the most probable hypothesis. +"""A probabilistic cell hashing demultiplexing method. + +HashSolo generates a noise distribution and signal distribution +for each hashing barcode from empirically observed counts. +These distributions are updates from the global signal and noise barcode distributions, +which helps in the setting where not many cells are observed. +For a hashing barcode: + +Signal distributions + are estimated from samples where that hashing barcode has the highest count. + +Noise distributions + are estimated from samples where that hashing barcode is one the k-2 lowest barcodes, + where k is the number of barcodes. + +We test each of the following hypotheses in a bayesian fashion, +and select the most probable hypothesis. + +A doublet + should have its two highest barcode counts most likely + coming from a signal distribution for those barcodes. + +A singlet + should have its highest barcode from a signal distribution, + and its second highest barcode from a noise distribution. + +A negative two highest barcodes + should come from noise distributions. """ from __future__ import annotations @@ -38,7 +51,7 @@ def _calculate_log_likelihoods( data: np.ndarray, number_of_noise_barcodes: int ) -> tuple[NDArray[np.float64], NDArray[np.float64], dict[int, str]]: - """Calculate log likelihoods for each hypothesis, negative, singlet, doublet + """Calculate log likelihoods for each hypothesis, negative, singlet, doublet. Parameters ---------- @@ -53,13 +66,15 @@ def _calculate_log_likelihoods( a 2d np.array log likelihood of each hypothesis all_indices counter_to_barcode_combo + """ def gaussian_updates( data: np.ndarray, mu_o: float, std_o: float ) -> tuple[float, float]: - """Update parameters of your gaussian - https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf + """Update parameters of your gaussian. + + See . Parameters ---------- @@ -76,6 +91,7 @@ def gaussian_updates( of gaussian std of gaussian + """ lam_o = 1 / (std_o**2) n = len(data) @@ -227,8 +243,7 @@ def gaussian_updates( def _calculate_bayes_rule( data: np.ndarray, priors: ArrayLike, number_of_noise_barcodes: int ) -> dict[str, np.ndarray]: - """ - Calculate bayes rule from log likelihoods + """Calculate bayes rule from log likelihoods. Parameters ---------- @@ -255,6 +270,7 @@ def _calculate_bayes_rule( A 2d np.array probability of each hypothesis `"log_likelihoods_for_each_hypothesis"` A 2d np.array log likelihood of each hypothesis + """ priors = np.array(priors) log_likelihoods_for_each_hypothesis, _, _ = _calculate_log_likelihoods( @@ -339,12 +355,13 @@ def hashsolo( `"Negative"`, or `"Doublet"`. Examples - ------- + -------- >>> import anndata >>> import scanpy.external as sce >>> adata = anndata.read_h5ad("data.h5ad") >>> sce.pp.hashsolo(adata, ["Hash1", "Hash2", "Hash3"]) >>> adata.obs.head() + """ print( "Please cite HashSolo paper:\nhttps://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2" diff --git a/src/scanpy/external/pp/_magic.py b/src/scanpy/external/pp/_magic.py index 12e93f1a8e..093ee24187 100644 --- a/src/scanpy/external/pp/_magic.py +++ b/src/scanpy/external/pp/_magic.py @@ -1,6 +1,4 @@ -"""\ -Denoise high-dimensional data using MAGIC -""" +"""Denoise high-dimensional data using MAGIC.""" from __future__ import annotations @@ -42,8 +40,7 @@ def magic( copy: bool | None = None, **kwargs, ) -> AnnData | None: - """\ - Markov Affinity-based Graph Imputation of Cells (MAGIC) API :cite:p:`vanDijk2018`. + """Markov Affinity-based Graph Imputation of Cells (MAGIC) API :cite:p:`vanDijk2018`. MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data @@ -128,17 +125,17 @@ def magic( >>> adata = sc.datasets.paul15() >>> sc.pp.normalize_per_cell(adata) >>> sc.pp.sqrt(adata) # or sc.pp.log1p(adata) - >>> adata_magic = sce.pp.magic(adata, name_list=['Mpo', 'Klf1', 'Ifitm1'], knn=5) + >>> adata_magic = sce.pp.magic(adata, name_list=["Mpo", "Klf1", "Ifitm1"], knn=5) >>> adata_magic.shape (2730, 3) - >>> sce.pp.magic(adata, name_list='pca_only', knn=5) - >>> adata.obsm['X_magic'].shape + >>> sce.pp.magic(adata, name_list="pca_only", knn=5) + >>> adata.obsm["X_magic"].shape (2730, 100) - >>> sce.pp.magic(adata, name_list='all_genes', knn=5) + >>> sce.pp.magic(adata, name_list="all_genes", knn=5) >>> adata.X.shape (2730, 3451) - """ + """ try: from magic import MAGIC, __version__ except ImportError: diff --git a/src/scanpy/external/pp/_mnn_correct.py b/src/scanpy/external/pp/_mnn_correct.py index 518686dc75..2e36e596fc 100644 --- a/src/scanpy/external/pp/_mnn_correct.py +++ b/src/scanpy/external/pp/_mnn_correct.py @@ -38,8 +38,7 @@ def mnn_correct( list[pd.DataFrame], list[tuple[float | None, int]] | None, ]: - """\ - Correct batch effects by matching mutual nearest neighbors :cite:p:`Haghverdi2018` :cite:p:`Kang2018`. + """Correct batch effects by matching mutual nearest neighbors :cite:p:`Haghverdi2018` :cite:p:`Kang2018`. This uses the implementation of mnnpy_ :cite:p:`Kang2018`. @@ -125,6 +124,7 @@ def mnn_correct( A list containing MNN pairing information as DataFrames in each iteration step. angle_list A list containing angles of each batch. + """ if len(datas) < 2: return datas, [], [] diff --git a/src/scanpy/external/pp/_scanorama_integrate.py b/src/scanpy/external/pp/_scanorama_integrate.py index c5fb2683b4..56e5413175 100644 --- a/src/scanpy/external/pp/_scanorama_integrate.py +++ b/src/scanpy/external/pp/_scanorama_integrate.py @@ -1,6 +1,4 @@ -""" -Use Scanorama to integrate cells from different experiments. -""" +"""Use Scanorama to integrate cells from different experiments.""" from __future__ import annotations @@ -32,8 +30,7 @@ def scanorama_integrate( batch_size: int = 5000, **kwargs, ) -> None: - """\ - Use Scanorama :cite:p:`Hie2019` to integrate different experiments. + """Use Scanorama :cite:p:`Hie2019` to integrate different experiments. Scanorama :cite:p:`Hie2019` is an algorithm for integrating single-cell data from multiple experiments stored in an AnnData object. This @@ -98,15 +95,16 @@ def scanorama_integrate( be a column in ``adata.obs`` giving the experiment each cell came from. - >>> adata.obs['batch'] = 1350*['a'] + 1350*['b'] + >>> adata.obs["batch"] = 1350 * ["a"] + 1350 * ["b"] Finally, run Scanorama. Afterwards, there will be a new table in ``adata.obsm`` containing the Scanorama embeddings. - >>> sce.pp.scanorama_integrate(adata, 'batch', verbose=1) + >>> sce.pp.scanorama_integrate(adata, "batch", verbose=1) Processing datasets a <=> b - >>> 'X_scanorama' in adata.obsm + >>> "X_scanorama" in adata.obsm True + """ try: import scanorama diff --git a/src/scanpy/external/tl/__init__.py b/src/scanpy/external/tl/__init__.py index 115b309a24..5d262e0b06 100644 --- a/src/scanpy/external/tl/__init__.py +++ b/src/scanpy/external/tl/__init__.py @@ -1,3 +1,5 @@ +"""External analysis tools.""" + from __future__ import annotations from ._harmony_timeseries import harmony_timeseries diff --git a/src/scanpy/external/tl/_harmony_timeseries.py b/src/scanpy/external/tl/_harmony_timeseries.py index 5cbfd2d856..440c90d975 100644 --- a/src/scanpy/external/tl/_harmony_timeseries.py +++ b/src/scanpy/external/tl/_harmony_timeseries.py @@ -1,7 +1,4 @@ -"""\ -Harmony time series for data visualization with augmented affinity matrix at -discrete time points -""" +"""Harmony time series for data visualization with augmented affinity matrix at discrete time points.""" from __future__ import annotations @@ -29,9 +26,7 @@ def harmony_timeseries( n_jobs: int = -2, copy: bool = False, ) -> AnnData | None: - """\ - Harmony time series for data visualization with augmented affinity matrix - at discrete time points :cite:p:`Nowotschin2019`. + """Harmony time series for data visualization with augmented affinity matrix at discrete time points :cite:p:`Nowotschin2019`. Harmony time series is a framework for data visualization, trajectory detection and interpretation for scRNA-seq data measured at discrete @@ -112,7 +107,7 @@ def harmony_timeseries( ... ) >>> time_points = adata.obs["sample"].str.split("_", expand=True)[0] >>> adata.obs["time_points"] = pd.Categorical( - ... time_points, categories=['sa1', 'sa2', 'sa3'] + ... time_points, categories=["sa1", "sa2", "sa3"] ... ) Normalize and filter for highly expressed genes @@ -135,8 +130,8 @@ def harmony_timeseries( Harmony_sample_notebook.ipynb>`_. It provides a comprehensive guide to draw *gene expression trends*, amongst other things. - """ + """ try: import harmony except ImportError: diff --git a/src/scanpy/external/tl/_palantir.py b/src/scanpy/external/tl/_palantir.py index bb232d9d5f..3651c402e0 100644 --- a/src/scanpy/external/tl/_palantir.py +++ b/src/scanpy/external/tl/_palantir.py @@ -1,6 +1,4 @@ -"""\ -Run Diffusion maps using the adaptive anisotropic kernel -""" +"""Run Diffusion maps using the adaptive anisotropic kernel.""" from __future__ import annotations @@ -41,8 +39,7 @@ def palantir( n_steps: int = 3, copy: bool = False, ) -> AnnData | None: - """\ - Run Diffusion maps using the adaptive anisotropic kernel :cite:p:`Setty2019`. + """Run Diffusion maps using the adaptive anisotropic kernel :cite:p:`Setty2019`. Palantir is an algorithm to align cells along differentiation trajectories. Palantir models differentiation as a stochastic process where stem cells @@ -168,7 +165,7 @@ def palantir( Palantir constructs the tSNE map in the embedded space since these maps better represent the differentiation trajectories. - >>> sc.tl.tsne(adata, n_pcs=2, use_rep='X_palantir_multiscale', perplexity=150) + >>> sc.tl.tsne(adata, n_pcs=2, use_rep="X_palantir_multiscale", perplexity=150) *tsne by cell size* @@ -178,9 +175,9 @@ def palantir( >>> sc.pl.tsne( ... adata, - ... gene_symbols=['CD34', 'MPO', 'GATA1', 'IRF8'], - ... layer='palantir_imp', - ... color=['CD34', 'MPO', 'GATA1', 'IRF8'] + ... gene_symbols=["CD34", "MPO", "GATA1", "IRF8"], + ... layer="palantir_imp", + ... color=["CD34", "MPO", "GATA1", "IRF8"], ... ) **Running Palantir** @@ -189,11 +186,11 @@ def palantir( automatically determines the terminal states, they can also be specified using the `termine_states` parameter. - >>> start_cell = 'Run5_164698952452459' + >>> start_cell = "Run5_164698952452459" >>> pr_res = sce.tl.palantir_results( ... adata, ... early_cell=start_cell, - ... ms_data='X_palantir_multiscale', + ... ms_data="X_palantir_multiscale", ... num_waypoints=500, ... ) @@ -207,8 +204,8 @@ def palantir( `_. It provides a comprehensive guide to draw *gene expression trends*, amongst other things. - """ + """ _check_import() from palantir.utils import ( determine_multiscale_space, @@ -280,8 +277,7 @@ def palantir_results( use_early_cell_as_start: bool = False, max_iterations: int = 25, ) -> AnnData | None: - """\ - **Running Palantir** + """Run Palantir. A convenience function that wraps `palantir.core.run_palantir` to compute branch probabilities and waypoints. @@ -314,6 +310,7 @@ def palantir_results( Returns ------- PResults object with pseudotime, entropy, branch probabilities and waypoints. + """ logg.info("Palantir computing waypoints..") diff --git a/src/scanpy/external/tl/_phate.py b/src/scanpy/external/tl/_phate.py index 91d8191e60..3a7b7933b0 100644 --- a/src/scanpy/external/tl/_phate.py +++ b/src/scanpy/external/tl/_phate.py @@ -1,6 +1,4 @@ -"""\ -Embed high-dimensional data using PHATE -""" +"""Embed high-dimensional data using PHATE.""" from __future__ import annotations @@ -54,8 +52,7 @@ def phate( copy: bool = False, **kwargs, ) -> AnnData | None: - """\ - PHATE :cite:p:`Moon2019`. + """PHATE :cite:p:`Moon2019`. Potential of Heat-diffusion for Affinity-based Trajectory Embedding (PHATE) embeds high dimensional single-cell data into two or three dimensions for @@ -142,9 +139,10 @@ def phate( (2000, 100) >>> adata = AnnData(tree_data) >>> sce.tl.phate(adata, k=5, a=20, t=150) - >>> adata.obsm['X_phate'].shape + >>> adata.obsm["X_phate"].shape (2000, 2) >>> sce.pl.phate(adata) + """ start = logg.info("computing PHATE") adata = adata.copy() if copy else adata diff --git a/src/scanpy/external/tl/_phenograph.py b/src/scanpy/external/tl/_phenograph.py index fdc3973771..81ff0c4500 100644 --- a/src/scanpy/external/tl/_phenograph.py +++ b/src/scanpy/external/tl/_phenograph.py @@ -1,6 +1,4 @@ -"""\ -Perform clustering using PhenoGraph -""" +"""Perform clustering using PhenoGraph.""" from __future__ import annotations @@ -70,8 +68,7 @@ def phenograph( copy: bool = False, **kargs: Any, ) -> tuple[np.ndarray | None, spmatrix, float | None] | None: - """\ - PhenoGraph clustering :cite:p:`Levine2015`. + """PhenoGraph clustering :cite:p:`Levine2015`. **PhenoGraph** is a clustering method designed for high-dimensional single-cell data. It works by creating a graph ("network") representing phenotypic similarities @@ -201,23 +198,33 @@ def phenograph( Plot phenograph clusters on tSNE: >>> sc.pl.tsne( - ... adata, color = ["pheno_louvain", "pheno_leiden"], s = 100, - ... palette = sc.pl.palettes.vega_20_scanpy, legend_fontsize = 10 + ... adata, + ... color=["pheno_louvain", "pheno_leiden"], + ... s=100, + ... palette=sc.pl.palettes.vega_20_scanpy, + ... legend_fontsize=10, ... ) Cluster and cluster centroids for input Numpy ndarray >>> df = np.random.rand(1000, 40) >>> dframe = pd.DataFrame(df) - >>> dframe.index, dframe.columns = (map(str, dframe.index), map(str, dframe.columns)) + >>> dframe.index, dframe.columns = ( + ... map(str, dframe.index), + ... map(str, dframe.columns), + ... ) >>> adata = AnnData(dframe) >>> sc.pp.pca(adata, n_comps=20) >>> sce.tl.phenograph(adata, clustering_algo="leiden", k=50) >>> sc.tl.tsne(adata, random_state=1) >>> sc.pl.tsne( - ... adata, color=['pheno_leiden'], s=100, - ... palette=sc.pl.palettes.vega_20_scanpy, legend_fontsize=10 + ... adata, + ... color=["pheno_leiden"], + ... s=100, + ... palette=sc.pl.palettes.vega_20_scanpy, + ... legend_fontsize=10, ... ) + """ start = logg.info("PhenoGraph clustering") diff --git a/src/scanpy/external/tl/_pypairs.py b/src/scanpy/external/tl/_pypairs.py index 2db98ff9a7..ca39cf6164 100644 --- a/src/scanpy/external/tl/_pypairs.py +++ b/src/scanpy/external/tl/_pypairs.py @@ -1,6 +1,4 @@ -"""\ -Calculate scores based on relative expression change of maker pairs -""" +"""Calculate scores based on relative expression change of maker pairs.""" from __future__ import annotations @@ -29,8 +27,7 @@ def sandbag( filter_genes: Genes | None = None, filter_samples: Genes | None = None, ) -> dict[str, list[tuple[str, str]]]: - """\ - Calculate marker pairs of genes :cite:p:`Scialdone2015,Fechtner2018`. + """Calculate marker pairs of genes :cite:p:`Scialdone2015,Fechtner2018`. Calculates the pairs of genes serving as marker pairs for each phase, based on a matrix of gene counts and an annotation of known phases. @@ -66,6 +63,7 @@ def sandbag( >>> from pypairs import datasets >>> adata = datasets.leng15() >>> marker_pairs = sandbag(adata, fraction=0.5) + """ _check_import() from pypairs import settings as pp_settings @@ -94,8 +92,7 @@ def cyclone( min_iter: int = 100, min_pairs: int = 50, ) -> pd.DataFrame: - """\ - Assigns scores and predicted class to observations :cite:p:`Scialdone2015` :cite:p:`Fechtner2018`. + """Assign scores and predicted class to observations :cite:p:`Scialdone2015` :cite:p:`Fechtner2018`. Calculates scores for each observation and each phase and assigns prediction based on marker pairs indentified by :func:`~scanpy.external.tl.sandbag`. @@ -129,6 +126,7 @@ def cyclone( If `marker_pairs` contains only the cell cycle categories G1, S and G2M an additional column `pypairs_cc_prediction` will be added. Where category S is assigned to samples where G1 and G2M score are < 0.5. + """ _check_import() from pypairs import settings as pp_settings diff --git a/src/scanpy/external/tl/_sam.py b/src/scanpy/external/tl/_sam.py index 8daa2c0091..7e0d5bd4ed 100644 --- a/src/scanpy/external/tl/_sam.py +++ b/src/scanpy/external/tl/_sam.py @@ -1,6 +1,4 @@ -"""\ -Run the Self-Assembling Manifold algorithm -""" +"""Run the Self-Assembling Manifold algorithm.""" from __future__ import annotations @@ -48,8 +46,7 @@ def sam( inplace: bool = True, verbose: bool = True, ) -> SAM | tuple[SAM, AnnData]: - """\ - Self-Assembling Manifolds single-cell RNA sequencing analysis tool :cite:p:`Tarashansky2019`. + """Self-Assembling Manifolds single-cell RNA sequencing analysis tool :cite:p:`Tarashansky2019`. SAM iteratively rescales the input gene expression matrix to emphasize genes that are spatially variable along the intrinsic manifold of the data. @@ -61,7 +58,6 @@ def sam( Parameters ---------- - k The number of nearest neighbors to identify for each cell. @@ -167,7 +163,7 @@ def sam( Assuming we are given an AnnData object called `adata`, we can run the SAM algorithm as follows: - >>> sam_obj = sce.tl.sam(adata,inplace=True) + >>> sam_obj = sce.tl.sam(adata, inplace=True) The input AnnData object should contain unstandardized, non-negative expression values. Preferably, the data should be log-normalized and no @@ -182,7 +178,7 @@ def sam( To visualize the output, we can use: - >>> sce.pl.sam(adata,projection='X_umap') + >>> sce.pl.sam(adata, projection="X_umap") `sce.pl.sam` accepts all keyword arguments used in the `matplotlib.pyplot.scatter` function. @@ -200,14 +196,13 @@ def sam( In a Jupyter notebook, execute the following to launch the interface: >>> from samalg.gui import SAMGUI - >>> sam_gui = SAMGUI(sam_obj) # sam_obj is your SAM object + >>> sam_gui = SAMGUI(sam_obj) # sam_obj is your SAM object >>> sam_gui.SamPlot This can also be enabled in Jupyer Lab by following the instructions in the self-assembling-manifold README. """ - try: from samalg import SAM except ImportError: diff --git a/src/scanpy/external/tl/_trimap.py b/src/scanpy/external/tl/_trimap.py index 122a4792b7..a81e782aae 100644 --- a/src/scanpy/external/tl/_trimap.py +++ b/src/scanpy/external/tl/_trimap.py @@ -1,6 +1,4 @@ -"""\ -Embed high-dimensional data using TriMap -""" +"""Embed high-dimensional data using TriMap.""" from __future__ import annotations @@ -45,8 +43,7 @@ def trimap( verbose: bool | int | None = None, copy: bool = False, ) -> AnnData | None: - """\ - TriMap: Large-scale Dimensionality Reduction Using Triplets :cite:p:`Amid2019`. + """TriMap: Large-scale Dimensionality Reduction Using Triplets :cite:p:`Amid2019`. TriMap is a dimensionality reduction method that uses triplet constraints to form a low-dimensional embedding of a set of points. The triplet @@ -102,9 +99,9 @@ def trimap( >>> import scanpy.external as sce >>> pbmc = sc.datasets.pbmc68k_reduced() >>> pbmc = sce.tl.trimap(pbmc, copy=True) - >>> sce.pl.trimap(pbmc, color=['bulk_labels'], s=10) - """ + >>> sce.pl.trimap(pbmc, color=["bulk_labels"], s=10) + """ try: from trimap import TRIMAP except ImportError: diff --git a/src/scanpy/external/tl/_wishbone.py b/src/scanpy/external/tl/_wishbone.py index 3b85ae14a1..3c57ea317a 100644 --- a/src/scanpy/external/tl/_wishbone.py +++ b/src/scanpy/external/tl/_wishbone.py @@ -27,9 +27,7 @@ def wishbone( components: Iterable[int] = (1, 2, 3), num_waypoints: int | Collection = 250, ): - """\ - Wishbone identifies bifurcating developmental trajectories from single-cell data - :cite:p:`Setty2016`. + """Identify bifurcating developmental trajectories from single-cell data :cite:p:`Setty2016`. Wishbone is an algorithm for positioning single cells along bifurcating developmental trajectories with high resolution. Wishbone uses multi-dimensional @@ -100,6 +98,7 @@ def wishbone( For further demonstration of Wishbone methods and visualization please follow the notebooks in the package `Wishbone_for_single_cell_RNAseq.ipynb `_.\ + """ try: from wishbone.core import wishbone as c_wishbone diff --git a/src/scanpy/get/__init__.py b/src/scanpy/get/__init__.py index 56c0d3c130..18ff834229 100644 --- a/src/scanpy/get/__init__.py +++ b/src/scanpy/get/__init__.py @@ -1,3 +1,5 @@ +"""Get data from AnnData.""" + from __future__ import annotations from ._aggregated import aggregate diff --git a/src/scanpy/get/_aggregated.py b/src/scanpy/get/_aggregated.py index aa7f93d9ce..16f2086ab2 100644 --- a/src/scanpy/get/_aggregated.py +++ b/src/scanpy/get/_aggregated.py @@ -24,8 +24,7 @@ class Aggregate: - """\ - Functionality for generic grouping and aggregating. + """Functionality for generic grouping and aggregating. There is currently support for count_nonzero, sum, mean, and variance. @@ -65,34 +64,34 @@ def __init__( data: Array def count_nonzero(self) -> NDArray[np.integer]: - """\ - Count the number of observations in each group. + """Count the number of observations in each group. Returns ------- Array of counts. + """ # pattern = self.data._with_data(np.broadcast_to(1, len(self.data.data))) # return self.indicator_matrix @ pattern return utils.asarray(self.indicator_matrix @ (self.data != 0)) def sum(self) -> Array: - """\ - Compute the sum per feature per group of observations. + """Compute the sum per feature per group of observations. Returns ------- Array of sum. + """ return utils.asarray(self.indicator_matrix @ self.data) def mean(self) -> Array: - """\ - Compute the mean per feature per group of observations. + """Compute the mean per feature per group of observations. Returns ------- Array of mean. + """ return ( utils.asarray(self.indicator_matrix @ self.data) @@ -100,8 +99,7 @@ def mean(self) -> Array: ) def mean_var(self, dof: int = 1) -> tuple[np.ndarray, np.ndarray]: - """\ - Compute the count, as well as mean and variance per feature, per group of observations. + """Compute the count, as well as mean and variance per feature, per group of observations. The formula `Var(X) = E(X^2) - E(X)^2` suffers loss of precision when the variance is a very small fraction of the squared mean. In particular, when X is constant, the formula may @@ -117,6 +115,7 @@ def mean_var(self, dof: int = 1) -> tuple[np.ndarray, np.ndarray]: Returns ------- Object with `count`, `mean`, and `var` attributes. + """ assert dof >= 0 @@ -139,14 +138,13 @@ def mean_var(self, dof: int = 1) -> tuple[np.ndarray, np.ndarray]: return mean_, var_ def median(self) -> Array: - """\ - Compute the median per feature per group of observations. + """Compute the median per feature per group of observations. Returns ------- Array of median. - """ + """ medians = [] for group in np.unique(self.groupby.codes): group_mask = self.groupby.codes == group @@ -161,8 +159,7 @@ def median(self) -> Array: def _power(X: Array, power: float) -> Array: - """\ - Generate elementwise power of a matrix. + """Generate elementwise power of a matrix. Needed for non-square sparse matrices because they do not support `**` so the `.power` function is used. @@ -176,6 +173,7 @@ def _power(X: Array, power: float) -> Array: Returns ------- Matrix whose power has been raised. + """ return X**power if isinstance(X, np.ndarray) else X.power(power) @@ -192,8 +190,7 @@ def aggregate( obsm: str | None = None, varm: str | None = None, ) -> AnnData: - """\ - Aggregate data matrix based on some categorical grouping. + """Aggregate data matrix based on some categorical grouping. This function is useful for pseudobulking as well as plotting. @@ -230,14 +227,15 @@ def aggregate( Examples -------- - Calculating mean expression and number of nonzero entries per cluster: >>> import scanpy as sc, pandas as pd >>> pbmc = sc.datasets.pbmc3k_processed().raw.to_adata() >>> pbmc.shape (2638, 13714) - >>> aggregated = sc.get.aggregate(pbmc, by="louvain", func=["mean", "count_nonzero"]) + >>> aggregated = sc.get.aggregate( + ... pbmc, by="louvain", func=["mean", "count_nonzero"] + ... ) >>> aggregated AnnData object with n_obs × n_vars = 8 × 13714 obs: 'louvain' @@ -247,13 +245,16 @@ def aggregate( We can group over multiple columns: >>> pbmc.obs["percent_mito_binned"] = pd.cut(pbmc.obs["percent_mito"], bins=5) - >>> sc.get.aggregate(pbmc, by=["louvain", "percent_mito_binned"], func=["mean", "count_nonzero"]) + >>> sc.get.aggregate( + ... pbmc, by=["louvain", "percent_mito_binned"], func=["mean", "count_nonzero"] + ... ) AnnData object with n_obs × n_vars = 40 × 13714 obs: 'louvain', 'percent_mito_binned' var: 'n_cells' layers: 'mean', 'count_nonzero' Note that this filters out any combination of groups that wasn't present in the original data. + """ if not isinstance(adata, AnnData): msg = ( @@ -379,9 +380,7 @@ def aggregate_array( def _combine_categories( label_df: pd.DataFrame, cols: Collection[str] | str ) -> tuple[pd.Categorical, pd.DataFrame]: - """ - Returns both the result categories and a dataframe labelling each row - """ + """Return both the result categories and a dataframe labelling each row.""" from itertools import product if isinstance(cols, str): diff --git a/src/scanpy/get/get.py b/src/scanpy/get/get.py index aea8be9eb2..f644f634c7 100644 --- a/src/scanpy/get/get.py +++ b/src/scanpy/get/get.py @@ -1,4 +1,4 @@ -"""This module contains helper functions for accessing data.""" +"""Helper functions for accessing data.""" from __future__ import annotations @@ -38,9 +38,7 @@ def rank_genes_groups_df( log2fc_max: float | None = None, gene_symbols: str | None = None, ) -> pd.DataFrame: - """\ - :func:`scanpy.tl.rank_genes_groups` results in the form of a - :class:`~pandas.DataFrame`. + """Get :func:`scanpy.tl.rank_genes_groups` results in the form of a :class:`~pandas.DataFrame`. Params ------ @@ -68,6 +66,7 @@ def rank_genes_groups_df( >>> pbmc = sc.datasets.pbmc68k_reduced() >>> sc.tl.rank_genes_groups(pbmc, groupby="louvain", use_raw=True) >>> dedf = sc.get.rank_genes_groups_df(pbmc, group="0") + """ if isinstance(group, str): group = [group] @@ -124,7 +123,7 @@ def _check_indices( alias_index: pd.Index | None = None, use_raw: bool = False, ) -> tuple[list[str], list[str], list[str]]: - """Common logic for checking indices for obs_df and var_df.""" + """Check indices for `obs_df` and `var_df`.""" alt_repr = "adata.raw" if use_raw else "adata" alt_dim = ("obs", "var")[dim == "obs"] @@ -233,8 +232,7 @@ def obs_df( gene_symbols: str | None = None, use_raw: bool = False, ) -> pd.DataFrame: - """\ - Return values for observations in adata. + """Return values for observations in adata. Params ------ @@ -263,9 +261,7 @@ def obs_df( >>> import scanpy as sc >>> pbmc = sc.datasets.pbmc68k_reduced() >>> plotdf = sc.get.obs_df( - ... pbmc, - ... keys=["CD8B", "n_genes"], - ... obsm_keys=[("X_umap", 0), ("X_umap", 1)] + ... pbmc, keys=["CD8B", "n_genes"], obsm_keys=[("X_umap", 0), ("X_umap", 1)] ... ) >>> plotdf.columns Index(['CD8B', 'n_genes', 'X_umap-0', 'X_umap-1'], dtype='object') @@ -275,13 +271,11 @@ def obs_df( Calculating mean expression for marker genes by cluster: >>> pbmc = sc.datasets.pbmc68k_reduced() - >>> marker_genes = ['CD79A', 'MS4A1', 'CD8A', 'CD8B', 'LYZ'] - >>> genedf = sc.get.obs_df( - ... pbmc, - ... keys=["louvain", *marker_genes] - ... ) + >>> marker_genes = ["CD79A", "MS4A1", "CD8A", "CD8B", "LYZ"] + >>> genedf = sc.get.obs_df(pbmc, keys=["louvain", *marker_genes]) >>> grouped = genedf.groupby("louvain", observed=True) >>> mean, var = grouped.mean(), grouped.var() + """ if isinstance(keys, str): keys = [keys] @@ -348,8 +342,7 @@ def var_df( *, layer: str | None = None, ) -> pd.DataFrame: - """\ - Return values for observations in adata. + """Return values for observations in adata. Params ------ @@ -366,6 +359,7 @@ def var_df( ------- A dataframe with `adata.var_names` as index, and values specified by `keys` and `varm_keys`. + """ # Argument handling if isinstance(keys, str): @@ -425,9 +419,7 @@ def _get_obs_rep( | BaseCompressedSparseDataset | None ): - """ - Choose array aligned with obs annotation. - """ + """Choose array aligned with obs annotation.""" # https://github.com/scverse/scanpy/issues/1546 if not isinstance(use_raw, bool): msg = f"use_raw expected to be bool, was {type(use_raw)}." @@ -465,9 +457,7 @@ def _set_obs_rep( obsm: str | None = None, obsp: str | None = None, ): - """ - Set value for observation rep. - """ + """Set value for observation rep.""" is_layer = layer is not None is_raw = use_raw is not False is_obsm = obsm is not None @@ -502,8 +492,8 @@ def _check_mask( *, allow_probabilities: bool = False, ) -> M: # Could also be a series, but should be one or the other - """ - Validate mask argument + """Validate mask argument. + Params ------ data diff --git a/src/scanpy/logging.py b/src/scanpy/logging.py index d12333beae..ea063af75d 100644 --- a/src/scanpy/logging.py +++ b/src/scanpy/logging.py @@ -1,4 +1,4 @@ -"""Logging and Profiling""" +"""Logging and Profiling.""" from __future__ import annotations @@ -135,14 +135,15 @@ def print_header(*, file: None = None) -> SessionInfo: ... @overload def print_header(*, file: IO[str]) -> None: ... def print_header(*, file: IO[str] | None = None): - """\ - Versions that might influence the numerical results. + """Versions that might influence the numerical results. + Matplotlib and Seaborn are excluded from this. Parameters ---------- file Optional path for dependency output. + """ from session_info2 import session_info @@ -157,8 +158,7 @@ def print_header(*, file: IO[str] | None = None): @deprecated("Use `print_header` instead") def print_versions() -> SessionInfo: - """\ - Alias for `print_header`. + """Alias for `print_header`. .. deprecated:: 1.11.0 @@ -168,13 +168,15 @@ def print_versions() -> SessionInfo: def print_version_and_date(*, file=None): - """\ + """Print small version and date header. + Useful for starting a notebook so you see when you started working. Parameters ---------- file Optional path for output. + """ from . import __version__ @@ -197,8 +199,7 @@ def error( deep: str | None = None, extra: dict | None = None, ) -> datetime: - """\ - Log message with specific level and return current time. + """Log message with specific level and return current time. Parameters ---------- @@ -214,6 +215,7 @@ def error( this gets displayed as well extra Additional values you can specify in `msg` like `{time_passed}`. + """ from ._settings import settings diff --git a/src/scanpy/metrics/__init__.py b/src/scanpy/metrics/__init__.py index 526ac56a80..91e850bab0 100644 --- a/src/scanpy/metrics/__init__.py +++ b/src/scanpy/metrics/__init__.py @@ -1,3 +1,5 @@ +"""Metrics.""" + from __future__ import annotations from ._gearys_c import gearys_c diff --git a/src/scanpy/metrics/_common.py b/src/scanpy/metrics/_common.py index 804fd8eab2..a8c3013426 100644 --- a/src/scanpy/metrics/_common.py +++ b/src/scanpy/metrics/_common.py @@ -54,8 +54,7 @@ def _(val: pd.DataFrame | pd.Series) -> NDArray: def _check_vals(vals: V) -> tuple[V, NDArray[np.bool_] | slice, NDArray[np.float64]]: - """\ - Checks that values wont cause issues in computation. + """Check that values wont cause issues in computation. Returns new set of vals, and indexer to put values back into result. diff --git a/src/scanpy/metrics/_gearys_c.py b/src/scanpy/metrics/_gearys_c.py index 33b77b4c63..5b196476b3 100644 --- a/src/scanpy/metrics/_gearys_c.py +++ b/src/scanpy/metrics/_gearys_c.py @@ -31,9 +31,9 @@ def gearys_c( obsp: str | None = None, use_raw: bool = False, ) -> np.ndarray | float: - r""" - Calculate `Geary's C `_, as used - by `VISION `_. + r"""Calculate `Geary's C `_. + + Specifically as used by `VISION `_. Geary's C is a measure of autocorrelation for some measure on a graph. This can be to whether measures are correlated between neighboring cells. Lower @@ -91,7 +91,6 @@ def gearys_c( Examples -------- - Calculate Geary’s C for each components of a dimensionality reduction: .. code:: python @@ -108,6 +107,7 @@ def gearys_c( alt = sc.metrics.gearys_c(pbmc.obsp["connectivities"], pbmc.obsm["X_pca"].T) np.testing.assert_array_equal(pc_c, alt) + """ if use_graph is None: # Fix for anndata<0.7 diff --git a/src/scanpy/metrics/_metrics.py b/src/scanpy/metrics/_metrics.py index 90eb42a7af..c24cfe9fd1 100644 --- a/src/scanpy/metrics/_metrics.py +++ b/src/scanpy/metrics/_metrics.py @@ -1,6 +1,4 @@ -""" -Metrics which don't quite deserve their own file. -""" +"""Metrics which don't quite deserve their own file.""" from __future__ import annotations @@ -22,8 +20,7 @@ def confusion_matrix( *, normalize: bool = True, ) -> pd.DataFrame: - """\ - Given an original and new set of labels, create a labelled confusion matrix. + """Given an original and new set of labels, create a labelled confusion matrix. Parameters `orig` and `new` can either be entries in data or categorical arrays of the same size. diff --git a/src/scanpy/metrics/_morans_i.py b/src/scanpy/metrics/_morans_i.py index 516b1ce616..99224e1f4b 100644 --- a/src/scanpy/metrics/_morans_i.py +++ b/src/scanpy/metrics/_morans_i.py @@ -31,8 +31,7 @@ def morans_i( obsp: str | None = None, use_raw: bool = False, ) -> np.ndarray | float: - r""" - Calculate Moran’s I Global Autocorrelation Statistic. + r"""Calculate Moran’s I Global Autocorrelation Statistic. Moran’s I is a global autocorrelation statistic for some measure on a graph. It is commonly used in spatial data analysis to assess autocorrelation on a 2D grid. It is closely related to Geary's C, @@ -90,7 +89,6 @@ def morans_i( Examples -------- - Calculate Moran’s I for each components of a dimensionality reduction: .. code:: python @@ -107,6 +105,7 @@ def morans_i( alt = sc.metrics.morans_i(pbmc.obsp["connectivities"], pbmc.obsm["X_pca"].T) np.testing.assert_array_equal(pc_c, alt) + """ if use_graph is None: # Fix for anndata<0.7 diff --git a/src/scanpy/neighbors/__init__.py b/src/scanpy/neighbors/__init__.py index 214043727b..7edca53284 100644 --- a/src/scanpy/neighbors/__init__.py +++ b/src/scanpy/neighbors/__init__.py @@ -1,3 +1,5 @@ +"""Functions and classes for computing nearest neighbors.""" + from __future__ import annotations import contextlib @@ -57,7 +59,7 @@ class KwdsForTransformer(TypedDict): random_state: _LegacyRandom -class NeighborsParams(TypedDict): +class NeighborsParams(TypedDict): # noqa: D101 n_neighbors: int method: _Method random_state: _LegacyRandom @@ -83,8 +85,7 @@ def neighbors( key_added: str | None = None, copy: bool = False, ) -> AnnData | None: - """\ - Computes the nearest neighbors distance matrix and a neighborhood graph of observations :cite:p:`McInnes2018`. + """Compute the nearest neighbors distance matrix and a neighborhood graph of observations :cite:p:`McInnes2018`. The neighbor search efficiency of this heavily relies on UMAP :cite:p:`McInnes2018`, which also provides a method for estimating connectivities of data points - @@ -172,16 +173,19 @@ def neighbors( >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() >>> # Basic usage - >>> sc.pp.neighbors(adata, 20, metric='cosine') + >>> sc.pp.neighbors(adata, 20, metric="cosine") >>> # Provide your own transformer for more control and flexibility >>> from sklearn.neighbors import KNeighborsTransformer - >>> transformer = KNeighborsTransformer(n_neighbors=10, metric='manhattan', algorithm='kd_tree') + >>> transformer = KNeighborsTransformer( + ... n_neighbors=10, metric="manhattan", algorithm="kd_tree" + ... ) >>> sc.pp.neighbors(adata, transformer=transformer) >>> # now you can e.g. access the index: `transformer._tree` - See also + See Also -------- :doc:`/how-to/knn-transformers` + """ start = logg.info("computing neighbors") adata = adata.copy() if copy else adata @@ -245,7 +249,7 @@ def neighbors( return adata if copy else None -class FlatTree(NamedTuple): +class FlatTree(NamedTuple): # noqa: D101 hyperplanes: None offsets: None children: None @@ -294,7 +298,7 @@ def _make_forest_dict(forest): class OnFlySymMatrix: """Emulate a matrix where elements are calculated on the fly.""" - def __init__( + def __init__( # noqa: D107 self, get_row: Callable[[Any], np.ndarray], shape: tuple[int, int], @@ -311,7 +315,7 @@ def __init__( self.rows = {} if rows is None else rows self.restrict_array = restrict_array # restrict the array to a subset - def __getitem__(self, index): + def __getitem__(self, index): # noqa: D105 if isinstance(index, int | np.integer): if self.restrict_array is None: glob_index = index @@ -349,8 +353,7 @@ def restrict(self, index_array): class Neighbors: - """\ - Data represented as graph of nearest neighbors. + """Data represented as graph of nearest neighbors. Represent a data matrix as a graph of nearest neighbor relations (edges) among data points (nodes). @@ -363,10 +366,11 @@ class Neighbors: Number of diffusion components to use. neighbors_key Where to look in `.uns` and `.obsp` for neighbors data + """ @old_positionals("n_dcs", "neighbors_key") - def __init__( + def __init__( # noqa: D107 self, adata: AnnData, *, @@ -443,6 +447,7 @@ def count_nonzero(a: np.ndarray | csr_matrix) -> int: @property def rp_forest(self) -> RPForestDict | None: + """PyNNDescent index.""" return self._rp_forest @property @@ -469,6 +474,7 @@ def transitions(self) -> np.ndarray | csr_matrix: Notes ----- This has not been tested, in contrast to `transitions_sym`. + """ Zinv = self.Z.power(-1) if issparse(self.Z) else np.diag(1.0 / np.diag(self.Z)) return self.Z @ self.transitions_sym @ Zinv @@ -524,8 +530,7 @@ def compute_neighbors( metric_kwds: Mapping[str, Any] = MappingProxyType({}), random_state: _LegacyRandom = 0, ) -> None: - """\ - Compute distances and connectivities of neighbors. + """Compute distances and connectivities of neighbors. Parameters ---------- @@ -543,6 +548,7 @@ def compute_neighbors( ------- Writes sparse graph attributes `.distances` and, if `method` is not `None`, `.connectivities`. + """ from ..tools._utils import _choose_representation @@ -715,8 +721,7 @@ def _handle_transformer( @old_positionals("density_normalize") def compute_transitions(self, *, density_normalize: bool = True): - """\ - Compute transition matrix. + """Compute transition matrix. Parameters ---------- @@ -727,6 +732,7 @@ def compute_transitions(self, *, density_normalize: bool = True): Returns ------- Makes attributes `.transitions_sym` and `.transitions` available. + """ start = logg.info("computing transitions") W = self._connectivities @@ -760,8 +766,7 @@ def compute_eigen( sort: Literal["decrease", "increase"] = "decrease", random_state: _LegacyRandom = 0, ): - """\ - Compute eigen decomposition of transition matrix. + """Compute eigen decomposition of transition matrix. Parameters ---------- @@ -787,6 +792,7 @@ def compute_eigen( projection on the diffusion components. these are simply the components of the right eigenvectors and can directly be used for plotting. + """ np.set_printoptions(precision=10) if self._transitions_sym is None: @@ -889,6 +895,7 @@ def _set_iroot_via_xroot(self, xroot: np.ndarray): xroot Vector that marks the root cell, the vector storing the initial condition, only relevant for computing pseudotime. + """ if self._adata.shape[1] != xroot.size: msg = "The root vector you provided does not have the correct dimension." diff --git a/src/scanpy/neighbors/_backends/rapids.py b/src/scanpy/neighbors/_backends/rapids.py index ed847a0674..dde9578b5f 100644 --- a/src/scanpy/neighbors/_backends/rapids.py +++ b/src/scanpy/neighbors/_backends/rapids.py @@ -92,7 +92,7 @@ def transform(self, X: ArrayLike) -> csr_matrix: return self.nn.kneighbors_graph(X_contiguous, mode="distance") def _more_tags(self) -> dict[str, Any]: - """See :label:`sklearn:estimator_tags`""" + """See :label:`sklearn:estimator_tags`.""" return { "requires_y": False, "preserves_dtype": [np.float32], diff --git a/src/scanpy/neighbors/_common.py b/src/scanpy/neighbors/_common.py index 5a329a8a55..a2da205eb7 100644 --- a/src/scanpy/neighbors/_common.py +++ b/src/scanpy/neighbors/_common.py @@ -36,8 +36,7 @@ def _get_sparse_matrix_from_indices_distances( *, keep_self: bool, ) -> csr_matrix: - """\ - Create a sparse matrix from a pair of indices and distances. + """Create a sparse matrix from a pair of indices and distances. If keep_self=False, it verifies that the first column is the cell itself, then removes it from the explicitly stored zeroes. @@ -73,8 +72,7 @@ def _get_indices_distances_from_dense_matrix( def _get_indices_distances_from_sparse_matrix( D: csr_matrix, n_neighbors: int ) -> tuple[NDArray[np.int32 | np.int64], NDArray[np.float32 | np.float64]]: - """\ - Get indices and distances from a sparse matrix. + """Get indices and distances from a sparse matrix. Makes sure that for both of the returned matrices: 1. the first column corresponds to the cell itself as nearest neighbor. diff --git a/src/scanpy/neighbors/_connectivity.py b/src/scanpy/neighbors/_connectivity.py index a4b7ff3fc3..31d12648bb 100644 --- a/src/scanpy/neighbors/_connectivity.py +++ b/src/scanpy/neighbors/_connectivity.py @@ -16,8 +16,7 @@ def gauss(distances: D, n_neighbors: int, *, knn: bool) -> D: - """ - Derive gaussian connectivities between data points from their distances. + """Derive gaussian connectivities between data points from their distances. Parameters ---------- @@ -27,6 +26,7 @@ def gauss(distances: D, n_neighbors: int, *, knn: bool) -> D: The number of nearest neighbors to consider. knn Specify if the distances have been restricted to k nearest neighbors. + """ # init distances if isinstance(distances, csr_matrix): @@ -107,8 +107,7 @@ def umap( set_op_mix_ratio: float = 1.0, local_connectivity: float = 1.0, ) -> csr_matrix: - """\ - This is from umap.fuzzy_simplicial_set :cite:p:`McInnes2018`. + """Wrap for `umap.fuzzy_simplicial_set` :cite:p:`McInnes2018`. Given a set of data X, a neighborhood size, and a measure of distance compute the fuzzy simplicial set (here represented as a fuzzy graph in diff --git a/src/scanpy/plotting/__init__.py b/src/scanpy/plotting/__init__.py index 53d6e4531b..254ccd03e0 100644 --- a/src/scanpy/plotting/__init__.py +++ b/src/scanpy/plotting/__init__.py @@ -1,3 +1,5 @@ +"""Plotting functions and classes.""" + from __future__ import annotations from . import palettes diff --git a/src/scanpy/plotting/_anndata.py b/src/scanpy/plotting/_anndata.py index 75dd210c0b..754c2df801 100755 --- a/src/scanpy/plotting/_anndata.py +++ b/src/scanpy/plotting/_anndata.py @@ -119,8 +119,7 @@ def scatter( save: str | bool | None = None, ax: Axes | None = None, ) -> Axes | list[Axes] | None: - """\ - Scatter plot along observations or variables axes. + """Scatter plot along observations or variables axes. Color the plot using annotations of observations (`.obs`), variables (`.var`) or expression of genes (`.var_names`). @@ -151,6 +150,7 @@ def scatter( Returns ------- If `show==False` a :class:`~matplotlib.axes.Axes` or a list of it. + """ # color can be a obs column name or a matplotlib color specification (or a collection thereof) if color is not None: @@ -189,7 +189,8 @@ def _check_if_annotations( colors: Collection[str | ColorLike] | None = None, use_raw: bool | None = None, ) -> bool: - """Checks if `x`, `y`, and `colors` are annotations of `adata`. + """Check if `x`, `y`, and `colors` are annotations of `adata`. + In the case of `colors`, valid matplotlib colors are also accepted. If `axis_name` is `obs`, checks in `adata.obs.columns` and `adata.var_names`, @@ -587,8 +588,7 @@ def ranking( include_lowest: bool = False, show: bool | None = None, ) -> gridspec.GridSpec | None: - """\ - Plot rankings. + """Plot rankings. See, for example, how this is used in pl.pca_loadings. @@ -604,6 +604,7 @@ def ranking( Returns ------- Returns matplotlib gridspec with access to the axes. + """ if isinstance(keys, str) and indices is not None: scores = getattr(adata, attr)[keys][:, indices] @@ -733,8 +734,7 @@ def violin( scale: DensityNorm | Empty = _empty, **kwds, ) -> Axes | FacetGrid | None: - """\ - Violin plot. + """Violin plot. Wraps :func:`seaborn.violinplot` for :class:`~anndata.AnnData`. @@ -831,9 +831,10 @@ def violin( .. currentmodule:: scanpy - See also + See Also -------- pl.stacked_violin + """ import seaborn as sns # Slow import, only import if called @@ -984,8 +985,7 @@ def clustermap( save: bool | str | None = None, **kwds, ) -> ClusterGrid | None: - """\ - Hierarchically-clustered heatmap. + """Hierarchically-clustered heatmap. Wraps :func:`seaborn.clustermap` for :class:`~anndata.AnnData`. @@ -1021,6 +1021,7 @@ def clustermap( :context: close-figs sc.pl.clustermap(adata, obs_keys='cell_type') + """ import seaborn as sns # Slow import, only import if called @@ -1102,8 +1103,7 @@ def heatmap( norm: Normalize | None = None, **kwds, ) -> dict[str, Axes] | None: - """\ - Heatmap of the expression values of genes. + """Heatmap of the expression values of genes. If `groupby` is given, the heatmap is ordered by the respective group. For example, a list of marker genes can be plotted, ordered by clustering. If @@ -1132,7 +1132,7 @@ def heatmap( Dict of :class:`~matplotlib.axes.Axes` Examples - ------- + -------- .. plot:: :context: close-figs @@ -1143,10 +1143,11 @@ def heatmap( .. currentmodule:: scanpy - See also + See Also -------- pl.rank_genes_groups_heatmap tl.rank_genes_groups + """ var_names, var_group_labels, var_group_positions = _check_var_names_type( var_names, var_group_labels, var_group_positions @@ -1511,7 +1512,8 @@ def tracksplot( figsize: tuple[float, float] | None = None, **kwds, ) -> dict[str, Axes] | None: - """\ + """Compact plot of expression of a list of genes. + In this type of plot each var_name is plotted as a filled line plot where the y values correspond to the var_name values and x is each of the cells. Best results are obtained when using raw counts that are not log. @@ -1532,7 +1534,6 @@ def tracksplot( Examples -------- - Using var_names as list: .. plot:: @@ -1553,11 +1554,11 @@ def tracksplot( .. currentmodule:: scanpy - See also + See Also -------- pl.rank_genes_groups_tracksplot: to plot marker genes identified using the :func:`~scanpy.tl.rank_genes_groups` function. - """ + """ if groupby not in adata.obs_keys() or adata.obs[groupby].dtype.name != "category": msg = ( "groupby has to be a valid categorical observation. " @@ -1767,8 +1768,7 @@ def dendrogram( save: str | bool | None = None, ax: Axes | None = None, ) -> Axes: - """\ - Plots a dendrogram of the categories defined in `groupby`. + """Plot a dendrogram of the categories defined in `groupby`. See :func:`~scanpy.tl.dendrogram`. @@ -1849,8 +1849,7 @@ def correlation_matrix( norm: Normalize | None = None, **kwds, ) -> list[Axes] | None: - """\ - Plots the correlation matrix computed as part of `sc.tl.dendrogram`. + """Plot the correlation matrix computed as part of `sc.tl.dendrogram`. Parameters ---------- @@ -1883,10 +1882,10 @@ def correlation_matrix( -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> sc.tl.dendrogram(adata, 'bulk_labels') - >>> sc.pl.correlation_matrix(adata, 'bulk_labels') - """ + >>> sc.tl.dendrogram(adata, "bulk_labels") + >>> sc.pl.correlation_matrix(adata, "bulk_labels") + """ dendrogram_key = _get_dendrogram_key(adata, _dk(dendrogram), groupby) index = adata.uns[dendrogram_key]["categories_idx_ordered"] @@ -2012,9 +2011,7 @@ def _prepare_dataframe( layer: str | None = None, gene_symbols: str | None = None, ) -> tuple[Sequence[str], pd.DataFrame]: - """ - Given the anndata object, prepares a data frame in which the row index are the categories - defined by group by and the columns correspond to var_names. + """Prepare a data frame of categories (`groupby`) × `var_names`. Parameters ---------- @@ -2042,8 +2039,8 @@ def _prepare_dataframe( Returns ------- Tuple of `pandas.DataFrame` and list of categories. - """ + """ sanitize_anndata(adata) use_raw = _check_use_raw(adata, use_raw, layer=layer) if isinstance(var_names, str): @@ -2133,8 +2130,8 @@ def _plot_gene_groups_brackets( rotation: float | None = None, orientation: Literal["top", "right"] = "top", ): - """\ - Draws brackets that represent groups of genes on the give axis. + """Draw brackets that represent groups of genes on the give axis. + For best results, this axis is located on top of an image whose x axis contains gene names. @@ -2166,9 +2163,11 @@ def _plot_gene_groups_brackets( rotated, otherwise, they are rotated 90 degrees orientation location of the brackets. Either `top` or `right` + Returns ------- None + """ import matplotlib.patches as patches from matplotlib.path import Path @@ -2266,9 +2265,7 @@ def _reorder_categories_after_dendrogram( var_group_positions: Sequence[tuple[int, int]] | None, categories: Sequence[str], ): - """\ - Function used by plotting functions that need to reorder the the groupby - observations based on the dendrogram results. + """Reorder the the groupby observations based on the dendrogram results. The function checks if a dendrogram has already been precomputed. If not, `sc.tl.dendrogram` is run with default parameters. @@ -2282,8 +2279,8 @@ def _reorder_categories_after_dendrogram( dictionary with keys: 'categories_idx_ordered', 'var_group_names_idx_ordered', 'var_group_labels', and 'var_group_positions' - """ + """ if isinstance(groupby, str): groupby = [groupby] @@ -2421,18 +2418,16 @@ def _plot_dendrogram( remove_labels: bool = True, ticks: Collection[float] | None = None, ): - """\ - Plots a dendrogram on the given ax using the precomputed dendrogram - information stored in `.uns[dendrogram_key]` - """ + """Plot a dendrogram on the given ax. + Uses the precomputed dendrogram information stored in `.uns[dendrogram_key]`. + """ dendrogram_key = _get_dendrogram_key(adata, dendrogram_key, groupby) def translate_pos(pos_list, new_ticks, old_ticks): - """\ - transforms the dendrogram coordinates to a given new position. - The xlabel_pos and orig_ticks should be of the same - length. + """Transform the dendrogram coordinates to a given new position. + + The xlabel_pos and orig_ticks should be of the same length. This is mostly done for the heatmap case, where the position of the dendrogram leaves needs to be adjusted depending on the category size. @@ -2456,10 +2451,11 @@ def translate_pos(pos_list, new_ticks, old_ticks): -------- >>> translate_pos( ... [5, 15, 20, 21], - ... [0, 1, 2, 3 ], + ... [0, 1, 2, 3], ... [5, 15, 25, 35], ... ) [0, 1, 1.5, 1.6] + """ # of given coordinates. @@ -2544,9 +2540,10 @@ def _plot_categories_as_colorblocks( orientation: Literal["top", "bottom", "left", "right"] = "left", cmap_name: str = "tab20", ): - """\ - Plots categories as colored blocks. If orientation is 'left', the categories - are plotted vertically, otherwise they are plotted horizontally. + """Plot categories as colored blocks. + + If orientation is 'left', the categories are plotted vertically, + otherwise they are plotted horizontally. Parameters ---------- @@ -2561,8 +2558,8 @@ def _plot_categories_as_colorblocks( Returns ------- ticks position, labels, colormap - """ + """ groupby = obs_tidy.index.name from matplotlib.colors import BoundaryNorm, ListedColormap @@ -2641,9 +2638,9 @@ def _plot_categories_as_colorblocks( def _plot_colorbar(mappable, fig, subplot_spec, max_cbar_height: float = 4.0): - """ - Plots a vertical color bar based on mappable. - The height of the colorbar is min(figure-height, max_cmap_height) + """Plot a vertical color bar based on mappable. + + The height of the colorbar is min(figure-height, max_cmap_height). Parameters ---------- @@ -2659,6 +2656,7 @@ def _plot_colorbar(mappable, fig, subplot_spec, max_cbar_height: float = 4.0): Returns ------- color bar ax + """ width, height = fig.get_size_inches() if height > max_cbar_height: @@ -2678,9 +2676,7 @@ def _plot_colorbar(mappable, fig, subplot_spec, max_cbar_height: float = 4.0): def _check_var_names_type(var_names, var_group_labels, var_group_positions): - """ - checks if var_names is a dict. Is this is the cases, then set the - correct values for var_group_labels and var_group_positions + """If var_names is a dict, set the `var_group_labels` and `var_group_positions`. Returns ------- diff --git a/src/scanpy/plotting/_baseplot_class.py b/src/scanpy/plotting/_baseplot_class.py index 859a6f93cd..fe4a2af52c 100644 --- a/src/scanpy/plotting/_baseplot_class.py +++ b/src/scanpy/plotting/_baseplot_class.py @@ -1,4 +1,4 @@ -"""BasePlot for dotplot, matrixplot and stacked_violin""" +"""BasePlot for dotplot, matrixplot and stacked_violin.""" from __future__ import annotations @@ -64,9 +64,7 @@ class VarGroups(NamedTuple): class BasePlot: - """\ - Generic class for the visualization of AnnData categories and - selected `var` (features or genes). + """Generic class for the visualization of AnnData categories and selected `var` (features or genes). Takes care of the visual location of a main plot, additional plots in the margins (e.g. dendrogram, margin totals) and legends. Also @@ -220,8 +218,7 @@ def __init__( @old_positionals("swap_axes") def swap_axes(self, *, swap_axes: bool | None = True) -> Self: - """ - Plots a transposed image. + """Plot a transposed image. By default, the x axis contains `var_names` (e.g. genes) and the y axis the `groupby` categories. By setting `swap_axes` then x are @@ -254,9 +251,9 @@ def add_dendrogram( dendrogram_key: str | None = None, size: float | None = 0.8, ) -> Self: - r"""\ - Show dendrogram based on the hierarchical clustering between the `groupby` - categories. Categories are reordered to match the dendrogram order. + r"""Show dendrogram based on the hierarchical clustering between the `groupby` categories. + + Categories are reordered to match the dendrogram order. The dendrogram information is computed using :func:`scanpy.tl.dendrogram`. If `sc.tl.dendrogram` has not been called previously the function is called @@ -295,15 +292,17 @@ def add_dendrogram( -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> markers = {'T-cell': 'CD3D', 'B-cell': 'CD79A', 'myeloid': 'CST3'} - >>> plot = sc.pl._baseplot_class.BasePlot(adata, markers, groupby='bulk_labels').add_dendrogram() + >>> markers = {"T-cell": "CD3D", "B-cell": "CD79A", "myeloid": "CST3"} + >>> plot = sc.pl._baseplot_class.BasePlot( + ... adata, markers, groupby="bulk_labels" + ... ).add_dendrogram() >>> plot.plot_group_extra # doctest: +NORMALIZE_WHITESPACE {'kind': 'dendrogram', 'width': 0.8, 'dendrogram_key': None, 'dendrogram_ticks': array([0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5])} - """ + """ if not show: self.plot_group_extra = None return self @@ -342,8 +341,7 @@ def add_totals( size: float | None = 0.8, color: ColorLike | Sequence[ColorLike] | None = None, ) -> Self: - r"""\ - Show barplot for the number of cells in in `groupby` category. + r"""Show barplot for the number of cells in in `groupby` category. The barplot is by default shown on the right side of the plot or on top if the axes are swapped. @@ -375,9 +373,11 @@ def add_totals( -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> markers = {'T-cell': 'CD3D', 'B-cell': 'CD79A', 'myeloid': 'CST3'} - >>> plot = sc.pl._baseplot_class.BasePlot(adata, markers, groupby='bulk_labels').add_totals() - >>> plot.plot_group_extra['counts_df'] # doctest: +SKIP + >>> markers = {"T-cell": "CD3D", "B-cell": "CD79A", "myeloid": "CST3"} + >>> plot = sc.pl._baseplot_class.BasePlot( + ... adata, markers, groupby="bulk_labels" + ... ).add_totals() + >>> plot.plot_group_extra["counts_df"] # doctest: +SKIP bulk_labels CD4+/CD25 T Reg 68 CD4+/CD45RA+/CD25- Naive T 8 @@ -390,6 +390,7 @@ def add_totals( CD56+ NK 31 Dendritic 240 Name: count, dtype: int64 + """ self.group_extra_size = size @@ -417,20 +418,19 @@ def add_totals( @old_positionals("cmap") def style(self, *, cmap: Colormap | str | None | Empty = _empty) -> Self: - """\ - Set visual style parameters + r"""Set visual style parameters. Parameters ---------- cmap Matplotlib color map, specified by name or directly. - If ``None``, use :obj:`matplotlib.rcParams`\\ ``["image.cmap"]`` + If ``None``, use :obj:`matplotlib.rcParams`\ ``["image.cmap"]`` Returns ------- Returns `self` for method chaining. - """ + """ if cmap is not _empty: self.cmap = cmap return self @@ -443,8 +443,7 @@ def legend( title: str | None = DEFAULT_COLOR_LEGEND_TITLE, width: float | None = DEFAULT_LEGENDS_WIDTH, ) -> Self: - r"""\ - Configure legend parameters + r"""Configure legend parameters. Parameters ---------- @@ -452,7 +451,7 @@ def legend( Set to 'False' to hide the default plot of the legend. This sets the legend width to zero which will result in a wider main plot. title - Legend title. Appears on top of the color bar. Use '\\n' to add line breaks. + Legend title. Appears on top of the color bar. Use ``\n`` to add line breaks. width Width of the legend. The unit is the same as in matplotlib (inches) @@ -463,7 +462,6 @@ def legend( Examples -------- - Set legend title: >>> import scanpy as sc @@ -473,8 +471,8 @@ def legend( ... .legend(title='log(UMI counts + 1)') >>> dp.color_legend_title 'log(UMI counts + 1)' - """ + """ if not show: # turn of legends by setting width to 0 self.legends_width = 0 @@ -492,9 +490,7 @@ def get_axes(self) -> dict[str, Axes]: def _plot_totals( self, total_barplot_ax: Axes, orientation: Literal["top", "right"] ): - """ - Makes the bar plot for totals - """ + """Make the bar plot for totals.""" params = self.plot_group_extra counts_df: pd.DataFrame = params["counts_df"] if self.categories_order is not None: @@ -567,8 +563,7 @@ def _plot_totals( total_barplot_ax.axis("off") def _plot_colorbar(self, color_legend_ax: Axes, normalize) -> None: - """ - Plots a horizontal colorbar given the ax an normalize values + """Plot a horizontal colorbar given the ax an normalize values. Parameters ---------- @@ -578,6 +573,7 @@ def _plot_colorbar(self, color_legend_ax: Axes, normalize) -> None: Returns ------- `None`, updates color_legend_ax + """ cmap = plt.get_cmap(self.cmap) @@ -656,18 +652,17 @@ def _mainplot(self, ax: Axes): ) def make_figure(self): - r""" - Renders the image but does not call :func:`matplotlib.pyplot.show`. Useful - when several plots are put together into one figure. + r"""Render the image but does not call :func:`matplotlib.pyplot.show`. + + Useful when several plots are put together into one figure. - See also + See Also -------- `show()`: Renders and shows the plot. `savefig()`: Saves the plot. Examples -------- - >>> import scanpy as sc >>> import matplotlib.pyplot as plt >>> adata = sc.datasets.pbmc68k_reduced() @@ -676,8 +671,8 @@ def make_figure(self): >>> sc.pl.MatrixPlot(adata, markers, groupby='bulk_labels', ax=ax0) \ ... .style(cmap='Blues', edge_color='none').make_figure() >>> sc.pl.DotPlot(adata, markers, groupby='bulk_labels', ax=ax1).make_figure() - """ + """ category_height = self.DEFAULT_CATEGORY_HEIGHT category_width = self.DEFAULT_CATEGORY_WIDTH @@ -821,8 +816,7 @@ def make_figure(self): self.ax_dict = return_ax_dict def show(self, return_axes: bool | None = None) -> dict[str, Axes] | None: - """ - Show the figure + """Show the figure. Parameters ---------- @@ -835,19 +829,19 @@ def show(self, return_axes: bool | None = None) -> dict[str, Axes] | None: If `return_axes=True`: Dict of :class:`matplotlib.axes.Axes`. The dict key indicates the type of ax (eg. `mainplot_ax`) - See also + See Also -------- `render()`: Renders the plot but does not call :func:`matplotlib.pyplot.show` `savefig()`: Saves the plot. Examples - ------- + -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() >>> markers = ["C1QA", "PSAP", "CD79A", "CD79B", "CST3", "LYZ"] >>> sc.pl._baseplot_class.BasePlot(adata, markers, groupby="bulk_labels").show() - """ + """ self.make_figure() if return_axes: @@ -856,8 +850,7 @@ def show(self, return_axes: bool | None = None) -> dict[str, Axes] | None: plt.show() def savefig(self, filename: str, bbox_inches: str | None = "tight", **kwargs): - """ - Save the current figure + """Save the current figure. Parameters ---------- @@ -869,27 +862,26 @@ def savefig(self, filename: str, bbox_inches: str | None = "tight", **kwargs): kwargs Passed to :func:`matplotlib.pyplot.savefig` - See also + See Also -------- `render()`: Renders the plot but does not call :func:`matplotlib.pyplot.show` `show()`: Renders and shows the plot Examples - ------- + -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() >>> markers = ["C1QA", "PSAP", "CD79A", "CD79B", "CST3", "LYZ"] >>> sc.pl._baseplot_class.BasePlot( ... adata, markers, groupby="bulk_labels" ... ).savefig("plot.pdf") + """ self.make_figure() plt.savefig(filename, bbox_inches=bbox_inches, **kwargs) def _reorder_categories_after_dendrogram(self, dendrogram_key: str | None) -> None: - """\ - Function used by plotting functions that need to reorder the the groupby - observations based on the dendrogram results. + """Reorder the the groupby observations based on the dendrogram results. The function checks if a dendrogram has already been precomputed. If not, `sc.tl.dendrogram` is run with default parameters. @@ -903,10 +895,11 @@ def _reorder_categories_after_dendrogram(self, dendrogram_key: str | None) -> No `None`, internally updates 'categories_idx_ordered', 'var_group_names_idx_ordered', 'var_group_labels' and 'var_group_positions' + """ def _format_first_three_categories(_categories): - """used to clean up warning message""" + """Clean up warning message.""" _categories = list(_categories) if len(_categories) > 3: _categories = _categories[:3] + ["etc."] @@ -984,8 +977,8 @@ def _plot_var_groups_brackets( rotation: float | None = None, orientation: Literal["top", "right"] = "top", ) -> None: - """\ - Draws brackets that represent groups of genes on the give axis. + """Draw brackets that represent groups of genes on the give axis. + For best results, this axis is located on top of an image whose x axis contains gene names. @@ -1015,6 +1008,7 @@ def _plot_var_groups_brackets( rotated, otherwise, they are rotated 90 degrees orientation location of the brackets. Either `top` or `right` + """ import matplotlib.patches as patches from matplotlib.path import Path @@ -1098,11 +1092,10 @@ def _plot_var_groups_brackets( def _var_groups( var_names: _VarNames | Mapping[str, _VarNames], *, ref: pd.Index[str] ) -> tuple[Sequence[str], VarGroups | None]: - """ - Normalize var_names. + """Normalize var_names. + If it’s a mapping, also return var_group_labels and var_group_positions. """ - if not isinstance(var_names, Mapping): var_names = [var_names] if isinstance(var_names, str) else var_names return var_names, None diff --git a/src/scanpy/plotting/_docs.py b/src/scanpy/plotting/_docs.py index 61f0baa420..bffa097f90 100644 --- a/src/scanpy/plotting/_docs.py +++ b/src/scanpy/plotting/_docs.py @@ -1,6 +1,4 @@ -"""\ -Shared docstrings for plotting function parameters. -""" +"""Shared docstrings for plotting function parameters.""" from __future__ import annotations @@ -130,7 +128,7 @@ vcenter The value representing the center of the color scale. Useful for diverging colormaps. The format is the same as for `vmin`. - Example: sc.pl.umap(adata, color='TREM2', vcenter='p50', cmap='RdBu_r')\ + Example: ``sc.pl.umap(adata, color='TREM2', vcenter='p50', cmap='RdBu_r')``\ """ doc_vboundnorm = """\ diff --git a/src/scanpy/plotting/_dotplot.py b/src/scanpy/plotting/_dotplot.py index da3d16379b..19a8d11c72 100644 --- a/src/scanpy/plotting/_dotplot.py +++ b/src/scanpy/plotting/_dotplot.py @@ -35,10 +35,10 @@ @_doc_params(common_plot_args=doc_common_plot_args) class DotPlot(BasePlot): - """\ - Allows the visualization of two values that are encoded as - dot size and color. The size usually represents the fraction - of cells (obs) that have a non-zero value for genes (var). + """Allows the visualization of two values that are encoded as dot size and color. + + The size usually represents the fraction of cells (obs) + that have a non-zero value for genes (var). For each var_name and each `groupby` category a dot is plotted. Each dot represents two values: mean expression within each category @@ -73,7 +73,7 @@ class DotPlot(BasePlot): kwds Are passed to :func:`matplotlib.pyplot.scatter`. - See also + See Also -------- :func:`~scanpy.pl.dotplot`: Simpler way to call DotPlot but with less options. :func:`~scanpy.pl.rank_genes_groups_dotplot`: to plot marker @@ -81,16 +81,15 @@ class DotPlot(BasePlot): Examples -------- - >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ'] - >>> sc.pl.DotPlot(adata, markers, groupby='bulk_labels').show() + >>> markers = ["C1QA", "PSAP", "CD79A", "CD79B", "CST3", "LYZ"] + >>> sc.pl.DotPlot(adata, markers, groupby="bulk_labels").show() Using var_names as dict: - >>> markers = {{'T-cell': 'CD3D', 'B-cell': 'CD79A', 'myeloid': 'CST3'}} - >>> sc.pl.DotPlot(adata, markers, groupby='bulk_labels').show() + >>> markers = {{"T-cell": "CD3D", "B-cell": "CD79A", "myeloid": "CST3"}} + >>> sc.pl.DotPlot(adata, markers, groupby="bulk_labels").show() """ @@ -316,8 +315,7 @@ def style( x_padding: float | Empty = _empty, y_padding: float | Empty = _empty, ) -> Self: - r"""\ - Modifies plot visual parameters + r"""Modify plot visual parameters. Parameters ---------- @@ -370,8 +368,7 @@ def style( :class:`~scanpy.pl.DotPlot` Examples - ------- - + -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() >>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ'] @@ -386,6 +383,7 @@ def style( >>> sc.pl.DotPlot(adata, markers, groupby='bulk_labels') \ ... .style(dot_edge_color='black', dot_edge_lw=1, grid=True) \ ... .show() + """ super().style(cmap=cmap) @@ -432,8 +430,7 @@ def legend( colorbar_title: str | None = DEFAULT_COLOR_LEGEND_TITLE, width: float | None = DEFAULT_LEGENDS_WIDTH, ) -> Self: - """\ - Configures dot size and the colorbar legends + r"""Configure dot size and the colorbar legends. Parameters ---------- @@ -445,10 +442,10 @@ def legend( show_colorbar Set to `False` to hide the colorbar legend size_title - Title for the dot size legend. Use '\\n' to add line breaks. Appears on top + Title for the dot size legend. Use ``\n`` to add line breaks. Appears on top of dot sizes colorbar_title - Title for the color bar. Use '\\n' to add line breaks. Appears on top of the + Title for the color bar. Use ``\n`` to add line breaks. Appears on top of the color bar width Width of the legends area. The unit is the same as in matplotlib (inches). @@ -459,16 +456,15 @@ def legend( Examples -------- - Set color bar title: >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> markers = {'T-cell': 'CD3D', 'B-cell': 'CD79A', 'myeloid': 'CST3'} - >>> dp = sc.pl.DotPlot(adata, markers, groupby='bulk_labels') - >>> dp.legend(colorbar_title='log(UMI counts + 1)').show() - """ + >>> markers = {"T-cell": "CD3D", "B-cell": "CD79A", "myeloid": "CST3"} + >>> dp = sc.pl.DotPlot(adata, markers, groupby="bulk_labels") + >>> dp.legend(colorbar_title="log(UMI counts + 1)").show() + """ if not show: # turn of legends by setting width to 0 self.legends_width = 0 @@ -643,10 +639,10 @@ def _dotplot( norm: Normalize | None, **kwds, ): - """\ - Makes a *dot plot* given two data frames, one containing - the doc size and other containing the dot color. The indices and - columns of the data frame are used to label the output image + """Make a *dot plot* given two data frames. + + One containing the dot size and other containing the dot color. + The indices and columns of the data frame are used to label the output image. The dots are plotted using :func:`matplotlib.pyplot.scatter`. Thus, additional arguments can be passed. @@ -886,8 +882,7 @@ def dotplot( smallest_dot: float = DotPlot.DEFAULT_SMALLEST_DOT, **kwds, ) -> DotPlot | dict | None: - """\ - Makes a *dot plot* of the expression values of `var_names`. + r"""Make a *dot plot* of the expression values of `var_names`. For each var_name and each `groupby` category a dot is plotted. Each dot represents two values: mean expression within each category @@ -912,7 +907,7 @@ def dotplot( {common_plot_args} {groupby_plots_args} size_title - Title for the size legend. New line character (\\n) can be used. + Title for the size legend. New line character (\n) can be used. expression_cutoff Expression cutoff that is used for binarizing the gene expression and determining the fraction of cells expressing given genes. A gene is @@ -940,7 +935,7 @@ def dotplot( If `return_fig` is `True`, returns a :class:`~scanpy.pl.DotPlot` object, else if `show` is false, return axes dict - See also + See Also -------- :class:`~scanpy.pl.DotPlot`: The DotPlot class can be used to to control several visual parameters not available in this function. @@ -949,7 +944,6 @@ def dotplot( Examples -------- - Create a dot plot using the given markers and the PBMC example dataset grouped by the category 'bulk_labels'. @@ -985,7 +979,6 @@ def dotplot( print(axes_dict) """ - # backwards compatibility: previous version of dotplot used `color_map` # instead of `cmap` cmap = kwds.pop("color_map", cmap) diff --git a/src/scanpy/plotting/_matrixplot.py b/src/scanpy/plotting/_matrixplot.py index 9184f2455b..242d758841 100644 --- a/src/scanpy/plotting/_matrixplot.py +++ b/src/scanpy/plotting/_matrixplot.py @@ -34,8 +34,7 @@ @_doc_params(common_plot_args=doc_common_plot_args) class MatrixPlot(BasePlot): - """\ - Allows the visualization of values using a color map. + """Allows the visualization of values using a color map. Parameters ---------- @@ -60,7 +59,7 @@ class MatrixPlot(BasePlot): kwds Are passed to :func:`matplotlib.pyplot.scatter`. - See also + See Also -------- :func:`~scanpy.pl.matrixplot`: Simpler way to call MatrixPlot but with less options. :func:`~scanpy.pl.rank_genes_groups_matrixplot`: to plot marker genes identified @@ -68,7 +67,6 @@ class MatrixPlot(BasePlot): Examples -------- - Simple visualization of the average expression of a few genes grouped by the category 'bulk_labels'. @@ -88,6 +86,7 @@ class MatrixPlot(BasePlot): markers = {{'T-cell': 'CD3D', 'B-cell': 'CD79A', 'myeloid': 'CST3'}} sc.pl.MatrixPlot(adata, markers, groupby='bulk_labels').show() + """ DEFAULT_SAVE_PREFIX = "matrixplot_" @@ -203,27 +202,26 @@ def style( edge_color: ColorLike | None | Empty = _empty, edge_lw: float | None | Empty = _empty, ) -> Self: - """\ - Modifies plot visual parameters. + r"""Modify plot visual parameters. Parameters ---------- cmap Matplotlib color map, specified by name or directly. - If ``None``, use :obj:`matplotlib.rcParams`\\ ``["image.cmap"]`` + If ``None``, use :obj:`matplotlib.rcParams`\ ``["image.cmap"]`` edge_color Edge color between the squares of matrix plot. - If ``None``, use :obj:`matplotlib.rcParams`\\ ``["patch.edgecolor"]`` + If ``None``, use :obj:`matplotlib.rcParams`\ ``["patch.edgecolor"]`` edge_lw Edge line width. - If ``None``, use :obj:`matplotlib.rcParams`\\ ``["lines.linewidth"]`` + If ``None``, use :obj:`matplotlib.rcParams`\ ``["lines.linewidth"]`` Returns ------- :class:`~scanpy.pl.MatrixPlot` Examples - ------- + -------- .. plot:: :context: close-figs @@ -369,8 +367,7 @@ def matrixplot( norm: Normalize | None = None, **kwds, ) -> MatrixPlot | dict[str, Axes] | None: - """\ - Creates a heatmap of the mean expression values per group of each var_names. + """Create a heatmap of the mean expression values per group of each var_names. This function provides a convenient interface to the :class:`~scanpy.pl.MatrixPlot` class. If you need more flexibility, you should use :class:`~scanpy.pl.MatrixPlot` @@ -390,7 +387,7 @@ def matrixplot( If `return_fig` is `True`, returns a :class:`~scanpy.pl.MatrixPlot` object, else if `show` is false, return axes dict - See also + See Also -------- :class:`~scanpy.pl.MatrixPlot`: The MatrixPlot class can be used to to control several visual parameters not available in this function. @@ -430,8 +427,8 @@ def matrixplot( :context: close-figs axes_dict = mp.get_axes() - """ + """ mp = MatrixPlot( adata, var_names, diff --git a/src/scanpy/plotting/_preprocessing.py b/src/scanpy/plotting/_preprocessing.py index b51688082e..a1f6d59916 100644 --- a/src/scanpy/plotting/_preprocessing.py +++ b/src/scanpy/plotting/_preprocessing.py @@ -41,6 +41,7 @@ def highly_variable_genes( If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {{`'.pdf'`, `'.png'`, `'.svg'`}}. + """ if isinstance(adata_or_result, AnnData): result = adata_or_result.var @@ -112,8 +113,7 @@ def filter_genes_dispersion( show: bool | None = None, save: bool | str | None = None, ) -> None: - """\ - Plot dispersions versus means for genes. + """Plot dispersions versus means for genes. Produces Supp. Fig. 5c of Zheng et al. (2017) and MeanVarPlot() of Seurat. @@ -129,6 +129,7 @@ def filter_genes_dispersion( If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {{`'.pdf'`, `'.png'`, `'.svg'`}}. + """ highly_variable_genes( result, log=log, show=show, save=save, highly_variable_genes=False diff --git a/src/scanpy/plotting/_qc.py b/src/scanpy/plotting/_qc.py index cd3f764468..602fae580f 100644 --- a/src/scanpy/plotting/_qc.py +++ b/src/scanpy/plotting/_qc.py @@ -32,8 +32,7 @@ def highest_expr_genes( ax: Axes | None = None, **kwds, ): - """\ - Fraction of counts assigned to each gene over all cells. + """Fraction of counts assigned to each gene over all cells. Computes, for each gene, the fraction of counts assigned to that gene within a cell. The `n_top` genes with the highest mean fraction over all cells are @@ -70,6 +69,7 @@ def highest_expr_genes( Returns ------- If `show==False` a :class:`~matplotlib.axes.Axes`. + """ import seaborn as sns # Slow import, only import if called from scipy.sparse import issparse diff --git a/src/scanpy/plotting/_rcmod.py b/src/scanpy/plotting/_rcmod.py index 525f3e97da..cfd672c9ed 100644 --- a/src/scanpy/plotting/_rcmod.py +++ b/src/scanpy/plotting/_rcmod.py @@ -14,7 +14,6 @@ def set_rcParams_scanpy(fontsize=14, color_map=None): Call this through `settings.set_figure_params`. """ - # figure rcParams["figure.figsize"] = (4, 4) rcParams["figure.subplot.left"] = 0.18 diff --git a/src/scanpy/plotting/_scrublet.py b/src/scanpy/plotting/_scrublet.py index 050aec6f53..3459986d56 100644 --- a/src/scanpy/plotting/_scrublet.py +++ b/src/scanpy/plotting/_scrublet.py @@ -33,8 +33,7 @@ def scrublet_score_distribution( show: bool = True, save: str | bool | None = None, ) -> Figure | Sequence[tuple[Axes, Axes]] | tuple[Axes, Axes] | None: - """\ - Plot histogram of doublet scores for observed transcriptomes and simulated doublets. + """Plot histogram of doublet scores for observed transcriptomes and simulated doublets. The histogram for simulated doublets is useful for determining the correct doublet score threshold. @@ -63,14 +62,14 @@ def scrublet_score_distribution( If ``return_fig`` is True, a :class:`~matplotlib.figure.Figure`. If ``show==False`` a list of :class:`~matplotlib.axes.Axes`. - See also + See Also -------- :func:`~scanpy.pp.scrublet`: Main way of running Scrublet, runs preprocessing, doublet simulation and calling. :func:`~scanpy.pp.scrublet_simulate_doublets`: Run Scrublet's doublet simulation separately for advanced usage. - """ + """ if "scrublet" not in adata.uns: msg = "Please run scrublet before trying to generate the scrublet plot." raise ValueError(msg) diff --git a/src/scanpy/plotting/_stacked_violin.py b/src/scanpy/plotting/_stacked_violin.py index 3c58ead35f..0e883c07f4 100644 --- a/src/scanpy/plotting/_stacked_violin.py +++ b/src/scanpy/plotting/_stacked_violin.py @@ -38,8 +38,7 @@ @_doc_params(common_plot_args=doc_common_plot_args) class StackedViolin(BasePlot): - """\ - Stacked violin plots. + """Stacked violin plots. Makes a compact image composed of individual violin plots (from :func:`~seaborn.violinplot`) stacked on top of each other. @@ -88,7 +87,7 @@ class StackedViolin(BasePlot): Are passed to :func:`~seaborn.violinplot`. - See also + See Also -------- :func:`~scanpy.pl.stacked_violin`: simpler way to call StackedViolin but with less options. @@ -96,19 +95,23 @@ class StackedViolin(BasePlot): to plot marker genes identified using :func:`~scanpy.tl.rank_genes_groups` Examples - ------- - + -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ'] - >>> sc.pl.StackedViolin(adata, markers, groupby='bulk_labels', dendrogram=True) # doctest: +ELLIPSIS + >>> markers = ["C1QA", "PSAP", "CD79A", "CD79B", "CST3", "LYZ"] + >>> sc.pl.StackedViolin( + ... adata, markers, groupby="bulk_labels", dendrogram=True + ... ) # doctest: +ELLIPSIS Using var_names as dict: - >>> markers = {{'T-cell': 'CD3D', 'B-cell': 'CD79A', 'myeloid': 'CST3'}} - >>> sc.pl.StackedViolin(adata, markers, groupby='bulk_labels', dendrogram=True) # doctest: +ELLIPSIS + >>> markers = {{"T-cell": "CD3D", "B-cell": "CD79A", "myeloid": "CST3"}} + >>> sc.pl.StackedViolin( + ... adata, markers, groupby="bulk_labels", dendrogram=True + ... ) # doctest: +ELLIPSIS + """ DEFAULT_SAVE_PREFIX = "stacked_violin_" @@ -143,8 +146,8 @@ class StackedViolin(BasePlot): # None will draw unadorned violins. DEFAULT_INNER = None + # Called unconditionally when accessing an instance attribute: def __getattribute__(self, name: str) -> object: - """Called unconditionally when accessing an instance attribute""" # If the user has set the deprecated version on the class, # and our code accesses the new version from the instance, # return the user-specified version instead and warn. @@ -284,8 +287,7 @@ def style( # deprecated scale: DensityNorm | Empty = _empty, ) -> Self: - r"""\ - Modifies plot visual parameters + r"""Modify plot visual parameters. Parameters ---------- @@ -333,7 +335,7 @@ def style( :class:`~scanpy.pl.StackedViolin` Examples - ------- + -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() >>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ'] @@ -342,6 +344,7 @@ def style( >>> sc.pl.StackedViolin(adata, markers, groupby='bulk_labels') \ ... .style(row_palette='Blues', linewidth=0).show() + """ super().style(cmap=cmap) @@ -583,10 +586,7 @@ def _make_rows_of_violinplots( self._setup_violin_axes_ticks(row_ax, num_cols) def _setup_violin_axes_ticks(self, row_ax: Axes, num_cols: int): - """ - Configures each of the violin plot axes ticks like remove or add labels etc. - - """ + """Configure each of the violin plot axes ticks like remove or add labels etc.""" # remove the default seaborn grids because in such a compact # plot are unnecessary @@ -708,8 +708,7 @@ def stacked_violin( scale: DensityNorm | Empty = _empty, **kwds, ) -> StackedViolin | dict | None: - """\ - Stacked violin plots. + """Stacked violin plots. Makes a compact image composed of individual violin plots (from :func:`~seaborn.violinplot`) stacked on top of each other. @@ -758,7 +757,7 @@ def stacked_violin( If `return_fig` is `True`, returns a :class:`~scanpy.pl.StackedViolin` object, else if `show` is false, return axes dict - See also + See Also -------- :class:`~scanpy.pl.StackedViolin`: The StackedViolin class can be used to to control several visual parameters not available in this function. @@ -766,8 +765,7 @@ def stacked_violin( using the :func:`~scanpy.tl.rank_genes_groups` function. Examples - ------- - + -------- Visualization of violin plots of a few genes grouped by the category `bulk_labels`: .. plot:: diff --git a/src/scanpy/plotting/_tools/__init__.py b/src/scanpy/plotting/_tools/__init__.py index 8c89b34fb4..d219a64fa8 100644 --- a/src/scanpy/plotting/_tools/__init__.py +++ b/src/scanpy/plotting/_tools/__init__.py @@ -57,8 +57,7 @@ @_doc_params(scatter_bulk=doc_scatter_embedding, show_save_ax=doc_show_save_ax) def pca_overview(adata: AnnData, **params): - """\ - Plot PCA results. + """Plot PCA results. The parameters are the ones of the scatter plot. Call pca_ranking separately if you want to change the default settings. @@ -79,6 +78,7 @@ def pca_overview(adata: AnnData, **params): If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {{`'.pdf'`, `'.png'`, `'.svg'`}}. + Examples -------- .. plot:: @@ -90,9 +90,10 @@ def pca_overview(adata: AnnData, **params): .. currentmodule:: scanpy - See also + See Also -------- pp.pca + """ show = params.pop("show", None) pca(adata, **params, show=False) @@ -114,8 +115,7 @@ def pca_loadings( show: bool | None = None, save: str | bool | None = None, ): - """\ - Rank genes according to contributions to PCs. + """Rank genes according to contributions to PCs. Parameters ---------- @@ -188,8 +188,7 @@ def pca_variance_ratio( show: bool | None = None, save: bool | str | None = None, ): - """\ - Plot the variance ratio. + """Plot the variance ratio. Parameters ---------- @@ -203,6 +202,7 @@ def pca_variance_ratio( If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {`'.pdf'`, `'.png'`, `'.svg'`}. + """ ranking( adata, @@ -232,13 +232,13 @@ def dpt_timeseries( as_heatmap: bool = True, marker: str | Sequence[str] = ".", ): - """\ - Heatmap of pseudotime series. + """Heatmap of pseudotime series. Parameters ---------- as_heatmap Plot the timeseries as heatmap. + """ if adata.n_vars > 100: logg.warning( @@ -278,8 +278,7 @@ def dpt_groups_pseudotime( save: bool | str | None = None, marker: str | Sequence[str] = ".", ): - """\ - Plot groups and pseudotime. + """Plot groups and pseudotime. Parameters ---------- @@ -289,6 +288,7 @@ def dpt_groups_pseudotime( {show_save} marker Marker style. See :mod:`~matplotlib.markers` for details. + """ _, (ax_grp, ax_ord) = plt.subplots(2, 1) timeseries_subplot( @@ -348,8 +348,7 @@ def rank_genes_groups( ax: Axes | None = None, **kwds, ) -> list[Axes] | None: - """\ - Plot ranking of genes. + """Plot ranking of genes. Parameters ---------- @@ -395,7 +394,7 @@ def rank_genes_groups( .. currentmodule:: scanpy - See also + See Also -------- tl.rank_genes_groups @@ -501,9 +500,7 @@ def rank_genes_groups( def _fig_show_save_or_axes( plot_obj: BasePlot, *, return_fig: bool, show: bool | None, save: bool | None ): - """ - Decides what to return - """ + """Decides what to return.""" if return_fig: return plot_obj plot_obj.make_figure() @@ -531,9 +528,7 @@ def _rank_genes_groups_plot( gene_symbols: str | None = None, **kwds, ): - """\ - Common function to call the different rank_genes_groups_* plots - """ + """Call the different `rank_genes_groups_*` plots.""" if var_names is not None and n_genes is not None: msg = ( "The arguments n_genes and var_names are mutually exclusive. Please " @@ -704,8 +699,7 @@ def rank_genes_groups_heatmap( save: bool | None = None, **kwds, ): - """\ - Plot ranking of genes using heatmap plot (see :func:`~scanpy.pl.heatmap`) + """Plot ranking of genes using heatmap plot (see :func:`~scanpy.pl.heatmap`). Parameters ---------- @@ -741,10 +735,11 @@ def rank_genes_groups_heatmap( .. currentmodule:: scanpy - See also + See Also -------- tl.rank_genes_groups tl.dendrogram + """ return _rank_genes_groups_plot( adata, @@ -787,8 +782,7 @@ def rank_genes_groups_tracksplot( save: bool | None = None, **kwds, ): - """\ - Plot ranking of genes using heatmap plot (see :func:`~scanpy.pl.heatmap`) + """Plot ranking of genes using heatmap plot (see :func:`~scanpy.pl.heatmap`). Parameters ---------- @@ -807,8 +801,8 @@ def rank_genes_groups_tracksplot( adata = sc.datasets.pbmc68k_reduced() sc.tl.rank_genes_groups(adata, 'bulk_labels') sc.pl.rank_genes_groups_tracksplot(adata) - """ + """ return _rank_genes_groups_plot( adata, plot_type="tracksplot", @@ -866,8 +860,7 @@ def rank_genes_groups_dotplot( return_fig: bool = False, **kwds, ): - """\ - Plot ranking of genes using dotplot plot (see :func:`~scanpy.pl.dotplot`) + """Plot ranking of genes using dotplot plot (see :func:`~scanpy.pl.dotplot`). Parameters ---------- @@ -966,9 +959,10 @@ def rank_genes_groups_dotplot( .. currentmodule:: scanpy - See also + See Also -------- tl.rank_genes_groups + """ return _rank_genes_groups_plot( adata, @@ -1005,9 +999,9 @@ def rank_genes_groups_stacked_violin( return_fig: bool = False, **kwds, ): - """\ - Plot ranking of genes using stacked_violin plot - (see :func:`~scanpy.pl.stacked_violin`) + """Plot ranking of genes using stacked_violin plot. + + (See :func:`~scanpy.pl.stacked_violin`) Parameters ---------- @@ -1028,13 +1022,13 @@ def rank_genes_groups_stacked_violin( -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> sc.tl.rank_genes_groups(adata, 'bulk_labels') + >>> sc.tl.rank_genes_groups(adata, "bulk_labels") - >>> sc.pl.rank_genes_groups_stacked_violin(adata, n_genes=4, - ... min_logfoldchange=4, figsize=(8,6)) + >>> sc.pl.rank_genes_groups_stacked_violin( + ... adata, n_genes=4, min_logfoldchange=4, figsize=(8, 6) + ... ) """ - return _rank_genes_groups_plot( adata, plot_type="stacked_violin", @@ -1093,8 +1087,7 @@ def rank_genes_groups_matrixplot( return_fig: bool = False, **kwds, ): - """\ - Plot ranking of genes using matrixplot plot (see :func:`~scanpy.pl.matrixplot`) + """Plot ranking of genes using matrixplot plot (see :func:`~scanpy.pl.matrixplot`). Parameters ---------- @@ -1178,8 +1171,8 @@ def rank_genes_groups_matrixplot( min_logfoldchange=3, colorbar_title='log fold change', ) - """ + """ return _rank_genes_groups_plot( adata, plot_type="matrixplot", @@ -1234,8 +1227,7 @@ def rank_genes_groups_violin( # deprecated scale: DensityNorm | Empty = _empty, ): - """\ - Plot ranking of genes for all tested comparisons. + """Plot ranking of genes for all tested comparisons. Parameters ---------- @@ -1265,6 +1257,7 @@ def rank_genes_groups_violin( size Size of the jitter points. {show_save_ax} + """ if key is None: key = "rank_genes_groups" @@ -1351,8 +1344,7 @@ def sim( save: bool | str | None = None, marker: str | Sequence[str] = ".", ) -> None: - """\ - Plot results of simulation. + """Plot results of simulation. Parameters ---------- @@ -1369,6 +1361,7 @@ def sim( If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {{`'.pdf'`, `'.png'`, `'.svg'`}}. + """ if tmax_realization is not None: tmax = tmax_realization @@ -1461,8 +1454,7 @@ def embedding_density( return_fig: bool | None = None, **kwargs, ) -> Figure | Axes | None: - """\ - Plot the density of cells in an embedding (per condition). + """Plot the density of cells in an embedding (per condition). Plots the gaussian kernel density estimates (over condition) from the `sc.tl.embedding_density()` output. @@ -1529,9 +1521,10 @@ def embedding_density( .. currentmodule:: scanpy - See also + See Also -------- tl.embedding_density + """ sanitize_anndata(adata) @@ -1727,14 +1720,13 @@ def _get_values_to_plot( key: str | None = "rank_genes_groups", gene_symbols: str | None = None, ): - """ - If rank_genes_groups has been called, this function - prepares a dataframe containing scores, pvalues, logfoldchange etc to be plotted - as dotplot or matrixplot. + """Prepare a dataframe to be plotted as dotplot or matrixplot. - The dataframe index are the given groups and the columns are the gene_names + The specified `values_to_plot` stem from `rank_genes_groups`. - used by rank_genes_groups_dotplot + The dataframe `index` are the given groups and the `columns` are the `gene_names`. + + (used by `rank_genes_groups_dotplot`) Parameters ---------- @@ -1750,6 +1742,7 @@ def _get_values_to_plot( By default 'rank_genes_groups' gene_symbols Key for field in .var that stores gene symbols. + Returns ------- pandas DataFrame index=groups, columns=gene_names diff --git a/src/scanpy/plotting/_tools/paga.py b/src/scanpy/plotting/_tools/paga.py index a4b2de3441..497df3277a 100644 --- a/src/scanpy/plotting/_tools/paga.py +++ b/src/scanpy/plotting/_tools/paga.py @@ -91,8 +91,7 @@ def paga_compare( pos=None, **paga_graph_params, ): - """\ - Scatter and PAGA graph side-by-side. + """Scatter and PAGA graph side-by-side. Consists in a scatter plot and the abstracted graph. See :func:`~scanpy.pl.paga` for all related parameters. @@ -114,6 +113,7 @@ def paga_compare( Returns ------- A list of :class:`~matplotlib.axes.Axes` if `show` is `False`. + """ axs, _, _, _ = _utils.setup_axes(panels=[0, 1], right_margin=right_margin) if color is None: @@ -344,8 +344,7 @@ def paga( save: bool | str | None = None, ax: Axes | None = None, ) -> Axes | list[Axes] | None: - """\ - Plot the PAGA graph through thresholding low-connectivity edges. + r"""Plot the PAGA graph through thresholding low-connectivity edges. Compute a coarse-grained layout of the data. Reuse this by passing `init_pos='paga'` to :func:`~scanpy.tl.umap` or @@ -456,7 +455,7 @@ def paga( save If `True` or a `str`, save the figure. A string is appended to the default filename. - Infer the filetype if ending on \\{`'.pdf'`, `'.png'`, `'.svg'`\\}. + Infer the filetype if ending on \{`'.pdf'`, `'.png'`, `'.svg'`\}. ax A matplotlib axes object. @@ -491,13 +490,13 @@ def paga( .. currentmodule:: scanpy - See also + See Also -------- tl.paga pl.paga_compare pl.paga_path - """ + """ if groups is not None: # backwards compat labels = groups logg.warning("`groups` is deprecated in `pl.paga`: use `labels` instead") @@ -1068,8 +1067,7 @@ def paga_path( save: bool | str | None = None, ax: Axes | None = None, ) -> tuple[Axes, pd.DataFrame] | Axes | pd.DataFrame | None: - """\ - Gene expression and annotation changes along paths in the abstracted graph. + r"""Gene expression and annotation changes along paths in the abstracted graph. Parameters ---------- @@ -1117,7 +1115,7 @@ def paga_path( save If `True` or a `str`, save the figure. A string is appended to the default filename. - Infer the filetype if ending on \\{`'.pdf'`, `'.png'`, `'.svg'`\\}. + Infer the filetype if ending on \{`'.pdf'`, `'.png'`, `'.svg'`\}. ax A matplotlib axes object. @@ -1125,6 +1123,7 @@ def paga_path( ------- A :class:`~matplotlib.axes.Axes` object, if `ax` is `None`, else `None`. If `return_data`, return the timeseries data in addition to an axes. + """ ax_was_none = ax is None @@ -1370,7 +1369,7 @@ def paga_adjacency( show: bool | None = None, save: bool | str | None = None, ) -> None: - """Connectivity of paga groups.""" + """Plot connectivity of paga groups.""" connectivity = adata.uns[adjacency].toarray() connectivity_select = adata.uns[adjacency_tree] if as_heatmap: diff --git a/src/scanpy/plotting/_tools/scatterplots.py b/src/scanpy/plotting/_tools/scatterplots.py index cb3c9d7c66..74e04f13c2 100644 --- a/src/scanpy/plotting/_tools/scatterplots.py +++ b/src/scanpy/plotting/_tools/scatterplots.py @@ -118,8 +118,7 @@ def embedding( marker: str | Sequence[str] = ".", **kwargs, ) -> Figure | Axes | list[Axes] | None: - """\ - Scatter plot for user specified embedding basis (e.g. umap, pca, etc) + """Scatter plot for user specified embedding basis (e.g. umap, pca, etc). Parameters ---------- @@ -133,6 +132,7 @@ def embedding( Returns ------- If `show==False` a :class:`~matplotlib.axes.Axes` or a list of it. + """ ##################### # Argument handling # @@ -515,18 +515,18 @@ def _get_vboundnorm( index: int, colors: Sequence[float], ) -> tuple[float | None, float | None]: - """ - Evaluates the value of vmin, vmax and vcenter, which could be a - str in which case is interpreted as a percentile and should - be specified in the form 'pN' where N is the percentile. - Eg. for a percentile of 85 the format would be 'p85'. - Floats are accepted as p99.9 + """Evaluate the value of `vmin`, `vmax` and `vcenter`. - Alternatively, vmin/vmax could be a function that is applied to - the list of color values (`colors`). E.g. + Each could be a str in which case is interpreted as a percentile and should + be specified in the form `pN` where `N` is the percentile. + Eg. for a percentile of 85 the format would be `p85`. + Floats are accepted as `p99.9`. - def my_vmax(colors): np.percentile(colors, p=80) + Alternatively, `vmin`/`vmax` could be a function that is applied to + the list of color values (`colors`). E.g. + >>> def my_vmax(colors): + ... return np.percentile(colors, p=80) Parameters ---------- @@ -537,7 +537,6 @@ def my_vmax(colors): np.percentile(colors, p=80) Returns ------- - (vmin, vmax, vcenter, norm) containing None or float values for vmin, vmax, vcenter and matplotlib.colors.Normalize or None for norm. @@ -597,7 +596,6 @@ def my_vmax(colors): np.percentile(colors, p=80) def _wraps_plot_scatter(wrapper): """Update the wrapper function to use the correct signature.""" - params = inspect.signature(embedding, eval_str=True).parameters.copy() wrapper_sig = inspect.signature(wrapper, eval_str=True) wrapper_params = wrapper_sig.parameters.copy() @@ -634,8 +632,7 @@ def _wraps_plot_scatter(wrapper): show_save_ax=doc_show_save_ax, ) def umap(adata: AnnData, **kwargs) -> Figure | Axes | list[Axes] | None: - """\ - Scatter plot in UMAP basis. + """Scatter plot in UMAP basis. Parameters ---------- @@ -681,9 +678,10 @@ def umap(adata: AnnData, **kwargs) -> Figure | Axes | list[Axes] | None: .. currentmodule:: scanpy - See also + See Also -------- tl.umap + """ return embedding(adata, "umap", **kwargs) @@ -696,8 +694,7 @@ def umap(adata: AnnData, **kwargs) -> Figure | Axes | list[Axes] | None: show_save_ax=doc_show_save_ax, ) def tsne(adata: AnnData, **kwargs) -> Figure | Axes | list[Axes] | None: - """\ - Scatter plot in tSNE basis. + """Scatter plot in tSNE basis. Parameters ---------- @@ -722,9 +719,10 @@ def tsne(adata: AnnData, **kwargs) -> Figure | Axes | list[Axes] | None: .. currentmodule:: scanpy - See also + See Also -------- tl.tsne + """ return embedding(adata, "tsne", **kwargs) @@ -736,8 +734,7 @@ def tsne(adata: AnnData, **kwargs) -> Figure | Axes | list[Axes] | None: show_save_ax=doc_show_save_ax, ) def diffmap(adata: AnnData, **kwargs) -> Figure | Axes | list[Axes] | None: - """\ - Scatter plot in Diffusion Map basis. + """Scatter plot in Diffusion Map basis. Parameters ---------- @@ -761,9 +758,10 @@ def diffmap(adata: AnnData, **kwargs) -> Figure | Axes | list[Axes] | None: .. currentmodule:: scanpy - See also + See Also -------- tl.diffmap + """ return embedding(adata, "diffmap", **kwargs) @@ -778,8 +776,7 @@ def diffmap(adata: AnnData, **kwargs) -> Figure | Axes | list[Axes] | None: def draw_graph( adata: AnnData, *, layout: _Layout | None = None, **kwargs ) -> Figure | Axes | list[Axes] | None: - """\ - Scatter plot in graph-drawing basis. + """Scatter plot in graph-drawing basis. Parameters ---------- @@ -807,9 +804,10 @@ def draw_graph( .. currentmodule:: scanpy - See also + See Also -------- tl.draw_graph + """ if layout is None: layout = str(adata.uns["draw_graph"]["params"]["layout"]) @@ -836,8 +834,7 @@ def pca( save: bool | str | None = None, **kwargs, ) -> Figure | Axes | list[Axes] | None: - """\ - Scatter plot in PCA coordinates. + """Scatter plot in PCA coordinates. Use the parameter `annotate_var_explained` to annotate the explained variance. @@ -878,9 +875,10 @@ def pca( .. currentmodule:: scanpy - See also + See Also -------- pp.pca + """ if not annotate_var_explained: return embedding( @@ -951,8 +949,7 @@ def spatial( save: bool | str | None = None, **kwargs, ) -> Figure | Axes | list[Axes] | None: - """\ - Scatter plot in spatial coordinates. + """Scatter plot in spatial coordinates. .. deprecated:: 1.11.0 Use :func:`squidpy.pl.spatial_scatter` instead. @@ -1003,6 +1000,7 @@ def spatial( -------- :func:`scanpy.datasets.visium_sge` Example visium data. + """ # get default image params if available library_id, spatial_data = _check_spatial_data(adata.uns, library_id) @@ -1181,9 +1179,7 @@ def _get_color_source_vector( layer: str | None = None, groups: Sequence[str] | None = None, ) -> np.ndarray | pd.api.extensions.ExtensionArray: - """ - Get array from adata that colors will be based on. - """ + """Get array from adata that colors will be based on.""" if value_to_plot is None: # Points will be plotted with `na_color`. Ideally this would work # with the "bad color" in a color map but that throws a warning. Instead @@ -1235,8 +1231,7 @@ def _color_vector( palette: str | Sequence[str] | Cycler | None, na_color: ColorLike = "lightgray", ) -> tuple[np.ndarray | pd.api.extensions.ExtensionArray, Literal["cat", "na", "cont"]]: - """ - Map array of values to array of hex (plus alpha) codes. + """Map array of values to array of hex (plus alpha) codes. For categorical data, the return value is list of colors taken from the category palette or from the given `palette` value. @@ -1273,10 +1268,7 @@ def _color_vector( def _basis2name(basis): - """ - converts the 'basis' into the proper name. - """ - + """Convert the 'basis' into the proper name.""" component_name = ( "DC" if basis == "diffmap" @@ -1294,8 +1286,7 @@ def _basis2name(basis): def _check_spot_size(spatial_data: Mapping | None, spot_size: float | None) -> float: - """ - Resolve spot_size value. + """Resolve spot_size value. This is a required argument for spatial plots. """ @@ -1328,8 +1319,7 @@ def _check_scale_factor( def _check_spatial_data( uns: Mapping, library_id: str | None | Empty ) -> tuple[str | None, Mapping | None]: - """ - Given a mapping, try and extract a library id/ mapping with spatial data. + """Given a mapping, try and extract a library id/ mapping with spatial data. Assumes this is `.uns` from how we parse visium data. """ @@ -1356,9 +1346,7 @@ def _check_img( *, bw: bool = False, ) -> tuple[np.ndarray | None, str | None]: - """ - Resolve image for spatial plots. - """ + """Resolve image for spatial plots.""" if img is None and spatial_data is not None and img_key is _empty: img_key = next( (k for k in ["hires", "lowres"] if k in spatial_data["images"]), @@ -1394,7 +1382,6 @@ def _check_na_color( def _broadcast_args(*args): """Broadcasts arguments to a common length.""" - lens = [len(arg) for arg in args] longest = max(lens) if not (set(lens) == {1, longest} or set(lens) == {longest}): diff --git a/src/scanpy/plotting/_utils.py b/src/scanpy/plotting/_utils.py index b6cd920039..1586c5bfb6 100644 --- a/src/scanpy/plotting/_utils.py +++ b/src/scanpy/plotting/_utils.py @@ -63,7 +63,7 @@ class _AxesSubplot(Axes, axes.SubplotBase): - """Intersection between Axes and SubplotBase: Has methods of both""" + """Intersection between Axes and SubplotBase: Has methods of both.""" # ------------------------------------------------------------------------------- @@ -143,8 +143,7 @@ def timeseries_subplot( ax: Axes | None = None, marker: str | Sequence[str] = ".", ): - """\ - Plot X. + """Plot X. Parameters ---------- @@ -153,8 +152,8 @@ def timeseries_subplot( X with one column, color categorical. X with one column, color continuous. X with n columns, color is of length n. - """ + """ if color is not None: use_color_map = isinstance(color[0], float | np.floating) palette = default_palette(palette) @@ -208,8 +207,7 @@ def timeseries_subplot( def timeseries_as_heatmap( X: np.ndarray, *, var_names: Collection[str] = (), highlights_x=(), color_map=None ): - """\ - Plot timeseries as heatmap. + """Plot timeseries as heatmap. Parameters ---------- @@ -217,6 +215,7 @@ def timeseries_as_heatmap( Data array. var_names Array of strings naming variables stored in columns of X. + """ if len(var_names) == 0: var_names = np.arange(X.shape[1]) @@ -378,14 +377,11 @@ def default_palette( def _validate_palette(adata: AnnData, key: str) -> None: - """ - checks if the list of colors in adata.uns[f'{key}_colors'] is valid - and updates the color list in adata.uns[f'{key}_colors'] if needed. + """Validate and update the list of colors in `adata.uns[f'{key}_colors']`. Not only valid matplotlib colors are checked but also if the color name is a valid R color name, in which case it will be translated to a valid name """ - _palette = [] color_key = f"{key}_colors" @@ -413,8 +409,7 @@ def _validate_palette(adata: AnnData, key: str) -> None: def _set_colors_for_categorical_obs( adata, value_to_plot, palette: str | Sequence[str] | Cycler ): - """ - Sets the adata.uns[value_to_plot + '_colors'] according to the given palette + """Set `adata.uns[f'{value_to_plot}_colors']` according to the given palette. Parameters ---------- @@ -430,6 +425,7 @@ def _set_colors_for_categorical_obs( Returns ------- None + """ from matplotlib.colors import to_hex @@ -492,8 +488,7 @@ def _set_colors_for_categorical_obs( def _set_default_colors_for_categorical_obs(adata, value_to_plot): - """ - Sets the adata.uns[value_to_plot + '_colors'] using default color palettes + """Set `adata.uns[f'{value_to_plot}_colors']` using default color palettes. Parameters ---------- @@ -505,6 +500,7 @@ def _set_default_colors_for_categorical_obs(adata, value_to_plot): Returns ------- None + """ if adata.obs[value_to_plot].dtype == bool: categories = ( @@ -768,6 +764,7 @@ def scatter_base( ------- Depending on whether supplying a single array or a list of arrays, return a single axis or a list of axes. + """ if isinstance(highlights, Mapping): highlights_indices = sorted(highlights) @@ -897,6 +894,7 @@ def scatter_single(ax: Axes, Y: np.ndarray, *args, **kwargs): Axis to plot on. Y Data array, data to be plotted needs to be in the first two columns. + """ if "s" not in kwargs: kwargs["s"] = 2 if Y.shape[0] > 500 else 10 @@ -908,8 +906,7 @@ def scatter_single(ax: Axes, Y: np.ndarray, *args, **kwargs): def arrows_transitions(ax: Axes, X: np.ndarray, indices: Sequence[int], weight=None): - """ - Plot arrows of transitions in data matrix. + """Plot arrows of transitions in data matrix. Parameters ---------- @@ -919,6 +916,7 @@ def arrows_transitions(ax: Axes, X: np.ndarray, indices: Sequence[int], weight=N Data array, any representation wished (X, psi, phi, etc). indices Indices storing the transitions. + """ step = 1 width = axis_to_data(ax, 0.001) @@ -1003,6 +1001,7 @@ def hierarchy_pos( value: number of nodes in this level width: horizontal space allocated for drawing height: vertical space allocated for drawing + """ def make_levels( @@ -1011,7 +1010,7 @@ def make_levels( current_level: int = 0, parent: int | None = None, ) -> dict[int, _Level]: - """Compute the number of nodes for each level""" + """Compute the number of nodes for each level.""" if current_level not in levels: levels[current_level] = _Level(total=0, current=0) levels[current_level]["total"] += 1 @@ -1072,11 +1071,7 @@ def make_sc_tree(sc_G, node=root, parent=None): def zoom(ax, xy="x", factor=1): - """Zoom into axis. - - Parameters - ---------- - """ + """Zoom into axis.""" limits = ax.get_xlim() if xy == "x" else ax.get_ylim() new_limits = 0.5 * (limits[0] + limits[1]) + 1.0 / factor * np.array( (-0.5, 0.5) @@ -1088,7 +1083,7 @@ def zoom(ax, xy="x", factor=1): def get_ax_size(ax: Axes, fig: Figure): - """Get axis size + """Get axis size. Parameters ---------- @@ -1096,6 +1091,7 @@ def get_ax_size(ax: Axes, fig: Figure): Axis object from matplotlib. fig Figure. + """ bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) width, height = bbox.width, bbox.height @@ -1104,8 +1100,7 @@ def get_ax_size(ax: Axes, fig: Figure): def axis_to_data(ax: Axes, width: float): - """For a width in axis coordinates, return the corresponding in data - coordinates. + """For a width in axis coordinates, return the corresponding in data coordinates. Parameters ---------- @@ -1113,6 +1108,7 @@ def axis_to_data(ax: Axes, width: float): Axis object from matplotlib. width Width in xaxis coordinates. + """ xlim = ax.get_xlim() widthx = width * (xlim[1] - xlim[0]) @@ -1132,6 +1128,7 @@ def axis_to_data_points(ax: Axes, points_axis: np.ndarray): Axis object from matplotlib. points_axis Points in axis coordinates. + """ axis_to_data = ax.transAxes + ax.transData.inverted() return axis_to_data.transform(points_axis) @@ -1148,13 +1145,14 @@ def data_to_axis_points(ax: Axes, points_data: np.ndarray): Axis object from matplotlib. points_data Points in data coordinates. + """ data_to_axis = axis_to_data.inverted() return data_to_axis(points_data) def check_projection(projection): - """Validation for projection argument.""" + """Validate projection argument.""" if projection not in {"2d", "3d"}: msg = f"Projection must be '2d' or '3d', was '{projection}'." raise ValueError(msg) @@ -1170,10 +1168,12 @@ def check_projection(projection): def circles( x, y, *, s, ax, marker=None, c="b", vmin=None, vmax=None, scale_factor=1.0, **kwargs ): - """ - Taken from here: https://gist.github.com/syrte/592a062c562cd2a98a83 - Make a scatter plot of circles. + """Make a scatter plot of circles. + Similar to pl.scatter, but the size of circles are in data scale. + + Taken from here: + Parameters ---------- x, y : scalar or array_like, shape (n, ) @@ -1195,9 +1195,11 @@ def circles( kwargs : `~matplotlib.collections.Collection` properties Eg. alpha, edgecolor(ec), facecolor(fc), linewidth(lw), linestyle(ls), norm, cmap, transform, etc. + Returns ------- paths : `~matplotlib.collections.PathCollection` + Examples -------- a = np.arange(11) @@ -1207,8 +1209,8 @@ def circles( -------- This code is under [The BSD 3-Clause License] (https://opensource.org/license/bsd-3-clause/) - """ + """ # You can set `facecolor` with an array for each patch, # while you can only set `facecolors` with a value for all. if scale_factor != 1.0: @@ -1257,31 +1259,31 @@ def make_grid_spec( def fix_kwds(kwds_dict, **kwargs): - """ - Given a dictionary of plot parameters (kwds_dict) and a dict of kwds, - merge the parameters into a single consolidated dictionary to avoid - argument duplication errors. + """Merge the parameters into a single consolidated dictionary. - If kwds_dict an kwargs have the same key, only the value in kwds_dict is kept. + Given a dictionary of plot parameters (`kwds_dict`) and a dict of `kwds`, + this function prevents argument duplication errors. + + If `kwds_dict` an kwargs have the same key, only the value in `kwds_dict` is kept. Parameters ---------- - kwds_dict kwds_dictionary + kwds_dict + kwds dictionary kwargs Returns ------- - kwds_dict merged with kwargs + `kwds_dict` merged with `kwargs` Examples -------- - >>> def _example(**kwds): ... return fix_kwds(kwds, key1="value1", key2="value2") >>> _example(key1="value10", key3="value3") {'key1': 'value10', 'key2': 'value2', 'key3': 'value3'} - """ + """ kwargs.update(kwds_dict) return kwargs @@ -1354,5 +1356,5 @@ def _deprecated_scale( def _dk(dendrogram: bool | str | None) -> str | None: - """Helper to convert the `dendrogram` parameter to a `dendrogram_key` parameter.""" + """Convert the `dendrogram` parameter to a `dendrogram_key` parameter.""" return None if isinstance(dendrogram, bool) else dendrogram diff --git a/src/scanpy/preprocessing/__init__.py b/src/scanpy/preprocessing/__init__.py index 4307cbb6c9..24ef84e5e7 100644 --- a/src/scanpy/preprocessing/__init__.py +++ b/src/scanpy/preprocessing/__init__.py @@ -1,3 +1,5 @@ +"""Preprocessing functions.""" + from __future__ import annotations from ..neighbors import neighbors diff --git a/src/scanpy/preprocessing/_combat.py b/src/scanpy/preprocessing/_combat.py index 93052f356c..3dc2331136 100644 --- a/src/scanpy/preprocessing/_combat.py +++ b/src/scanpy/preprocessing/_combat.py @@ -20,11 +20,10 @@ def _design_matrix( model: pd.DataFrame, batch_key: str, batch_levels: Collection[str] ) -> pd.DataFrame: - """\ - Computes a simple design matrix. + """Compute a simple design matrix. Parameters - -------- + ---------- model Contains the batch annotation batch_key @@ -33,8 +32,9 @@ def _design_matrix( Levels of the batch annotation Returns - -------- + ------- The design matrix for the regression problem + """ import patsy @@ -72,13 +72,12 @@ def _design_matrix( def _standardize_data( model: pd.DataFrame, data: pd.DataFrame, batch_key: str ) -> tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]: - """\ - Standardizes the data per gene. + """Standardize the data per gene. The aim here is to make mean and variance be comparable across batches. Parameters - -------- + ---------- model Contains the batch annotation data @@ -87,7 +86,7 @@ def _standardize_data( Name of the batch column in the model matrix Returns - -------- + ------- s_data Standardized Data design @@ -96,8 +95,8 @@ def _standardize_data( Pooled variance per gene stand_mean Gene-wise mean - """ + """ # compute the design matrix batch_items = model.groupby(batch_key, observed=True).groups.items() batch_levels, batch_info = zip(*batch_items) @@ -143,8 +142,7 @@ def combat( covariates: Collection[str] | None = None, inplace: bool = True, ) -> np.ndarray | None: - """\ - ComBat function for batch effect correction :cite:p:`Johnson2006,Leek2012,Pedersen2012`. + """ComBat function for batch effect correction :cite:p:`Johnson2006,Leek2012,Pedersen2012`. Corrects for batch effects by fitting linear models, gains statistical power via an EB framework where information is borrowed across genes. @@ -175,8 +173,8 @@ def combat( `adata.X` : :class:`numpy.ndarray` (dtype `float`) Corrected data matrix. - """ + """ # check the input if key not in adata.obs_keys(): msg = f"Could not find the key {key!r} in adata.obs" @@ -296,8 +294,7 @@ def _it_sol( b: float, conv: float = 0.0001, ) -> tuple[np.ndarray, np.ndarray]: - """\ - Iteratively compute the conditional posterior means for gamma and delta. + """Iteratively compute the conditional posterior means for gamma and delta. gamma is an estimator for the additive batch effect, deltat is an estimator for the multiplicative batch effect. We use an EB framework to estimate these @@ -305,7 +302,7 @@ def _it_sol( We therefore iteratively evalutate these two expressions until convergence is reached. Parameters - -------- + ---------- s_data Contains the standardized Data g_hat @@ -317,14 +314,14 @@ def _it_sol( conv: float, optional (default: `0.0001`) convergence criterium - Returns: - -------- + Returns + ------- gamma estimated value for gamma delta estimated value for delta - """ + """ # noqa: D401 n = (1 - np.isnan(s_data)).sum(axis=1) g_old = g_hat.copy() d_old = d_hat.copy() diff --git a/src/scanpy/preprocessing/_deprecated/__init__.py b/src/scanpy/preprocessing/_deprecated/__init__.py index b821417c0b..a3902a83ba 100644 --- a/src/scanpy/preprocessing/_deprecated/__init__.py +++ b/src/scanpy/preprocessing/_deprecated/__init__.py @@ -13,8 +13,7 @@ def normalize_per_cell_weinreb16_deprecated( max_fraction: float = 1, mult_with_mean: bool = False, ) -> np.ndarray: - """\ - Normalize each cell :cite:p:`Weinreb2017`. + """Normalize each cell :cite:p:`Weinreb2017`. This is a deprecated version. See `normalize_per_cell` instead. @@ -34,6 +33,7 @@ def normalize_per_cell_weinreb16_deprecated( Returns ------- Normalized version of the original expression matrix. + """ if max_fraction < 0 or max_fraction > 1: msg = "Choose max_fraction between 0 and 1." @@ -59,8 +59,7 @@ def normalize_per_cell_weinreb16_deprecated( def zscore_deprecated(X: np.ndarray) -> np.ndarray: - """\ - Z-score standardize each variable/gene in X :cite:p:`Weinreb2017`. + """Z-score standardize each variable/gene in X :cite:p:`Weinreb2017`. Use `scale` instead. @@ -72,6 +71,7 @@ def zscore_deprecated(X: np.ndarray) -> np.ndarray: Returns ------- Z-score standardized version of the data matrix. + """ means = np.tile(np.mean(X, axis=0)[None, :], (X.shape[0], 1)) stds = np.tile(np.std(X, axis=0)[None, :], (X.shape[0], 1)) diff --git a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py index f841e24da3..c9bc79b82b 100644 --- a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py +++ b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py @@ -46,8 +46,7 @@ def filter_genes_dispersion( subset: bool = True, copy: bool = False, ) -> AnnData | np.recarray | None: - """\ - Extract highly variable genes :cite:p:`Satija2015,Zheng2017`. + """Extract highly variable genes :cite:p:`Satija2015,Zheng2017`. .. deprecated:: 1.3.6 @@ -123,6 +122,7 @@ def filter_genes_dispersion( If a data matrix `X` is passed, the annotation is returned as `np.recarray` with the same information stored in fields: `gene_subset`, `means`, `dispersions`, `dispersion_norm`. + """ if n_top_genes is not None and not all( x is None for x in [min_disp, max_disp, min_mean, max_mean] diff --git a/src/scanpy/preprocessing/_deprecated/sampling.py b/src/scanpy/preprocessing/_deprecated/sampling.py index 4be071fc02..b6ca424c3b 100644 --- a/src/scanpy/preprocessing/_deprecated/sampling.py +++ b/src/scanpy/preprocessing/_deprecated/sampling.py @@ -23,8 +23,7 @@ def subsample( random_state: _LegacyRandom = 0, copy: bool = False, ) -> AnnData | tuple[np.ndarray | _CSMatrix, NDArray[np.int64]] | None: - """\ - Subsample to a fraction of the number of observations. + """Subsample to a fraction of the number of observations. .. deprecated:: 1.11.0 @@ -50,8 +49,8 @@ def subsample( Returns `X[obs_indices], obs_indices` if data is array-like, otherwise subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or returns a subsampled copy of it (`copy == True`). - """ + """ rng = _legacy_numpy_gen(random_state) return sample( data=data, fraction=fraction, n=n_obs, rng=rng, copy=copy, replace=False, axis=0 diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index c5dc3a27a2..49291f5da1 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -38,8 +38,7 @@ def _highly_variable_genes_seurat_v3( subset: bool = False, inplace: bool = True, ) -> pd.DataFrame | None: - """\ - See `highly_variable_genes`. + """See `highly_variable_genes`. For further implementation details see https://www.overleaf.com/read/ckptrbgzzzpg @@ -60,8 +59,8 @@ def _highly_variable_genes_seurat_v3( Rank of the gene according to normalized variance, median rank in the case of multiple batches. highly_variable_nbatches : :class:`int` If batch_key is given, this denotes in how many batches genes are detected as HVG. - """ + """ try: from skmisc.loess import loess except ImportError: @@ -273,13 +272,13 @@ def _highly_variable_genes_single_batch( n_bins: int = 20, flavor: Literal["seurat", "cell_ranger"] = "seurat", ) -> pd.DataFrame: - """\ - See `highly_variable_genes`. + """See `highly_variable_genes`. Returns ------- A DataFrame that contains the columns `highly_variable`, `means`, `dispersions`, and `dispersions_norm`. + """ X = _get_obs_rep(adata, layer=layer) @@ -533,8 +532,7 @@ def highly_variable_genes( batch_key: str | None = None, check_values: bool = True, ) -> pd.DataFrame | None: - """\ - Annotate highly variable genes :cite:p:`Satija2015,Zheng2017,Stuart2019`. + """Annotate highly variable genes :cite:p:`Satija2015,Zheng2017,Stuart2019`. Expects logarithmized data, except when `flavor='seurat_v3'`/`'seurat_v3_paper'`, in which count data is expected. @@ -644,8 +642,8 @@ def highly_variable_genes( Notes ----- This function replaces :func:`~scanpy.pp.filter_genes_dispersion`. - """ + """ start = logg.info("extracting highly variable genes") if not isinstance(adata, AnnData): diff --git a/src/scanpy/preprocessing/_normalization.py b/src/scanpy/preprocessing/_normalization.py index e1ee3d4822..5997899d53 100644 --- a/src/scanpy/preprocessing/_normalization.py +++ b/src/scanpy/preprocessing/_normalization.py @@ -80,8 +80,7 @@ def normalize_total( inplace: bool = True, copy: bool = False, ) -> AnnData | dict[str, np.ndarray] | None: - """\ - Normalize counts per cell. + """Normalize counts per cell. Normalize each cell by total counts over all genes, so that every cell has the same total count after normalization. @@ -138,23 +137,28 @@ def normalize_total( `adata.X` and `adata.layers`, depending on `inplace`. Example - -------- + ------- >>> import sys >>> from anndata import AnnData >>> import scanpy as sc - >>> sc.settings.verbosity = 'info' + >>> sc.settings.verbosity = "info" >>> sc.settings.logfile = sys.stdout # for doctests >>> np.set_printoptions(precision=2) - >>> adata = AnnData(np.array([ - ... [3, 3, 3, 6, 6], - ... [1, 1, 1, 2, 2], - ... [1, 22, 1, 2, 2], - ... ], dtype='float32')) + >>> adata = AnnData( + ... np.array( + ... [ + ... [3, 3, 3, 6, 6], + ... [1, 1, 1, 2, 2], + ... [1, 22, 1, 2, 2], + ... ], + ... dtype="float32", + ... ) + ... ) >>> adata.X array([[ 3., 3., 3., 6., 6.], [ 1., 1., 1., 2., 2.], [ 1., 22., 1., 2., 2.]], dtype=float32) - >>> X_norm = sc.pp.normalize_total(adata, target_sum=1, inplace=False)['X'] + >>> X_norm = sc.pp.normalize_total(adata, target_sum=1, inplace=False)["X"] normalizing counts per cell finished (0:00:00) >>> X_norm @@ -162,9 +166,12 @@ def normalize_total( [0.14, 0.14, 0.14, 0.29, 0.29], [0.04, 0.79, 0.04, 0.07, 0.07]], dtype=float32) >>> X_norm = sc.pp.normalize_total( - ... adata, target_sum=1, exclude_highly_expressed=True, - ... max_fraction=0.2, inplace=False - ... )['X'] + ... adata, + ... target_sum=1, + ... exclude_highly_expressed=True, + ... max_fraction=0.2, + ... inplace=False, + ... )["X"] normalizing counts per cell. The following highly-expressed genes are not considered during normalization factor computation: ['1', '3', '4'] finished (0:00:00) @@ -172,6 +179,7 @@ def normalize_total( array([[ 0.5, 0.5, 0.5, 1. , 1. ], [ 0.5, 0.5, 0.5, 1. , 1. ], [ 0.5, 11. , 0.5, 1. , 1. ]], dtype=float32) + """ if copy: if not inplace: diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py index 77c5c6b3fe..223393e0bd 100644 --- a/src/scanpy/preprocessing/_pca/__init__.py +++ b/src/scanpy/preprocessing/_pca/__init__.py @@ -77,8 +77,7 @@ def pca( key_added: str | None = None, copy: bool = False, ) -> AnnData | np.ndarray | _CSMatrix | None: - """\ - Principal component analysis :cite:p:`Pedregosa2011`. + r"""Principal component analysis :cite:p:`Pedregosa2011`. Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of *scikit-learn* :cite:p:`Pedregosa2011`. @@ -171,13 +170,13 @@ def pca( Required if `chunked=True` was passed. key_added If not specified, the embedding is stored as - :attr:`~anndata.AnnData.obsm`\\ `['X_pca']`, the loadings as - :attr:`~anndata.AnnData.varm`\\ `['PCs']`, and the the parameters in - :attr:`~anndata.AnnData.uns`\\ `['pca']`. + :attr:`~anndata.AnnData.obsm`\ `['X_pca']`, the loadings as + :attr:`~anndata.AnnData.varm`\ `['PCs']`, and the the parameters in + :attr:`~anndata.AnnData.uns`\ `['pca']`. If specified, the embedding is stored as - :attr:`~anndata.AnnData.obsm`\\ ``[key_added]``, the loadings as - :attr:`~anndata.AnnData.varm`\\ ``[key_added]``, and the the parameters in - :attr:`~anndata.AnnData.uns`\\ ``[key_added]``. + :attr:`~anndata.AnnData.obsm`\ ``[key_added]``, the loadings as + :attr:`~anndata.AnnData.varm`\ ``[key_added]``, and the the parameters in + :attr:`~anndata.AnnData.uns`\ ``[key_added]``. copy If an :class:`~anndata.AnnData` is passed, determines whether a copy is returned. Is ignored otherwise. @@ -200,6 +199,7 @@ def pca( `.uns['pca' | key_added]['variance']` : :class:`~numpy.ndarray` (shape `(n_comps,)`) Explained variance, equivalent to the eigenvalues of the covariance matrix. + """ logg_start = logg.info("computing PCA") if layer is not None and chunked: @@ -424,8 +424,7 @@ def _handle_mask_var( mask_var: NDArray[np.bool_] | str | Empty | None, use_highly_variable: bool | None, ) -> tuple[np.ndarray | str | None, np.ndarray | None]: - """\ - Unify new mask argument and deprecated use_highly_varible argument. + """Unify new mask argument and deprecated use_highly_varible argument. Returns both the normalized mask parameter and the validated mask array. """ diff --git a/src/scanpy/preprocessing/_pca/_compat.py b/src/scanpy/preprocessing/_pca/_compat.py index 056650fd15..e6665d7049 100644 --- a/src/scanpy/preprocessing/_pca/_compat.py +++ b/src/scanpy/preprocessing/_pca/_compat.py @@ -29,7 +29,7 @@ def _pca_compat_sparse( mu: NDArray[np.floating] | None = None, random_state: _LegacyRandom = None, ) -> tuple[NDArray[np.floating], PCA]: - """Sparse PCA for scikit-learn <1.4""" + """Sparse PCA for scikit-learn <1.4.""" random_state = check_random_state(random_state) np.random.set_state(random_state.get_state()) random_init = np.random.rand(np.min(x.shape)) diff --git a/src/scanpy/preprocessing/_pca/_dask_sparse.py b/src/scanpy/preprocessing/_pca/_dask_sparse.py index 7f53bda992..cef5147668 100644 --- a/src/scanpy/preprocessing/_pca/_dask_sparse.py +++ b/src/scanpy/preprocessing/_pca/_dask_sparse.py @@ -45,6 +45,7 @@ def fit(self, x: DaskArray) -> PCASparseDaskFit: >>> assert isinstance(pca_fit, PCASparseDaskFit) >>> pca_fit.transform(x) dask.array + """ if x._meta.format != "csr": msg = ( @@ -151,12 +152,10 @@ def _cov_sparse_dask( tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.floating]] | tuple[NDArray[np.floating], NDArray[np.floating]] ): - """\ - Computes the covariance matrix and row/col means of matrix `x`. + r"""Compute the covariance matrix and row/col means of matrix `x`. Parameters ---------- - x A sparse matrix return_gram @@ -169,13 +168,13 @@ def _cov_sparse_dask( Returns ------- - - :math:`\\cov(X, X)` - The covariance matrix of `x` in the form :math:`\\cov(X, X) = \\E(XX) - \\E(X)\\E(X)`. - :math:`\\gram(X, X)` - When return_gram is `True`, the gram matrix of `x` in the form :math:`\\frac{1}{n} X.T \\dot X`. - :math:`\\mean(X)` + :math:`\cov(X, X)` + The covariance matrix of `x` in the form :math:`\cov(X, X) = \E(XX) - \E(X)\E(X)`. + :math:`\gram(X, X)` + When return_gram is `True`, the gram matrix of `x` in the form :math:`\frac{1}{n} X.T \dot X`. + :math:`\mean(X)` The row means of `x`. + """ if TYPE_CHECKING: import dask.array.core as da diff --git a/src/scanpy/preprocessing/_qc.py b/src/scanpy/preprocessing/_qc.py index e57719a20d..a26fab8cdf 100644 --- a/src/scanpy/preprocessing/_qc.py +++ b/src/scanpy/preprocessing/_qc.py @@ -66,8 +66,7 @@ def describe_obs( X=None, parallel=None, ) -> pd.DataFrame | None: - """\ - Describe observations of anndata. + """Describe observations of anndata. Calculates a number of qc metrics for observations in AnnData object. See section `Returns` for a description of those metrics. @@ -94,6 +93,7 @@ def describe_obs( the AnnData's `.obs` dataframe. {doc_obs_qc_returns} + """ if parallel is not None: warn( @@ -164,8 +164,7 @@ def describe_var( log1p: bool = True, X: _CSMatrix | coo_matrix | np.ndarray | None = None, ) -> pd.DataFrame | None: - """\ - Describe variables of anndata. + """Describe variables of anndata. Calculates a number of qc metrics for variables in AnnData object. See section `Returns` for a description of those metrics. @@ -186,6 +185,7 @@ def describe_var( AnnData's `.var` dataframe. {doc_var_qc_returns} + """ # Handle whether X is passed if X is None: @@ -237,8 +237,7 @@ def calculate_qc_metrics( log1p: bool = True, parallel: bool | None = None, ) -> tuple[pd.DataFrame, pd.DataFrame] | None: - """\ - Calculate quality control metrics. + """Calculate quality control metrics. Calculates a number of qc metrics for an AnnData object, see section `Returns` for specifics. Largely based on `calculateQCMetrics` from scater @@ -291,6 +290,7 @@ def calculate_qc_metrics( :context: close-figs sns.histplot(pbmc.obs["pct_counts_mito"]) + """ if parallel is not None: warn( @@ -332,8 +332,7 @@ def calculate_qc_metrics( def top_proportions(mtx: np.ndarray | _CSMatrix | coo_matrix, n: int): - """\ - Calculates cumulative proportions of top expressed genes + """Calculate cumulative proportions of top expressed genes. Parameters ---------- @@ -343,6 +342,7 @@ def top_proportions(mtx: np.ndarray | _CSMatrix | coo_matrix, n: int): Rank to calculate proportions up to. Value is treated as 1-indexed, `n=50` will calculate cumulative proportions up to the 50th most expressed gene. + """ if issparse(mtx): if not isinstance(mtx, csr_matrix): @@ -398,8 +398,7 @@ def check_ns_inner( @singledispatch @check_ns def top_segment_proportions(mtx: np.ndarray, ns: Collection[int]) -> np.ndarray: - """ - Calculates total percentage of counts in top ns genes. + """Calculate total percentage of counts in top ns genes. Parameters ---------- @@ -409,6 +408,7 @@ def top_segment_proportions(mtx: np.ndarray, ns: Collection[int]) -> np.ndarray: Positions to calculate cumulative proportion at. Values are considered 1-indexed, e.g. `ns=[50]` will calculate cumulative proportion up to the 50th most expressed gene. + """ # Currently ns is considered to be 1 indexed ns = np.sort(ns) diff --git a/src/scanpy/preprocessing/_recipes.py b/src/scanpy/preprocessing/_recipes.py index 4748d75e5c..cbc4dac8d6 100644 --- a/src/scanpy/preprocessing/_recipes.py +++ b/src/scanpy/preprocessing/_recipes.py @@ -1,4 +1,4 @@ -"""Preprocessing recipes from the literature""" +"""Preprocessing recipes from the literature.""" from __future__ import annotations @@ -39,8 +39,7 @@ def recipe_weinreb17( random_state: _LegacyRandom = 0, copy: bool = False, ) -> AnnData | None: - """\ - Normalization and filtering as of :cite:p:`Weinreb2017`. + """Normalize and filter as of :cite:p:`Weinreb2017`. Expects non-logarithmized data. If using logarithmized data, pass `log=False`. @@ -53,6 +52,7 @@ def recipe_weinreb17( Logarithmize data? copy Return a copy if true. + """ from scipy.sparse import issparse @@ -85,8 +85,7 @@ def recipe_weinreb17( def recipe_seurat( adata: AnnData, *, log: bool = True, plot: bool = False, copy: bool = False ) -> AnnData | None: - """\ - Normalization and filtering as of Seurat :cite:p:`Satija2015`. + """Normalize and filter as of Seurat :cite:p:`Satija2015`. This uses a particular preprocessing. @@ -103,6 +102,7 @@ def recipe_seurat( Show a plot of the gene dispersion vs. mean relation. copy Return a copy if true. + """ if copy: adata = adata.copy() @@ -134,8 +134,7 @@ def recipe_zheng17( plot: bool = False, copy: bool = False, ) -> AnnData | None: - """\ - Normalization and filtering as of :cite:t:`Zheng2017`. + """Normalize and filter as of :cite:t:`Zheng2017`. Reproduces the preprocessing of :cite:t:`Zheng2017` – the Cell Ranger R Kit of 10x Genomics. @@ -176,6 +175,7 @@ def recipe_zheng17( Returns ------- Returns or updates `adata` depending on `copy`. + """ start = logg.info("running recipe zheng17") if copy: diff --git a/src/scanpy/preprocessing/_scale.py b/src/scanpy/preprocessing/_scale.py index 2a8ff2140e..61a0dafe02 100644 --- a/src/scanpy/preprocessing/_scale.py +++ b/src/scanpy/preprocessing/_scale.py @@ -86,8 +86,7 @@ def scale( obsm: str | None = None, mask_obs: NDArray[np.bool_] | str | None = None, ) -> AnnData | _CSMatrix | np.ndarray | DaskArray | None: - """\ - Scale data to unit variance and zero mean. + """Scale data to unit variance and zero mean. .. note:: Variables (genes) that do not display any variation (are constant across @@ -129,6 +128,7 @@ def scale( Standard deviations per gene before scaling. `adata.var['var']` : :class:`pandas.Series` (dtype `float`) Variances per gene before scaling. + """ _check_array_function_arguments(layer=layer, obsm=obsm) if layer is not None: diff --git a/src/scanpy/preprocessing/_scrublet/__init__.py b/src/scanpy/preprocessing/_scrublet/__init__.py index 99c0c28522..c084701b5c 100644 --- a/src/scanpy/preprocessing/_scrublet/__init__.py +++ b/src/scanpy/preprocessing/_scrublet/__init__.py @@ -61,8 +61,7 @@ def scrublet( copy: bool = False, random_state: _LegacyRandom = 0, ) -> AnnData | None: - """\ - Predict doublets using Scrublet :cite:p:`Wolock2019`. + """Predict doublets using Scrublet :cite:p:`Wolock2019`. Predict cell doublets using a nearest-neighbor classifier of observed transcriptomes and simulated doublets. Works best if the input is a raw @@ -169,14 +168,14 @@ def scrublet( ``.uns['scrublet']['parameters']`` Dictionary of Scrublet parameters - See also + See Also -------- :func:`~scanpy.pp.scrublet_simulate_doublets`: Run Scrublet's doublet simulation separately for advanced usage. :func:`~scanpy.pl.scrublet_score_distribution`: Plot histogram of doublet scores for observed transcriptomes and simulated doublets. - """ + """ if threshold is None and not find_spec("skimage"): # pragma: no cover # Scrublet.call_doublets requires `skimage` with `threshold=None` but PCA # is called early, which is wasteful if there is not `skimage` @@ -319,8 +318,7 @@ def _scrublet_call_doublets( random_state: _LegacyRandom = 0, verbose: bool = True, ) -> AnnData: - """\ - Core function for predicting doublets using Scrublet :cite:p:`Wolock2019`. + """Core function for predicting doublets using Scrublet :cite:p:`Wolock2019`. Predict cell doublets using a nearest-neighbor classifier of observed transcriptomes and simulated doublets. @@ -399,8 +397,8 @@ def _scrublet_call_doublets( ``.uns['scrublet']['parameters']`` Dictionary of Scrublet parameters - """ + """ # Estimate n_neighbors if not provided, and create scrublet object. if n_neighbors is None: @@ -512,8 +510,7 @@ def scrublet_simulate_doublets( synthetic_doublet_umi_subsampling: float = 1.0, random_seed: _LegacyRandom = 0, ) -> AnnData: - """\ - Simulate doublets by adding the counts of random observed transcriptome pairs. + """Simulate doublets by adding the counts of random observed transcriptome pairs. Parameters ---------- @@ -545,14 +542,14 @@ def scrublet_simulate_doublets( ``.uns['scrublet']['parameters']`` Dictionary of Scrublet parameters - See also + See Also -------- :func:`~scanpy.pp.scrublet`: Main way of running Scrublet, runs preprocessing, doublet simulation (this function) and calling. :func:`~scanpy.pl.scrublet_score_distribution`: Plot histogram of doublet scores for observed transcriptomes and simulated doublets. - """ + """ X = _get_obs_rep(adata, layer=layer) scrub = Scrublet(X, random_state=random_seed) diff --git a/src/scanpy/preprocessing/_scrublet/core.py b/src/scanpy/preprocessing/_scrublet/core.py index 8130f6bd3c..b9787a1668 100644 --- a/src/scanpy/preprocessing/_scrublet/core.py +++ b/src/scanpy/preprocessing/_scrublet/core.py @@ -30,8 +30,7 @@ @dataclass(kw_only=True) class Scrublet: - """\ - Initialize Scrublet object with counts matrix and doublet prediction parameters + """Initialize Scrublet object with counts matrix and doublet prediction parameters. Parameters ---------- @@ -62,6 +61,7 @@ class Scrublet: random_state Random state for doublet simulation, approximate nearest neighbor search, and PCA/TruncatedSVD. + """ # init fields @@ -192,8 +192,8 @@ def simulate_doublets( ) -> None: """Simulate doublets by adding the counts of random observed transcriptome pairs. - Arguments - --------- + Parameters + ---------- sim_doublet_ratio Number of doublets to simulate relative to the number of observed transcriptomes. If `None`, self.sim_doublet_ratio is used. @@ -208,8 +208,8 @@ def simulate_doublets( Sets ---- doublet_parents_ - """ + """ if sim_doublet_ratio is None: sim_doublet_ratio = self.sim_doublet_ratio else: @@ -239,11 +239,10 @@ def simulate_doublets( def set_manifold( self, manifold_obs: NDArray[np.float64], manifold_sim: NDArray[np.float64] ) -> None: - """\ - Set the manifold coordinates used in k-nearest-neighbor graph construction + """Set the manifold coordinates used in k-nearest-neighbor graph construction. - Arguments - --------- + Parameters + ---------- manifold_obs (shape: n_cells × n_features) The single-cell "manifold" coordinates (e.g., PCA coordinates) @@ -259,8 +258,8 @@ def set_manifold( Sets ---- manifold_obs_, manifold_sim_, - """ + """ self.manifold_obs_ = manifold_obs self.manifold_sim_ = manifold_sim @@ -271,13 +270,12 @@ def calculate_doublet_scores( distance_metric: _Metric | _MetricFn = "euclidean", get_doublet_neighbor_parents: bool = False, ) -> NDArray[np.float64]: - """\ - Calculate doublet scores for observed transcriptomes and simulated doublets + """Calculate doublet scores for observed transcriptomes and simulated doublets. Requires that manifold_obs_ and manifold_sim_ have already been set. - Arguments - --------- + Parameters + ---------- use_approx_neighbors Use approximate nearest neighbor method (annoy) for the KNN classifier. @@ -299,8 +297,8 @@ def calculate_doublet_scores( doublet_scores_obs_, doublet_scores_sim_, doublet_errors_obs_, doublet_errors_sim_, doublet_neighbor_parents_ - """ + """ self._nearest_neighbor_classifier( k=self._n_neighbors, exp_doub_rate=self.expected_doublet_rate, @@ -406,11 +404,10 @@ def _nearest_neighbor_classifier( def call_doublets( self, *, threshold: float | None = None, verbose: bool = True ) -> NDArray[np.bool_] | None: - """\ - Call trancriptomes as doublets or singlets + """Call trancriptomes as doublets or singlets. - Arguments - --------- + Parameters + ---------- threshold Doublet score threshold for calling a transcriptome a doublet. If `None`, this is set automatically by looking @@ -427,8 +424,8 @@ def call_doublets( predicted_doublets_, z_scores_, threshold_, detected_doublet_rate_, detectable_doublet_fraction, overall_doublet_rate_ - """ + """ if threshold is None: # automatic threshold detection # http://scikit-image.org/docs/dev/api/skimage.filters.html diff --git a/src/scanpy/preprocessing/_scrublet/sparse_utils.py b/src/scanpy/preprocessing/_scrublet/sparse_utils.py index c570612c0a..84e76c13db 100644 --- a/src/scanpy/preprocessing/_scrublet/sparse_utils.py +++ b/src/scanpy/preprocessing/_scrublet/sparse_utils.py @@ -20,8 +20,7 @@ def sparse_multiply( E: _CSMatrix | NDArray[np.float64], a: float | NDArray[np.float64], ) -> _CSMatrix: - """multiply each row of E by a scalar""" - + """Multiply each row of E by a scalar.""" nrow = E.shape[0] w = sparse.dia_matrix((a, 0), shape=(nrow, nrow), dtype=a.dtype) r = w @ E @@ -36,7 +35,7 @@ def sparse_zscore( gene_mean: NDArray[np.float64] | None = None, gene_stdev: NDArray[np.float64] | None = None, ) -> _CSMatrix: - """z-score normalize each column of E""" + """z-score normalize each column of E.""" if gene_mean is None or gene_stdev is None: gene_means, gene_stdevs = _get_mean_var(E, axis=0) gene_stdevs = np.sqrt(gene_stdevs) diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index 986a2cd386..7e80d6c8e3 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -1,4 +1,4 @@ -"""Simple Preprocessing Functions +"""Simple Preprocessing Functions. Compositions of these functions are found in sc.preprocess.recipes. """ @@ -68,8 +68,7 @@ def filter_cells( inplace: bool = True, copy: bool = False, ) -> AnnData | tuple[np.ndarray, np.ndarray] | None: - """\ - Filter cell outliers based on counts and numbers of genes expressed. + """Filter cell outliers based on counts and numbers of genes expressed. For instance, only keep cells with at least `min_counts` counts or `min_genes` genes expressed. This is to filter measurement outliers, @@ -124,20 +123,21 @@ def filter_cells( >>> sc.pp.filter_cells(adata, min_genes=0) >>> adata.n_obs 640 - >>> int(adata.obs['n_genes'].min()) + >>> int(adata.obs["n_genes"].min()) 1 >>> # filter manually - >>> adata_copy = adata[adata.obs['n_genes'] >= 3] + >>> adata_copy = adata[adata.obs["n_genes"] >= 3] >>> adata_copy.n_obs 554 - >>> int(adata_copy.obs['n_genes'].min()) + >>> int(adata_copy.obs["n_genes"].min()) 3 >>> # actually do some filtering >>> sc.pp.filter_cells(adata, min_genes=3) >>> adata.n_obs 554 - >>> int(adata.obs['n_genes'].min()) + >>> int(adata.obs["n_genes"].min()) 3 + """ if copy: logg.warning("`copy` is deprecated, use `inplace` instead.") @@ -217,8 +217,7 @@ def filter_genes( inplace: bool = True, copy: bool = False, ) -> AnnData | tuple[np.ndarray, np.ndarray] | None: - """\ - Filter genes based on number of cells or counts. + """Filter genes based on number of cells or counts. Keep genes that have at least `min_counts` counts or are expressed in at least `min_cells` cells or have at most `max_counts` counts or are expressed @@ -254,6 +253,7 @@ def filter_genes( number_per_gene Depending on what was thresholded (`counts` or `cells`), the array stores `n_counts` or `n_cells` per gene. + """ if copy: logg.warning("`copy` is deprecated, use `inplace` instead.") @@ -330,10 +330,9 @@ def log1p( layer: str | None = None, obsm: str | None = None, ) -> AnnData | np.ndarray | _CSMatrix | None: - """\ - Logarithmize the data matrix. + r"""Logarithmize the data matrix. - Computes :math:`X = \\log(X + 1)`, + Computes :math:`X = \log(X + 1)`, where :math:`log` denotes the natural logarithm unless a different base is given. Parameters @@ -359,6 +358,7 @@ def log1p( Returns ------- Returns or updates `data`, depending on `copy`. + """ _check_array_function_arguments( chunked=chunked, chunk_size=chunk_size, layer=layer, obsm=obsm @@ -443,10 +443,9 @@ def sqrt( chunked: bool = False, chunk_size: int | None = None, ) -> AnnData | _CSMatrix | np.ndarray | None: - """\ - Square root the data matrix. + r"""Take square root of the data matrix. - Computes :math:`X = \\sqrt(X)`. + Computes :math:`X = \sqrt(X)`. Parameters ---------- @@ -465,6 +464,7 @@ def sqrt( Returns ------- Returns or updates `data`, depending on `copy`. + """ if isinstance(data, AnnData): adata = data.copy() if copy else data @@ -502,8 +502,7 @@ def normalize_per_cell( use_rep: Literal["after", "X"] | None = None, min_counts: int = 1, ) -> AnnData | np.ndarray | _CSMatrix | None: - """\ - Normalize total counts per cell. + """Normalize total counts per cell. .. deprecated:: 1.3.7 @@ -564,8 +563,9 @@ def normalize_per_cell( >>> print(adata.X.sum(axis=1)) [3. 3. 3.] >>> sc.pp.normalize_per_cell( - ... adata, counts_per_cell_after=1, - ... key_n_counts='n_counts2', + ... adata, + ... counts_per_cell_after=1, + ... key_n_counts="n_counts2", ... ) >>> print(adata.obs) n_counts n_counts2 @@ -574,6 +574,7 @@ def normalize_per_cell( 2 11.0 3.0 >>> print(adata.X.sum(axis=1)) [1. 1. 1.] + """ if isinstance(data, AnnData): start = logg.info("normalizing by total count per cell") @@ -652,8 +653,8 @@ def numpy_regress_out( data: np.ndarray, regressor: np.ndarray, ) -> np.ndarray: - """\ - Numba kernel for regress out unwanted sorces of variantion. + """Numba kernel for regress out unwanted sorces of variantion. + Finding coefficient using Linear regression (Linear Least Squares). """ inv_gram_matrix = np.linalg.inv(regressor.T @ regressor) @@ -671,8 +672,7 @@ def regress_out( n_jobs: int | None = None, copy: bool = False, ) -> AnnData | None: - """\ - Regress out (mostly) unwanted sources of variation. + """Regress out (mostly) unwanted sources of variation. Uses simple linear regression. This is inspired by Seurat's `regressOut` function in R :cite:p:`Satija2015`. Note that this function tends to overcorrect @@ -698,6 +698,7 @@ def regress_out( `adata.X` | `adata.layers[layer]` : :class:`numpy.ndarray` | :class:`scipy.sparse._csr.csr_matrix` (dtype `float`) Corrected count data matrix. + """ from joblib import Parallel, delayed @@ -882,8 +883,7 @@ def sample( axis: Literal["obs", 0, "var", 1] = "obs", p: str | NDArray[np.bool_] | NDArray[np.floating] | None = None, ) -> AnnData | None | tuple[np.ndarray | _CSMatrix | DaskArray, NDArray[np.int64]]: - """\ - Sample observations or variables with or without replacement. + r"""Sample observations or variables with or without replacement. Parameters ---------- @@ -892,7 +892,7 @@ def sample( Rows correspond to cells and columns to genes. fraction Sample to this `fraction` of the number of observations or variables. - (All of them, even if there are `0`\\ s/`False`\\ s in `p`.) + (All of them, even if there are `0`\ s/`False`\ s in `p`.) This can be larger than 1.0, if `replace=True`. See `axis` and `replace`. n @@ -905,7 +905,7 @@ def sample( replace If True, samples are drawn with replacement. axis - Sample `obs`\\ ervations (axis 0) or `var`\\ iables (axis 1). + Sample `obs`\ ervations (axis 0) or `var`\ iables (axis 1). p Drawing probabilities (floats) or mask (bools). Either an `axis`-sized array, or the name of a column. @@ -920,6 +920,7 @@ def sample( If `data` is array-like or `copy=True`, returns the subset. `indices` : numpy.ndarray If `data` is array-like, also returns the indices into the original. + """ # parameter validation if not copy and isinstance(data, AnnData) and data.isbacked: @@ -986,8 +987,7 @@ def downsample_counts( replace: bool = False, copy: bool = False, ) -> AnnData | None: - """\ - Downsample counts from count matrix. + """Downsample counts from count matrix. If `counts_per_cell` is specified, each cell will downsampled. If `total_counts` is specified, expression matrix will be downsampled to @@ -1018,6 +1018,7 @@ def downsample_counts( `adata.X` : :class:`~numpy.ndarray` | :class:`~scipy.sparse.csr_matrix` | :class:`~scipy.sparse.csc_matrix` (dtype `float`) Downsampled counts matrix. + """ raise_not_implemented_error_if_backed_type(adata.X, "downsample_counts") # This logic is all dispatch @@ -1138,8 +1139,7 @@ def _downsample_array( replace: bool = True, inplace: bool = False, ): - """\ - Evenly reduce counts in cell to target amount. + """Evenly reduce counts in cell to target amount. This is an internal function and has some restrictions: diff --git a/src/scanpy/preprocessing/_utils.py b/src/scanpy/preprocessing/_utils.py index 755a687d67..103c2e2b0e 100644 --- a/src/scanpy/preprocessing/_utils.py +++ b/src/scanpy/preprocessing/_utils.py @@ -52,9 +52,9 @@ def _get_mean_var( def sparse_mean_variance_axis( mtx: _CSMatrix, axis: Literal[0, 1] ) -> tuple[NDArray[np.float64], NDArray[np.float64]]: - """ - This code and internal functions are based on sklearns - `sparsefuncs.mean_variance_axis`. + """Compute mean and variance along one axis of a sparse matrix. + + This code and internal functions are based on sklearns `sparsefuncs.mean_variance_axis`. Modifications: * allow deciding on the output type, which can increase accuracy when calculating the mean and variance of 32bit floats. @@ -94,8 +94,7 @@ def sparse_mean_variance_axis( def sparse_mean_var_minor_axis( data, indices, indptr, *, major_len, minor_len, n_threads ): - """ - Computes mean and variance for a sparse matrix for the minor axis. + """Compute mean and variance for a sparse matrix for the minor axis. Given arrays for a csr matrix, returns the means and variances for each column back. @@ -125,8 +124,7 @@ def sparse_mean_var_minor_axis( @njit def sparse_mean_var_major_axis(data, indptr, *, major_len, minor_len, n_threads): - """ - Computes mean and variance for a sparse array for the major axis. + """Compute mean and variance for a sparse array for the major axis. Given arrays for a csr matrix, returns the means and variances for each row back. @@ -169,9 +167,7 @@ def sample_comb( def _to_dense(X: _CSMatrix, order: Literal["C", "F"] = "C") -> NDArray: - """\ - Numba kernel for np.toarray() function - """ + """Numba kernel for np.toarray() function.""" out = np.zeros(X.shape, dtype=X.dtype, order=order) if X.format == "csr": _to_dense_csr_numba(X.indptr, X.indices, X.data, out, X.shape) diff --git a/src/scanpy/queries/__init__.py b/src/scanpy/queries/__init__.py index 6080f88ef8..6e618d3782 100644 --- a/src/scanpy/queries/__init__.py +++ b/src/scanpy/queries/__init__.py @@ -1,4 +1,5 @@ -# Biomart queries +"""Biomart queries.""" + from __future__ import annotations from ._queries import ( diff --git a/src/scanpy/queries/_queries.py b/src/scanpy/queries/_queries.py index e992f937e3..29f4955dc9 100644 --- a/src/scanpy/queries/_queries.py +++ b/src/scanpy/queries/_queries.py @@ -45,8 +45,7 @@ def simple_query( host: str = "www.ensembl.org", use_cache: bool = False, ) -> pd.DataFrame: - """\ - A simple interface to biomart. + """Interface with biomart. Params ------ @@ -85,8 +84,7 @@ def biomart_annotations( host: str = "www.ensembl.org", use_cache: bool = False, ) -> pd.DataFrame: - """\ - Retrieve gene annotations from ensembl biomart. + """Retrieve gene annotations from ensembl biomart. Parameters ---------- @@ -110,6 +108,7 @@ def biomart_annotations( ... ["ensembl_gene_id", "start_position", "end_position", "chromosome_name"], ... ).set_index("ensembl_gene_id") >>> adata.var[annot.columns] = annot + """ return simple_query(org=org, attrs=attrs, host=host, use_cache=use_cache) @@ -125,8 +124,7 @@ def gene_coordinates( host: str = "www.ensembl.org", use_cache: bool = False, ) -> pd.DataFrame: - """\ - Retrieve gene coordinates for specific organism through BioMart. + """Retrieve gene coordinates for specific organism through BioMart. Parameters ---------- @@ -149,6 +147,7 @@ def gene_coordinates( -------- >>> import scanpy as sc >>> sc.queries.gene_coordinates("hsapiens", "MT-TF") + """ res = simple_query( org=org, @@ -170,8 +169,7 @@ def mitochondrial_genes( use_cache: bool = False, chromosome: str = "MT", ) -> pd.DataFrame: - """\ - Mitochondrial gene symbols for specific organism through BioMart. + """Mitochondrial gene symbols for specific organism through BioMart. Parameters ---------- @@ -193,8 +191,13 @@ def mitochondrial_genes( -------- >>> import scanpy as sc >>> mito_gene_names = sc.queries.mitochondrial_genes("hsapiens") - >>> mito_ensembl_ids = sc.queries.mitochondrial_genes("hsapiens", attrname="ensembl_gene_id") - >>> mito_gene_names_fly = sc.queries.mitochondrial_genes("dmelanogaster", chromosome="mitochondrion_genome") + >>> mito_ensembl_ids = sc.queries.mitochondrial_genes( + ... "hsapiens", attrname="ensembl_gene_id" + ... ) + >>> mito_gene_names_fly = sc.queries.mitochondrial_genes( + ... "dmelanogaster", chromosome="mitochondrion_genome" + ... ) + """ return simple_query( org, @@ -214,8 +217,7 @@ def enrich( org: str = "hsapiens", gprofiler_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> pd.DataFrame: - """\ - Get enrichment for DE results. + """Get enrichment for DE results. This is a thin convenience wrapper around the very useful gprofiler_. @@ -261,14 +263,17 @@ def enrich( Using `sc.queries.enrich` on a list of genes: >>> import scanpy as sc - >>> sc.queries.enrich(['KLF4', 'PAX5', 'SOX2', 'NANOG'], org="hsapiens") - >>> sc.queries.enrich({{'set1':['KLF4', 'PAX5'], 'set2':['SOX2', 'NANOG']}}, org="hsapiens") + >>> sc.queries.enrich(["KLF4", "PAX5", "SOX2", "NANOG"], org="hsapiens") + >>> sc.queries.enrich( + ... {{"set1": ["KLF4", "PAX5"], "set2": ["SOX2", "NANOG"]}}, org="hsapiens" + ... ) Using `sc.queries.enrich` on an :class:`anndata.AnnData` object: >>> pbmcs = sc.datasets.pbmc68k_reduced() >>> sc.tl.rank_genes_groups(pbmcs, "bulk_labels") >>> sc.queries.enrich(pbmcs, "CD34+") + """ try: from gprofiler import GProfiler diff --git a/src/scanpy/readwrite.py b/src/scanpy/readwrite.py index 1910d9393e..aaceb490fb 100644 --- a/src/scanpy/readwrite.py +++ b/src/scanpy/readwrite.py @@ -1,4 +1,4 @@ -"""Reading and Writing""" +"""Reading and Writing.""" from __future__ import annotations @@ -94,8 +94,7 @@ def read( cache_compression: Literal["gzip", "lzf"] | None | Empty = _empty, **kwargs, ) -> AnnData: - """\ - Read file and return :class:`~anndata.AnnData` object. + """Read file and return :class:`~anndata.AnnData` object. To speed up reading, consider passing ``cache=True``, which creates an hdf5 cache file. @@ -137,6 +136,7 @@ def read( Returns ------- An :class:`~anndata.AnnData` object + """ filename = Path(filename) # allow passing strings if is_valid_filename(filename): @@ -175,8 +175,7 @@ def read_10x_h5( gex_only: bool = True, backup_url: str | None = None, ) -> AnnData: - """\ - Read 10x-Genomics-formatted hdf5 file. + r"""Read 10x-Genomics-formatted hdf5 file. Parameters ---------- @@ -202,14 +201,15 @@ def read_10x_h5( Cell names :attr:`~anndata.AnnData.var_names` Gene names for a feature barcode matrix, probe names for a probe bc matrix - :attr:`~anndata.AnnData.var`\\ `['gene_ids']` + :attr:`~anndata.AnnData.var`\ `['gene_ids']` Gene IDs - :attr:`~anndata.AnnData.var`\\ `['feature_types']` + :attr:`~anndata.AnnData.var`\ `['feature_types']` Feature types - :attr:`~anndata.AnnData.obs`\\ `[filtered_barcodes]` + :attr:`~anndata.AnnData.obs`\ `[filtered_barcodes]` filtered barcodes if present in the matrix :attr:`~anndata.AnnData.var` Any additional metadata present in /matrix/features is read in. + """ start = logg.info(f"reading {filename}") is_present = _check_datafile_present_and_download(filename, backup_url=backup_url) @@ -239,9 +239,7 @@ def read_10x_h5( def _read_legacy_10x_h5( path: Path, *, genome: str | None = None, start: datetime | None = None ): - """ - Read hdf5 file from Cell Ranger v2 or earlier versions. - """ + """Read hdf5 file from Cell Ranger v2 or earlier versions.""" with h5py.File(str(path), "r") as f: try: children = list(f.keys()) @@ -304,9 +302,7 @@ def _collect_datasets(dsets: dict, group: h5py.Group): def _read_v3_10x_h5(filename, *, start=None): - """ - Read hdf5 file from Cell Ranger v3 or later versions. - """ + """Read hdf5 file from Cell Ranger v3 or later versions.""" with h5py.File(str(filename), "r") as f: try: dsets = {} @@ -387,8 +383,7 @@ def read_visium( load_images: bool | None = True, source_image_path: Path | str | None = None, ) -> AnnData: - """\ - Read 10x-Genomics-formatted visum dataset. + r"""Read 10x-Genomics-formatted visum dataset. .. deprecated:: 1.11.0 Use :func:`squidpy.read.visium` instead. @@ -400,7 +395,7 @@ def read_visium( See :func:`~scanpy.pl.spatial` for a compatible plotting function. - .. _Space Ranger output docs: https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/output/overview + .. _Space Ranger output docs: Parameters ---------- @@ -428,24 +423,25 @@ def read_visium( Cell names :attr:`~anndata.AnnData.var_names` Gene names for a feature barcode matrix, probe names for a probe bc matrix - :attr:`~anndata.AnnData.var`\\ `['gene_ids']` + :attr:`~anndata.AnnData.var`\ `['gene_ids']` Gene IDs - :attr:`~anndata.AnnData.var`\\ `['feature_types']` + :attr:`~anndata.AnnData.var`\ `['feature_types']` Feature types - :attr:`~anndata.AnnData.obs`\\ `[filtered_barcodes]` + :attr:`~anndata.AnnData.obs`\ `[filtered_barcodes]` filtered barcodes if present in the matrix :attr:`~anndata.AnnData.var` Any additional metadata present in /matrix/features is read in. - :attr:`~anndata.AnnData.uns`\\ `['spatial']` + :attr:`~anndata.AnnData.uns`\ `['spatial']` Dict of spaceranger output files with 'library_id' as key - :attr:`~anndata.AnnData.uns`\\ `['spatial'][library_id]['images']` + :attr:`~anndata.AnnData.uns`\ `['spatial'][library_id]['images']` Dict of images (`'hires'` and `'lowres'`) - :attr:`~anndata.AnnData.uns`\\ `['spatial'][library_id]['scalefactors']` + :attr:`~anndata.AnnData.uns`\ `['spatial'][library_id]['scalefactors']` Scale factors for the spots - :attr:`~anndata.AnnData.uns`\\ `['spatial'][library_id]['metadata']` + :attr:`~anndata.AnnData.uns`\ `['spatial'][library_id]['metadata']` Files metadata: 'chemistry_description', 'software_version', 'source_image_path' - :attr:`~anndata.AnnData.obsm`\\ `['spatial']` + :attr:`~anndata.AnnData.obsm`\ `['spatial']` Spatial spot coordinates, usable as `basis` by :func:`~scanpy.pl.embedding`. + """ path = Path(path) adata = read_10x_h5(path / count_file, genome=genome) @@ -552,8 +548,7 @@ def read_10x_mtx( gex_only: bool = True, prefix: str | None = None, ) -> AnnData: - """\ - Read 10x-Genomics-formatted mtx directory. + """Read 10x-Genomics-formatted mtx directory. Parameters ---------- @@ -582,6 +577,7 @@ def read_10x_mtx( Returns ------- An :class:`~anndata.AnnData` object + """ path = Path(path) prefix = "" if prefix is None else prefix @@ -611,9 +607,7 @@ def _read_10x_mtx( prefix: str = "", is_legacy: bool, ) -> AnnData: - """ - Read mex from output from Cell Ranger v2- or v3+ - """ + """Read mex from output from Cell Ranger v2- or v3+.""" suffix = "" if is_legacy else ".gz" adata = read( path / f"{prefix}matrix.mtx{suffix}", @@ -653,8 +647,7 @@ def write( compression: Literal["gzip", "lzf"] | None = "gzip", compression_opts: int | None = None, ): - """\ - Write :class:`~anndata.AnnData` objects to file. + """Write :class:`~anndata.AnnData` objects to file. Parameters ---------- @@ -672,6 +665,7 @@ def write( See https://docs.h5py.org/en/latest/high/dataset.html. compression_opts See https://docs.h5py.org/en/latest/high/dataset.html. + """ filename = Path(filename) # allow passing strings if is_valid_filename(filename): @@ -707,8 +701,7 @@ def write( def read_params( filename: Path | str, *, as_header: bool = False ) -> dict[str, int | float | bool | str | None]: - """\ - Read parameter dictionary from text file. + """Read parameter dictionary from text file. Assumes that parameters are specified in the format:: @@ -727,6 +720,7 @@ def read_params( Returns ------- Dictionary that stores parameters. + """ filename = Path(filename) # allow passing str objects from collections import OrderedDict @@ -743,8 +737,7 @@ def read_params( def write_params(path: Path | str, *args, **maps): - """\ - Write parameters to file, so that it's readable by read_params. + """Write parameters to file, so that it's readable by read_params. Uses INI file format. """ @@ -873,8 +866,7 @@ def _slugify(path: str | PurePath) -> str: def _read_softgz(filename: str | bytes | Path | BinaryIO) -> AnnData: - """\ - Read a SOFT format data file. + """Read a SOFT format data file. The SOFT format is documented here https://www.ncbi.nlm.nih.gov/geo/info/soft.html. @@ -883,6 +875,7 @@ def _read_softgz(filename: str | bytes | Path | BinaryIO) -> AnnData: ----- The function is based on a script by Kerby Shedden. https://dept.stat.lsa.umich.edu/~kshedden/Python-Workshop/gene_expression_comparison.html + """ import gzip @@ -938,9 +931,10 @@ def _read_softgz(filename: str | bytes | Path | BinaryIO) -> AnnData: def is_float(string: str) -> float: """Check whether string is float. - See also + See Also -------- https://stackoverflow.com/questions/736043/checking-if-a-string-can-be-converted-to-float-in-python + """ try: float(string) diff --git a/src/scanpy/sim_models/__init__.py b/src/scanpy/sim_models/__init__.py index e69de29bb2..ea6d2ba410 100644 --- a/src/scanpy/sim_models/__init__.py +++ b/src/scanpy/sim_models/__init__.py @@ -0,0 +1 @@ +"""Package containing the models for simulating scRNA-seq data.""" diff --git a/src/scanpy/tools/__init__.py b/src/scanpy/tools/__init__.py index ce0904ec66..e8ebd06328 100644 --- a/src/scanpy/tools/__init__.py +++ b/src/scanpy/tools/__init__.py @@ -1,3 +1,5 @@ +"""Analysis tools.""" + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/src/scanpy/tools/_dendrogram.py b/src/scanpy/tools/_dendrogram.py index f33aca1ff7..2b32cdd61b 100644 --- a/src/scanpy/tools/_dendrogram.py +++ b/src/scanpy/tools/_dendrogram.py @@ -1,6 +1,4 @@ -""" -Computes a dendrogram based on a given categorical observation. -""" +"""Computes a dendrogram based on a given categorical observation.""" from __future__ import annotations @@ -48,8 +46,7 @@ def dendrogram( key_added: str | None = None, inplace: bool = True, ) -> dict[str, Any] | None: - """\ - Computes a hierarchical clustering for the given `groupby` categories. + """Compute a hierarchical clustering for the given `groupby` categories. By default, the PCA representation is used unless `.X` has less than 50 variables. @@ -111,13 +108,13 @@ def dendrogram( -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> sc.tl.dendrogram(adata, groupby='bulk_labels') - >>> sc.pl.dendrogram(adata, groupby='bulk_labels') # doctest: +SKIP + >>> sc.tl.dendrogram(adata, groupby="bulk_labels") + >>> sc.pl.dendrogram(adata, groupby="bulk_labels") # doctest: +SKIP - >>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ'] - >>> sc.pl.dotplot(adata, markers, groupby='bulk_labels', dendrogram=True) - """ + >>> markers = ["C1QA", "PSAP", "CD79A", "CD79B", "CST3", "LYZ"] + >>> sc.pl.dotplot(adata, markers, groupby="bulk_labels", dendrogram=True) + """ raise_not_implemented_error_if_backed_type(adata.X, "dendrogram") if isinstance(groupby, str): # if not a list, turn into a list diff --git a/src/scanpy/tools/_diffmap.py b/src/scanpy/tools/_diffmap.py index b69c2ef18f..72f3c1759f 100644 --- a/src/scanpy/tools/_diffmap.py +++ b/src/scanpy/tools/_diffmap.py @@ -20,8 +20,7 @@ def diffmap( random_state: _LegacyRandom = 0, copy: bool = False, ) -> AnnData | None: - """\ - Diffusion Maps :cite:p:`Coifman2005,Haghverdi2015,Wolf2018`. + """Diffusion Maps :cite:p:`Coifman2005,Haghverdi2015,Wolf2018`. Diffusion maps :cite:p:`Coifman2005` have been proposed for visualizing single-cell data by :cite:t:`Haghverdi2015`. This tool uses the adapted Gaussian kernel suggested @@ -72,6 +71,7 @@ def diffmap( which is non-informative in diffusion maps. Therefore, the first diffusion component is at index 1, e.g. `adata.obsm["X_diffmap"][:,1]` + """ if neighbors_key is None: neighbors_key = "neighbors" diff --git a/src/scanpy/tools/_dpt.py b/src/scanpy/tools/_dpt.py index a9adc2a112..0f9ebe9f9e 100644 --- a/src/scanpy/tools/_dpt.py +++ b/src/scanpy/tools/_dpt.py @@ -48,9 +48,7 @@ def dpt( neighbors_key: str | None = None, copy: bool = False, ) -> AnnData | None: - """\ - Infer progression of cells through geodesic distance along the graph - :cite:p:`Haghverdi2016,Wolf2019`. + """Infer progression of cells through geodesic distance along the graph :cite:p:`Haghverdi2016,Wolf2019`. Reconstruct the progression of a biological process from snapshot data. `Diffusion Pseudotime` was introduced by :cite:t:`Haghverdi2016` and @@ -62,7 +60,7 @@ def dpt( to detect branchings via :func:`~scanpy.tl.paga`. For pseudotime, you need to annotate your data with a root cell. For instance:: - adata.uns['iroot'] = np.flatnonzero(adata.obs['cell_types'] == 'Stem')[0] + adata.uns["iroot"] = np.flatnonzero(adata.obs["cell_types"] == "Stem")[0] This requires running :func:`~scanpy.pp.neighbors`, first. In order to reproduce the original implementation of DPT, use `method=='gauss'`. @@ -122,6 +120,7 @@ def dpt( Notes ----- The tool is similar to the R package `destiny` of :cite:t:`Angerer2015`. + """ # standard errors, warnings etc. adata = adata.copy() if copy else adata @@ -201,9 +200,7 @@ def dpt( class DPT(Neighbors): - """\ - Hierarchical Diffusion Pseudotime. - """ + """Hierarchical Diffusion Pseudotime.""" def __init__( self, @@ -228,8 +225,7 @@ def __init__( self.allow_kendall_tau_shift = allow_kendall_tau_shift def branchings_segments(self): - """\ - Detect branchings and partition the data into corresponding segments. + """Detect branchings and partition the data into corresponding segments. Detect all branchings up to `n_branchings`. @@ -251,8 +247,7 @@ def branchings_segments(self): self.order_pseudotime() def detect_branchings(self): - """\ - Detect all branchings up to `n_branchings`. + """Detect all branchings up to `n_branchings`. Writes Attributes ----------------- @@ -382,9 +377,7 @@ def check_adjacency(self): # self.segs_adjacency.eliminate_zeros() def select_segment(self, segs, segs_tips, segs_undecided) -> tuple[int, int]: - """\ - Out of a list of line segments, choose segment that has the most - distant second data point. + """Out of a list of line segments, choose segment that has the most distant second data point. Assume the distance matrix Ddiff is sorted according to seg_idcs. Compute all the distances. @@ -395,6 +388,7 @@ def select_segment(self, segs, segs_tips, segs_undecided) -> tuple[int, int]: Index identifying the position within the list of line segments. tips3 Positions of tips within chosen segment. + """ scores_tips = np.zeros((len(segs), 4)) allindices = np.arange(self._adata.shape[0], dtype=int) @@ -497,8 +491,7 @@ def set_segs_names(self): self.segs_names = segs_names def order_pseudotime(self): - """\ - Define indices that reflect segment and pseudotime order. + """Define indices that reflect segment and pseudotime order. Writes ------ @@ -547,8 +540,7 @@ def detect_branching( iseg: int, tips3: np.ndarray, ): - """\ - Detect branching on given segment. + """Detect branching on given segment. Updates all list parameters inplace. @@ -565,6 +557,7 @@ def detect_branching( Position of segment under study in segs. tips3 The three tip points. They form a 'triangle' that contains the data. + """ seg = segs[iseg] # restrict distance matrix to points in segment @@ -764,8 +757,7 @@ def _detect_branching( list[list[int]], int, ]: - """\ - Detect branching on given segment. + """Detect branching on given segment. Call function __detect_branching three times for all three orderings of tips. Points that do not belong to the same segment in all three @@ -792,6 +784,7 @@ def _detect_branching( ? trunk ? + """ if self.flavor == "haghverdi16": ssegs = self._detect_branching_single_haghverdi16(Dseg, tips) @@ -957,8 +950,7 @@ def _detect_branching_single_wolf17_bi(self, Dseg, tips): def __detect_branching_haghverdi16( self, Dseg: np.ndarray, tips: np.ndarray ) -> np.ndarray: - """\ - Detect branching on given segment. + """Detect branching on given segment. Compute point that maximizes kendall tau correlation of the sequences of distances to the second and the third tip, respectively, when 'moving @@ -975,6 +967,7 @@ def __detect_branching_haghverdi16( Returns ------- Segments obtained from "splitting away the first tip cell". + """ # sort distance from first tip point # then the sequence of distances Dseg[tips[0]][idcs] increases @@ -1037,6 +1030,7 @@ def kendall_tau_split(self, a: np.ndarray, b: np.ndarray) -> int: Returns ------- Splitting index according to above description. + """ if a.size != b.size: msg = "a and b need to have the same size" @@ -1096,6 +1090,7 @@ def _kendall_tau_add(self, len_old: int, diff_pos: int, tau_old: float): Difference between concordant and non-concordant pairs. tau_old Kendall rank correlation of the old sequence. + """ return 2.0 / (len_old + 1) * (float(diff_pos) / len_old - tau_old) @@ -1112,6 +1107,7 @@ def _kendall_tau_subtract(self, len_old: int, diff_neg: int, tau_old: float): Difference between concordant and non-concordant pairs. tau_old Kendall rank correlation of the old sequence. + """ return 2.0 / (len_old - 2) * (-float(diff_neg) / (len_old - 1) + tau_old) @@ -1133,6 +1129,7 @@ def _kendall_tau_diff(self, a: np.ndarray, b: np.ndarray, i) -> tuple[int, int]: Difference between concordant pairs for both subsequences. diff_neg Difference between non-concordant pairs for both subsequences. + """ # compute ordering relation of the single points a[i] and b[i] # with all previous points of the sequences a and b, respectively diff --git a/src/scanpy/tools/_draw_graph.py b/src/scanpy/tools/_draw_graph.py index 0727715671..42c871f17b 100644 --- a/src/scanpy/tools/_draw_graph.py +++ b/src/scanpy/tools/_draw_graph.py @@ -52,8 +52,7 @@ def draw_graph( copy: bool = False, **kwds, ) -> AnnData | None: - """\ - Force-directed graph drawing :cite:p:`Islam2011,Jacomy2014,Chippada2018`. + """Force-directed graph drawing :cite:p:`Islam2011,Jacomy2014,Chippada2018`. An alternative to tSNE that often preserves the topology of the data better. This requires running :func:`~scanpy.pp.neighbors`, first. @@ -121,6 +120,7 @@ def draw_graph( the field is called `'X_draw_graph_fa'`. `key_added_ext` overwrites `layout`. `adata.uns['draw_graph']`: :class:`dict` `draw_graph` parameters. + """ start = logg.info(f"drawing single-cell graph using layout {layout!r}") if layout not in (layouts := get_literal_vals(_Layout)): diff --git a/src/scanpy/tools/_embedding_density.py b/src/scanpy/tools/_embedding_density.py index b930cc0aeb..f080783932 100644 --- a/src/scanpy/tools/_embedding_density.py +++ b/src/scanpy/tools/_embedding_density.py @@ -1,6 +1,4 @@ -"""\ -Calculate density of cells in embeddings -""" +"""Calculate density of cells in embeddings.""" from __future__ import annotations @@ -19,9 +17,7 @@ def _calc_density(x: np.ndarray, y: np.ndarray): - """\ - Calculates the density of points in 2 dimensions. - """ + """Calculate the density of points in 2 dimensions.""" from scipy.stats import gaussian_kde # Calculate the point density @@ -46,8 +42,7 @@ def embedding_density( key_added: str | None = None, components: str | Sequence[str] | None = None, ) -> None: - """\ - Calculate the density of cells in an embedding (per condition). + """Calculate the density of cells in an embedding (per condition). Gaussian kernel density estimation is used to calculate the density of cells in an embedded space. This can be performed per category over a @@ -113,9 +108,10 @@ def embedding_density( .. currentmodule:: scanpy - See also + See Also -------- pl.embedding_density + """ # to ensure that newly created covariates are categorical # to test for category numbers diff --git a/src/scanpy/tools/_ingest.py b/src/scanpy/tools/_ingest.py index 256e1a97c6..8bc36143ae 100644 --- a/src/scanpy/tools/_ingest.py +++ b/src/scanpy/tools/_ingest.py @@ -46,8 +46,7 @@ def ingest( inplace: bool = True, **kwargs, ): - """\ - Map labels and embeddings from reference data to new data. + """Map labels and embeddings from reference data to new data. :doc:`/tutorials/basics/integrating-data-using-ingest` @@ -118,7 +117,8 @@ def ingest( >>> import scanpy as sc >>> sc.pp.neighbors(adata_ref) >>> sc.tl.umap(adata_ref) - >>> sc.tl.ingest(adata, adata_ref, obs='cell_type') + >>> sc.tl.ingest(adata, adata_ref, obs="cell_type") + """ # anndata version check anndata_version = pkg_version("anndata") @@ -214,8 +214,7 @@ def __repr__(self): class Ingest: - """\ - Class to map labels and embeddings from existing data to new data. + """Class to map labels and embeddings from existing data to new data. You need to run :func:`~scanpy.pp.neighbors` on `adata` before initializing Ingest with it. @@ -225,6 +224,7 @@ class Ingest: adata : :class:`~anndata.AnnData` The annotated data matrix of shape `n_obs` × `n_vars` with embeddings and labels. + """ def _init_umap(self, adata): @@ -379,8 +379,7 @@ def _same_rep(self): return adata.X def fit(self, adata_new): - """\ - Map `adata_new` to the same representation as `adata`. + """Map `adata_new` to the same representation as `adata`. This function identifies the representation which was used to calculate neighbors in 'adata' and maps `adata_new` to @@ -409,8 +408,7 @@ def fit(self, adata_new): self._obsm["rep"] = self._same_rep() def neighbors(self, k=None, queue_size=5, epsilon=0.1, random_state=0): - """\ - Calculate neighbors of `adata_new` observations in `adata`. + """Calculate neighbors of `adata_new` observations in `adata`. This function calculates `k` neighbors in `adata` for each observation of `adata_new`. @@ -432,8 +430,7 @@ def _umap_transform(self): return self._umap.transform(self._obsm["rep"]) def map_embedding(self, method): - """\ - Map embeddings of `adata` to `adata_new`. + """Map embeddings of `adata` to `adata_new`. This function infers embeddings, specified by `method`, for `adata_new` from existing embeddings in `adata`. @@ -454,8 +451,7 @@ def _knn_classify(self, labels): return pd.Categorical(values=values, categories=cat_array.cat.categories) def map_labels(self, labels, method): - """\ - Map labels of `adata` to `adata_new`. + """Map labels of `adata` to `adata_new`. This function infers `labels` for `adata_new.obs` from existing labels in `adata.obs`. @@ -469,8 +465,7 @@ def map_labels(self, labels, method): @old_positionals("inplace") def to_adata(self, *, inplace: bool = False) -> AnnData | None: - """\ - Returns `adata_new` with mapped embeddings and labels. + """Return `adata_new` with mapped embeddings and labels. If `inplace=False` returns a copy of `adata_new` with mapped embeddings and labels in `obsm` and `obs` correspondingly. @@ -490,8 +485,7 @@ def to_adata(self, *, inplace: bool = False) -> AnnData | None: def to_adata_joint( self, batch_key="batch", batch_categories=None, index_unique="-" ): - """\ - Returns concatenated object. + """Return concatenated object. This function returns the new :class:`~anndata.AnnData` object with concatenated existing embeddings and labels of 'adata' diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py index 97d1cea1b5..19984d8736 100644 --- a/src/scanpy/tools/_leiden.py +++ b/src/scanpy/tools/_leiden.py @@ -48,8 +48,7 @@ def leiden( flavor: Literal["leidenalg", "igraph"] = "leidenalg", **clustering_args, ) -> AnnData | None: - """\ - Cluster cells into subgroups :cite:p:`Traag2019`. + """Cluster cells into subgroups :cite:p:`Traag2019`. Cluster cells using the Leiden algorithm :cite:p:`Traag2019`, an improved version of the Louvain algorithm :cite:p:`Blondel2008`. @@ -119,6 +118,7 @@ def leiden( `adata.uns['leiden' | key_added]['params']` : :class:`dict` A dict with the values for the parameters `resolution`, `random_state`, and `n_iterations`. + """ if flavor not in {"igraph", "leidenalg"}: msg = ( diff --git a/src/scanpy/tools/_louvain.py b/src/scanpy/tools/_louvain.py index 88269a21d4..a00b7d1db1 100644 --- a/src/scanpy/tools/_louvain.py +++ b/src/scanpy/tools/_louvain.py @@ -65,8 +65,7 @@ def louvain( obsp: str | None = None, copy: bool = False, ) -> AnnData | None: - """\ - Cluster cells into subgroups :cite:p:`Blondel2008,Levine2015,Traag2017`. + """Cluster cells into subgroups :cite:p:`Blondel2008,Levine2015,Traag2017`. Cluster cells using the Louvain algorithm :cite:p:`Blondel2008` in the implementation of :cite:t:`Traag2017`. The Louvain algorithm was proposed for single-cell @@ -139,6 +138,7 @@ def louvain( `adata.uns['louvain' | key_added]['params']` : :class:`dict` A dict with the values for the parameters `resolution`, `random_state`, and `n_iterations`. + """ partition_kwargs = dict(partition_kwargs) start = logg.info("running Louvain clustering") diff --git a/src/scanpy/tools/_marker_gene_overlap.py b/src/scanpy/tools/_marker_gene_overlap.py index 43408ff2c3..ca3e63ea77 100644 --- a/src/scanpy/tools/_marker_gene_overlap.py +++ b/src/scanpy/tools/_marker_gene_overlap.py @@ -1,6 +1,4 @@ -"""\ -Calculate overlaps of rank_genes_groups marker genes with marker gene dictionaries -""" +"""Calculate overlaps of rank_genes_groups marker genes with marker gene dictionaries.""" from __future__ import annotations @@ -22,8 +20,7 @@ def _calc_overlap_count(markers1: dict, markers2: dict): - """\ - Calculate overlap count between the values of two dictionaries + """Calculate overlap count between the values of two dictionaries. Note: dict values must be sets """ @@ -37,8 +34,7 @@ def _calc_overlap_count(markers1: dict, markers2: dict): def _calc_overlap_coef(markers1: dict, markers2: dict): - """\ - Calculate overlap coefficient between the values of two dictionaries + """Calculate overlap coefficient between the values of two dictionaries. Note: dict values must be sets """ @@ -56,8 +52,7 @@ def _calc_overlap_coef(markers1: dict, markers2: dict): def _calc_jaccard(markers1: dict, markers2: dict): - """\ - Calculate jaccard index between the values of two dictionaries + """Calculate jaccard index between the values of two dictionaries. Note: dict values must be sets """ @@ -87,9 +82,7 @@ def marker_gene_overlap( key_added: str = "marker_gene_overlap", inplace: bool = False, ): - """\ - Calculate an overlap score between data-derived marker genes and - provided markers + """Calculate an overlap score between data-derived marker genes and provided markers. Marker gene overlap scores can be quoted as overlap counts, overlap coefficients, or jaccard indices. The method returns a pandas dataframe @@ -144,21 +137,22 @@ def marker_gene_overlap( -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> sc.pp.pca(adata, svd_solver='arpack') + >>> sc.pp.pca(adata, svd_solver="arpack") >>> sc.pp.neighbors(adata) >>> sc.tl.leiden(adata) - >>> sc.tl.rank_genes_groups(adata, groupby='leiden') + >>> sc.tl.rank_genes_groups(adata, groupby="leiden") >>> marker_genes = { - ... 'CD4 T cells': {'IL7R'}, - ... 'CD14+ Monocytes': {'CD14', 'LYZ'}, - ... 'B cells': {'MS4A1'}, - ... 'CD8 T cells': {'CD8A'}, - ... 'NK cells': {'GNLY', 'NKG7'}, - ... 'FCGR3A+ Monocytes': {'FCGR3A', 'MS4A7'}, - ... 'Dendritic Cells': {'FCER1A', 'CST3'}, - ... 'Megakaryocytes': {'PPBP'} + ... "CD4 T cells": {"IL7R"}, + ... "CD14+ Monocytes": {"CD14", "LYZ"}, + ... "B cells": {"MS4A1"}, + ... "CD8 T cells": {"CD8A"}, + ... "NK cells": {"GNLY", "NKG7"}, + ... "FCGR3A+ Monocytes": {"FCGR3A", "MS4A7"}, + ... "Dendritic Cells": {"FCER1A", "CST3"}, + ... "Megakaryocytes": {"PPBP"}, ... } >>> marker_matches = sc.tl.marker_gene_overlap(adata, marker_genes) + """ # Test user inputs if inplace: diff --git a/src/scanpy/tools/_paga.py b/src/scanpy/tools/_paga.py index b7f1e86e5d..0058522e9f 100644 --- a/src/scanpy/tools/_paga.py +++ b/src/scanpy/tools/_paga.py @@ -29,8 +29,7 @@ def paga( neighbors_key: str | None = None, copy: bool = False, ) -> AnnData | None: - """\ - Mapping out the coarse-grained connectivity structures of complex manifolds :cite:p:`Wolf2019`. + """Map out the coarse-grained connectivity structures of complex manifolds :cite:p:`Wolf2019`. By quantifying the connectivity of partitions (groups, clusters) of the single-cell graph, partition-based graph abstraction (PAGA) generates a much @@ -104,6 +103,7 @@ def paga( pl.paga pl.paga_path pl.paga_compare + """ check_neighbors = "neighbors" if neighbors_key is None else neighbors_key if check_neighbors not in adata.uns: @@ -398,6 +398,7 @@ def paga_degrees(adata: AnnData) -> list[int]: Returns ------- List of degrees for each node. + """ import networkx as nx @@ -417,6 +418,7 @@ def paga_expression_entropies(adata: AnnData) -> list[float]: Returns ------- Entropies of median expressions for each node. + """ from scipy.stats import entropy @@ -479,6 +481,7 @@ def paga_compare_paths( Fraction of consistent paths n_paths Number of paths + """ import networkx as nx diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 206920fe87..5af62a91aa 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -500,8 +500,7 @@ def rank_genes_groups( layer: str | None = None, **kwds, ) -> AnnData | None: - """\ - Rank genes for characterizing groups. + """Rank genes for characterizing groups. Expects logarithmized data. @@ -594,9 +593,10 @@ def rank_genes_groups( -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> sc.tl.rank_genes_groups(adata, 'bulk_labels', method='wilcoxon') + >>> sc.tl.rank_genes_groups(adata, "bulk_labels", method="wilcoxon") >>> # to visualize the results >>> sc.pl.rank_genes_groups(adata) + """ mask_var = _check_mask(adata, mask_var, "var") @@ -760,9 +760,11 @@ def filter_rank_genes_groups( max_out_group_fraction: float = 0.5, compare_abs: bool = False, ) -> None: - """\ - Filters out genes based on log fold change and fraction of genes expressing the - gene within and outside the `groupby` categories. + """Filter out genes based on two criteria. + + 1. log fold change and + 2. fraction of genes expressing the + gene within and outside the `groupby` categories. See :func:`~scanpy.tl.rank_genes_groups`. @@ -794,12 +796,13 @@ def filter_rank_genes_groups( -------- >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() - >>> sc.tl.rank_genes_groups(adata, 'bulk_labels', method='wilcoxon') + >>> sc.tl.rank_genes_groups(adata, "bulk_labels", method="wilcoxon") >>> sc.tl.filter_rank_genes_groups(adata, min_fold_change=3) >>> # visualize results - >>> sc.pl.rank_genes_groups(adata, key='rank_genes_groups_filtered') + >>> sc.pl.rank_genes_groups(adata, key="rank_genes_groups_filtered") >>> # visualize results using dotplot - >>> sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups_filtered') + >>> sc.pl.rank_genes_groups_dotplot(adata, key="rank_genes_groups_filtered") + """ if key is None: key = "rank_genes_groups" diff --git a/src/scanpy/tools/_score_genes.py b/src/scanpy/tools/_score_genes.py index a67331e678..7580d464ca 100644 --- a/src/scanpy/tools/_score_genes.py +++ b/src/scanpy/tools/_score_genes.py @@ -31,9 +31,7 @@ def _sparse_nanmean(X: _CSMatrix, axis: Literal[0, 1]) -> NDArray[np.float64]: - """ - np.nanmean equivalent for sparse matrices - """ + """np.nanmean equivalent for sparse matrices.""" if not issparse(X): msg = "X must be a sparse matrix" raise TypeError(msg) @@ -73,8 +71,7 @@ def score_genes( use_raw: bool | None = None, layer: str | None = None, ) -> AnnData | None: - """\ - Score a set of genes :cite:p:`Satija2015`. + """Score a set of genes :cite:p:`Satija2015`. The score is the average expression of a set of genes after subtraction by the average expression of a reference set of genes. The reference set is @@ -123,6 +120,7 @@ def score_genes( Examples -------- See this `notebook `__. + """ start = logg.info(f"computing score {score_name!r}") adata = adata.copy() if copy else adata @@ -273,8 +271,7 @@ def score_genes_cell_cycle( copy: bool = False, **kwargs, ) -> AnnData | None: - """\ - Score cell cycle genes :cite:p:`Satija2015`. + """Score cell cycle genes :cite:p:`Satija2015`. Given two lists of genes associated to S phase and G2M phase, calculates scores and assigns a cell cycle phase (G1, S or G2M). See @@ -305,13 +302,14 @@ def score_genes_cell_cycle( `adata.obs['phase']` : :class:`pandas.Series` (dtype `object`) The cell cycle phase (`S`, `G2M` or `G1`) for each cell. - See also + See Also -------- score_genes Examples -------- See this `notebook `__. + """ logg.info("calculating cell cycle phase") diff --git a/src/scanpy/tools/_sim.py b/src/scanpy/tools/_sim.py index f6ea2fede8..ce36f74bf9 100644 --- a/src/scanpy/tools/_sim.py +++ b/src/scanpy/tools/_sim.py @@ -1,12 +1,13 @@ # Author: Alex Wolf (https://falexwolf.de) -"""Simulate Data +"""Simulate Data. Simulate stochastic dynamic systems to model gene expression dynamics and cause-effect data. -TODO +Todo: ---- Beta Version. The code will be reorganized soon. + """ from __future__ import annotations @@ -57,8 +58,7 @@ def sim( seed: int | None = None, writedir: Path | str | None = None, ) -> AnnData: - """\ - Simulate dynamic gene expression data :cite:p:`Wittmann2009` :cite:p:`Wolf2018`. + """Simulate dynamic gene expression data :cite:p:`Wittmann2009` :cite:p:`Wolf2018`. Sample from a stochastic differential equation model built from literature-curated boolean gene regulatory networks, as suggested by @@ -94,6 +94,7 @@ def sim( Examples -------- See this `use case `__ + """ params = locals() if params_file: @@ -109,8 +110,7 @@ def sim( def add_args(p): - """ - Update parser with tool specific arguments. + """Update parser with tool specific arguments. This overwrites was is done in utils.uns_args. """ @@ -128,9 +128,6 @@ def add_args(p): def sample_dynamic_data(**params): - """ - Helper function. - """ model_key = Path(params["model"]).with_suffix("").name writedir = params.get("writedir") if writedir is None: @@ -377,8 +374,7 @@ def write_data( class GRNsim: - """ - Simlulation of stochastic dynamic systems. + """Simlulation of stochastic dynamic systems. Main application: simulation of gene expression dynamics. @@ -409,7 +405,8 @@ def __init__( Coupl=None, params=MappingProxyType({}), ): - """ + """Initialize. + Params ------ model @@ -469,8 +466,9 @@ def sim_model(self, tmax, X0, noiseDyn=0, restart=0): return X def Xdiff_hill(self, Xt): - """Build Xdiff from coefficients of boolean network, - that is, using self.boolCoeff. The employed functions + """Build Xdiff from coefficients of boolean network. + + That is, using self.boolCoeff. The employed functions are Hill type activation and deactivation functions. See Wittmann et al., BMC Syst. Biol. 3, 98 (2009), @@ -519,7 +517,6 @@ def Xdiff_hill(self, Xt): return Xdiff def Xdiff_var(self, Xt, verbosity=0): - """""" # subtract the current state Xdiff = -Xt # add the information from the past @@ -542,7 +539,7 @@ def hill_i(self, x, threshold=0.1, power=2): return threshold_pow / (x_pow + threshold_pow) def nhill_a(self, x, threshold=0.1, power=2, ichild=2): - """Normalized activating hill function.""" + """Normalized activating hill function.""" # noqa: D401 x_pow = np.power(x, power) threshold_pow = np.power(threshold, power) return x_pow / (x_pow + threshold_pow) * (1 + threshold_pow) @@ -551,7 +548,7 @@ def nhill_i(self, x, threshold=0.1, power=2): """Normalized inhibiting hill function. Is equivalent to 1-nhill_a(self,x,power,threshold). - """ + """ # noqa: D401 x_pow = np.power(x, power) threshold_pow = np.power(threshold, power) return threshold_pow / (x_pow + threshold_pow) * (1 - x_pow) @@ -602,9 +599,7 @@ def read_model(self): self.build_boolCoeff() def set_coupl(self, Coupl=None): - """Construct the coupling matrix (and adjacancy matrix) from predefined models - or via sampling. - """ + """Construct the coupling matrix (and adjacancy matrix) from predefined models or via sampling.""" self.varNames = {str(i): i for i in range(self.dim)} if self.model not in self.availModels and Coupl is None: self.read_model() @@ -684,7 +679,7 @@ def set_coupl(self, Coupl=None): # settings.m(0,self.Adj) def set_coupl_old(self): - """Using the adjacency matrix, sample a coupling matrix.""" + """Sample a coupling matrix using the adjacency matrix.""" if self.model == "krumsiek11" or self.model == "var": # we already built the coupling matrix in set_coupl20() return @@ -716,9 +711,9 @@ def set_coupl_old(self): settings.m(0, self.Coupl) def coupl_model1(self): - """In model 1, we want enforce the following signs - on the couplings. Model 2 has the same couplings - but arbitrary signs. + """Enforce the following signs on the couplings. + + (Model 2 has the same couplings but arbitrary signs.) """ self.Coupl[0, 0] = np.abs(self.Coupl[0, 0]) self.Coupl[0, 1] = -np.abs(self.Coupl[0, 1]) @@ -750,9 +745,7 @@ def coupl_model_krumsiek11(self): self.Coupl = self.Adj_signed def sim_model_back_help(self, Xt, Xt1): - """Yields zero when solved for X_t - given X_{t+1}. - """ + """Yield zero when solved for X_t given X_{t+1}.""" return -Xt1 + Xt + self.Xdiff(Xt) def sim_model_backwards(self, tmax, X0): @@ -920,8 +913,7 @@ def write_data( def _check_branching( X: np.ndarray, Xsamples: np.ndarray, restart: int, threshold: float = 0.25 ) -> tuple[bool, list[np.ndarray]]: - """\ - Check whether time series branches. + """Check whether time series branches. Parameters ---------- @@ -940,6 +932,7 @@ def _check_branching( true if branching realization Xsamples updated list + """ check = True Xsamples = list(Xsamples) @@ -963,8 +956,7 @@ def _check_branching( def check_nocycles(Adj: np.ndarray, verbosity: int = 2) -> bool: - """\ - Checks that there are no cycles in graph described by adjacancy matrix. + """Check that there are no cycles in graph described by adjacancy matrix. Parameters ---------- @@ -974,6 +966,7 @@ def check_nocycles(Adj: np.ndarray, verbosity: int = 2) -> bool: Returns ------- True if there is no cycle, False otherwise. + """ dim = Adj.shape[0] for g in range(dim): @@ -999,8 +992,7 @@ def check_nocycles(Adj: np.ndarray, verbosity: int = 2) -> bool: def sample_coupling_matrix( dim: int = 3, connectivity: float = 0.5 ) -> tuple[np.ndarray, np.ndarray, np.ndarray, int]: - """\ - Sample coupling matrix. + """Sample coupling matrix. Checks that returned graphs contain no self-cycles. @@ -1023,6 +1015,7 @@ def sample_coupling_matrix( signed adjacancy matrix n_edges Number of edges + """ max_trial = 10 check = False @@ -1054,9 +1047,7 @@ def sample_coupling_matrix( class StaticCauseEffect: - """ - Simulates static data to investigate structure learning. - """ + """Simulates static data to investigate structure learning.""" availModels = dict( line="y = αx \n", @@ -1080,8 +1071,7 @@ def __init__(self): ) def sim_givenAdj(self, Adj: np.ndarray, model="line"): - """\ - Simulate data given only an adjacancy matrix and a model. + """Simulate data given only an adjacancy matrix and a model. The model is a bivariate funtional dependence. The adjacancy matrix needs to be acyclic. @@ -1094,6 +1084,7 @@ def sim_givenAdj(self, Adj: np.ndarray, model="line"): Returns ------- Data array of shape (n_samples,dim). + """ # nice examples examples = [ # noqa: F841 TODO We are really unsure whether this is needed. diff --git a/src/scanpy/tools/_top_genes.py b/src/scanpy/tools/_top_genes.py index 3b4e709b5f..2ac8f5b491 100644 --- a/src/scanpy/tools/_top_genes.py +++ b/src/scanpy/tools/_top_genes.py @@ -1,8 +1,6 @@ # Author: T. Callies # -"""\ -This modules provides all non-visualization tools for advanced gene ranking and exploration of genes -""" +"""All non-visualization tools for advanced gene ranking and exploration of genes.""" from __future__ import annotations @@ -35,8 +33,7 @@ def correlation_matrix( method: Literal["pearson", "kendall", "spearman"] = "pearson", annotation_key: str | None = None, ) -> None: - """\ - Calculate correlation matrix. + """Calculate correlation matrix. Calculate a correlation matrix for genes stored in sample annotation using :func:`~scanpy.tl.rank_genes_groups`. @@ -74,8 +71,8 @@ def correlation_matrix( Spearman rank correlation annotation_key Allows defining the name of the anndata entry where results are stored. - """ + """ # TODO: At the moment, only works for int identifiers # If no genes are passed, selects ranked genes from sample annotation. @@ -134,8 +131,7 @@ def ROC_AUC_analysis( group: str | None = None, n_genes: int = 100, ): - """\ - Calculate correlation matrix. + """Calculate correlation matrix. Calculate a correlation matrix for genes strored in sample annotation @@ -152,6 +148,7 @@ def ROC_AUC_analysis( n_genes For how many genes to calculate ROC and AUC. If no parameter is passed, calculation is done for all stored top ranked genes. + """ if group is None: pass diff --git a/src/scanpy/tools/_tsne.py b/src/scanpy/tools/_tsne.py index 62fa8b9d57..d578967113 100644 --- a/src/scanpy/tools/_tsne.py +++ b/src/scanpy/tools/_tsne.py @@ -44,8 +44,7 @@ def tsne( key_added: str | None = None, copy: bool = False, ) -> AnnData | None: - """\ - t-SNE :cite:p:`vanDerMaaten2008,Amir2013,Pedregosa2011`. + r"""t-SNE :cite:p:`vanDerMaaten2008,Amir2013,Pedregosa2011`. t-distributed stochastic neighborhood embedding (tSNE, :cite:t:`vanDerMaaten2008`) was proposed for visualizating single-cell data by :cite:t:`Amir2013`. Here, by default, @@ -91,11 +90,11 @@ def tsne( `None` means using :attr:`scanpy._settings.ScanpyConfig.n_jobs`. key_added If not specified, the embedding is stored as - :attr:`~anndata.AnnData.obsm`\\ `['X_tsne']` and the the parameters in - :attr:`~anndata.AnnData.uns`\\ `['tsne']`. + :attr:`~anndata.AnnData.obsm`\ `['X_tsne']` and the the parameters in + :attr:`~anndata.AnnData.uns`\ `['tsne']`. If specified, the embedding is stored as - :attr:`~anndata.AnnData.obsm`\\ ``[key_added]`` and the the parameters in - :attr:`~anndata.AnnData.uns`\\ ``[key_added]``. + :attr:`~anndata.AnnData.obsm`\ ``[key_added]`` and the the parameters in + :attr:`~anndata.AnnData.uns`\ ``[key_added]``. copy Return a copy instead of writing to `adata`. diff --git a/src/scanpy/tools/_umap.py b/src/scanpy/tools/_umap.py index 926e6d3d4f..4a7c47df24 100644 --- a/src/scanpy/tools/_umap.py +++ b/src/scanpy/tools/_umap.py @@ -57,8 +57,7 @@ def umap( neighbors_key: str = "neighbors", copy: bool = False, ) -> AnnData | None: - """\ - Embed the neighborhood graph using UMAP :cite:p:`McInnes2018`. + r"""Embed the neighborhood graph using UMAP :cite:p:`McInnes2018`. UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to @@ -135,15 +134,15 @@ def umap( Use :func:`rapids_singlecell.tl.umap` instead. key_added If not specified, the embedding is stored as - :attr:`~anndata.AnnData.obsm`\\ `['X_umap']` and the the parameters in - :attr:`~anndata.AnnData.uns`\\ `['umap']`. + :attr:`~anndata.AnnData.obsm`\ `['X_umap']` and the the parameters in + :attr:`~anndata.AnnData.uns`\ `['umap']`. If specified, the embedding is stored as - :attr:`~anndata.AnnData.obsm`\\ ``[key_added]`` and the the parameters in - :attr:`~anndata.AnnData.uns`\\ ``[key_added]``. + :attr:`~anndata.AnnData.obsm`\ ``[key_added]`` and the the parameters in + :attr:`~anndata.AnnData.uns`\ ``[key_added]``. neighbors_key Umap looks in - :attr:`~anndata.AnnData.uns`\\ ``[neighbors_key]`` for neighbors settings and - :attr:`~anndata.AnnData.obsp`\\ ``[.uns[neighbors_key]['connectivities_key']]`` for connectivities. + :attr:`~anndata.AnnData.uns`\ ``[neighbors_key]`` for neighbors settings and + :attr:`~anndata.AnnData.obsp`\ ``[.uns[neighbors_key]['connectivities_key']]`` for connectivities. copy Return a copy instead of writing to adata. diff --git a/src/scanpy/tools/_utils.py b/src/scanpy/tools/_utils.py index e1a21cfac3..cee9eec591 100644 --- a/src/scanpy/tools/_utils.py +++ b/src/scanpy/tools/_utils.py @@ -72,39 +72,6 @@ def _choose_representation( return X -def preprocess_with_pca(adata, n_pcs: int | None = None, random_state=0): - """ - Parameters - ---------- - n_pcs - If `n_pcs=0`, do not preprocess with PCA. - If `None` and there is a PCA version of the data, use this. - If an integer, compute the PCA. - """ - from ..preprocessing import pca - - if n_pcs == 0: - logg.info(" using data matrix X directly (no PCA)") - return adata.X - elif n_pcs is None and "X_pca" in adata.obsm_keys(): - logg.info(f" using 'X_pca' with n_pcs = {adata.obsm['X_pca'].shape[1]}") - return adata.obsm["X_pca"] - elif "X_pca" in adata.obsm_keys() and adata.obsm["X_pca"].shape[1] >= n_pcs: - logg.info(f" using 'X_pca' with n_pcs = {n_pcs}") - return adata.obsm["X_pca"][:, :n_pcs] - else: - n_pcs = settings.N_PCS if n_pcs is None else n_pcs - if adata.X.shape[1] > n_pcs: - logg.info(f" computing 'X_pca' with n_pcs = {n_pcs}") - logg.hint("avoid this by setting n_pcs = 0") - X = pca(adata.X, n_comps=n_pcs, random_state=random_state) - adata.obsm["X_pca"] = X - return X - else: - logg.info(" using data matrix X directly (no PCA)") - return adata.X - - def get_init_pos_from_paga( adata: AnnData, adjacency: spmatrix | None = None, diff --git a/src/testing/scanpy/__init__.py b/src/testing/scanpy/__init__.py index e69de29bb2..08571d88a9 100644 --- a/src/testing/scanpy/__init__.py +++ b/src/testing/scanpy/__init__.py @@ -0,0 +1,3 @@ +"""Scanpy testing utilities.""" + +# This file is empty until we design its public API. diff --git a/src/testing/scanpy/_helpers/__init__.py b/src/testing/scanpy/_helpers/__init__.py index 3cff738132..52c347e1af 100644 --- a/src/testing/scanpy/_helpers/__init__.py +++ b/src/testing/scanpy/_helpers/__init__.py @@ -1,6 +1,4 @@ -""" -This file contains helper functions for the scanpy test suite. -""" +"""Helper functions for the scanpy test suite.""" from __future__ import annotations @@ -31,7 +29,7 @@ def anndata_v0_8_constructor_compat(X, *args, **kwargs): - """Constructor for anndata that uses dtype of X for test compatibility with older versions of AnnData. + """Construct AnnData that uses dtype of X for test compatibility with older AnnData versions. Once the minimum version of AnnData is 0.9, this function can be replaced with the default constructor. """ @@ -78,7 +76,7 @@ def check_rep_mutation(func, X, *, fields=("layer", "obsm"), **kwargs): def check_rep_results(func, X, *, fields=["layer", "obsm"], **kwargs): - """Checks that the results of a computation add values/ mutate the anndata object in a consistent way.""" + """Check that the results of a computation add values/ mutate the anndata object in a consistent way.""" # Gen data empty_X = np.zeros(shape=X.shape, dtype=X.dtype) adata = sc.AnnData( @@ -113,12 +111,11 @@ def check_rep_results(func, X, *, fields=["layer", "obsm"], **kwargs): def _check_check_values_warnings(function, adata, expected_warning, kwargs={}): - """ - Runs `function` on `adata` with provided arguments `kwargs` twice: - once with `check_values=True` and once with `check_values=False`. + """Run `function` on `adata` with provided arguments `kwargs` twice. + + Once with `check_values=True` and once with `check_values=False`. Checks that the `expected_warning` is only raised whtn `check_values=True`. """ - # expecting 0 no-int warnings with warnings.catch_warnings(record=True) as record: function(adata.copy(), **kwargs, check_values=False) @@ -163,10 +160,10 @@ def __exit__(self, exc_type, exc_value, traceback): @contextmanager def maybe_dask_process_context(): - """ + """Switch to a single-threaded scheduler for tests that use numba. + Running numba with dask's threaded scheduler causes crashes, - so we need to switch to single-threaded (or processes, which is slower) - scheduler for tests that use numba. + so we need to switch to single-threaded (or processes, which is slower). """ if not find_spec("dask"): yield diff --git a/src/testing/scanpy/_helpers/data.py b/src/testing/scanpy/_helpers/data.py index d98f4e36c0..edda63035b 100644 --- a/src/testing/scanpy/_helpers/data.py +++ b/src/testing/scanpy/_helpers/data.py @@ -1,5 +1,5 @@ -""" -Functions returning copies of datasets as cheaply as possible, +"""Functions returning copies of datasets as cheaply as possible. + i.e. without having to hit the disk or (in case of ``_pbmc3k_normalized``) recomputing normalization. """ diff --git a/src/testing/scanpy/_pytest/__init__.py b/src/testing/scanpy/_pytest/__init__.py index e365a90495..782be091e9 100644 --- a/src/testing/scanpy/_pytest/__init__.py +++ b/src/testing/scanpy/_pytest/__init__.py @@ -1,4 +1,4 @@ -"""A private pytest plugin""" +"""A private pytest plugin.""" from __future__ import annotations diff --git a/src/testing/scanpy/_pytest/fixtures/__init__.py b/src/testing/scanpy/_pytest/fixtures/__init__.py index 27c4da4e0a..a80d338767 100644 --- a/src/testing/scanpy/_pytest/fixtures/__init__.py +++ b/src/testing/scanpy/_pytest/fixtures/__init__.py @@ -1,4 +1,4 @@ -"""This file contains some common fixtures for use in tests. +"""Some common fixtures for use in tests. This is kept seperate from the helpers file because it relies on pytest. """ diff --git a/src/testing/scanpy/_pytest/fixtures/data.py b/src/testing/scanpy/_pytest/fixtures/data.py index bd316bad09..8bdcc0705e 100644 --- a/src/testing/scanpy/_pytest/fixtures/data.py +++ b/src/testing/scanpy/_pytest/fixtures/data.py @@ -104,7 +104,7 @@ def _prepare_pbmc_testdata( *, small: bool, ) -> AnnData: - """Prepares 3k PBMC dataset with batch key `batch` and defined datatype/sparsity. + """Prepare 3k PBMC dataset with batch key `batch` and defined datatype/sparsity. Params ------ diff --git a/src/testing/scanpy/_pytest/marks.py b/src/testing/scanpy/_pytest/marks.py index 22b32269d2..5e880b487c 100644 --- a/src/testing/scanpy/_pytest/marks.py +++ b/src/testing/scanpy/_pytest/marks.py @@ -34,8 +34,7 @@ def __init__(self, mark: pytest.Mark) -> None: class needs(QuietMarkDecorator, Enum): - """ - Pytest skip marker evaluated at module import. + """Pytest skip marker evaluated at module import. This allows us to see the amount of skipped tests at the start of a test run. :func:`pytest.importorskip` skips tests after they started running. @@ -46,7 +45,7 @@ class needs(QuietMarkDecorator, Enum): def _generate_next_value_( name: str, start: int, count: int, last_values: list[str] ) -> str: - """Distribution name for matching modules""" + """Distribution name for matching modules.""" return name.replace("_", "-") mod: str diff --git a/src/testing/scanpy/_pytest/params.py b/src/testing/scanpy/_pytest/params.py index f405e33d5e..5c10772e47 100644 --- a/src/testing/scanpy/_pytest/params.py +++ b/src/testing/scanpy/_pytest/params.py @@ -1,4 +1,4 @@ -"""Like fixtures, but more flexible""" +"""Like fixtures, but more flexible.""" from __future__ import annotations diff --git a/tests/_scripts/scanpy-testbin b/tests/_scripts/scanpy-testbin index f7ed95336e..5809eee0a9 100755 --- a/tests/_scripts/scanpy-testbin +++ b/tests/_scripts/scanpy-testbin @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +"""A test script for scanpy’s CLI.""" + from __future__ import annotations import sys diff --git a/tests/conftest.py b/tests/conftest.py index 2d7f8e7aad..196a8f5c47 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,7 +48,7 @@ def _manage_log_handlers() -> Generator[None, None, None]: @pytest.fixture(autouse=True) def _caplog_adapter(caplog: pytest.LogCaptureFixture) -> Generator[None, None, None]: - """Allow use of scanpy’s logger with caplog""" + """Allow use of scanpy’s logger with caplog.""" import scanpy as sc sc.settings._root_logger.addHandler(caplog.handler) @@ -61,7 +61,7 @@ def imported_modules(): return IMPORTED -class CompareResult(TypedDict): +class CompareResult(TypedDict): # noqa: D101 rms: float expected: str actual: str diff --git a/tests/external/test_harmony_integrate.py b/tests/external/test_harmony_integrate.py index 6b0bd68f24..2844354a2f 100644 --- a/tests/external/test_harmony_integrate.py +++ b/tests/external/test_harmony_integrate.py @@ -9,8 +9,7 @@ def test_harmony_integrate(): - """ - Test that Harmony integrate works. + """Test that Harmony integrate works. This is a very simple test that just checks to see if the Harmony integrate wrapper succesfully added a new field to ``adata.obsm`` diff --git a/tests/external/test_scanorama_integrate.py b/tests/external/test_scanorama_integrate.py index baa2007fc0..df90368861 100644 --- a/tests/external/test_scanorama_integrate.py +++ b/tests/external/test_scanorama_integrate.py @@ -9,8 +9,7 @@ def test_scanorama_integrate(): - """ - Test that Scanorama integration works. + """Test that Scanorama integration works. This is a very simple test that just checks to see if the Scanorama integrate wrapper succesfully added a new field to ``adata.obsm`` diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 5e0fc1e125..b2e5c835bc 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -1,6 +1,4 @@ -""" -Tests to make sure the example datasets load. -""" +"""Tests to make sure the example datasets load.""" from __future__ import annotations @@ -138,7 +136,6 @@ def test_visium_datasets_dir_change(tmp_path: Path): @pytest.mark.internet def test_visium_datasets_images(): """Test that image download works and is does not have global effects.""" - # Test that downloading tissue image works with pytest.warns(UserWarning, match=r"Variable names are not unique"): mbrain = sc.datasets.visium_sge("V1_Adult_Mouse_Brain", include_hires_tiff=True) @@ -191,7 +188,8 @@ def test_download_failure(): def test_doc_shape(ds_name): dataset_fn: Callable[[], AnnData] = getattr(sc.datasets, ds_name) assert dataset_fn.__doc__, "No docstring" - docstring = dedent(dataset_fn.__doc__) + start_line_2 = dataset_fn.__doc__.find("\n") + 1 + docstring = dedent(dataset_fn.__doc__[start_line_2:]) with warnings.catch_warnings(): warnings.filterwarnings( "ignore", diff --git a/tests/test_get.py b/tests/test_get.py index 05cb1b6a9d..8c15dad8ed 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -34,10 +34,11 @@ def transpose_adata(adata: AnnData, *, expect_duplicates: bool = False) -> AnnDa @pytest.fixture -def adata(): - """ - adata.X is np.ones((2, 2)) - adata.layers['double'] is sparse np.ones((2,2)) * 2 to also test sparse matrices +def adata() -> AnnData: + """Create a tiny AnnData. + + `adata.X` is `np.ones((2, 2))`. + `adata.layers['double']` is sparse `np.ones((2,2)) * 2` to also test sparse matrices. """ return anndata_v0_8_constructor_compat( X=np.ones((2, 2), dtype=int), @@ -56,7 +57,7 @@ def adata(): ######################## -def test_obs_df(adata): +def test_obs_df(adata: AnnData): adata.obsm["eye"] = np.eye(2, dtype=int) adata.obsm["sparse"] = sparse.csr_matrix(np.eye(2), dtype="float64") @@ -153,9 +154,7 @@ def test_obs_df(adata): def test_repeated_gene_symbols(): - """ - Gene symbols column allows repeats, but we can't unambiguously get data for these values. - """ + """Gene symbols column allows repeats, but we can't unambiguously get data for these values.""" gene_symbols = [f"symbol_{i}" for i in ["a", "b", "b", "c"]] var_names = pd.Index([f"id_{i}" for i in ["a", "b.1", "b.2", "c"]]) adata = sc.AnnData( @@ -178,7 +177,7 @@ def test_repeated_gene_symbols(): @filter_oldformatwarning def test_backed_vs_memory(): - """compares backed vs. memory""" + """Compares backed vs. memory.""" from pathlib import Path # get location test h5ad file in datasets @@ -204,7 +203,7 @@ def test_backed_vs_memory(): def test_column_content(): - """uses a larger dataset to test column order and content""" + """Uses a larger dataset to test column order and content.""" adata = pbmc68k_reduced() # test that columns content is correct for obs_df @@ -224,7 +223,7 @@ def test_column_content(): np.testing.assert_array_equal(df[col].values, adata.var_vector(col)) -def test_var_df(adata): +def test_var_df(adata: AnnData): adata.varm["eye"] = np.eye(2, dtype=int) adata.varm["sparse"] = sparse.csr_matrix(np.eye(2), dtype="float64") diff --git a/tests/test_highly_variable_genes.py b/tests/test_highly_variable_genes.py index 528a86ea99..79535ab7de 100644 --- a/tests/test_highly_variable_genes.py +++ b/tests/test_highly_variable_genes.py @@ -610,8 +610,9 @@ def test_cutoff_info(): @pytest.mark.parametrize("array_type", ARRAY_TYPES) @pytest.mark.parametrize("batch_key", [None, "batch"]) def test_subset_inplace_consistency(flavor, array_type, batch_key): - """Tests that, with `n_top_genes=n` - - `inplace` and `subset` interact correctly + """Tests `n_top_genes=n`. + + - if `inplace` and `subset` interact correctly - for both the `seurat` and `cell_ranger` flavors - for dask arrays and non-dask arrays - for both with and without batch_key diff --git a/tests/test_logging.py b/tests/test_logging.py index 81b4acbf38..02fe2fdbef 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -135,8 +135,7 @@ def now(tz): ], ) def test_call_outputs(func): - """ - Tests that these functions print to stdout and don't error. + """Tests that these functions print to stdout and don't error. Checks that https://github.com/scverse/scanpy/issues/1437 is fixed. """ diff --git a/tests/test_neighbors.py b/tests/test_neighbors.py index 806594ff8d..a6da0850b5 100644 --- a/tests/test_neighbors.py +++ b/tests/test_neighbors.py @@ -128,7 +128,7 @@ def neigh() -> Neighbors: def test_distances_euclidean( mocker: MockerFixture, neigh: Neighbors, method: Literal["umap", "gauss"] ): - """umap and gauss behave the same for distances. + """Umap and gauss behave the same for distances. They call pynndescent for large data. """ diff --git a/tests/test_package_structure.py b/tests/test_package_structure.py index 3402c3872e..73e731dc8d 100644 --- a/tests/test_package_structure.py +++ b/tests/test_package_structure.py @@ -82,7 +82,7 @@ def is_deprecated(f: FunctionType) -> bool: } -class ExpectedSig(TypedDict): +class ExpectedSig(TypedDict): # noqa: D101 first_name: str copy_default: Any return_ann: str | None @@ -149,7 +149,7 @@ def test_sig_conventions(f, qualname): def getsourcefile(obj): - """inspect.getsourcefile, but supports singledispatch""" + """inspect.getsourcefile, but supports singledispatch.""" from inspect import getsourcefile if wrapped := getattr(obj, "__wrapped__", None): @@ -159,7 +159,7 @@ def getsourcefile(obj): def getsourcelines(obj): - """inspect.getsourcelines, but supports singledispatch""" + """inspect.getsourcelines, but supports singledispatch.""" from inspect import getsourcelines if wrapped := getattr(obj, "__wrapped__", None): diff --git a/tests/test_paga.py b/tests/test_paga.py index d8de573fee..2730f5a6e2 100644 --- a/tests/test_paga.py +++ b/tests/test_paga.py @@ -100,7 +100,7 @@ def test_paga_compare(image_comparer): def test_paga_positions_reproducible(): - """Check exact reproducibility and effect of random_state on paga positions""" + """Check exact reproducibility and effect of random_state on paga positions.""" # https://github.com/scverse/scanpy/issues/1859 pbmc = pbmc68k_reduced() sc.tl.paga(pbmc, "bulk_labels") diff --git a/tests/test_pca.py b/tests/test_pca.py index 0130b6ac35..0621ed9474 100644 --- a/tests/test_pca.py +++ b/tests/test_pca.py @@ -71,7 +71,7 @@ if pkg_version("anndata") < Version("0.9"): def to_memory(self: AnnData, *, copy: bool = False) -> AnnData: - """Compatibility version of AnnData.to_memory() that works with old AnnData versions""" + """Compatibility version of AnnData.to_memory() that works with old AnnData versions.""" adata = self if adata.isbacked: adata = adata.to_memory() @@ -296,9 +296,9 @@ def test_pca_transform_no_zero_center(request: pytest.FixtureRequest, array_type def test_pca_shapes(): - """ - Tests that n_comps behaves correctly - See https://github.com/scverse/scanpy/issues/1051 + """Tests that n_comps behaves correctly. + + See """ adata = AnnData(np.random.randn(30, 20)) sc.pp.pca(adata) @@ -323,9 +323,9 @@ def test_pca_shapes(): ], ) def test_pca_sparse(key_added: str | None, keys_expected: tuple[str, str, str]): - """ - Tests that implicitly centered pca on sparse arrays returns equivalent results to - explicit centering on dense arrays. + """Tests implicitly centered pca on sparse arrays. + + Checks if it returns equivalent results to explicit centering on dense arrays. """ pbmc = pbmc3k_normalized()[:200].copy() @@ -371,11 +371,10 @@ def test_pca_reproducible(array_type): def test_pca_chunked(): - """ - See https://github.com/scverse/scanpy/issues/1590 - But this is also a more general test - """ + """Tests that chunked PCA is equivalent to default PCA. + See also + """ # Subsetting for speed of test pbmc_full = pbmc3k_normalized() pbmc = pbmc_full[::6].copy() @@ -398,10 +397,7 @@ def test_pca_chunked(): def test_pca_n_pcs(): - """ - Tests that the n_pcs parameter also works for - representations not called "X_pca" - """ + """Tests that the n_pcs parameter also works for representations not called "X_pca".""" pbmc = pbmc3k_normalized() sc.pp.pca(pbmc, dtype=np.float64) pbmc.obsm["X_pca_test"] = pbmc.obsm["X_pca"] @@ -444,8 +440,7 @@ def test_mask_length_error(): def test_mask_var_argument_equivalence(float_dtype, array_type): - """Test if pca result is equal when given mask as boolarray vs string""" - + """Test if pca result is equal when given mask as boolarray vs string.""" adata_base = AnnData(array_type(np.random.random((100, 10))).astype(float_dtype)) mask_var = np.random.choice([True, False], adata_base.shape[1]) @@ -508,9 +503,10 @@ def test_mask_order_warning(request: pytest.FixtureRequest): def test_mask_defaults(array_type, float_dtype): - """ - Test if pca result is equal without highly variable and with-but mask is None - and if pca takes highly variable as mask as default + """Test if PCA behavior in relation to highly variable genes. + + 1. That it’s equal withwithout and with – but mask is None + 2. If pca takes highly variable as mask as default """ A = array_type(A_list).astype("float64") adata = AnnData(A) @@ -532,9 +528,7 @@ def test_mask_defaults(array_type, float_dtype): def test_pca_layer(): - """ - Tests that layers works the same way as .X - """ + """Tests that layers works the same way as `X`.""" X_adata = pbmc3k_normalized() layer_adata = X_adata.copy() diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 161b493823..b5dee02f60 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -873,7 +873,7 @@ def test_rank_genes_group_axes(image_comparer): @pytest.fixture(scope="session") def gene_symbols_adatas_session() -> tuple[AnnData, AnnData]: - """Create two anndata objects which are equivalent except for var_names + """Create two anndata objects which are equivalent except for var_names. Both have ensembl ids and hgnc symbols as columns in var. The first has ensembl ids as var_names, the second has symbols. @@ -965,10 +965,7 @@ def test_plot_rank_genes_groups_gene_symbols( ], ) def test_rank_genes_groups_plots_n_genes_vs_var_names(tmp_path, func, check_same_image): - """\ - Checks that passing a negative value for n_genes works, and that passing - var_names as a dict works. - """ + """Checks that once can pass a negative value for n_genes and var_names as a dict.""" N = 3 pbmc = pbmc68k_reduced().raw.to_adata() groups = pbmc.obs["louvain"].cat.categories[:3] @@ -1382,8 +1379,7 @@ def test_scatter_specify_layer_and_raw(): "color", ["n_genes", "bulk_labels", ["n_genes", "bulk_labels"]] ) def test_scatter_no_basis_per_obs(image_comparer, color): - """Test scatterplot of per-obs points with no basis""" - + """Test scatterplot of per-obs points with no basis.""" save_and_compare_images = partial(image_comparer, ROOT, tol=15) pbmc = pbmc68k_reduced() @@ -1401,8 +1397,7 @@ def test_scatter_no_basis_per_obs(image_comparer, color): def test_scatter_no_basis_per_var(image_comparer): - """Test scatterplot of per-var points with no basis""" - + """Test scatterplot of per-var points with no basis.""" save_and_compare_images = partial(image_comparer, ROOT, tol=15) pbmc = pbmc68k_reduced() @@ -1445,7 +1440,7 @@ def test_scatter_no_basis_raw(check_same_image, pbmc_filtered, tmp_path, use_raw ], ) def test_scatter_no_basis_value_error(pbmc_filtered, x, y, color, use_raw): - """Test that `scatter()` raises `ValueError` where appropriate + """Test that `scatter()` raises `ValueError` where appropriate. If `sc.pl.scatter()` receives variable labels that either cannot be found or are incompatible with one another, the function should @@ -1481,9 +1476,7 @@ def test_rankings(image_comparer): # TODO: Make more generic def test_scatter_rep(tmp_path): - """ - Test to make sure I can predict when scatter reps should be the same - """ + """Test to make sure I can predict when scatter reps should be the same.""" rep_args = { "raw": {"use_raw": True}, "layer": {"layer": "layer", "use_raw": False}, @@ -1728,7 +1721,7 @@ def test_scrublet_plots(monkeypatch, image_comparer, id, params): def test_umap_mask_equal(tmp_path, check_same_image): - """Check that all desired cells are coloured and masked cells gray""" + """Check that all desired cells are coloured and masked cells gray.""" pbmc = pbmc3k_processed() mask_obs = pbmc.obs["louvain"].isin(["B cells", "NK cells"]) @@ -1755,7 +1748,7 @@ def test_umap_mask_mult_plots(): def test_string_mask(tmp_path, check_same_image): - """Check that the same mask given as string or bool array provides the same result""" + """Check that the same mask given as string or bool array provides the same result.""" pbmc = pbmc3k_processed() pbmc.obs["mask"] = mask_obs = pbmc.obs["louvain"].isin(["B cells", "NK cells"]) diff --git a/tests/test_plotting_embedded/conftest.py b/tests/test_plotting_embedded/conftest.py index d9e8ff8581..42be824899 100644 --- a/tests/test_plotting_embedded/conftest.py +++ b/tests/test_plotting_embedded/conftest.py @@ -14,7 +14,7 @@ @pytest.fixture(scope="module") def adata(): - """A bit cute.""" + # A bit cute. from matplotlib.image import imread from sklearn.cluster import DBSCAN from sklearn.datasets import make_blobs diff --git a/tests/test_plotting_embedded/test_spatial.py b/tests/test_plotting_embedded/test_spatial.py index 873db68794..b1e8e5b883 100644 --- a/tests/test_plotting_embedded/test_spatial.py +++ b/tests/test_plotting_embedded/test_spatial.py @@ -174,9 +174,7 @@ def spatial_kwargs(request): def test_manual_equivalency(equivalent_spatial_plotters, tmp_path, spatial_kwargs): - """ - Tests that manually passing values to sc.pl.spatial is similar to automatic extraction. - """ + """Tests that manually passing values to sc.pl.spatial is similar to automatic extraction.""" orig, removed = equivalent_spatial_plotters orig_pth = tmp_path / "orig.png" @@ -240,9 +238,7 @@ def test_white_background_vs_no_img(adata, tmp_path, spatial_kwargs): def test_spatial_na_color(adata, tmp_path): - """ - Check that na_color defaults to transparent when an image is present, light gray when not. - """ + """Check that na_color defaults to transparent when an image is present, light gray when not.""" white_background = np.ones_like( adata.uns["spatial"]["scanpy_img"]["images"]["hires"] ) diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index 6282c5ccf4..8b6fb90d71 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -147,9 +147,7 @@ def test_normalize_per_cell(): def _random_probs(n: int, frac_zero: float) -> NDArray[np.float64]: - """ - Generate a random probability distribution of `n` values between 0 and 1. - """ + """Generate a random probability distribution of `n` values between 0 and 1.""" probs = np.random.randint(0, 10000, n).astype(np.float64) probs[probs < np.quantile(probs, frac_zero)] = 0 probs /= probs.sum() @@ -358,18 +356,14 @@ def zero_center(request): def test_scale_rep(count_matrix_format, zero_center): - """ - Test that it doesn't matter where the array being scaled is in the anndata object. - """ + """Test that it doesn't matter where the array being scaled is in the anndata object.""" X = count_matrix_format(sp.random(100, 200, density=0.3).toarray()) check_rep_mutation(sc.pp.scale, X, zero_center=zero_center) check_rep_results(sc.pp.scale, X, zero_center=zero_center) def test_scale_array(count_matrix_format, zero_center): - """ - Test that running sc.pp.scale on an anndata object and an array returns the same results. - """ + """Test that running sc.pp.scale on an anndata object and an array returns the same results.""" X = count_matrix_format(sp.random(100, 200, density=0.3).toarray()) adata = anndata_v0_8_constructor_compat(X=X.copy()) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index b938fd2ca3..ba08eeb778 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -213,7 +213,7 @@ def test_emptycat(): def test_log1p_save_restore(tmp_path): - """tests the sequence log1p→save→load→rank_genes_groups""" + """Tests the sequence log1p→save→load→rank_genes_groups.""" from anndata import read_h5ad pbmc = pbmc68k_reduced() @@ -317,12 +317,11 @@ def test_wilcoxon_huge_data(monkeypatch): [pytest.param(0, 0, id="equal"), pytest.param(2, 1, id="more")], ) def test_mask_n_genes(n_genes_add, n_genes_out_add): - """\ - Check that no. genes in output is + """Check if no. genes in output is correct. + 1. =n_genes when n_genessum(mask) """ - pbmc = pbmc68k_reduced() mask_var = np.zeros(pbmc.shape[1]).astype(bool) mask_var[:6].fill(True) # noqa: FBT003 @@ -342,11 +341,7 @@ def test_mask_n_genes(n_genes_add, n_genes_out_add): def test_mask_not_equal(): - """\ - Check that mask is applied successfully to data set \ - where test statistics are already available (test stats overwritten). - """ - + """Check that mask is applied successfully to data set where test statistics are already available (test stats overwritten).""" pbmc = pbmc68k_reduced() mask_var = np.random.choice([True, False], pbmc.shape[1]) n_genes = sum(mask_var) diff --git a/tests/test_read_10x.py b/tests/test_read_10x.py index 301a156bec..ed836a95a1 100644 --- a/tests/test_read_10x.py +++ b/tests/test_read_10x.py @@ -145,7 +145,7 @@ def visium_pth(request, tmp_path) -> Path: @pytest.mark.filterwarnings("ignore:Use `squidpy.*` instead:FutureWarning") def test_read_visium_counts(visium_pth): - """Test checking that read_visium reads the right genome""" + """Test checking that read_visium reads the right genome.""" spec_genome_v3 = sc.read_visium(visium_pth, genome="GRCh38") nospec_genome_v3 = sc.read_visium(visium_pth) assert_anndata_equal(spec_genome_v3, nospec_genome_v3) diff --git a/tests/test_score_genes.py b/tests/test_score_genes.py index 4ac1b62224..4fc6a278b8 100644 --- a/tests/test_score_genes.py +++ b/tests/test_score_genes.py @@ -24,9 +24,7 @@ def _create_random_gene_names(n_genes, name_length) -> NDArray[np.str_]: - """ - creates a bunch of random gene names (just CAPS letters) - """ + """Create a bunch of random gene names (just CAPS letters).""" return np.array( [ "".join(map(chr, np.random.randint(65, 90, name_length))) @@ -36,9 +34,7 @@ def _create_random_gene_names(n_genes, name_length) -> NDArray[np.str_]: def _create_sparse_nan_matrix(rows, cols, percent_zero, percent_nan): - """ - creates a sparse matrix, with certain amounts of NaN and Zeros - """ + """Create a sparse matrix with certain amounts of NaN and Zeros.""" A = np.random.randint(0, 1000, rows * cols).reshape((rows, cols)).astype("float32") maskzero = np.random.rand(rows, cols) < percent_zero masknan = np.random.rand(rows, cols) < percent_nan @@ -51,9 +47,7 @@ def _create_sparse_nan_matrix(rows, cols, percent_zero, percent_nan): def _create_adata(n_obs, n_var, p_zero, p_nan): - """ - creates an AnnData with random data, sparseness and some NaN values - """ + """Create an AnnData with random data, sparseness and some NaN values.""" X = _create_sparse_nan_matrix(n_obs, n_var, p_zero, p_nan) adata = AnnData(X) gene_names = _create_random_gene_names(n_var, name_length=6) @@ -62,12 +56,11 @@ def _create_adata(n_obs, n_var, p_zero, p_nan): def test_score_with_reference(): - """ - Checks if score_genes output agrees with pre-computed reference values. + """Checks if score_genes output agrees with pre-computed reference values. + The reference values had been generated using the same code - and stored as a pickle object in ./data + and stored as a pickle object in `./data`. """ - adata = paul15() sc.pp.normalize_per_cell(adata, counts_per_cell_after=10000) sc.pp.scale(adata) @@ -80,10 +73,7 @@ def test_score_with_reference(): def test_add_score(): - """ - check the dtype of the scores - check that non-existing genes get ignored - """ + """Check the dtype of the scores and that non-existing genes get ignored.""" # TODO: write a test that costs less resources and is more meaningful adata = _create_adata(100, 1000, p_zero=0, p_nan=0) @@ -101,9 +91,7 @@ def test_add_score(): def test_sparse_nanmean(): - """ - check that _sparse_nanmean() is equivalent to np.nanmean() - """ + """Check that _sparse_nanmean() is equivalent to np.nanmean().""" from scanpy.tools._score_genes import _sparse_nanmean R, C = 60, 50 @@ -135,9 +123,7 @@ def test_sparse_nanmean(): def test_sparse_nanmean_on_dense_matrix(): - """ - TypeError must be thrown when calling _sparse_nanmean with a dense matrix - """ + """TypeError must be thrown when calling _sparse_nanmean with a dense matrix.""" from scanpy.tools._score_genes import _sparse_nanmean with pytest.raises(TypeError): @@ -145,9 +131,7 @@ def test_sparse_nanmean_on_dense_matrix(): def test_score_genes_sparse_vs_dense(): - """ - score_genes() should give the same result for dense and sparse matrices - """ + """score_genes() should give the same result for dense and sparse matrices.""" adata_sparse = _create_adata(100, 1000, p_zero=0.3, p_nan=0.3) adata_dense = adata_sparse.copy() @@ -164,12 +148,12 @@ def test_score_genes_sparse_vs_dense(): def test_score_genes_deplete(): - """ - deplete some cells from a set of genes. - their score should be <0 since the sum of markers is 0 and - the sum of random genes is >=0 + """Deplete some cells from a set of genes. + + Their score should be <0 since the sum of markers is 0 and + the sum of random genes is >=0. - check that for both sparse and dense matrices + Check that for both sparse and dense matrices. """ adata_sparse = _create_adata(100, 1000, p_zero=0.3, p_nan=0.3) @@ -191,14 +175,12 @@ def test_score_genes_deplete(): def test_npnanmean_vs_sparsemean(monkeypatch): - """ - another check that _sparsemean behaves like np.nanmean! + """Another check that _sparsemean behaves like np.nanmean. monkeypatch the _score_genes._sparse_nanmean function to np.nanmean and check that the result is the same as the non-patched (i.e. sparse_nanmean) function """ - adata = _create_adata(100, 1000, p_zero=0.3, p_nan=0.3) gene_set = adata.var_names[:10] diff --git a/tests/test_scrublet.py b/tests/test_scrublet.py index 246ffa4027..dca608e747 100644 --- a/tests/test_scrublet.py +++ b/tests/test_scrublet.py @@ -118,8 +118,7 @@ def _create_sim_from_parents(adata: AnnData, parents: np.ndarray) -> AnnData: def test_scrublet_data(cache: pytest.Cache): - """ - Test that Scrublet processing is arranged correctly. + """Test that Scrublet processing is arranged correctly. Check that simulations run on raw data. """ @@ -214,8 +213,7 @@ def scrub_small(scrub_small_sess: AnnData): @pytest.mark.parametrize(("param", "value"), test_params.items()) def test_scrublet_params(scrub_small: AnnData, param: str, value: Any): - """ - Test that Scrublet args are passed. + """Test that Scrublet args are passed. Check that changes to parameters change scrublet results. """