Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Free tree command from subdatasets dependency #628

Merged
merged 3 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 5 additions & 17 deletions datalad_next/commands/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
NoDatasetFound
)

from datalad.local.subdatasets import Subdatasets
from datalad_next.iter_collections import iter_submodules
from datalad_next.constraints import (
EnsureBool,
EnsureDataset,
Expand Down Expand Up @@ -608,22 +608,10 @@ def get_subds_paths(ds_path: Path):
# submodules. Since we need to run it to (A) calculate dataset depth and
# (B) detect non-installed datasets, we cache results, so that the list of
# subdatasets is computed only once for each parent dataset.

def res_filter(res):
return res.get('status') == 'ok' and res.get('type') == 'dataset'

# call subdatasets command instead of dataset method `ds.subdatasets()`
# to avoid potentially expensive import of full datalad API
return Subdatasets.__call__(
dataset=ds_path,
recursive=False,
state='any', # include not-installed subdatasets
result_filter=res_filter,
on_failure='ignore',
result_xfm='paths',
result_renderer='disabled',
return_type='list'
)
return [
str(ds_path / sm.path)
for sm in iter_submodules(ds_path)
]


@lru_cache()
Expand Down
2 changes: 2 additions & 0 deletions datalad_next/iter_collections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
iter_gitstatus
iter_gittree
iter_gitworktree
iter_submodules
iter_tar
iter_zip
TarfileItem
Expand Down Expand Up @@ -67,6 +68,7 @@
# TODO move to datalad_next.types?
GitWorktreeFileSystemItem,
iter_gitworktree,
iter_submodules,
)
from .annexworktree import (
iter_annexworktree,
Expand Down
2 changes: 1 addition & 1 deletion datalad_next/iter_collections/gitstatus.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
)
from datalad_next.repo_utils import (
get_worktree_head,
iter_submodules,
)

from .gitdiff import (
Expand All @@ -35,6 +34,7 @@
GitTreeItem,
GitTreeItemType,
iter_gitworktree,
iter_submodules,
lsfiles_untracked_args,
_git_ls_files,
)
Expand Down
22 changes: 22 additions & 0 deletions datalad_next/iter_collections/gitworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,28 @@ def iter_gitworktree(
# report in the next loop iteration


def iter_submodules(
path: Path,
) -> Generator[GitTreeItem, None, None]:
"""Given a path, report all submodules of a repository worktree underneath

This is a thin convenience wrapper around ``iter_gitworktree()``.
"""
for item in iter_gitworktree(
path,
untracked=None,
link_target=False,
fp=False,
recursive='repository',
):
# exclude non-submodules, or a submodule that was found at
# the root path -- which would indicate that the submodule
# itself it not around, only its record in the parent
if item.gittype == GitTreeItemType.submodule \
and item.name != PurePath('.'):
yield item


def _get_item(
basepath: Path,
link_target: bool,
Expand Down
78 changes: 8 additions & 70 deletions datalad_next/repo_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,12 @@
"""Common repository operations
"""
from __future__ import annotations

from pathlib import (
Path,
PurePath,
)
from typing import Generator

from datalad_next.exceptions import CapturedException
from datalad_next.iter_collections.gitworktree import (
GitTreeItem,
GitTreeItemType,
iter_gitworktree,
)
from datalad_next.runners import (
CommandError,
call_git_lines,
)


def iter_submodules(
path: Path,
) -> Generator[GitTreeItem, None, None]:
"""Given a path, report all submodules of a repository underneath it"""
for item in iter_gitworktree(
path,
untracked=None,
link_target=False,
fp=False,
recursive='repository',
):
# exclude non-submodules, or a submodule that was found at
# the root path -- which would indicate that the submodule
# itself it not around, only its record in the parent
if item.gittype == GitTreeItemType.submodule \
and item.name != PurePath('.'):
yield item

.. currentmodule:: datalad_next.repo_utils
.. autosummary::
:toctree: generated

def get_worktree_head(
path: Path,
) -> tuple[str | None, str | None]:
try:
HEAD = call_git_lines(
# we add the pathspec disambiguator to get cleaner error messages
# (and we only report the first item below, to take it off again)
['rev-parse', '-q', '--symbolic-full-name', 'HEAD', '--'],
cwd=path,
)[0]
except (NotADirectoryError, FileNotFoundError) as e:
raise ValueError('path not found') from e
except CommandError as e:
CapturedException(e)
if 'fatal: not a git repository' in e.stderr:
raise ValueError(f'no Git repository at {path!r}') from e
elif 'fatal: bad revision' in e.stderr:
return (None, None)
else:
# no idea reraise
raise
get_worktree_head
"""

if HEAD.startswith('refs/heads/adjusted/'):
# this is a git-annex adjusted branch. do the comparison against
# its basis. it is not meaningful to track the managed branch in
# a superdataset
return (
HEAD,
# replace 'refs/heads' with 'refs/basis'
f'refs/basis/{HEAD[11:]}',
)
else:
return (HEAD, None)
from .worktree import (
get_worktree_head,
)
45 changes: 45 additions & 0 deletions datalad_next/repo_utils/worktree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from __future__ import annotations

from pathlib import Path

from datalad_next.exceptions import CapturedException
from datalad_next.runners import (
CommandError,
call_git_lines,
)


def get_worktree_head(
path: Path,
) -> tuple[str | None, str | None]:
try:
HEAD = call_git_lines(
# we add the pathspec disambiguator to get cleaner error messages
# (and we only report the first item below, to take it off again)
['rev-parse', '-q', '--symbolic-full-name', 'HEAD', '--'],
cwd=path,
)[0]
except (NotADirectoryError, FileNotFoundError) as e:
raise ValueError('path not found') from e
except CommandError as e:
CapturedException(e)
if 'fatal: not a git repository' in e.stderr:
raise ValueError(f'no Git repository at {path!r}') from e
elif 'fatal: bad revision' in e.stderr:
return (None, None)
else:
# no idea reraise
raise

if HEAD.startswith('refs/heads/adjusted/'):
# this is a git-annex adjusted branch. do the comparison against
# its basis. it is not meaningful to track the managed branch in
# a superdataset
return (
HEAD,
# replace 'refs/heads' with 'refs/basis'
f'refs/basis/{HEAD[11:]}',
)
else:
return (HEAD, None)

1 change: 1 addition & 0 deletions docs/source/pyutils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ packages.
iterable_subprocess
itertools
iter_collections
repo_utils
runners
tests
tests.fixtures
Expand Down
Loading