Skip to content

Commit

Permalink
status: use PyGit2 for much faster, simpler
Browse files Browse the repository at this point in the history
  • Loading branch information
scivision committed Sep 30, 2024
1 parent ae22b46 commit 9f9dded
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 156 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
matrix:
os: [windows-latest, macos-latest, ubuntu-latest]
python-version: ['3.9', '3.x']
python-version: ['3.9', '3.12']

steps:
- uses: actions/checkout@v4
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ line-length = 100

[tool.mypy]
files = ["src"]
ignore_missing_imports = true
6 changes: 4 additions & 2 deletions src/gitutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""
These Git utilities use nothing beyond pure Python and command-line Git.
These Git utilities use pygit2 where possible for much more efficient operations
than using subprocesses even with asyncio.
Speed is emphasized throughout, with pipelining and concurrent `asyncio` routines throughout
for fastest operation on large numbers of repos.
"""

import logging

__version__ = "1.12.1"
__version__ = "2.0.0"


def _log(verbose: bool):
Expand Down
170 changes: 25 additions & 145 deletions src/gitutils/status.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
detect Git local repo modifications. Crazy fast by not invoking remote.
detect Git local repo modifications.
replaced by git status --porcelain:
git ls-files -o -d --exclude-standard: # check for uncommitted files
Expand All @@ -10,171 +10,51 @@

from __future__ import annotations
import argparse
import subprocess
import logging
import typing
from pathlib import Path
import asyncio
from pprint import pprint

from . import _log
from .git import gitdirs, git_exe, subprocess_asyncio, MAGENTA, BLACK, TIMEOUT

C0 = ["rev-parse", "--abbrev-ref", "HEAD"] # get branch name
C1 = ["status", "--porcelain"] # uncommitted or changed files

__all__ = ["git_porcelain"]


def git_porcelain(path: Path, timeout: float = TIMEOUT["local"]) -> bool:
"""
detects if single Git repo is porcelain i.e. clean.
May not have been pushed or fetched.
Parameters
----------
path: pathlib.Path
path to Git repo
Returns
-------
is_porcelain: bool
true if local Git is clean
"""

if not path.is_dir():
raise NotADirectoryError(path)

ret = subprocess.run(
[git_exe(), "-C", str(path)] + C1,
stdout=subprocess.PIPE,
text=True,
timeout=timeout,
)
if ret.returncode != 0:
logging.error(f"{path.name} return code {ret.returncode} {C1}")
return False
return not ret.stdout


async def _git_status(path: Path, timeout: float) -> tuple[str, str] | None:
"""
Notes which Git repos have local changes that haven't been pushed to remote
Parameters
----------
path : pathlib.Path
Git repo directory
Returns
-------
changes : tuple of pathlib.Path, str
Git repo local changes
"""

code, out, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C1, timeout=timeout)
if code != 0:
logging.error(f"{path.name} return code {code} {C1} {err}")
return None

logging.info(path.name)

# %% uncommitted changes
if out:
return path.name, out

# %% detect committed, but not pushed
code, branch, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C0, timeout=timeout)
if code != 0:
logging.error(f"{path.name} return code {code} {C0} {err}")
return None

C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
code, out, err = await subprocess_asyncio(C2, timeout=timeout)
if code != 0:
logging.error(f"{path.name} return code {code} {branch} {out} {err}")
return None

if out:
return path.name, out

return None


def git_status_serial(path: Path, timeout: float = TIMEOUT["local"]) -> tuple[str, str] | None:
"""
Notes which Git repos have local changes that haven't been pushed to remote
Parameters
----------
path : pathlib.Path
Git repo directory
Returns
-------
changes : tuple of pathlib.Path, str
Git repo local changes
"""

out = subprocess.check_output(
[git_exe(), "-C", str(path)] + C1, text=True, timeout=timeout
).strip()

logging.info(path.name)

# %% uncommitted changes
if out:
return path.name, out

# %% detect committed, but not pushed
branch = subprocess.check_output(
[git_exe(), "-C", str(path)] + C0, text=True, timeout=timeout
).strip()

C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
out = subprocess.check_output(C2, text=True, timeout=timeout).strip()

if out:
return path.name, out

return None
import pygit2

from . import _log
from .git import gitdirs
from .status_cmd import git_status_serial, git_status_async

async def git_status(path: Path, verbose: bool, timeout: float) -> list[str]:
c = MAGENTA if verbose else ""

changed = []
futures = [_git_status(d, timeout) for d in gitdirs(path)]
for r in asyncio.as_completed(futures, timeout=timeout):
if changes := await r:
changed.append(changes[0])
print(c + changes[0])
if verbose:
print(BLACK + changes[1])
def git_status(path: Path, verbose: bool) -> typing.Iterator[dict]:

return changed
for d in gitdirs(path):
repo = pygit2.Repository(d)
if status := repo.status():
yield status


def cli():
p = argparse.ArgumentParser(description="get status of many Git repos")
p.add_argument("path", help="path to look under", nargs="?", default="~/code")
p.add_argument("-v", "--verbose", action="store_true")
p.add_argument("-t", "--timeout", type=float)
p.add_argument("--serial", help="don't use asyncio parallel execution", action="store_true")
p.add_argument(
"-method",
help="use Git command line serial execution",
choices=["pygit2", "serial", "async"], default="pygit2",
)
P = p.parse_args()

_log(P.verbose)

if P.serial:
c = MAGENTA if P.verbose else ""
if P.method == "pygit2":
for s in git_status(P.path, P.verbose):
pprint(s)
elif P.method == "serial":
for d in gitdirs(P.path):
if changes := git_status_serial(d, P.timeout):
print(c + changes[0])
print(changes[0])
if P.verbose:
print(BLACK + changes[1])
else:
asyncio.run(git_status(P.path, P.verbose, P.timeout))
print(changes[1])
elif P.method == "async":
asyncio.run(git_status_async(P.path, P.verbose, P.timeout))


if __name__ == "__main__":
Expand Down
149 changes: 149 additions & 0 deletions src/gitutils/status_cmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""
This was the original implementation of mass-checking of Git status
using asyncio and subprocesses. It is much more efficient to use
libgit2 via pygit2, which is the current implementation.
"""

import subprocess
import logging
from pathlib import Path
import asyncio

from .git import gitdirs, git_exe, subprocess_asyncio, MAGENTA, BLACK, TIMEOUT

C0 = ["rev-parse", "--abbrev-ref", "HEAD"] # get branch name
C1 = ["status", "--porcelain"] # uncommitted or changed files

__all__ = ["git_porcelain"]


def git_porcelain(path: Path, timeout: float = TIMEOUT["local"]) -> bool:
"""
detects if single Git repo is porcelain i.e. clean.
May not have been pushed or fetched.
Parameters
----------
path: pathlib.Path
path to Git repo
Returns
-------
is_porcelain: bool
true if local Git is clean
"""

if not path.is_dir():
raise NotADirectoryError(path)

ret = subprocess.run(
[git_exe(), "-C", str(path)] + C1,
stdout=subprocess.PIPE,
text=True,
timeout=timeout,
)
if ret.returncode != 0:
logging.error(f"{path.name} return code {ret.returncode} {C1}")
return False
return not ret.stdout


async def _git_status(path: Path, timeout: float) -> tuple[str, str] | None:
"""
Notes which Git repos have local changes that haven't been pushed to remote
Parameters
----------
path : pathlib.Path
Git repo directory
Returns
-------
changes : tuple of pathlib.Path, str
Git repo local changes
"""

code, out, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C1, timeout=timeout)
if code != 0:
logging.error(f"{path.name} return code {code} {C1} {err}")
return None

logging.info(path.name)

# %% uncommitted changes
if out:
return path.name, out

# %% detect committed, but not pushed
code, branch, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C0, timeout=timeout)
if code != 0:
logging.error(f"{path.name} return code {code} {C0} {err}")
return None

C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
code, out, err = await subprocess_asyncio(C2, timeout=timeout)
if code != 0:
logging.error(f"{path.name} return code {code} {branch} {out} {err}")
return None

if out:
return path.name, out

return None


def git_status_serial(path: Path, timeout: float = TIMEOUT["local"]) -> tuple[str, str] | None:
"""
Notes which Git repos have local changes that haven't been pushed to remote
Parameters
----------
path : pathlib.Path
Git repo directory
Returns
-------
changes : tuple of pathlib.Path, str
Git repo local changes
"""

out = subprocess.check_output(
[git_exe(), "-C", str(path)] + C1, text=True, timeout=timeout
).strip()

logging.info(path.name)

# %% uncommitted changes
if out:
return path.name, out

# %% detect committed, but not pushed
branch = subprocess.check_output(
[git_exe(), "-C", str(path)] + C0, text=True, timeout=timeout
).strip()

C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
out = subprocess.check_output(C2, text=True, timeout=timeout).strip()

if out:
return path.name, out

return None


async def git_status_async(path: Path, verbose: bool, timeout: float) -> list[str]:
c = MAGENTA if verbose else ""

changed = []
futures = [_git_status(d, timeout) for d in gitdirs(path)]
for r in asyncio.as_completed(futures, timeout=timeout):
if changes := await r:
changed.append(changes[0])
print(c + changes[0])
if verbose:
print(BLACK + changes[1])

return changed
Loading

0 comments on commit 9f9dded

Please sign in to comment.