Skip to content

Commit

Permalink
Add gto doctor (#316)
Browse files Browse the repository at this point in the history
* add gto doctor

* fix describe - for both problems

* add tests

* fix tests

* enhance 'gto doctor'

* fix tetst

* print corrupted commit

* add doctor to querying section
  • Loading branch information
aguschin authored Dec 16, 2022
1 parent 0600c0e commit ff500db
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 69 deletions.
55 changes: 34 additions & 21 deletions gto/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
ARTIFACT,
ASSIGNMENTS_PER_VERSION,
COMMIT,
NAME,
STAGE,
VERSION,
VERSIONS_PER_STAGE,
Expand All @@ -18,13 +17,10 @@
shortcut_regexp,
)
from gto.exceptions import NoRepo, NotImplementedInGTO, WrongArgs
from gto.ext import EnrichmentInfo
from gto.git_utils import is_url_of_remote_repo
from gto.index import EnrichmentManager, RepoIndexManager
from gto.index import Artifact, FileIndexManager, RepoIndexManager
from gto.registry import GitRegistry
from gto.tag import NAME_REFERENCE
from gto.tag import parse_name as parse_tag_name
from gto.tag import parse_name_reference


def _is_gto_repo(repo: Union[str, Repo]):
Expand Down Expand Up @@ -96,6 +92,39 @@ def remove(
)


def describe(repo: Union[str, Repo], name: str, rev: str = None) -> Optional[Artifact]:
"""Find enrichments for the artifact"""
match = re.search(shortcut_regexp, name)
if match:
if rev:
raise WrongArgs("Either specify revision or use naming shortcut.")
# clones a remote repo second time, can be optimized
versions = show(repo, name)
if len(versions) == 0: # nothing found
return None
if len(versions) > 1:
raise NotImplementedInGTO(
"Ambigious naming shortcut: multiple variants found."
)
rev = versions[0]["commit_hexsha"]
name = match["artifact"]

if not is_url_of_remote_repo(repo) and rev is None:
# read artifacts.yaml without using Git
artifact = (
FileIndexManager.from_path(
repo.working_dir if isinstance(repo, Repo) else repo
)
.get_index()
.state.get(name)
)
else:
# read Git repo
with RepoIndexManager.from_repo(repo) as index:
artifact = index.get_commit_index(rev).state.get(name)
return artifact


def register(
repo: Union[str, Repo],
name: str,
Expand Down Expand Up @@ -491,22 +520,6 @@ def format_hexsha(hexsha):
return versions_, "keys"


def describe(
repo: Union[str, Repo], name: str, rev: str = None
) -> List[EnrichmentInfo]:
"""Find enrichments for the artifact"""
ref_type, parsed = parse_name_reference(name)
if ref_type == NAME_REFERENCE.NAME:
with EnrichmentManager.from_repo(repo) as em:
return em.describe(name=name, rev=rev)
if ref_type == NAME_REFERENCE.TAG:
if rev:
raise WrongArgs("Should not specify revision if you pass git tag")
with EnrichmentManager.from_repo(repo) as em:
return em.describe(name=parsed[NAME], rev=name)
raise NotImplementedError


def history(
repo: Union[str, Repo],
artifact: str = None,
Expand Down
98 changes: 68 additions & 30 deletions gto/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
VERSIONS_PER_STAGE,
VersionSort,
)
from gto.exceptions import GTOException, NotImplementedInGTO, WrongArgs
from gto.exceptions import (
GTOException,
NotImplementedInGTO,
WrongArgs,
WrongConfig,
)
from gto.index import RepoIndexManager
from gto.ui import (
EMOJI_FAIL,
Expand Down Expand Up @@ -346,6 +351,9 @@ def callback_sort( # pylint: disable=inconsistent-return-statements
option_show_description = Option(
False, "--description", is_flag=True, help="Show description", show_default=True
)
option_show_custom = Option(
False, "--custom", is_flag=True, help="Show custom metadata", show_default=True
)
option_json = Option(
False,
"--json",
Expand Down Expand Up @@ -533,6 +541,42 @@ def remove(
)


@gto_command(section=CommandGroups.enriching)
def describe(
repo: str = option_repo,
name: str = arg_name,
rev: str = option_rev,
type: Optional[bool] = option_show_type,
path: Optional[bool] = option_show_path,
description: Optional[bool] = option_show_description,
# custom: Optional[bool] = option_show_custom,
):
"""Display enrichments for an artifact."""
assert (
sum(bool(i) for i in (type, path, description)) <= 1 # , custom
), "Can output one key only"
if type:
field = "type"
elif path:
field = "path"
elif description:
field = "description"
# elif custom:
# field = "custom"
else:
field = None

artifact = gto.api.describe(repo=repo, name=name, rev=rev)
if not artifact:
return
annotation = artifact.dict(exclude_defaults=True)
if field is None:
format_echo(annotation, "json")
elif field in annotation:
with cli_echo():
echo(annotation[field])


@gto_command(section=CommandGroups.modifying)
def register(
repo: str = option_repo,
Expand Down Expand Up @@ -875,37 +919,31 @@ def print_index(repo: str = option_repo):
format_echo(index.artifact_centric_representation(), "json")


@gto_command(section=CommandGroups.enriching)
def describe(
@gto_command(section=CommandGroups.querying)
def doctor(
repo: str = option_repo,
name: str = arg_name,
rev: str = option_rev,
type: Optional[bool] = option_show_type,
path: Optional[bool] = option_show_path,
description: Optional[bool] = option_show_description,
all_commits: bool = option_all_commits,
):
"""Display enrichments for an artifact."""
assert (
sum(bool(i) for i in (type, path, description)) <= 1
), "Can output one key only"
infos = gto.api.describe(repo=repo, name=name, rev=rev)
if not infos:
return
d = infos[0].get_object().dict(exclude_defaults=True)
if type:
if "type" not in d:
raise WrongArgs("No type in enrichment")
echo(d["type"])
elif path:
if "path" not in d:
raise WrongArgs("No path in enrichment")
echo(d["path"])
elif description:
if "description" not in d:
raise WrongArgs("No description in enrichment")
echo(d["description"])
else:
format_echo(d, "json")
"""Display GTO version and check the registry for problems."""
with cli_echo():
echo(f"{EMOJI_GTO} GTO Version: {gto.__version__}")
echo("---------------------------------")
try:
from gto.config import ( # pylint: disable=import-outside-toplevel
CONFIG,
)

echo(CONFIG.__repr_str__("\n"))
except WrongConfig:
echo(f"{EMOJI_FAIL} Fail to parse config")
echo("---------------------------------")

gto.api._get_state(repo).dict() # pylint: disable=protected-access
if all_commits:
with RepoIndexManager.from_repo(repo) as index:
index.artifact_centric_representation()
with cli_echo():
echo(f"{EMOJI_OK} No issues found")


if __name__ == "__main__":
Expand Down
9 changes: 8 additions & 1 deletion gto/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,15 @@ def __init__(self, path) -> None:
super().__init__(self.message)


class WrongArtifactsYaml(GTOException):
message = "artifacts.yaml file doesn't conform to GTO format"

def __init__(self) -> None:
super().__init__(self.message)


class NoFile(GTOException):
_message = "No file/folder found in '{path}' for checked out commit"
_message = "Nothing found in '{path}' for checked out commit"

def __init__(self, path) -> None:
self.message = self._message.format(path=path)
Expand Down
51 changes: 40 additions & 11 deletions gto/index.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import logging
import os
from abc import ABC, abstractmethod
from collections import defaultdict
from contextlib import contextmanager
from datetime import datetime
from functools import wraps
from pathlib import Path
from typing import IO, Dict, FrozenSet, Generator, List, Optional, Union

import git
from git import Repo
from pydantic import BaseModel, parse_obj_as, validator
from pydantic import BaseModel, ValidationError, parse_obj_as, validator
from ruamel.yaml import YAMLError

from gto.base import BaseManager, BaseRegistryState
from gto.base import Commit as EnrichmentEvent
Expand All @@ -31,12 +32,15 @@
NoFile,
PathIsUsed,
WrongArgs,
WrongArtifactsYaml,
)
from gto.ext import EnrichmentInfo, EnrichmentReader
from gto.git_utils import RemoteRepoMixin, read_repo
from gto.ui import echo
from gto.utils import resolve_ref

logger = logging.getLogger("gto")


class Artifact(BaseModel):
type: Optional[str] = None
Expand Down Expand Up @@ -118,10 +122,27 @@ def read(cls, path_or_file: Union[str, IO], frozen: bool = False):

@staticmethod
def read_state(path_or_file: Union[str, IO]):
def read_yaml(stream: IO):
try:
return yaml.load(stream)
except YAMLError as e:
raise WrongArtifactsYaml() from e

# read contents of the yaml
if isinstance(path_or_file, str):
with open(path_or_file, "r", encoding="utf8") as file:
return parse_obj_as(State, yaml.load(file))
state = parse_obj_as(State, yaml.load(path_or_file))
try:
with open(path_or_file, "r", encoding="utf8") as file:
contents = read_yaml(file)
except FileNotFoundError as e:
raise NoFile("artifacts.yaml") from e
else:
contents = read_yaml(path_or_file)
# check yaml contents is a valid State
try:
state = parse_obj_as(State, contents)
except ValidationError as e:
raise WrongArtifactsYaml() from e
# validate that specific names conform to the naming convention
for key in state:
assert_name_is_valid(key)
return state
Expand Down Expand Up @@ -229,7 +250,6 @@ class FileIndexManager(BaseIndexManager):
path: str = ""

@classmethod
@contextmanager
def from_path(cls, path: str, config: RegistryConfig = None):
if config is None:
config = read_registry_config(os.path.join(path, CONFIG_FILE_NAME))
Expand Down Expand Up @@ -333,15 +353,24 @@ class Config:
arbitrary_types_allowed = True

def get_commit_index( # type: ignore # pylint: disable=arguments-differ
self, ref: Union[str, git.Reference, None], allow_to_not_exist: bool = True
self,
ref: Union[str, git.Reference, None],
allow_to_not_exist: bool = True,
ignore_corrupted: bool = False,
) -> Optional[Index]:
if not ref or isinstance(ref, str):
ref = resolve_ref(self.repo, ref)
if self.config.INDEX in ref.tree:
return Index.read(
(ref.tree / self.config.INDEX).data_stream,
frozen=True,
)
try:
return Index.read(
(ref.tree / self.config.INDEX).data_stream,
frozen=True,
)
except WrongArtifactsYaml as e:
logger.warning("Corrupted artifacts.yaml file in commit %s", ref)
if ignore_corrupted:
return None
raise e
if allow_to_not_exist:
return None
raise ValueError(f"No Index exists at {ref}")
Expand Down
21 changes: 20 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,25 @@ def test_api_info_commands_repo_with_artifact(
gto.api.history(repo.working_dir)


def test_describe(repo_with_artifact: Tuple[git.Repo, Callable]):
repo, write_file = repo_with_artifact
gto.api.annotate(repo, "new-artifact", path="other-path")
check_obj(
gto.api.describe(repo, "new-artifact").dict(exclude_defaults=True), # type: ignore
dict(
type="new-type",
path="other-path",
),
)
check_obj(
gto.api.describe(repo, "new-artifact", rev="HEAD").dict(exclude_defaults=True), # type: ignore
dict(
type="new-type",
path="path",
),
)


def test_register_deregister(repo_with_artifact):
repo, name = repo_with_artifact
vname1, vname2 = "v1.0.0", "v1.0.1"
Expand Down Expand Up @@ -361,7 +380,7 @@ def test_if_stages_on_remote_git_repo_then_return_expected_stages():
@skip_for_windows
def test_if_describe_on_remote_git_repo_then_return_expected_info():
result = gto.api.describe(repo=tests.resources.SAMPLE_REMOTE_REPO_URL, name="churn")
assert result[0].get_object().dict(exclude_defaults=True) == {
assert result.dict(exclude_defaults=True) == {
"type": "model",
"path": "models/churn.pkl",
"virtual": False,
Expand Down
Loading

0 comments on commit ff500db

Please sign in to comment.