diff --git a/docs/changes.rst b/docs/changes.rst index 4e21b8ea..6e47c2ed 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -11,6 +11,9 @@ skops Changelog v0.5 ---- +- Added CLI entrypoint support (:func:`.cli.entrypoint.main_cli`) + and a command line function to convert Pickle files + to Skops files (:func:`.cli._convert.main`). :pr:`249` by `Erin Aho`_ - Support more array-like data types for tabular data and list-like data types for text data. :pr:`179` by `Francesco Cariaggi`_. @@ -88,4 +91,5 @@ Contributors :user:`Adrin Jalali `, :user:`Merve Noyan `, :user:`Benjamin Bossan `, :user:`Ayyuce Demirbas -`, :user:`Prajjwal Mishra `, :user:`Francesco Cariaggi ` +`, :user:`Prajjwal Mishra `, :user:`Francesco Cariaggi `, +:user:`Erin Aho ` diff --git a/docs/persistence.rst b/docs/persistence.rst index 07aa3e2c..473b9b6f 100644 --- a/docs/persistence.rst +++ b/docs/persistence.rst @@ -110,6 +110,31 @@ you have custom functions (say, a custom function to be used with most ``numpy`` and ``scipy`` functions should work. Therefore, you can save objects having references to functions such as ``numpy.sqrt``. +Command Line Interface +###################### + +Skops has a command line interface to convert scikit-learn models persisted with +``Pickle`` to ``Skops`` files. + +To convert a file from the command line, use the ``skops convert`` entrypoint. + +Below is an example call to convert a file ``my_model.pkl`` to ``my_model.skops``: + +.. code:: console + + skops convert my_model.pkl + +To convert multiple files, you can use bash commands to iterate the above call. +For example, to convert all ``.pkl`` flies in the current directory: + +.. code:: console + + for FILE in *.pkl; do skops convert FILE; done + +Further help for the different supported options can be found by calling +``skops convert --help`` in a terminal. + + Supported libraries ------------------- diff --git a/setup.py b/setup.py index 2262dcea..ce368f12 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,14 @@ def setup_package(): + package_data = dict( + entry_points={ + "console_scripts": [ + "skops = skops.cli.entrypoint:main_cli", + ], + } + ) + metadata = dict( name=DISTNAME, maintainer=MAINTAINER, @@ -72,7 +80,7 @@ def setup_package(): include_package_data=True, ) - setup(**metadata) + setup(**package_data, **metadata) if __name__ == "__main__": diff --git a/skops/cli/__init__.py b/skops/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/skops/cli/_convert.py b/skops/cli/_convert.py new file mode 100644 index 00000000..7d630c15 --- /dev/null +++ b/skops/cli/_convert.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +import argparse +import logging +import os +import pathlib +import pickle +from typing import Optional + +from skops.cli._utils import get_log_level +from skops.io import dumps, get_untrusted_types + + +def _convert_file( + input_file: os.PathLike, + output_file: os.PathLike, + logger: logging.Logger = logging.getLogger(), +) -> None: + """Function that is called by ``skops convert`` entrypoint. + + Loads a pickle model from the input path, converts to skops format, and saves to + output file. + + Parameters + ---------- + input_file : os.PathLike + Path of input .pkl model to load. + + output_file : os.PathLike + Path to save .skops model to. + + """ + model_name = pathlib.Path(input_file).stem + + logger.debug(f"Converting {model_name}") + + with open(input_file, "rb") as f: + obj = pickle.load(f) + skops_dump = dumps(obj) + + untrusted_types = get_untrusted_types(data=skops_dump) + + if not untrusted_types: + logger.info(f"No unknown types found in {model_name}.") + else: + untrusted_str = ", ".join(untrusted_types) + + logger.warning( + f"While converting {input_file}, " + "the following unknown types were found: " + f"{untrusted_str}. " + f"When loading {output_file} with skops.load, these types must be " + "specified as 'trusted'" + ) + + with open(output_file, "wb") as out_file: + logger.debug(f"Writing to {output_file}") + out_file.write(skops_dump) + + +def format_parser( + parser: Optional[argparse.ArgumentParser] = None, +) -> argparse.ArgumentParser: + """Adds arguments and help to parent CLI parser for the convert method.""" + + if not parser: # used in tests + parser = argparse.ArgumentParser() + + parser_subgroup = parser.add_argument_group("convert") + parser_subgroup.add_argument("input", help="Path to an input file to convert. ") + + parser_subgroup.add_argument( + "-o", + "--output-file", + help=( + "Specify the output file name for the converted skops file. " + "If not provided, will default to using the same name as the input file, " + "and saving to the current working directory with the suffix '.skops'." + ), + default=None, + ) + parser_subgroup.add_argument( + "-v", + "--verbose", + help=( + "Increases verbosity of logging. Can be used multiple times to increase " + "verbosity further." + ), + action="count", + dest="loglevel", + default=0, + ) + return parser + + +def main( + parsed_args: argparse.Namespace, +) -> None: + output_file = parsed_args.output_file + input_file = parsed_args.input + + logging.basicConfig( + format="%(levelname)-8s: %(message)s", level=get_log_level(parsed_args.loglevel) + ) + + if not output_file: + # No filename provided, defaulting to base file path + file_name = pathlib.Path(input_file).stem + output_file = pathlib.Path.cwd() / f"{file_name}.skops" + + _convert_file( + input_file=input_file, + output_file=output_file, + ) diff --git a/skops/cli/_utils.py b/skops/cli/_utils.py new file mode 100644 index 00000000..55173532 --- /dev/null +++ b/skops/cli/_utils.py @@ -0,0 +1,15 @@ +import logging + + +def get_log_level(level: int = 0) -> int: + """Takes in verbosity from a CLI entrypoint (number of times -v specified), + and sets the logger to the required log level""" + + all_levels = [logging.WARNING, logging.INFO, logging.DEBUG] + + if level > len(all_levels): + level = len(all_levels) - 1 + elif level < 0: + level = 0 + + return all_levels[level] diff --git a/skops/cli/entrypoint.py b/skops/cli/entrypoint.py new file mode 100644 index 00000000..c98ef8d3 --- /dev/null +++ b/skops/cli/entrypoint.py @@ -0,0 +1,47 @@ +import argparse + +import skops.cli._convert + + +def main_cli(command_line_args=None): + """Main command line interface entrypoint for all command line Skops methods. + + To add a new entrypoint: + 1. Create a new method to call that accepts a namespace + 2. Create a new subparser formatter to define the expected CL arguments + 3. Add those to the function map. + """ + entry_parser = argparse.ArgumentParser( + prog="Skops", + description="Main entrypoint for all command line Skops methods.", + add_help=True, + ) + + subparsers = entry_parser.add_subparsers( + title="Commands", + description="Skops command to call", + dest="cmd", + help="Sub-commands help", + ) + + # function_map should map a command to + # method: the command to call (gets set to default 'func') + # format_parser: the function used to create a subparser for that command + function_map = { + "convert": { + "method": skops.cli._convert.main, + "format_parser": skops.cli._convert.format_parser, + }, + } + + for func_name, values in function_map.items(): + # Add subparser for each function in func map, + # and assigns default func to be "method" from function_map + subparser = subparsers.add_parser(func_name) + subparser.set_defaults(func=values["method"]) + values["format_parser"](subparser) + + # Parse arguments with arg parser for given function in function map, + # Then call the matching method in the function_map with the argument namespace + args = entry_parser.parse_args(command_line_args) + args.func(args) diff --git a/skops/cli/tests/test_convert.py b/skops/cli/tests/test_convert.py new file mode 100644 index 00000000..6395dd3d --- /dev/null +++ b/skops/cli/tests/test_convert.py @@ -0,0 +1,142 @@ +import logging +import pathlib +import pickle +from unittest import mock + +import numpy as np +import pytest + +from skops.cli import _convert +from skops.io import load + + +class MockUnsafeType: + def __init__(self): + pass + + +class TestConvert: + model_name = "some_model_name" + + @pytest.fixture + def safe_obj(self): + return np.ndarray([1, 2, 3, 4]) + + @pytest.fixture + def unsafe_obj(self): + return MockUnsafeType() + + @pytest.fixture + def pkl_path(self, tmp_path): + return tmp_path / f"{self.model_name}.pkl" + + @pytest.fixture + def skops_path(self, tmp_path): + return tmp_path / f"{self.model_name}.skops" + + @pytest.fixture + def write_safe_file(self, pkl_path, safe_obj): + with open(pkl_path, "wb") as f: + pickle.dump(safe_obj, f) + + @pytest.fixture + def write_unsafe_file(self, pkl_path, unsafe_obj): + with open(pkl_path, "wb") as f: + pickle.dump(unsafe_obj, f) + + def test_base_case_works_as_expected( + self, pkl_path, tmp_path, skops_path, write_safe_file, safe_obj, caplog + ): + mock_logger = mock.MagicMock() + _convert._convert_file(pkl_path, skops_path, logger=mock_logger) + persisted_obj = load(skops_path) + assert np.array_equal(persisted_obj, safe_obj) + + # Check no warnings or errors raised + mock_logger.warning.assert_not_called() + mock_logger.error.assert_not_called() + + def test_unsafe_case_works_as_expected( + self, pkl_path, tmp_path, skops_path, write_unsafe_file, caplog + ): + caplog.set_level(logging.WARNING) + _convert._convert_file(pkl_path, skops_path) + persisted_obj = load(skops_path, trusted=True) + + assert isinstance(persisted_obj, MockUnsafeType) + + # check logging has warned that an unsafe type was found + assert MockUnsafeType.__name__ in caplog.text + + +class TestMain: + @staticmethod + def assert_called_correctly( + mock_convert: mock.MagicMock, + path, + output_file=None, + ): + if not output_file: + output_file = pathlib.Path.cwd() / f"{pathlib.Path(path).stem}.skops" + mock_convert.assert_called_once_with(input_file=path, output_file=output_file) + + @mock.patch("skops.cli._convert._convert_file") + def test_base_works_as_expected(self, mock_convert: mock.MagicMock): + path = "123.pkl" + namespace, _ = _convert.format_parser().parse_known_args([path]) + + _convert.main(namespace) + self.assert_called_correctly(mock_convert, path) + + @mock.patch("skops.cli._convert._convert_file") + @pytest.mark.parametrize( + "input_path, output_file, expected_path", + [ + ("abc.123", "some/file/path.out", "some/file/path.out"), + ("abc.123", None, pathlib.Path.cwd() / "abc.skops"), + ], + ids=["Given an output path", "No output path"], + ) + def test_with_output_dir_works_as_expected( + self, mock_convert: mock.MagicMock, input_path, output_file, expected_path + ): + if output_file is not None: + args = [input_path, "--output", output_file] + else: + args = [input_path] + + namespace, _ = _convert.format_parser().parse_known_args(args) + + _convert.main(namespace) + self.assert_called_correctly( + mock_convert, path=input_path, output_file=expected_path + ) + + @mock.patch("skops.cli._convert._convert_file") + @pytest.mark.parametrize( + "verbosity, expected_level", + [ + ("", logging.WARNING), + ("-v", logging.INFO), + ("--verbose", logging.INFO), + ("-vv", logging.DEBUG), + ("-v -v", logging.DEBUG), + ("-vvvvv", logging.DEBUG), + ("--verbose --verbose", logging.DEBUG), + ], + ) + def test_given_log_levels_works_as_expected( + self, mock_convert: mock.MagicMock, verbosity, expected_level, caplog + ): + input_path = "abc.def" + output_path = "bde.skops" + args = [input_path, "--output", output_path, verbosity.split()] + + namespace, _ = _convert.format_parser().parse_known_args(args) + + _convert.main(namespace) + self.assert_called_correctly( + mock_convert, path=input_path, output_file=output_path + ) + + assert caplog.at_level(expected_level) diff --git a/skops/cli/tests/test_entrypoint.py b/skops/cli/tests/test_entrypoint.py new file mode 100644 index 00000000..2fd3cb60 --- /dev/null +++ b/skops/cli/tests/test_entrypoint.py @@ -0,0 +1,41 @@ +import logging +import pathlib +import sys +from unittest import mock + +import pytest + +from skops.cli.entrypoint import main_cli + + +class TestEntrypoint: + """Integration tests that check that entrypoint calls pass through correctly. + Full coverage of individual entrypoint calls should be done in their own classes. + """ + + @pytest.fixture(autouse=True) + def clear_argv(self): + # Required to clear argv in case Pytest is called on this specific function. + # Otherwise, clogs parser.parse_known_args() in argparse + sys.argv = [""] + + @mock.patch("skops.cli._convert._convert_file") + def test_convert_works_as_expected( + self, + convert_file_mock: mock.MagicMock, + caplog, + ): + """ + Intended as a unit test to make sure, + given 'convert' as the first argument, + the parser is configured correctly + """ + + args = ["convert", "abc.def"] + + main_cli(args) + convert_file_mock.assert_called_once_with( + input_file="abc.def", output_file=pathlib.Path.cwd() / "abc.skops" + ) + + assert caplog.at_level(logging.WARNING) diff --git a/skops/utils/__init__.py b/skops/utils/__init__.py new file mode 100644 index 00000000..e69de29b