diff --git a/.gitignore b/.gitignore index 66bf3c5..e0f0e83 100644 --- a/.gitignore +++ b/.gitignore @@ -15,5 +15,5 @@ cupid.egg-info # Documentation /docs/_build/ /docs/README.md -/docs/NCAR_tips.md -/docs/contributors.md +/docs/NCARtips.md +/docs/Contributors.md diff --git a/cupid/cupid_webpage.py b/cupid/cupid_webpage.py index aa84be2..1bb7cb8 100755 --- a/cupid/cupid_webpage.py +++ b/cupid/cupid_webpage.py @@ -19,14 +19,123 @@ import os import shutil import subprocess +from urllib.parse import quote import click -import yaml +from git_helper import GitHelper +from util import get_control_dict +from util import is_bad_env + + +def github_pages_publish( + github_pages_dir, + github_pages_dir_thisversion, + name, + overwrite, + git_repo, + html_output_path, +): + """ + Publishes a version of the site to GitHub Pages. + + Copies the HTML output to the GitHub Pages directory, add prefix to `index.html` + with a link to the new version, and pushes changes to the repository. + + Args: + github_pages_dir (str): Root directory for GitHub Pages. + github_pages_dir_thisversion (str): Directory for the specific version. + name (str): Version name. + overwrite (bool): Whether to overwrite existing files. + git_repo (GitHelper): Git repository helper instance. + html_output_path (str): Path to the generated HTML files. + """ + parent_dir = os.path.split(github_pages_dir_thisversion)[-1] + if not os.path.exists(parent_dir): + os.makedirs(parent_dir) + shutil.copytree( + html_output_path, + github_pages_dir_thisversion, + dirs_exist_ok=overwrite, + ) + + # Handle special characters, converting e.g. ^ to %5E + name_url = quote(name) + + # Write to index.html, if needed + index_html_file = os.path.join(github_pages_dir, "index.html") + new_line = f'{name}

\n' + do_write = True + if os.path.exists(index_html_file): + with open(index_html_file) as f: + for line in f: + if line.strip() == new_line.strip(): + do_write = False + break + if do_write: + with open(index_html_file, "a") as f: + f.write(new_line) + + # Publish to GitHub.io + git_repo.publish() + + +def github_pages_args(github_pages_dir, name, overwrite): + """ + Prepares the GitHub Pages directory for publishing. + Ensures a name is provided, initializes a `GitHelper` object, + and checks if the version directory exists, handling overwrite conditions. + + Args: + github_pages_dir (str): Root directory for GitHub Pages. + name (str): Version name. + overwrite (bool): Whether to overwrite an existing version directory. + + Returns: + tuple: (str, GitHelper) - The version directory path and `GitHelper` instance. + + Raises: + RuntimeError: If no name is provided. + FileExistsError: If the directory exists and overwrite is not allowed. + """ + # Check that you gave a name + if not name: + raise RuntimeError( + "When specifying -g/--github-pages-dir, you must also provide -n/--name", + ) + + # Set up GitHelper object + git_repo = GitHelper(github_pages_dir, name) + this_version_dir = os.path.join(github_pages_dir, "versions", name) + if os.path.exists(this_version_dir) and not overwrite: + raise FileExistsError( + f"Add -o to overwrite existing directory '{this_version_dir}'", + ) + print(f"Publishing to '{this_version_dir}'") + return this_version_dir, git_repo @click.command() @click.argument("config_path", default="config.yml") -def build(config_path): +@click.option( + "--github-pages-dir", + "-g", + default="", + help="For publishing to GitHub pages:\n" + "Directory where the HTML outputs should be copied (into a new sub-directory in versions/ given by -n/--name)", +) +@click.option( + "--name", + "-n", + default="", + help="Name of version to publish", +) +@click.option( + "--overwrite", + "-o", + is_flag=True, + help="Overwrite existing publish directory", +) +def build(config_path, github_pages_dir, name, overwrite): """ Build a Jupyter book based on the TOC in CONFIG_PATH. Called by `cupid-webpage`. @@ -37,8 +146,16 @@ def build(config_path): None """ - with open(config_path) as fid: - control = yaml.safe_load(fid) + control = get_control_dict(config_path) + + # Check and process arguments + github_pages_dir = os.path.realpath(github_pages_dir) + if github_pages_dir: + github_pages_dir_thisversion, git_repo = github_pages_args( + github_pages_dir, + name, + overwrite, + ) run_dir = control["data_sources"]["run_dir"] @@ -46,31 +163,34 @@ def build(config_path): subprocess.run( ["jupyter-book", "build", f"{run_dir}/computed_notebooks", "--all"], ) + html_output_path = os.path.join(run_dir, "computed_notebooks", "_build", "html") for component in control["compute_notebooks"]: for notebook in control["compute_notebooks"][component]: + # Skip this notebook if it wasn't run due to bad environment + info = control["compute_notebooks"][component][notebook] + if is_bad_env(control, info): + print(f"Skipping {notebook}: Not run due to bad environment") + continue + if "external_tool" in control["compute_notebooks"][component][notebook]: - if ( - control["compute_notebooks"][component][notebook][ - "external_tool" - ].get("tool_name") - == "ADF" - ): - if os.path.exists(f"{run_dir}/ADF_output"): - shutil.copytree( - f"{run_dir}/ADF_output", - f"{run_dir}/computed_notebooks/_build/html/ADF", - ) - elif ( - control["compute_notebooks"][component][notebook][ - "external_tool" - ].get("tool_name") - == "ILAMB" - ): - if os.path.exists(f"{run_dir}/ILAMB_output"): - shutil.copytree( - f"{run_dir}/ILAMB_output", - f"{run_dir}/computed_notebooks/_build/html/ILAMB", - ) + tool_name = control["compute_notebooks"][component][notebook][ + "external_tool" + ].get("tool_name") + if tool_name in ["ADF", "ILAMB"]: + shutil.copytree( + f"{run_dir}/{tool_name}_output", + os.path.join(html_output_path, tool_name), + ) + + if github_pages_dir: + github_pages_publish( + github_pages_dir, + github_pages_dir_thisversion, + name, + overwrite, + git_repo, + html_output_path, + ) # Originally used this code to copy jupyter book HTML to a location to host it online diff --git a/cupid/git_helper.py b/cupid/git_helper.py new file mode 100644 index 0000000..bff1f0a --- /dev/null +++ b/cupid/git_helper.py @@ -0,0 +1,218 @@ +""" +Class useful for git stuff including publishing a git page for sharing CUPiD. +""" +from __future__ import annotations + +import os +import re +import subprocess +from urllib.parse import quote + + +class GitHelper: + def __init__(self, publish_dir, version_name, publish_url=None): + """ + Initializes an instance for managing the publishing of a Git repository. + + This constructor: + 1. Sets the version name and publish directory. + 2. Checks if the publish directory is clean (no uncommitted changes). + 3. Determines the publish URL if not provided. + 4. Constructs the final published URL, ensuring proper encoding. + + Args: + publish_dir (str): The directory where the repository is located. + version_name (str): The version identifier for the publication. + publish_url (str, optional): The base publish URL. If not provided, it is derived from the repository. + + Attributes: + version_name (str): The version being published. + publish_dir (str): The directory containing the repository. + publish_url (str): The base URL where the version will be published. + published_to_url (str): The full URL of the published version, with special characters encoded. + + Raises: + RuntimeError: If the publish directory is not clean. + """ + self.version_name = version_name + self.publish_dir = os.path.abspath(os.path.realpath(publish_dir)) + self.check_pub_dir_clean() + + if publish_url is None: + publish_url = self.get_publish_url() + self.publish_url = publish_url + + # Get URL to print, handling spaces and special characters + self.published_to_url = "/".join( + [self.publish_url, "versions", self.version_name], + ) + self.published_to_url = quote(self.published_to_url) + self.published_to_url = re.sub("http(s?)%3A", r"http\1:", self.published_to_url) + + def check_pub_dir_clean(self): + """ + Checks if the Git working directory in the publish directory is clean. + If the working tree is not clean, it raises a `RuntimeError`. + """ + status = self.run_git_cmd("status") + if status[-1] != "nothing to commit, working tree clean": + raise RuntimeError(f"self.publish_dir not clean: {self.publish_dir}") + + def commit(self, modified_files, new_files): + """ + Stages, commits, and pushes changes in the Git repository within the publish directory. + + This function: + 1. Checks the repository status. + 2. If there are changes to commit, it: + - Stages all modified and new files. + - Commits the changes with a message including the version name. + - Pushes the commit to the remote repository. + 3. If no changes are detected, it prints a message indicating there is nothing to commit. + + Args: + modified_files (list[str]): List of modified files to be committed. + new_files (list[str]): List of newly added files to be committed. + + Prints: + - Staging, committing, and pushing progress updates. + - The publish URL if changes are successfully pushed. + """ + status = self.run_git_cmd("status") + if status[-1] != "nothing to commit, working tree clean": + # Stage + print("Staging...") + git_cmd = ["add", os.path.join(self.publish_dir, "*")] + status = self.run_git_cmd(git_cmd) + + # Commit + print("Committing...") + git_cmd = [ + "commit", + "-m", + f"Add version '{self.version_name}'", + ] + status = self.run_git_cmd(git_cmd) + + # Push + print("Pushing...") + status = self.run_git_cmd("push") + + print("Done! Published to " + self.published_to_url) + print("It might take a bit for GitHub.io to generate that URL") + else: + print("Nothing to commit") + + def get_publish_url(self): + """ + Retrieves the publish URL for the Git repository based on its remote origin URL. + + The function determines the repository's remote URL and root directory, then constructs + the appropriate GitHub Pages URL if the repository is hosted on GitHub. If the repository + is not hosted on GitHub, a `NotImplementedError` is raised. + + Returns: + str: The constructed publish URL for the repository. + + Raises: + NotImplementedError: If the remote URL format is not recognized. + """ + cmd = "config --get remote.origin.url" + publish_repo_url = self.run_git_cmd(cmd, cwd=self.publish_dir)[0] + + cmd = "rev-parse --show-toplevel" + publish_dir_repo_top = self.run_git_cmd(cmd, cwd=self.publish_dir)[0] + subdirs = self.publish_dir.replace( + publish_dir_repo_top, + "", + ) + + if "git@github.com:" in publish_repo_url: + gh_user = re.compile(r"git@github.com:(\w+)").findall(publish_repo_url)[0] + repo_name = re.compile(r"/(.+).git").findall(publish_repo_url)[0] + publish_url = f"https://{gh_user}.github.io/{repo_name}" + subdirs + elif "https://github.com/" in publish_repo_url: + gh_user = re.compile(r"https://github.com/(\w+)").findall(publish_repo_url) + gh_user = gh_user[0] + repo_name = re.compile(r"https://github.com/\w+/(\w+)").findall( + publish_repo_url, + )[0] + publish_url = f"https://{gh_user}.github.io/{repo_name}" + subdirs + else: + raise NotImplementedError( + f"Not sure how to handle publish_repo_url {publish_repo_url}.", + ) + + return publish_url + + def publish(self): + """ + Identifies modified and untracked files in the Git repository within the publish directory + and commits the changes. + + The function runs `git status` to determine modified and new (untracked) files. + It then prints the files being updated or added and commits them using `self.commit()`. + + Raises: + Exception: If any error occurs while executing the Git command. + + """ + status = self.run_git_cmd("status") + modified_files = [] + new_files = [] + in_untracked_files = False + for line in status: + if not in_untracked_files: + if re.compile("^\tmodified:").match(line): + modified_files.append(line.split(" ")[-1]) + elif line == "Untracked files:": + in_untracked_files = True + else: + if line == "": + break + if ( + line + != ' (use "git add ..." to include in what will be committed)' + ): + new_files.append(line.replace("\t", "")) + if modified_files: + print("Updating files:\n " + "\n ".join(modified_files)) + if new_files: + print("Adding files:\n " + "\n ".join(new_files)) + + self.commit(modified_files, new_files) + + def run_git_cmd(self, git_cmd, cwd=os.getcwd()): + """ + Executes a Git command in the specified working directory and returns the output as a list of lines. + + Args: + git_cmd (str or list): The Git command to execute. Can be a string or a list of command components. + cwd (str, optional): The directory where command should be executed. Defaults to current working directory. + + Returns: + list: A list of strings representing the output lines of the command. + + Raises: + subprocess.CalledProcessError: If the Git command fails, prints the command, working directory, and + error message before raising the exception. + Exception: If any other error occurs during execution. + """ + if not isinstance(git_cmd, list): + git_cmd = git_cmd.split(" ") + git_cmd = ["git", "-C", self.publish_dir] + git_cmd + try: + git_result = subprocess.check_output( + git_cmd, + stderr=subprocess.STDOUT, + text=True, + cwd=cwd, + ).splitlines() + except subprocess.CalledProcessError as e: + print("Command: " + " ".join(e.cmd)) + print("Working directory: " + cwd) + print("Message: ", e.stdout) + raise e + except Exception as e: + raise e + return git_result diff --git a/cupid/run_diagnostics.py b/cupid/run_diagnostics.py index c66af13..e39db16 100755 --- a/cupid/run_diagnostics.py +++ b/cupid/run_diagnostics.py @@ -173,7 +173,7 @@ def run_diagnostics( # Checking for existence of environments for nb, info in all_nbs.copy().items(): - if not control["env_check"][info["kernel_name"]]: + if util.is_bad_env(control, info): bad_env = info["kernel_name"] logger.warning( f"Environment {bad_env} specified for {nb}.ipynb could not be found;" + diff --git a/cupid/util.py b/cupid/util.py index 26044ae..6333f22 100644 --- a/cupid/util.py +++ b/cupid/util.py @@ -323,3 +323,7 @@ def create_ploomber_script_task( # something with task.set_upstream(other_task?) return task + + +def is_bad_env(control, nb_info): + return not control["env_check"][nb_info["kernel_name"]] diff --git a/docs/index.rst b/docs/index.rst index e636b49..c1d73cb 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,6 +7,7 @@ CUPiD Documentation Contributor's Guide Adding Notebooks + Publishing Notebooks Configuration File Tips and Tricks for NCAR Machines Contributors diff --git a/docs/publishing_notebooks.md b/docs/publishing_notebooks.md new file mode 100644 index 0000000..27ab7a5 --- /dev/null +++ b/docs/publishing_notebooks.md @@ -0,0 +1,79 @@ +# Publishing CUPiD notebooks + +The `html/` directory generated by `cupid-webpage` can be shared manually or published as a website. + +## Publishing to GitHub.io + +To facilitate sharing, `cupid-webpage` can be configured to automatically push to a GitHub repo, which can then generate a GitHub.io website. + +### Setting up your GitHub repo + +Start by [creating a fresh GitHub repository](https://github.com/new). +- Make sure to give it a good name! Let's say you name it `my-repo`. While the GitHub repo you create will be at `https://github.com/yourusername/my-repo`, the URL you'll give people to view the published website will be `https://yourusername.github.io/my-repo`. +- Set the latter URL as your repo description. +- In case someone accidentally winds up on your repo instead of the published website, you may want to add a README.md with something like "This repository contains CUPiD outputs. You're probably more interested in the website version, at `https://yourusername.github.io/my-repo`." + +Once that's done, go to your repo's Settings > Pages screen. +1. For Source, choose "GitHub Actions". +2. If you see the "Static HTML" workflow, press Configure. Otherwise, press "browse all workflows", find it there, and press Configure. +3. Add git lfs support. You should see something like the following: +```yml + steps: + - name: Checkout + uses: actions/checkout@v4 +``` +Add two lines so it looks like this (be careful to include the spaces at the beginning of each line): +```yml + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + lfs: true +``` +4. Press Commit Changes and handle the dialog box that pops up. + +### Cloning and preparing your GitHub repo + +Next, you'll need to clone your repo to the machine where you're running CUPiD. +1. On that machine, navigate to where you want the repo to be cloned. This should be outside your local copy of the [CUPiD repo](https://github.com/NCAR/CUPiD)! +2. Clone your repo. +3. `cd` into your repo. + +You should now be in the directory `/path/to/my-repo/`. + +Next, we're going to enable Git Large File Storage (`git lfs`). This makes it so that large binary files (things like images) will be given special handling, including a feature to make it more lightweight to clone your repo in the future. (Note that these steps will only need to be performed once, not every time you clone it.) +1. Do `git lfs install`. +2. Create a text file, `.gitattributes`, with the following lines: +``` +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.svg filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.woff2 filter=lfs diff=lfs merge=lfs -text +*.mo filter=lfs diff=lfs merge=lfs -text +*.map filter=lfs diff=lfs merge=lfs -text +``` +3. `git add .gitattributes` +4. `git commit -m "Enable git lfs."` + +### Publishing your notebook + +Once all that's done, publishing is as simple as calling `cupid-webpage` with two extra options: +- `-g/--github-pages-dir`: The path to the directory where you cloned your repo. +- `-n/--name`: The name you want to give to this published version of your notebook. + +So, e.g.: +```shell +cupid-webpage -g /path/to/my-repo/ -n my-first-published-notebook +``` + +This will produce a bit of extra printout at the end, something like: +``` +Done! Published to https://yourusername.github.io/my-repo/versions/my-first-published-notebook +It might take a bit for GitHub.io to generate that URL +``` + +You can share that URL with someone for them to see the published version of that notebook. You can also share `https://yourusername.github.io/my-repo` if you want them to see a list of all the versions you've published.