Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workflow fail parsing #28819

Merged
merged 43 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
7d2c5b4
Update recent_fail_summary.yaml
kliao-csa Aug 21, 2023
403f335
Update summarize_fail.py
kliao-csa Aug 21, 2023
b55d589
Update recent_fail_summary.yaml
kliao-csa Aug 21, 2023
5b36101
Update summarize_fail.py
kliao-csa Aug 22, 2023
ea50c42
Update summarize_fail.py
kliao-csa Aug 22, 2023
7699ea6
Update recent_fail_summary.yaml
kliao-csa Aug 22, 2023
f49c3a0
Update recent_fail_summary.yaml
kliao-csa Aug 22, 2023
30e6530
Update summarize_fail.py
kliao-csa Aug 22, 2023
c5f5ebc
Update summarize_fail.py
kliao-csa Aug 22, 2023
1bfd92f
Update summarize_fail.py
kliao-csa Aug 22, 2023
8ea0ab0
Update summarize_fail.py
kliao-csa Aug 22, 2023
91f9760
Update summarize_fail.py
kliao-csa Aug 22, 2023
7df7488
Update summarize_fail.py
kliao-csa Aug 22, 2023
7e26c02
Update summarize_fail.py
kliao-csa Aug 22, 2023
b8d706f
Update recent_fail_summary.yaml
kliao-csa Aug 22, 2023
b4f754c
Update summarize_fail.py
kliao-csa Aug 22, 2023
02c4bfe
code style
kliao-csa Aug 23, 2023
fd4892e
Update summarize_fail.py
kliao-csa Aug 23, 2023
b415e30
Update recent_fail_summary.yaml
kliao-csa Aug 23, 2023
d986a02
Update summarize_fail.py
kliao-csa Aug 23, 2023
f7fe153
Update recent_fail_summary.yaml
kliao-csa Aug 23, 2023
273fdad
Update summarize_fail.py
kliao-csa Aug 23, 2023
529f19e
Update summarize_fail.py
kliao-csa Aug 23, 2023
94b1f23
styling
kliao-csa Aug 23, 2023
1d3a867
Update summarize_fail.py
kliao-csa Aug 25, 2023
f95a5f8
Update summarize_fail.py
kliao-csa Aug 25, 2023
a1690af
Update recent_fail_summary.yaml
kliao-csa Aug 25, 2023
6099168
Update summarize_fail.py
kliao-csa Aug 25, 2023
317ad6e
Update summarize_fail.py
kliao-csa Aug 25, 2023
805a3df
Update summarize_fail.py
kliao-csa Aug 25, 2023
2462e1e
Update summarize_fail.py
kliao-csa Aug 25, 2023
75a44b1
Update summarize_fail.py
kliao-csa Aug 25, 2023
4d0db11
Update summarize_fail.py
kliao-csa Aug 25, 2023
1e29952
Update summarize_fail.py
kliao-csa Aug 25, 2023
b9dc967
Update summarize_fail.py
kliao-csa Aug 25, 2023
522fd31
Update summarize_fail.py
kliao-csa Aug 25, 2023
52a4798
Update summarize_fail.py
kliao-csa Aug 25, 2023
a69a551
Update summarize_fail.py
kliao-csa Aug 25, 2023
10b3ec5
Update summarize_fail.py
kliao-csa Aug 29, 2023
c1b846f
Update summarize_fail.py
kliao-csa Aug 29, 2023
e5ffd31
Update summarize_fail.py
kliao-csa Aug 29, 2023
a6b42b6
Restyled by autopep8
restyled-commits Aug 29, 2023
9d2f9e7
Restyled by isort
restyled-commits Aug 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions .github/workflows/recent_fail_summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,20 @@ jobs:
name: Summarize Recent Workflow Failures
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Setup
run: |
gh run list -R project-chip/connectedhomeip -b master -s failure --json displayTitle,workflowName > run_list.json
pip install pandas
env:
GH_TOKEN: ${{ github.token }}
- uses: actions/checkout@v3
- run: pip install pandas python-slugify
- name: Run Summarization Script
run: python scripts/tools/summarize_fail.py
env:
GH_TOKEN: ${{ github.token }}
- name: Upload Logs
uses: actions/upload-artifact@v3
with:
name: workflow-fail-summary
path: |
run_list.json
recent_fails.csv
recent_fails_frequency.csv
recent_fails_logs
retention-days: 5

101 changes: 91 additions & 10 deletions scripts/tools/summarize_fail.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,93 @@
import logging
import os
import subprocess

import pandas as pd
from slugify import slugify

error_catalog = {
"CodeQL": {
"No space left on device": {
"short": "Ran out of space",
"detail": "Exception with signature \"No space left on device\""
},
"Check that the disk containing the database directory has ample free space.": {
"short": "Ran out of space",
"detail": "Fatal internal error with message indicating that disk space most likely ran out"
}
},
"Build example": {
"Could not find a version that satisfies the requirement": {
"short": "Requirements issue",
"detail": "Unable to install a requirements in Python requirements.txt"
},
"No module named": {
"short": "Missing module",
"detail": "Expected module was missing"
}
},
"Full builds": {
"No space left on device": {
"short": "Ran out of space",
"detail": "Exception with signature \"No space left on device\""
}
}
}


def process_fail(id, pr, start_time, workflow):
logging.info("Processing failure in {pr}, workflow {workflow} that started at {start_time}.")

logging.info("Building output file structure.")
output_path = f"recent_fails_logs/{slugify(pr)}/{slugify(workflow)}/{slugify(start_time)}"
os.makedirs(output_path)

logging.info("Gathering raw fail logs.")
subprocess.run(f"gh run view -R project-chip/connectedhomeip {id} --log-failed > {output_path}/fail_log.txt", shell=True)

# Eventually turn this into a catalog of error messages per workflow
logging.info("Collecting info on likely cause of failure.")
root_cause = "Unknown cause"
with open(f"{output_path}/fail_log.txt") as fail_log_file:
fail_log = fail_log_file.read()
workflow_category = workflow.split(" - ")[0]
if workflow_category in error_catalog:
for error_message in error_catalog[workflow_category]:
if error_message in fail_log:
root_cause = error_catalog[workflow_category][error_message]["short"]
break
return [pr, workflow, root_cause]


def main():
logging.info("Gathering recent fails information into run_list.json.")
subprocess.run("gh run list -R project-chip/connectedhomeip -b master -s failure --json databaseId,displayTitle,startedAt,workflowName > run_list.json", shell=True)

logging.info("Reading run_list.json into a DataFrame.")
df = pd.read_json("run_list.json")

logging.info("Listing recent fails.")
df.columns = ["ID", "Pull Request", "Start Time", "Workflow"]
print("Recent Fails:")
print(df.to_string(columns=["Pull Request", "Workflow"], index=False))
print()
df.to_csv("recent_fails.csv", index=False)

logging.info("Listing frequency of recent fails by workflow.")
frequency = df["Workflow"].value_counts(normalize=True).mul(100).astype(
str).reset_index(name="Percentage") # Reformat this from "50.0" to "50%"
print("Percentage Frequency of Fails by Workflow:")
print(frequency.to_string(index=False))
print()
frequency.to_csv("recent_workflow_fails_frequency.csv")

logging.info("Conducting fail information parsing.")
root_causes = df.apply(lambda row: process_fail(row["ID"], row["Pull Request"],
row["Start Time"], row["Workflow"]), axis=1, result_type="expand")
root_causes.columns = ["Pull Request", "Workflow", "Cause of Failure"]
print("Likely Root Cause of Recent Fails:")
print(root_causes.to_string(index=False))


df = pd.read_json("run_list.json")
df.columns = ["Pull Request", "Workflow"]
print("Recent Failures:")
print(df.to_string(index=False))
df.to_csv("recent_fails.csv", index=False)
print()
print("Percentage Frequency:")
frequency = df["Workflow"].value_counts(normalize=True).mul(100).astype(str) + "%"
print(frequency.to_string())
frequency.to_csv("recent_fails_frequency.csv")
if __name__ == "__main__":
main()