Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new script for analyzing readmes #15

Merged
merged 1 commit into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion github_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _get(self, url, params=None):
except Exception as e:
print(e)

def get_most_recent_commit(self, repo: str, timestamp: str, branch: str) -> requests.models.Response:
def get_most_recent_commit(self, repo: str, timestamp: str, branch: str):
api_url = f"{self.base_url}/repos/{repo}/commits"

params = {
Expand Down
119 changes: 119 additions & 0 deletions instrumentation_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from datetime import datetime
from typing import List, Set
import pandas as pd

from github_client import GithubClient


class Instrumentation:
def __init__(self, name: str, has_javaagent: bool = False,
has_library: bool = False, parent: str = None):
self.name = name
self.has_javaagent = has_javaagent
self.has_library = has_library
self.parent = parent


def analyze_instrumentation(file_list: List[str]) -> List[Instrumentation]:
instrumentations = {}
for i in file_list:
parts = i.split("/")
inst_name = parts[0]
parent = None
if len(parts) > 2:
inst_name = parts[len(parts) - 2]
parent = i.split(inst_name)[0].rstrip("/")
inst = instrumentations.get(inst_name, Instrumentation(inst_name))
if i.endswith("/javaagent"):
inst.has_javaagent = True
elif i.endswith("/library"):
inst.has_library = True

inst.parent = parent
instrumentations[inst_name] = inst

items = list(instrumentations.values())
return items


def parse_readme(file_list: List[str]) -> (Set[str], Set[str]):
javaagent_has_readme = set()
library_has_readme = set()

for i in file_list:
parts = i.split("/")
if i.lower().endswith("javaagent/readme.md"):
javaagent_has_readme.add(parts[len(parts) - 3])
elif i.lower().endswith("library/readme.md"):
library_has_readme.add(parts[len(parts) - 3])

return javaagent_has_readme, library_has_readme


def main():
repo = "open-telemetry/opentelemetry-java-instrumentation"
client = GithubClient()
today = (datetime.now().date() + pd.Timedelta(days=1)).strftime(
"%Y-%m-%dT%H:%M:%SZ")

commit = client.get_most_recent_commit(repo, today, "main")
repo_files = client.get_repository_at_commit(
repository=repo,
commit_sha=commit
)

instrumentations = []
readmes = []

for i in repo_files["tree"]:

if i["path"].lower().endswith("readme.md"):
readmes.append(i["path"].replace("instrumentation/", ""))

if i["path"].startswith("instrumentation/") \
and i["type"] == "tree" \
and (i["path"].endswith("/javaagent") or i["path"].endswith("/library")) \
and "/io/opentelemetry/javaagent" not in i["path"] \
and "-common/" not in i["path"]:

instrumentations.append(i["path"].replace("instrumentation/", ""))

inst_list = analyze_instrumentation(instrumentations)
javaagent_has_readme, library_has_readme = parse_readme(readmes)
library: List[Instrumentation] = []
javaagent: List[Instrumentation] = []

no_javaagent = []

output = ""
for i in inst_list:
output += f"{i.name}:\n"
if i.has_javaagent:
output += " javaagent\n"
javaagent.append(i)
else:
no_javaagent.append(i)
if i.has_library:
output += " library\n"
library.append(i)

javaagent_count = len(javaagent)
library_count = len(library)

print(f"{len(inst_list)} instrumentation items")
print("\n")
print(f"{javaagent_count} javaagent instrumentations ({int(javaagent_count / len(inst_list) * 100)}%)")
print(f"Readmes: {len(javaagent_has_readme)}\n\n")

print(f"{library_count} library instrumentations ({int(library_count / len(inst_list) * 100)}%)")
print(f"Readmes: {len(library_has_readme)}")

print("\nLibraries:\n")
for i in library:
full_inst_name = f"{i.parent}/{i.name}" if i.parent else i.name
link = f"https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/{full_inst_name}/library"
print(f"{'- [x]' if i.name in library_has_readme else '- [ ]'} [{i.name}]({link})")


if __name__ == '__main__':
main()
55 changes: 55 additions & 0 deletions instrumentation_analysis_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import unittest

from instrumentation_analysis import analyze_instrumentation, parse_readme


class TestInstrumentationAnalysis(unittest.TestCase):
def test_parse_file_list(self):
file_list = [
"akka/akka-actor-2.3/javaagent",
"akka/akka-actor-fork-join-2.5/library"
]

result = analyze_instrumentation(file_list)

self.assertEqual(len(result), 2)

self.assertEqual(result[0].name, "akka-actor-2.3")
self.assertEqual(result[0].has_javaagent, True)
self.assertEqual(result[0].has_library, False)
self.assertEqual(result[0].parent, "akka")

self.assertEqual(result[1].name, "akka-actor-fork-join-2.5")
self.assertEqual(result[1].has_javaagent, False)
self.assertEqual(result[1].has_library, True)
self.assertEqual(result[1].parent, "akka")

def test_parse_file_list_with_two_layers(self):
file_list = [
"spring/spring-webmvc/spring-webmvc-5.3/library",
]

result = analyze_instrumentation(file_list)

self.assertEqual(len(result), 1)

self.assertEqual(result[0].name, "spring-webmvc-5.3")
self.assertEqual(result[0].has_javaagent, False)
self.assertEqual(result[0].has_library, True)
self.assertEqual(result[0].parent, "spring/spring-webmvc")

def test_parse_readme(self):
input = [
'spring/spring-webmvc/spring-webmvc-5.3/library/README.md',
'aws-lambda/aws-lambda-core-1.0/javaagent/README.md',
'ktor/ktor-1.0/library/README.md',
'java-http-client/library/README.md'
]

javaagents_with_readmes, libraries_with_readmes = parse_readme(input)


self.assertIn('aws-lambda-core-1.0', javaagents_with_readmes)
self.assertIn('spring-webmvc-5.3', libraries_with_readmes)
self.assertIn('ktor-1.0', libraries_with_readmes)
self.assertIn('java-http-client', libraries_with_readmes)
Loading