Skip to content

implemented matching model and Added slack models #913

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
105 changes: 105 additions & 0 deletions backend/apps/common/management/commands/matching_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""A command to perform fuzzy and exact matching of leaders with GitHub users models."""

from django.core.management.base import BaseCommand
from django.db import models
from django.db.utils import DatabaseError
from thefuzz import fuzz

from apps.github.models.user import User
from apps.owasp.models.chapter import Chapter
from apps.owasp.models.committee import Committee
from apps.owasp.models.project import Project

MIN_NO_OF_WORDS = 2


class Command(BaseCommand):
help = "Process raw leaders for multiple models and suggest leaders."

def add_arguments(self, parser):
parser.add_argument(
"model_name",
type=str,
help="Model name to process leaders for (e.g., Chapter, Committee, Project)",
)
parser.add_argument(
"--threshold", type=int, default=95, help="Threshold for fuzzy matching"
)

def handle(self, *args, **kwargs):
model_name = kwargs["model_name"]
threshold = kwargs["threshold"]

model_map = {
"chapter": Chapter,
"committee": Committee,
"project": Project,
}

model_class = model_map.get(model_name.lower())

if not model_class:
self.stdout.write(
self.style.ERROR("Invalid model name! Choose from: chapter, committee, project")
)
return

all_users = User.objects.all()
filtered_users = [
u
for u in all_users
if len(u.login) >= MIN_NO_OF_WORDS and (u.name and len(u.name) >= MIN_NO_OF_WORDS)
]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve user filtering logic.

The current logic doesn't match the apparent intent:

  1. It loads all users into memory before filtering
  2. It checks string length rather than word count despite the variable name

Consider this more efficient approach:

- all_users = User.objects.all()
- filtered_users = [
-     u
-     for u in all_users
-     if len(u.login) >= MIN_NO_OF_WORDS and (u.name and len(u.name) >= MIN_NO_OF_WORDS)
- ]
+ # Filter directly in the database for better performance
+ filtered_users = User.objects.filter(
+     models.Q(login__regex=r'\w+\s+\w+') & 
+     (models.Q(name__isnull=False) & models.Q(name__regex=r'\w+\s+\w+'))
+ )

Or if you meant to filter by character length:

- MIN_NO_OF_WORDS = 2
+ MIN_CHAR_LENGTH = 2

And query directly in the database:

+ # Filter directly in the database for better performance
+ filtered_users = User.objects.filter(
+     models.Q(login__length__gte=MIN_CHAR_LENGTH) & 
+     (models.Q(name__isnull=False) & models.Q(name__length__gte=MIN_CHAR_LENGTH))
+ )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
all_users = User.objects.all()
filtered_users = [
u
for u in all_users
if len(u.login) >= MIN_NO_OF_WORDS and (u.name and len(u.name) >= MIN_NO_OF_WORDS)
]
# Filter directly in the database for better performance
filtered_users = User.objects.filter(
models.Q(login__regex=r'\w+\s+\w+') &
models.Q(name__isnull=False) & models.Q(name__regex=r'\w+\s+\w+')
)
Suggested change
all_users = User.objects.all()
filtered_users = [
u
for u in all_users
if len(u.login) >= MIN_NO_OF_WORDS and (u.name and len(u.name) >= MIN_NO_OF_WORDS)
]
# Filter directly in the database for better performance
# Renamed constant to reflect that we're filtering by character length
filtered_users = User.objects.filter(
models.Q(login__length__gte=MIN_CHAR_LENGTH) &
models.Q(name__isnull=False) & models.Q(name__length__gte=MIN_CHAR_LENGTH)
)


instances = model_class.objects.all()
for instance in instances:
self.stdout.write(f"Processing leaders for {model_name.capitalize()} {instance.id}...")
exact_matches, fuzzy_matches, unmatched_leaders = self.process_leaders(
instance.leaders_raw, threshold, filtered_users
)
instance.suggested_leaders.set(list(set(exact_matches + fuzzy_matches)))
instance.save()

if unmatched_leaders:
self.stdout.write(f"Unmatched leaders for {instance.name}: {unmatched_leaders}")

def process_leaders(self, leaders_raw, threshold, filtered_users):
"""Process leaders and return the suggested leaders with exact and fuzzy matching."""
if not leaders_raw:
return [], [], []

exact_matches = []
fuzzy_matches = []
unmatched_leaders = []

for leader in leaders_raw:
try:
leaders_data = User.objects.filter(
models.Q(login__iexact=leader) | models.Q(name__iexact=leader)
).first()
if leaders_data:
exact_matches.append(leaders_data)
self.stdout.write(f"Exact match found for {leader}: {leaders_data}")
continue

matches = [
u
for u in filtered_users
if (fuzz.partial_ratio(leader, u.login) >= threshold)
or (fuzz.partial_ratio(leader, u.name if u.name else "") >= threshold)
]

new_fuzzy_matches = [m for m in matches if m not in exact_matches]
fuzzy_matches.extend(new_fuzzy_matches)

if matches:
for match in new_fuzzy_matches:
self.stdout.write(f"Fuzzy match found for {leader}: {match}")
else:
unmatched_leaders.append(leader)

except DatabaseError as e:
unmatched_leaders.append(leader)
self.stdout.write(self.style.ERROR(f"Error processing leader {leader}: {e}"))

return exact_matches, fuzzy_matches, unmatched_leaders
31 changes: 25 additions & 6 deletions backend/apps/owasp/admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""OWASP app admin."""

from django.contrib import admin
from django.contrib import admin, messages
from django.utils.safestring import mark_safe

from apps.owasp.models.chapter import Chapter
Expand Down Expand Up @@ -41,12 +41,31 @@ def custom_field_owasp_url(self, obj):
f"<a href='https://owasp.org/{obj.key}' target='_blank'>↗️</a>"
)

def approve_suggested_leaders(self, request, queryset):
"""Approve all suggested leaders for selected entities."""
for entity in queryset:
suggestions = entity.suggested_leaders.all()
entity.leaders.add(*suggestions)
self.message_user(
request,
f"Approved {suggestions.count()} leader suggestions for {entity.name}",
messages.SUCCESS,
)

custom_field_github_urls.short_description = "GitHub 🔗"
custom_field_owasp_url.short_description = "OWASP 🔗"
approve_suggested_leaders.short_description = "Approve all suggested leaders"


class LeaderEntityAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
"""Admin class for entities that have leaders."""

class ChapterAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
autocomplete_fields = ("owasp_repository",)
actions = ["approve_suggested_leaders"]
filter_horizontal = ("suggested_leaders",)


class ChapterAdmin(LeaderEntityAdmin):
autocomplete_fields = ("owasp_repository", "leaders")
list_display = (
"name",
"region",
Expand All @@ -61,11 +80,10 @@ class ChapterAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
search_fields = ("name", "key")


class CommitteeAdmin(admin.ModelAdmin):
autocomplete_fields = ("owasp_repository",)
class CommitteeAdmin(LeaderEntityAdmin):
autocomplete_fields = ("owasp_repository", "leaders")
search_fields = ("name",)


class EventAdmin(admin.ModelAdmin):
list_display = (
"name",
Expand Down Expand Up @@ -97,6 +115,7 @@ class ProjectAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
"owasp_repository",
"owners",
"repositories",
"leaders",
)
list_display = (
"custom_field_name",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Generated by Django 5.1.6 on 2025-02-22 21:43

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("github", "0015_alter_release_author"),
("owasp", "0014_project_custom_tags"),
]

operations = [
migrations.AddField(
model_name="chapter",
name="leaders",
field=models.ManyToManyField(
blank=True,
related_name="normal_%(class)s",
to="github.user",
verbose_name="Leaders",
),
),
migrations.AddField(
model_name="chapter",
name="suggested_leaders",
field=models.ManyToManyField(
blank=True,
related_name="exact_matched_%(class)s",
to="github.user",
verbose_name="Exact Match Users",
),
),
migrations.AddField(
model_name="committee",
name="leaders",
field=models.ManyToManyField(
blank=True,
related_name="normal_%(class)s",
to="github.user",
verbose_name="Leaders",
),
),
migrations.AddField(
model_name="committee",
name="suggested_leaders",
field=models.ManyToManyField(
blank=True,
related_name="exact_matched_%(class)s",
to="github.user",
verbose_name="Exact Match Users",
),
),
migrations.AddField(
model_name="project",
name="leaders",
field=models.ManyToManyField(
blank=True,
related_name="normal_%(class)s",
to="github.user",
verbose_name="Leaders",
),
),
migrations.AddField(
model_name="project",
name="suggested_leaders",
field=models.ManyToManyField(
blank=True,
related_name="exact_matched_%(class)s",
to="github.user",
verbose_name="Exact Match Users",
),
),
]
11 changes: 11 additions & 0 deletions backend/apps/owasp/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,17 @@ def get_metadata(self):
extra={"repository": getattr(self.owasp_repository, "name", None)},
)

# M2M
suggested_leaders = models.ManyToManyField(
"github.User",
verbose_name="Exact Match Users",
related_name="exact_matched_%(class)s",
blank=True,
)
leaders = models.ManyToManyField(
"github.User", verbose_name="Leaders", related_name="normal_%(class)s", blank=True
)

def get_related_url(self, url, exclude_domains=(), include_domains=()):
"""Get OWASP entity related URL."""
if (
Expand Down
Loading
Loading