Skip to content

implemented matching model and Added slack models #913

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
7 changes: 7 additions & 0 deletions backend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ collect-static:
django-shell:
@CMD="python manage.py shell" $(MAKE) exec-backend-command-it

match-user:
@CMD="python manage.py matching_users $(model)" $(MAKE) exec-backend-command-it

dump-data:
@echo "Dumping Nest data"
@CMD="python manage.py dumpdata github owasp --indent=2" $(MAKE) exec-backend-command > backend/data/nest.json
Expand Down Expand Up @@ -50,6 +53,10 @@ load-data:
@echo "Loading Nest data"
@CMD="python manage.py load_data" $(MAKE) exec-backend-command

load-slack-data:
@echo "Loading Slack data"
@CMD="python manage.py load_slack_data" $(MAKE) exec-backend-command

merge-migrations:
@CMD="python manage.py makemigrations --merge" $(MAKE) exec-backend-command

Expand Down
139 changes: 139 additions & 0 deletions backend/apps/common/management/commands/matching_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""A command to perform fuzzy and exact matching of leaders/slack members with User model."""

from django.core.management.base import BaseCommand
from django.db.utils import DatabaseError
from thefuzz import fuzz

from apps.github.models.user import User
from apps.owasp.models.chapter import Chapter
from apps.owasp.models.committee import Committee
from apps.owasp.models.project import Project
from apps.slack.models import Member

MIN_NO_OF_WORDS = 2


class Command(BaseCommand):
help = "Match leaders or Slack members with GitHub users using exact and fuzzy matching."

def add_arguments(self, parser):
parser.add_argument(
"model_name",
type=str,
choices=["chapter", "committee", "project", "member"],
help="Model name to process: chapter, committee, project, or member",
)
parser.add_argument(
"--threshold",
type=int,
default=75,
help="Threshold for fuzzy matching (0-100)",
)

def handle(self, *args, **kwargs):
model_name = kwargs["model_name"].lower()
threshold = max(0, min(kwargs["threshold"], 100))

model_map = {
"chapter": (Chapter, "suggested_leaders"),
"committee": (Committee, "suggested_leaders"),
"project": (Project, "suggested_leaders"),
"member": (Member, "suggested_users"),
}

if model_name not in model_map:
self.stdout.write(
self.style.ERROR(
"Invalid model name! Choose from: chapter, committee, project, member"
)
)
return

model_class, relation_field = model_map[model_name]

# Pre-fetch GitHub users
all_users = User.objects.values("id", "login", "name")
filtered_users = {
u["id"]: u for u in all_users if self._is_valid_user(u["login"], u["name"])
}

instances = model_class.objects.prefetch_related(relation_field)
for instance in instances:
self.stdout.write(f"Processing {model_name} {instance.id}...")
if model_name == "member":
leaders_raw = [field for field in [instance.username, instance.real_name] if field]
else:
leaders_raw = instance.leaders_raw
Comment on lines +64 to +66
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add validation for leaders_raw attribute existence.

The code assumes all model instances have a leaders_raw attribute, but there's no validation to ensure it exists. This could lead to AttributeError exceptions.

if model_name == "member":
    leaders_raw = [field for field in [instance.username, instance.real_name] if field]
else:
-    leaders_raw = instance.leaders_raw
+    leaders_raw = getattr(instance, 'leaders_raw', [])
+    if not leaders_raw and hasattr(instance, 'leaders_raw'):
+        self.stdout.write(self.style.WARNING(f"Empty leaders_raw for {model_name} {instance.id}"))
+    elif not hasattr(instance, 'leaders_raw'):
+        self.stdout.write(self.style.ERROR(f"leaders_raw attribute not found for {model_name} {instance.id}"))
+        leaders_raw = []
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
leaders_raw = [field for field in [instance.username, instance.real_name] if field]
else:
leaders_raw = instance.leaders_raw
if model_name == "member":
leaders_raw = [field for field in [instance.username, instance.real_name] if field]
else:
leaders_raw = getattr(instance, 'leaders_raw', [])
if not leaders_raw and hasattr(instance, 'leaders_raw'):
self.stdout.write(self.style.WARNING(f"Empty leaders_raw for {model_name} {instance.id}"))
elif not hasattr(instance, 'leaders_raw'):
self.stdout.write(self.style.ERROR(f"leaders_raw attribute not found for {model_name} {instance.id}"))
leaders_raw = []


exact_matches, fuzzy_matches, unmatched = self.process_leaders(
leaders_raw, threshold, filtered_users
)

matched_user_ids = {user["id"] for user in exact_matches + fuzzy_matches}
getattr(instance, relation_field).set(matched_user_ids)

if unmatched:
self.stdout.write(f"Unmatched for {instance}: {unmatched}")

def _is_valid_user(self, login, name):
"""Check if GitHub user meets minimum requirements."""
return len(login) >= MIN_NO_OF_WORDS and name and len(name) >= MIN_NO_OF_WORDS

def process_leaders(self, leaders_raw, threshold, filtered_users):
"""Process leaders with optimized matching, capturing all exact matches."""
if not leaders_raw:
return [], [], []

exact_matches = []
fuzzy_matches = []
unmatched_leaders = []
processed_leaders = set()

user_list = list(filtered_users.values())

for leader in leaders_raw:
if not leader or leader in processed_leaders:
continue

processed_leaders.add(leader)
leader_lower = leader.lower()

try:
# Find all exact matches
exact_matches_for_leader = [
u
for u in user_list
if u["login"].lower() == leader_lower
or (u["name"] and u["name"].lower() == leader_lower)
]

if exact_matches_for_leader:
exact_matches.extend(exact_matches_for_leader)
for match in exact_matches_for_leader:
self.stdout.write(f"Exact match found for {leader}: {match['login']}")
continue

# Fuzzy matching with token_sort_ratio
matches = [
u
for u in user_list
if (fuzz.token_sort_ratio(leader_lower, u["login"].lower()) >= threshold)
or (
u["name"]
and fuzz.token_sort_ratio(leader_lower, u["name"].lower()) >= threshold
)
]

new_fuzzy_matches = [m for m in matches if m not in exact_matches]
if new_fuzzy_matches:
fuzzy_matches.extend(new_fuzzy_matches)
for match in new_fuzzy_matches:
self.stdout.write(f"Fuzzy match found for {leader}: {match['login']}")
else:
unmatched_leaders.append(leader)

except DatabaseError as e:
unmatched_leaders.append(leader)
self.stdout.write(self.style.ERROR(f"Error processing leader {leader}: {e}"))

return exact_matches, fuzzy_matches, unmatched_leaders
32 changes: 26 additions & 6 deletions backend/apps/owasp/admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""OWASP app admin."""

from django.contrib import admin
from django.contrib import admin, messages
from django.utils.safestring import mark_safe

from apps.owasp.models.chapter import Chapter
Expand Down Expand Up @@ -41,12 +41,31 @@ def custom_field_owasp_url(self, obj):
f"<a href='https://owasp.org/{obj.key}' target='_blank'>↗️</a>"
)

def approve_suggested_leaders(self, request, queryset):
"""Approve all suggested leaders for selected entities."""
for entity in queryset:
suggestions = entity.suggested_leaders.all()
entity.leaders.add(*suggestions)
self.message_user(
request,
f"Approved {suggestions.count()} leader suggestions for {entity.name}",
messages.SUCCESS,
)

custom_field_github_urls.short_description = "GitHub 🔗"
custom_field_owasp_url.short_description = "OWASP 🔗"
approve_suggested_leaders.short_description = "Approve all suggested leaders"


class LeaderEntityAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
"""Admin class for entities that have leaders."""

actions = ["approve_suggested_leaders"]
filter_horizontal = ("suggested_leaders",)


class ChapterAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
autocomplete_fields = ("owasp_repository",)
class ChapterAdmin(LeaderEntityAdmin):
autocomplete_fields = ("owasp_repository", "leaders")
list_display = (
"name",
"region",
Expand All @@ -62,8 +81,8 @@ class ChapterAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
search_fields = ("name", "key")


class CommitteeAdmin(admin.ModelAdmin):
autocomplete_fields = ("owasp_repository",)
class CommitteeAdmin(LeaderEntityAdmin):
autocomplete_fields = ("owasp_repository", "leaders")
search_fields = ("name",)


Expand Down Expand Up @@ -92,12 +111,13 @@ class PostAdmin(admin.ModelAdmin):
)


class ProjectAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
class ProjectAdmin(LeaderEntityAdmin):
autocomplete_fields = (
"organizations",
"owasp_repository",
"owners",
"repositories",
"leaders",
)
list_display = (
"custom_field_name",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Generated by Django 5.1.7 on 2025-03-23 13:51

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("github", "0018_alter_issue_managers_alter_pullrequest_managers"),
("owasp", "0030_chapter_is_leaders_policy_compliant_and_more"),
]

operations = [
migrations.AddField(
model_name="chapter",
name="leaders",
field=models.ManyToManyField(
blank=True,
related_name="assigned_%(class)s",
to="github.user",
verbose_name="Assigned leaders",
),
),
migrations.AddField(
model_name="chapter",
name="suggested_leaders",
field=models.ManyToManyField(
blank=True,
related_name="matched_%(class)s",
to="github.user",
verbose_name="Matched Users",
),
),
migrations.AddField(
model_name="committee",
name="leaders",
field=models.ManyToManyField(
blank=True,
related_name="assigned_%(class)s",
to="github.user",
verbose_name="Assigned leaders",
),
),
migrations.AddField(
model_name="committee",
name="suggested_leaders",
field=models.ManyToManyField(
blank=True,
related_name="matched_%(class)s",
to="github.user",
verbose_name="Matched Users",
),
),
migrations.AddField(
model_name="project",
name="leaders",
field=models.ManyToManyField(
blank=True,
related_name="assigned_%(class)s",
to="github.user",
verbose_name="Assigned leaders",
),
),
migrations.AddField(
model_name="project",
name="suggested_leaders",
field=models.ManyToManyField(
blank=True,
related_name="matched_%(class)s",
to="github.user",
verbose_name="Matched Users",
),
),
]
14 changes: 14 additions & 0 deletions backend/apps/owasp/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,20 @@ def get_metadata(self):
extra={"repository": getattr(self.owasp_repository, "name", None)},
)

# M2M
suggested_leaders = models.ManyToManyField(
"github.User",
verbose_name="Matched Users",
related_name="matched_%(class)s",
blank=True,
)
leaders = models.ManyToManyField(
"github.User",
verbose_name="Assigned leaders",
related_name="assigned_%(class)s",
blank=True,
)

def get_related_url(self, url, exclude_domains=(), include_domains=()):
"""Get OWASP entity related URL."""
if (
Expand Down
55 changes: 54 additions & 1 deletion backend/apps/slack/admin.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
"""Slack app admin."""

from django.contrib import admin
from django.contrib import admin, messages

from apps.slack.models.channel import Channel
from apps.slack.models.event import Event
from apps.slack.models.member import Member
from apps.slack.models.workspace import Workspace


class EventAdmin(admin.ModelAdmin):
Expand All @@ -16,4 +19,54 @@ class EventAdmin(admin.ModelAdmin):
list_filter = ("trigger",)


class ChannelAdmin(admin.ModelAdmin):
search_fields = (
"slack_channel_id",
"name",
)
list_filter = ("is_private",)


class MemberAdmin(admin.ModelAdmin):
search_fields = ("slack_user_id", "username", "real_name", "email", "user")
filter_horizontal = ("suggested_users",)
actions = ["approve_suggested_users"]

def approve_suggested_users(self, request, queryset):
"""Approve all suggested users for selected members, enforcing one-to-one constraints."""
for entity in queryset:
suggestions = entity.suggested_users.all()

if suggestions.count() == 1:
entity.user = suggestions.first() # only one suggested user
entity.save()
self.message_user(
request,
f" assigned user for {entity}.",
messages.SUCCESS,
)
elif suggestions.count() > 1:
self.message_user(
request,
f"Error: Multiple suggested users found for {entity}. "
f"Only one user can be assigned due to the one-to-one constraint.",
messages.ERROR,
)
else:
self.message_user(
request,
f"No suggested users found for {entity}.",
messages.WARNING,
)

approve_suggested_users.short_description = "Approve the suggested user (if only one exists)"


class WorkspaceAdmin(admin.ModelAdmin):
search_fields = ("slack_workspace_id", "name")


admin.site.register(Event, EventAdmin)
admin.site.register(Workspace, WorkspaceAdmin)
admin.site.register(Channel, ChannelAdmin)
admin.site.register(Member, MemberAdmin)
Loading