-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresume_screener.py
103 lines (81 loc) · 4.77 KB
/
resume_screener.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import streamlit as st
import pandas as pd
import re
from io import StringIO
from resume_loader import load_resumes
from resume_ranker import rank_resumes
# Streamlit UI
st.set_page_config(page_title="ResumeInsight - AI Resume Screener", layout="wide")
st.title("📄 ResumeInsight - AI Resume Screener")
st.write("Upload resumes and enter a job description to rank candidates based on relevance.")
# Sidebar for job description input
with st.sidebar:
st.header("📝 Job Description")
job_description = st.text_area("Enter the job description:", height=200)
# File uploader for resumes
uploaded_files = st.file_uploader("📂 Upload resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
# Function to extract meaningful keywords from job description
def extract_keywords(text):
"""Extracts relevant keywords from job description while filtering out common words and numbers."""
common_words = {"and", "or", "the", "of", "a", "to", "for", "with", "in", "on", "by", "an", "at", "as", "this", "is",
"must", "your", "have", "any", "within", "such", "required", "understanding", "determined", "time",
"details", "address", "city", "state", "name", "phone", "email", "skills", "education", "experience", "resume", "years"}
# Clean and remove personal details like address, phone numbers, emails, etc.
cleaned_text = re.sub(r"\d{1,5}\s\w+(\s\w+){0,2},\s\w{2,},\s?\w{2,}", "", text) # Address
cleaned_text = re.sub(r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", "", cleaned_text) # Phone numbers
cleaned_text = re.sub(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", "", cleaned_text) # Emails
# Extract words with 3+ characters
words = re.findall(r"\b[a-zA-Z#.+-]{3,}\b", cleaned_text.lower())
# Filter out common words and unwanted terms like "years"
filtered_keywords = {word for word in words if word not in common_words and not word.isdigit()}
return filtered_keywords
# Function to highlight only matching keywords in the resume summary
def get_highlighted_keywords(text, keywords):
"""Returns only the highlighted keywords from the resume summary."""
highlighted_keywords = []
# Sort keywords by length (longest first) to avoid partial overlaps
for keyword in sorted(keywords, key=len, reverse=True):
match = re.findall(f"(?i)\\b{re.escape(keyword)}\\b", text)
if match:
highlighted_keywords.append(keyword)
# Apply the highlighting format with HTML
text = re.sub(f"(?i)\\b{re.escape(keyword)}\\b",
rf'<span style="color:blue; font-weight:bold">{keyword}</span>', text)
return highlighted_keywords
if uploaded_files and job_description:
st.write("🔍 **Processing Resumes...**")
# Load resumes
processed_resumes = load_resumes(uploaded_files=uploaded_files)
# Rank resumes
ranked_resumes = rank_resumes(job_description, processed_resumes)
# Extract filtered keywords from job description
job_keywords = extract_keywords(job_description)
st.write("🔹 **Filtered Keywords for Matching:**", ", ".join(sorted(job_keywords)))
# Convert results to DataFrame
results_df = pd.DataFrame(ranked_resumes, columns=["Resume Name", "Score"])
# Display ranking results
st.subheader("🔹 Resume Ranking Results")
st.dataframe(results_df.style.format({"Score": "{:.4f}"}))
# Ensure ranked_resumes is not empty before selecting top resumes
if ranked_resumes:
# Avoid min_value being greater than max_value when there's only one resume
top_n = 1 if len(ranked_resumes) == 1 else st.slider(
"📊 Select number of top resumes to summarize:",
1, len(ranked_resumes), min(3, len(ranked_resumes))
)
# Summary of selected top resumes
st.subheader(f"🏆 Summary of Top {top_n} Resume{'s' if top_n > 1 else ''}")
for i, (name, score) in enumerate(ranked_resumes[:top_n], 1):
resume_text = processed_resumes[name] # Full resume text
highlighted_keywords = get_highlighted_keywords(resume_text, job_keywords)
# Extract experience section (first 100 words)
experience_section = " ".join(resume_text.split()[:100])
st.markdown(f"### {i}. {name} - Score: `{score:.4f}`")
st.markdown(f"📜 **Experience Summary:** {experience_section}...")
# Display highlighted keywords
st.write("🔍 Highlighted Keywords:", ", ".join(highlighted_keywords))
# Allow users to download results as CSV
st.subheader("📥 Download Results")
csv_buffer = StringIO()
results_df.to_csv(csv_buffer, index=False)
st.download_button("Download CSV", data=csv_buffer.getvalue(), file_name="resume_ranking_results.csv", mime="text/csv")