Skip to content

Commit

Permalink
Merge branch 'dev' into dependancy_pie_error
Browse files Browse the repository at this point in the history
  • Loading branch information
cdolfi authored Feb 21, 2024
2 parents cb848d5 + b608a20 commit 54c3b08
Show file tree
Hide file tree
Showing 23 changed files with 192 additions and 129 deletions.
4 changes: 4 additions & 0 deletions .wordlist-md
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,7 @@ yml
ansible
gitlab
SSL
Docker's
data-center
OAuth
postgres
4 changes: 2 additions & 2 deletions 8Knot/pages/affiliation/visualizations/commit_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,10 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurrences"})
df = df.rename(columns={"count": "occurrences"})

# changes the name of the company if under a certain threshold
df.loc[df.occurrences <= num, "domains"] = "Other"
df.loc[df["occurrences"] <= num, "domains"] = "Other"

# groups others together for final counts
df = (
Expand Down
4 changes: 2 additions & 2 deletions 8Knot/pages/affiliation/visualizations/gh_org_affiliation.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
df["company_name"] = df.index
df = df.reset_index()
df["company_name"] = df["company_name"].astype(str)
df = df.rename(columns={"index": "orginal_name", "cntrb_company": "contribution_count"})
df = df.rename(columns={"cntrb_company": "orginal_name", "count": "contribution_count"})

# applies fuzzy matching comparing all rows to each other
df["match"] = df.apply(lambda row: fuzzy_match(df, row["company_name"]), axis=1)
Expand All @@ -212,7 +212,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
)

# changes the name of the company if under a certain threshold
df.loc[df.contribution_count <= num, "company_name"] = "Other"
df.loc[df["contribution_count"] <= num, "company_name"] = "Other"

# groups others together for final counts
df = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurrences"})
df = df.rename(columns={"count": "occurrences"})

# changes the name of the organization if under a certain threshold
df.loc[df.occurrences <= num, "domains"] = "Other"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "contributors"})
df = df.rename(columns={"count": "contributors"})

# changes the name of the org if under a certain threshold
df.loc[df.contributors <= contributors, "domains"] = "Other"
Expand Down
2 changes: 1 addition & 1 deletion 8Knot/pages/affiliation/visualizations/unqiue_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurences"})
df = df.rename(columns={"count": "occurences"})

# changes the name of the company if under a certain threshold
df.loc[df.occurences <= num, "domains"] = "Other"
Expand Down
5 changes: 4 additions & 1 deletion 8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
df_sum = df[action_type].sum()

# calculate the remaining contributions by taking the the difference of t_sum and df_sum
df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
# dataframes no longer implement above 'append' interface as of Pandas 1.4.4
# create a single-entry dataframe that we can concatenate onto existing df
df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
df = pd.concat([df, df_concat], ignore_index=True)

return df

Expand Down
6 changes: 6 additions & 0 deletions 8Knot/pages/chaoss/visualizations/project_velocity.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,9 @@ def process_data(
# df_consolidated combines the actions and unique contributors and then specific columns for visualization use are added on
df_consolidated = pd.concat([df_actions, df_cntrbs], axis=1).reset_index()

# replace all nan to 0
df_consolidated.fillna(value=0, inplace=True)

# log of commits and contribs
df_consolidated["log_num_commits"] = df_consolidated["Commit"].apply(math.log)
df_consolidated["log_num_contrib"] = df_consolidated["num_unique_contributors"].apply(math.log)
Expand All @@ -341,6 +344,9 @@ def process_data(
+ df_consolidated["PR Closed"] * pr_c_weight
)

# after weighting replace 0 with nan for log
df_consolidated["prs_issues_actions_weighted"].replace(0, np.nan, inplace=True)

# column for log value of pr and issue actions
df_consolidated["log_prs_issues_actions_weighted"] = df_consolidated["prs_issues_actions_weighted"].apply(math.log)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
df_contrib = df[df["assignment_action"] == "assigned"]

# count the assignments total for each contributor
df_contrib = (
df_contrib["assignee"]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"assignee": "count", "index": "assignee"})
)
df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()

# create list of all contributors that meet the assignment requirement
contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
df_contrib = df[df["assignment_action"] == "assigned"]

# count the assignments total for each contributor
df_contrib = (
df_contrib["assignee"]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"assignee": "count", "index": "assignee"})
)
df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()

# create list of all contributors that meet the assignment requirement
contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()
Expand Down
5 changes: 3 additions & 2 deletions 8Knot/pages/contributions/visualizations/issues_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,16 @@ def process_data(df: pd.DataFrame, interval, start_date, end_date):
created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()

# converts to data frame object and creates date column from period values
df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})

# converts date column to a datetime object, converts to string first to handle period information
# the period slice is to handle weekly corner case
df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])

# df for closed issues in time interval
closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})

df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])

# first and last elements of the dataframe are the
Expand Down
6 changes: 3 additions & 3 deletions 8Knot/pages/contributions/visualizations/pr_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,20 +165,20 @@ def process_data(df: pd.DataFrame, interval):
created_range = df["created_at"].dt.to_period(interval).value_counts().sort_index()

# converts to data frame object and created date column from period values
df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})

# converts date column to a datetime object, converts to string first to handle period information
# the period slice is to handle weekly corner case
df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])

# df for merged prs in time interval
merged_range = pd.to_datetime(df["merged_at"]).dt.to_period(interval).value_counts().sort_index()
df_merged = merged_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_merged = merged_range.to_frame().reset_index().rename(columns={"merged_at": "Date", "count": "merged_at"})
df_merged["Date"] = pd.to_datetime(df_merged["Date"].astype(str).str[:period_slice])

# df for closed prs in time interval
closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})
df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])

# A single df created for plotting merged and closed as stacked bar chart
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
df_sum = df[action_type].sum()

# calculate the remaining contributions by taking the the difference of t_sum and df_sum
df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
# dataframes no longer implement above 'append' interface as of Pandas 1.4.4
# create a single-entry dataframe that we can concatenate onto existing df
df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
df = pd.concat([df, df_concat], ignore_index=True)

return df

Expand Down
2 changes: 1 addition & 1 deletion 8Knot/pages/contributors/visualizations/new_contributor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def process_data(df, interval):
created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()

# converts to data frame object and creates date column from period values
df_contribs = created_range.to_frame().reset_index().rename(columns={"index": "Date", "created_at": "contribs"})
df_contribs = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "contribs"})

# converts date column to a datetime object, converts to string first to handle period information
df_contribs["Date"] = pd.to_datetime(df_contribs["Date"].astype(str))
Expand Down
6 changes: 3 additions & 3 deletions 8Knot/pages/index/index_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,23 +146,23 @@
dbc.NavItem(
dbc.NavLink(
"Visualization request",
href="https://github.com/sandiego-rh/explorer/issues/new?assignees=&labels=enhancement%2Cvisualization&template=visualizations.md",
href="https://github.com/oss-aspen/8Knot/issues/new?assignees=&labels=enhancement%2Cvisualization&template=visualizations.md",
external_link="True",
target="_blank",
)
),
dbc.NavItem(
dbc.NavLink(
"Bug",
href="https://github.com/sandiego-rh/explorer/issues/new?assignees=&labels=bug&template=bug_report.md",
href="https://github.com/oss-aspen/8Knot/issues/new?assignees=&labels=bug&template=bug_report.md",
external_link="True",
target="_blank",
)
),
dbc.NavItem(
dbc.NavLink(
"Repo/Org Request",
href="https://github.com/sandiego-rh/explorer/issues/new?assignees=&labels=augur&template=augur_load.md",
href="https://github.com/oss-aspen/8Knot/issues/new?assignees=&labels=augur&template=augur_load.md",
external_link="True",
target="_blank",
)
Expand Down
Loading

0 comments on commit 54c3b08

Please sign in to comment.