Merge branch 'dev' into dependancy_pie_error

cdolfi · Feb 21, 2024 · 54c3b08 · 54c3b08
2 parents cb848d5 + b608a20
commit 54c3b08
Show file tree

Hide file tree

Showing 23 changed files with 192 additions and 129 deletions.
diff --git a/.wordlist-md b/.wordlist-md
@@ -68,3 +68,7 @@ yml
 ansible
 gitlab
 SSL
+Docker's
+data-center
+OAuth
+postgres
diff --git a/8Knot/pages/affiliation/visualizations/commit_domains.py b/8Knot/pages/affiliation/visualizations/commit_domains.py
@@ -187,10 +187,10 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
     # creates df of domains and counts
     df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()
 
-    df = df.rename(columns={0: "occurrences"})
+    df = df.rename(columns={"count": "occurrences"})
 
     # changes the name of the company if under a certain threshold
-    df.loc[df.occurrences <= num, "domains"] = "Other"
+    df.loc[df["occurrences"] <= num, "domains"] = "Other"
 
     # groups others together for final counts
     df = (

diff --git a/8Knot/pages/affiliation/visualizations/gh_org_affiliation.py b/8Knot/pages/affiliation/visualizations/gh_org_affiliation.py
@@ -187,7 +187,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
     df["company_name"] = df.index
     df = df.reset_index()
     df["company_name"] = df["company_name"].astype(str)
-    df = df.rename(columns={"index": "orginal_name", "cntrb_company": "contribution_count"})
+    df = df.rename(columns={"cntrb_company": "orginal_name", "count": "contribution_count"})
 
     # applies fuzzy matching comparing all rows to each other
     df["match"] = df.apply(lambda row: fuzzy_match(df, row["company_name"]), axis=1)
@@ -212,7 +212,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
     )
 
     # changes the name of the company if under a certain threshold
-    df.loc[df.contribution_count <= num, "company_name"] = "Other"
+    df.loc[df["contribution_count"] <= num, "company_name"] = "Other"
 
     # groups others together for final counts
     df = (

diff --git a/8Knot/pages/affiliation/visualizations/org_associated_activity.py b/8Knot/pages/affiliation/visualizations/org_associated_activity.py
@@ -230,7 +230,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
     # creates df of domains and counts
     df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()
 
-    df = df.rename(columns={0: "occurrences"})
+    df = df.rename(columns={"count": "occurrences"})
 
     # changes the name of the organization if under a certain threshold
     df.loc[df.occurrences <= num, "domains"] = "Other"

diff --git a/8Knot/pages/affiliation/visualizations/org_core_contributors.py b/8Knot/pages/affiliation/visualizations/org_core_contributors.py
@@ -243,7 +243,7 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_
     # creates df of domains and counts
     df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()
 
-    df = df.rename(columns={0: "contributors"})
+    df = df.rename(columns={"count": "contributors"})
 
     # changes the name of the org if under a certain threshold
     df.loc[df.contributors <= contributors, "domains"] = "Other"

diff --git a/8Knot/pages/affiliation/visualizations/unqiue_domains.py b/8Knot/pages/affiliation/visualizations/unqiue_domains.py
@@ -188,7 +188,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
     # creates df of domains and counts
     df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()
 
-    df = df.rename(columns={0: "occurences"})
+    df = df.rename(columns={"count": "occurences"})
 
     # changes the name of the company if under a certain threshold
     df.loc[df.occurences <= num, "domains"] = "Other"

diff --git a/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py b/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
@@ -318,7 +318,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
     df_sum = df[action_type].sum()
 
     # calculate the remaining contributions by taking the the difference of t_sum and df_sum
-    df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
+    # dataframes no longer implement above 'append' interface as of Pandas 1.4.4
+    # create a single-entry dataframe that we can concatenate onto existing df
+    df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
+    df = pd.concat([df, df_concat], ignore_index=True)
 
     return df
 

diff --git a/8Knot/pages/chaoss/visualizations/project_velocity.py b/8Knot/pages/chaoss/visualizations/project_velocity.py
@@ -328,6 +328,9 @@ def process_data(
     # df_consolidated combines the actions and unique contributors and then specific columns for visualization use are added on
     df_consolidated = pd.concat([df_actions, df_cntrbs], axis=1).reset_index()
 
+    # replace all nan to 0
+    df_consolidated.fillna(value=0, inplace=True)
+
     # log of commits and contribs
     df_consolidated["log_num_commits"] = df_consolidated["Commit"].apply(math.log)
     df_consolidated["log_num_contrib"] = df_consolidated["num_unique_contributors"].apply(math.log)
@@ -341,6 +344,9 @@ def process_data(
         + df_consolidated["PR Closed"] * pr_c_weight
     )
 
+    # after weighting replace 0 with nan for log
+    df_consolidated["prs_issues_actions_weighted"].replace(0, np.nan, inplace=True)
+
     # column for log value of pr and issue actions
     df_consolidated["log_prs_issues_actions_weighted"] = df_consolidated["prs_issues_actions_weighted"].apply(math.log)
 

diff --git a/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py b/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py
@@ -217,13 +217,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
     df_contrib = df[df["assignment_action"] == "assigned"]
 
     # count the assignments total for each contributor
-    df_contrib = (
-        df_contrib["assignee"]
-        .value_counts()
-        .to_frame()
-        .reset_index()
-        .rename(columns={"assignee": "count", "index": "assignee"})
-    )
+    df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()
 
     # create list of all contributors that meet the assignment requirement
     contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()

diff --git a/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py b/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py
@@ -214,13 +214,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
     df_contrib = df[df["assignment_action"] == "assigned"]
 
     # count the assignments total for each contributor
-    df_contrib = (
-        df_contrib["assignee"]
-        .value_counts()
-        .to_frame()
-        .reset_index()
-        .rename(columns={"assignee": "count", "index": "assignee"})
-    )
+    df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()
 
     # create list of all contributors that meet the assignment requirement
     contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()

diff --git a/8Knot/pages/contributions/visualizations/issues_over_time.py b/8Knot/pages/contributions/visualizations/issues_over_time.py
@@ -189,15 +189,16 @@ def process_data(df: pd.DataFrame, interval, start_date, end_date):
     created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()
 
     # converts to data frame object and creates date column from period values
-    df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})
 
     # converts date column to a datetime object, converts to string first to handle period information
     # the period slice is to handle weekly corner case
     df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])
 
     # df for closed issues in time interval
     closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
-    df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})
+
     df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])
 
     # first and last elements of the dataframe are the

diff --git a/8Knot/pages/contributions/visualizations/pr_over_time.py b/8Knot/pages/contributions/visualizations/pr_over_time.py
@@ -165,20 +165,20 @@ def process_data(df: pd.DataFrame, interval):
     created_range = df["created_at"].dt.to_period(interval).value_counts().sort_index()
 
     # converts to data frame object and created date column from period values
-    df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})
 
     # converts date column to a datetime object, converts to string first to handle period information
     # the period slice is to handle weekly corner case
     df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])
 
     # df for merged prs in time interval
     merged_range = pd.to_datetime(df["merged_at"]).dt.to_period(interval).value_counts().sort_index()
-    df_merged = merged_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_merged = merged_range.to_frame().reset_index().rename(columns={"merged_at": "Date", "count": "merged_at"})
     df_merged["Date"] = pd.to_datetime(df_merged["Date"].astype(str).str[:period_slice])
 
     # df for closed prs in time interval
     closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
-    df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
+    df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})
     df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])
 
     # A single df created for plotting merged and closed as stacked bar chart

diff --git a/8Knot/pages/contributors/visualizations/contrib_importance_pie.py b/8Knot/pages/contributors/visualizations/contrib_importance_pie.py
@@ -319,7 +319,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
     df_sum = df[action_type].sum()
 
     # calculate the remaining contributions by taking the the difference of t_sum and df_sum
-    df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
+    # dataframes no longer implement above 'append' interface as of Pandas 1.4.4
+    # create a single-entry dataframe that we can concatenate onto existing df
+    df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
+    df = pd.concat([df, df_concat], ignore_index=True)
 
     return df
 

diff --git a/8Knot/pages/contributors/visualizations/new_contributor.py b/8Knot/pages/contributors/visualizations/new_contributor.py
@@ -189,7 +189,7 @@ def process_data(df, interval):
     created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()
 
     # converts to data frame object and creates date column from period values
-    df_contribs = created_range.to_frame().reset_index().rename(columns={"index": "Date", "created_at": "contribs"})
+    df_contribs = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "contribs"})
 
     # converts date column to a datetime object, converts to string first to handle period information
     df_contribs["Date"] = pd.to_datetime(df_contribs["Date"].astype(str))

diff --git a/8Knot/pages/index/index_layout.py b/8Knot/pages/index/index_layout.py
@@ -146,23 +146,23 @@
         dbc.NavItem(
             dbc.NavLink(
                 "Visualization request",
-                href="https://github.com/sandiego-rh/explorer/issues/new?assignees=&labels=enhancement%2Cvisualization&template=visualizations.md",
+                href="https://github.com/oss-aspen/8Knot/issues/new?assignees=&labels=enhancement%2Cvisualization&template=visualizations.md",
                 external_link="True",
                 target="_blank",
             )
         ),
         dbc.NavItem(
             dbc.NavLink(
                 "Bug",
-                href="https://github.com/sandiego-rh/explorer/issues/new?assignees=&labels=bug&template=bug_report.md",
+                href="https://github.com/oss-aspen/8Knot/issues/new?assignees=&labels=bug&template=bug_report.md",
                 external_link="True",
                 target="_blank",
             )
         ),
         dbc.NavItem(
             dbc.NavLink(
                 "Repo/Org Request",
-                href="https://github.com/sandiego-rh/explorer/issues/new?assignees=&labels=augur&template=augur_load.md",
+                href="https://github.com/oss-aspen/8Knot/issues/new?assignees=&labels=augur&template=augur_load.md",
                 external_link="True",
                 target="_blank",
             )