♻️ Refactor commute and "short trip" charts to fit the new pattern

shankari · shankari · commit f7f3590de7bc · 2024-05-01T16:40:37.000-07:00
- In both cases, we preprocess in the notebook, so that the data manipulation is clear, and then call the `plot_and_text_stacked_bar_chart` created in e-mission@12b00e3 to display the bar charts While testing this with empty data, addressed a few more corner cases - Since we now pre-process data, we can have a data exception instead of just a code exception from the `try/catch` block. So we catch `AttributeError`, and print out the debug_df as before - Note that the preprocessing for the commute trips involves `df.query`, which throws a `pd.errors.UndefinedVariableError` so we need to catch that as well - We still print the code error if it is not a data access error, but since the error can be generated before the fig is created, we re-create a figure before we print out the error Related fix e-mission#123 (comment) Cutoff function was removed in e-mission@013ca55 Commit function was removed in: e-mission@89ff25d
diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
@@ -177,6 +177,7 @@
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except Exception as e:\n",
     "    # TODO: Future cleanup can pass in just the figure and have the function choose the last axis\n",
+    "    fig, ax = plt.subplots()\n",
     "    plot_and_text_error(e, ax[1], file_name)"
    ]
   },
@@ -195,26 +196,39 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot_title_no_quality= \"Number of trips for 80th percentile for each mode (selected by users):\"\n",
     "file_name = f'ntrips_under80{file_suffix}'\n",
     "\n",
     "try:\n",
-    "    plot_title = plot_title_no_quality\n",
+    "    # Preprocess to find cutoff and filter below cutoff\n",
+    "    # For simplicity, and to aid in comparison, we have a single cutoff based on the total number of trips\n",
+    "    cutoff = expanded_ct_sensed.distance.quantile(0.8)\n",
+    "    if pd.isna(cutoff):\n",
+    "        cutoff = 0\n",
+    "    dist_threshold = expanded_ct_sensed[distance_col].quantile(0.8).round(1)\n",
+    "    dist_threshold = str(dist_threshold) \n",
+    "\n",
+    "    plot_title_no_quality=\"Number of trips under \" + dist_threshold + \" \" + label_units_lower\n",
+    "    plot_title_no_quality=plot_title_no_quality+\"\\n[\"+dist_threshold + \" \" + label_units_lower+\" represents 80th percentile of trip length]\"\n",
+    "\n",
+    "    expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)]\n",
+    "    expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct['distance'] <= cutoff)]\n",
+    "    u80_quality_text = scaffolding.get_quality_text(expanded_ct_sensed, expanded_ct_sensed_u80, \"< \" + dist_threshold + \" \" + label_units_lower, include_test_users)\n",
+    "    plot_title = plot_title_no_quality+\"\\n\"+u80_quality_text\n",
+    "    \n",
+    "    # Plot entries\n",
     "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
-    "    df_confirmed_tc_u80_expanded, df_confirmed_tc_u80,cutoff, dist_threshold  = process_data_for_cutoff(expanded_ct,'Mode_confirm', distance_col)\n",
-    "    df_sensed_tc_u80_expanded, df_sensed_tc_u80, cutoff_sensed, dist_threshold_sensed = process_data_for_cutoff(expanded_ct_sensed, 'primary_mode', distance_col)\n",
-    "    u80_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff], \"<= \" + dist_threshold + \" \" + short_label, include_test_users) if not expanded_ct.empty else \"\"\n",
-    "    u80_quality_text_sensed = scaffolding.get_quality_text_sensed(expanded_ct_sensed[expanded_ct_sensed['distance'] <= cutoff_sensed], \"<= \" + dist_threshold_sensed + \" \" + short_label, include_test_users) if not expanded_ct_sensed.empty else \"\"\n",
-    "    plot_title +=  \"\\n\" + \"For Labeled: \" + u80_quality_text + \"\\n\" + \"For Sensed: \" + u80_quality_text_sensed\n",
-    "    create_alt_text_and_html_title(plot_title, file_name)\n",
-    "    plot_stacked_bar_chart(df_confirmed_tc_u80, \"Confirmed Trip\", \"Labeled by user\\n (Confirmed trips)\", ax[0], colors_mode)\n",
-    "    store_alt_text_and_html_stacked_bar_chart(df_confirmed_tc_u80_expanded, file_name, \"Labeled by user\\n (Confirmed trips)\")\n",
-    "    plot_stacked_bar_chart(df_sensed_tc_u80,  \"Sensed Trip\", \"Sensed by OpenPATH\\n (Total trips)\", ax[1], colors_sensed)\n",
-    "    store_alt_text_and_html_stacked_bar_chart(df_sensed_tc_u80_expanded, file_name, \"Sensed by OpenPATH\\n (Total trips)\")\n",
-    "    add_stacked_bar_chart_title(fig, ax[0], plot_title, file_name)\n",
-    "except:\n",
-    "    generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n",
-    "    alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)"
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_u80, \"Mode_confirm\", \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, \"primary_mode\", \"Sensed by OpenPATH\", ax[1], text_results[1], colors_mode, debug_df_sensed)\n",
+    "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
+    "except AttributeError as e:\n",
+    "    # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
+    "    # here, our pre-processing only relies on sensed data, so we use the debug_df_sensed\n",
+    "    generate_missing_plot(\"Number of trips below 80th percentile in each mode\", debug_df_sensed, file_name)\n",
+    "    alt_text = store_alt_text_missing(debug_df_sensed, file_name, \"Number of trips below 80th percentile in each mode\")\n",
+    "except Exception as e:\n",
+    "    fig, ax = plt.subplots()\n",
+    "    plot_and_text_error(e, ax[1], file_name)"
    ]
   },
   {
@@ -236,26 +250,33 @@
     "file_name = f\"ntrips_commute_mode_confirm{file_suffix}\"\n",
     "\n",
     "try:\n",
+    "    # Preprocess to find commute trips\n",
     "    if (len(dynamic_labels)):\n",
     "        purpose_map_label =  scaffolding.mapping_labels(dynamic_labels, \"PURPOSE\")\n",
     "        translation_work = purpose_map_label['work']\n",
     "        trip_purpose_query = f\"Trip_purpose == '{translation_work}'\"\n",
     "    else:\n",
     "        trip_purpose_query = \"Trip_purpose == 'Work'\"\n",
     "\n",
-    "    df_total_trip_commute_expanded, df_total_trip_commute = process_commute_data_frame(expanded_ct, trip_purpose_query)\n",
-    "    commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct.query(trip_purpose_query), \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n",
-    "\n",
+    "    expanded_ct_commute = expanded_ct.query(trip_purpose_query)\n",
+    "    commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct_commute, \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n",
     "    plot_title = plot_title_no_quality\n",
-    "    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
     "    plot_title += \"\\n\" + \"For Labeled: \" + commute_quality_text\n",
-    "    create_alt_text_and_html_title(plot_title, file_name)\n",
-    "    store_alt_text_and_html_stacked_bar_chart(df_total_trip_commute_expanded, file_name,\"Labeled by user\\n (Confirmed trips)\")\n",
-    "    plot_stacked_bar_chart(df_total_trip_commute, \"Commute Trip\", \"Labeled by user\\n (Confirmed trips)\", ax, colors_mode)\n",
-    "    add_stacked_bar_chart_title(fig, ax, plot_title, file_name)\n",
-    "except:\n",
+    "    \n",
+    "    # Plot entries\n",
+    "    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)    \n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_u80, \"Mode_confirm\", \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, \"primary_mode\", \"Sensed by OpenPATH\", ax[1], text_results[1], colors_mode, debug_df_sensed)\n",
+    "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
+    "except (AttributeError, pd.errors.UndefinedVariableError) as e:\n",
+    "    # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
+    "    # here, our pre-processing only relies on sensed data, so we use the debug_df_sensed\n",
     "    generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n",
-    "    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
+    "    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)\n",
+    "except Exception as e:\n",
+    "    fig, ax = plt.subplots()\n",
+    "    plot_and_text_error(e, ax, file_name)"
    ]
   },
   {
@@ -278,6 +299,7 @@
     "\n",
     "try:\n",
     "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+    "    \n",
     "    df_confirm_dist_expanded, df_confirm_dist = process_distance_data(expanded_ct, 'Mode_confirm', distance_col, label_units_lower)\n",
     "    df_sensed_dist_expanded, df_sensed_dist = process_distance_data(expanded_ct_sensed, 'primary_mode', distance_col, label_units_lower)  \n",
     "    plot_title = plot_title_no_quality + \"\\n\" + \"For Labeled & Sensed: \" + quality_text\n",