Skip to content

Commit f7f3590

Browse files
committed
♻️ Refactor commute and "short trip" charts to fit the new pattern
- In both cases, we preprocess in the notebook, so that the data manipulation is clear, and then call the `plot_and_text_stacked_bar_chart` created in e-mission@12b00e3 to display the bar charts While testing this with empty data, addressed a few more corner cases - Since we now pre-process data, we can have a data exception instead of just a code exception from the `try/catch` block. So we catch `AttributeError`, and print out the debug_df as before - Note that the preprocessing for the commute trips involves `df.query`, which throws a `pd.errors.UndefinedVariableError` so we need to catch that as well - We still print the code error if it is not a data access error, but since the error can be generated before the fig is created, we re-create a figure before we print out the error Related fix e-mission#123 (comment) Cutoff function was removed in e-mission@013ca55 Commit function was removed in: e-mission@89ff25d
1 parent 013ca55 commit f7f3590

File tree

1 file changed

+48
-26
lines changed

1 file changed

+48
-26
lines changed

viz_scripts/generic_metrics.ipynb

+48-26
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@
177177
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
178178
"except Exception as e:\n",
179179
" # TODO: Future cleanup can pass in just the figure and have the function choose the last axis\n",
180+
" fig, ax = plt.subplots()\n",
180181
" plot_and_text_error(e, ax[1], file_name)"
181182
]
182183
},
@@ -195,26 +196,39 @@
195196
"metadata": {},
196197
"outputs": [],
197198
"source": [
198-
"plot_title_no_quality= \"Number of trips for 80th percentile for each mode (selected by users):\"\n",
199199
"file_name = f'ntrips_under80{file_suffix}'\n",
200200
"\n",
201201
"try:\n",
202-
" plot_title = plot_title_no_quality\n",
202+
" # Preprocess to find cutoff and filter below cutoff\n",
203+
" # For simplicity, and to aid in comparison, we have a single cutoff based on the total number of trips\n",
204+
" cutoff = expanded_ct_sensed.distance.quantile(0.8)\n",
205+
" if pd.isna(cutoff):\n",
206+
" cutoff = 0\n",
207+
" dist_threshold = expanded_ct_sensed[distance_col].quantile(0.8).round(1)\n",
208+
" dist_threshold = str(dist_threshold) \n",
209+
"\n",
210+
" plot_title_no_quality=\"Number of trips under \" + dist_threshold + \" \" + label_units_lower\n",
211+
" plot_title_no_quality=plot_title_no_quality+\"\\n[\"+dist_threshold + \" \" + label_units_lower+\" represents 80th percentile of trip length]\"\n",
212+
"\n",
213+
" expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)]\n",
214+
" expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct['distance'] <= cutoff)]\n",
215+
" u80_quality_text = scaffolding.get_quality_text(expanded_ct_sensed, expanded_ct_sensed_u80, \"< \" + dist_threshold + \" \" + label_units_lower, include_test_users)\n",
216+
" plot_title = plot_title_no_quality+\"\\n\"+u80_quality_text\n",
217+
" \n",
218+
" # Plot entries\n",
203219
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
204-
" df_confirmed_tc_u80_expanded, df_confirmed_tc_u80,cutoff, dist_threshold = process_data_for_cutoff(expanded_ct,'Mode_confirm', distance_col)\n",
205-
" df_sensed_tc_u80_expanded, df_sensed_tc_u80, cutoff_sensed, dist_threshold_sensed = process_data_for_cutoff(expanded_ct_sensed, 'primary_mode', distance_col)\n",
206-
" u80_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff], \"<= \" + dist_threshold + \" \" + short_label, include_test_users) if not expanded_ct.empty else \"\"\n",
207-
" u80_quality_text_sensed = scaffolding.get_quality_text_sensed(expanded_ct_sensed[expanded_ct_sensed['distance'] <= cutoff_sensed], \"<= \" + dist_threshold_sensed + \" \" + short_label, include_test_users) if not expanded_ct_sensed.empty else \"\"\n",
208-
" plot_title += \"\\n\" + \"For Labeled: \" + u80_quality_text + \"\\n\" + \"For Sensed: \" + u80_quality_text_sensed\n",
209-
" create_alt_text_and_html_title(plot_title, file_name)\n",
210-
" plot_stacked_bar_chart(df_confirmed_tc_u80, \"Confirmed Trip\", \"Labeled by user\\n (Confirmed trips)\", ax[0], colors_mode)\n",
211-
" store_alt_text_and_html_stacked_bar_chart(df_confirmed_tc_u80_expanded, file_name, \"Labeled by user\\n (Confirmed trips)\")\n",
212-
" plot_stacked_bar_chart(df_sensed_tc_u80, \"Sensed Trip\", \"Sensed by OpenPATH\\n (Total trips)\", ax[1], colors_sensed)\n",
213-
" store_alt_text_and_html_stacked_bar_chart(df_sensed_tc_u80_expanded, file_name, \"Sensed by OpenPATH\\n (Total trips)\")\n",
214-
" add_stacked_bar_chart_title(fig, ax[0], plot_title, file_name)\n",
215-
"except:\n",
216-
" generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n",
217-
" alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)"
220+
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
221+
" plot_and_text_stacked_bar_chart(expanded_ct_u80, \"Mode_confirm\", \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
222+
" plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, \"primary_mode\", \"Sensed by OpenPATH\", ax[1], text_results[1], colors_mode, debug_df_sensed)\n",
223+
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
224+
"except AttributeError as e:\n",
225+
" # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
226+
" # here, our pre-processing only relies on sensed data, so we use the debug_df_sensed\n",
227+
" generate_missing_plot(\"Number of trips below 80th percentile in each mode\", debug_df_sensed, file_name)\n",
228+
" alt_text = store_alt_text_missing(debug_df_sensed, file_name, \"Number of trips below 80th percentile in each mode\")\n",
229+
"except Exception as e:\n",
230+
" fig, ax = plt.subplots()\n",
231+
" plot_and_text_error(e, ax[1], file_name)"
218232
]
219233
},
220234
{
@@ -236,26 +250,33 @@
236250
"file_name = f\"ntrips_commute_mode_confirm{file_suffix}\"\n",
237251
"\n",
238252
"try:\n",
253+
" # Preprocess to find commute trips\n",
239254
" if (len(dynamic_labels)):\n",
240255
" purpose_map_label = scaffolding.mapping_labels(dynamic_labels, \"PURPOSE\")\n",
241256
" translation_work = purpose_map_label['work']\n",
242257
" trip_purpose_query = f\"Trip_purpose == '{translation_work}'\"\n",
243258
" else:\n",
244259
" trip_purpose_query = \"Trip_purpose == 'Work'\"\n",
245260
"\n",
246-
" df_total_trip_commute_expanded, df_total_trip_commute = process_commute_data_frame(expanded_ct, trip_purpose_query)\n",
247-
" commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct.query(trip_purpose_query), \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n",
248-
"\n",
261+
" expanded_ct_commute = expanded_ct.query(trip_purpose_query)\n",
262+
" commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct_commute, \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n",
249263
" plot_title = plot_title_no_quality\n",
250-
" fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
251264
" plot_title += \"\\n\" + \"For Labeled: \" + commute_quality_text\n",
252-
" create_alt_text_and_html_title(plot_title, file_name)\n",
253-
" store_alt_text_and_html_stacked_bar_chart(df_total_trip_commute_expanded, file_name,\"Labeled by user\\n (Confirmed trips)\")\n",
254-
" plot_stacked_bar_chart(df_total_trip_commute, \"Commute Trip\", \"Labeled by user\\n (Confirmed trips)\", ax, colors_mode)\n",
255-
" add_stacked_bar_chart_title(fig, ax, plot_title, file_name)\n",
256-
"except:\n",
265+
" \n",
266+
" # Plot entries\n",
267+
" fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True) \n",
268+
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
269+
" plot_and_text_stacked_bar_chart(expanded_ct_u80, \"Mode_confirm\", \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
270+
" plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, \"primary_mode\", \"Sensed by OpenPATH\", ax[1], text_results[1], colors_mode, debug_df_sensed)\n",
271+
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
272+
"except (AttributeError, pd.errors.UndefinedVariableError) as e:\n",
273+
" # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
274+
" # here, our pre-processing only relies on sensed data, so we use the debug_df_sensed\n",
257275
" generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n",
258-
" alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
276+
" alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)\n",
277+
"except Exception as e:\n",
278+
" fig, ax = plt.subplots()\n",
279+
" plot_and_text_error(e, ax, file_name)"
259280
]
260281
},
261282
{
@@ -278,6 +299,7 @@
278299
"\n",
279300
"try:\n",
280301
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
302+
" \n",
281303
" df_confirm_dist_expanded, df_confirm_dist = process_distance_data(expanded_ct, 'Mode_confirm', distance_col, label_units_lower)\n",
282304
" df_sensed_dist_expanded, df_sensed_dist = process_distance_data(expanded_ct_sensed, 'primary_mode', distance_col, label_units_lower) \n",
283305
" plot_title = plot_title_no_quality + \"\\n\" + \"For Labeled & Sensed: \" + quality_text\n",

0 commit comments

Comments
 (0)