Skip to content

Commit 12b00e3

Browse files
committed
♻️ Heavy lift on the refactoring
We now have: - a single call per bar, which, given data, fills in the plot, alt text and alt html - the call also displays the debug_df if there are any errors while plotting - a separate function to add the titles to the plot and the two text strings and save the figure - a separate function to handle code errors. since the bar call now handles plotting errors, our catch block in the cell will correspond to code errors, and we need to display them directly instead of the data_df - notebook changes to use the new library interface Testing done (partial): Error handling works e-mission#123 (comment)
1 parent 4f1c385 commit 12b00e3

File tree

2 files changed

+90
-67
lines changed

2 files changed

+90
-67
lines changed

viz_scripts/generic_metrics.ipynb

+9-11
Original file line numberDiff line numberDiff line change
@@ -179,18 +179,16 @@
179179
"\n",
180180
"try:\n",
181181
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
182-
" df_confirmed_tc_expanded, df_confirmed_tc = process_data_frame(expanded_ct, 'Mode_confirm')\n",
183-
" df_sensed_tc_expanded, df_sensed_tc = process_data_frame(expanded_ct_sensed, 'primary_mode')\n",
182+
" # We will have text results corresponding to the axes for simplicity and consistency\n",
183+
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
184+
" plot_and_text_stacked_bar_chart(expanded_ct, \"Mode_confirm\", \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
185+
" plot_and_text_stacked_bar_chart(expanded_ct_sensed, \"primary_mode\", \"Sensed by OpenPATH\", ax[1], text_results[1], colors_mode, debug_df_sensed)\n",
186+
" \n",
184187
" plot_title = plot_title_no_quality + \"\\n\" + \"For Labeled & Sensed: \" + quality_text\n",
185-
" create_alt_text_and_html_title(plot_title, file_name)\n",
186-
" plot_stacked_bar_chart(df_confirmed_tc, \"Confirmed Trip\", \"Labeled by user\\n (Confirmed trips)\", ax[0], colors_mode)\n",
187-
" store_alt_text_and_html_stacked_bar_chart(df_confirmed_tc_expanded, file_name, \"Labeled by user\\n (Confirmed trips)\")\n",
188-
" plot_stacked_bar_chart(df_sensed_tc, \"Sensed Trip\", \"Sensed by OpenPATH\\n (Total trips)\", ax[1], colors_sensed)\n",
189-
" store_alt_text_and_html_stacked_bar_chart(df_sensed_tc_expanded, file_name, \"Sensed by OpenPATH\\n (Total trips)\")\n",
190-
" add_stacked_bar_chart_title(fig, ax[0], plot_title, file_name)\n",
191-
"except:\n",
192-
" generate_missing_plot(plot_title_no_quality,merged_debug_df,file_name)\n",
193-
" alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)"
188+
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
189+
"except Exception as e:\n",
190+
" # TODO: Future cleanup can pass in just the figure and have the function choose the last axis\n",
191+
" plot_and_text_error(e, ax[1], file_name)"
194192
]
195193
},
196194
{

viz_scripts/plots.py

+81-56
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import itertools
55
import matplotlib.pyplot as plt
66
import seaborn as sns
7+
import traceback as tb
78
from matplotlib.patches import Patch
89

910
sns.set_style("whitegrid")
@@ -58,23 +59,6 @@ def merge_small_entries(labels, values):
5859

5960
return (v2l_df.index.to_list(),v2l_df.vals.to_list(), v2l_df.pct.to_list())
6061

61-
def process_data_frame(df, df_col):
62-
""" Inputs:
63-
df = Likely expanded_ct, data_eb or expanded_ct_sensed data frame
64-
df_col = Column from the above df, likely Mode_confirm, primary_mode
65-
trip_type = Bar labels (e.g. Labeled by user (Confirmed trips))
66-
"""
67-
try:
68-
labels = df[df_col].value_counts(dropna=True).keys().tolist()
69-
values = df[df_col].value_counts(dropna=True).tolist()
70-
return process_trip_data(labels, values)
71-
except KeyError:
72-
print(f"Column '{df_col}' not found in the data frame.")
73-
return pd.DataFrame(), pd.DataFrame()
74-
except Exception as e:
75-
print(f"An error occurred: {e}")
76-
return pd.DataFrame(), pd.DataFrame()
77-
7862
def process_distance_data(df, df_col, distance_col, label_units_lower):
7963
""" Inputs:
8064
df = Likely expanded_ct, data_eb or expanded_ct_sensed data frame
@@ -169,29 +153,46 @@ def process_trip_data(labels, values):
169153
df_total_trip = pd.DataFrame(data_trip)
170154
return df_total_trip_expanded, df_total_trip
171155

156+
def plot_and_text_error(e, ax, file_name):
157+
stringified_exception = "".join(tb.format_exception(type(e), e, e.__traceback__))
158+
ax.text(0,0,s=stringified_exception)
159+
plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight')
160+
alt_text = f"Error while generating chart:"
161+
alt_text += stringified_exception
162+
alt_text = access_alt_text(alt_text, file_name)
163+
# TODO: Format the error as HTML instead of plain text
164+
alt_html = access_alt_html(alt_text, file_name, "w")
165+
return alt_text, alt_html
166+
172167
# Creates/ Appends single bar to the 100% Stacked Bar Chart
173-
def plot_stacked_bar_chart(df, bar_name, bar_lab, ax, colors_combined):
168+
def plot_and_text_stacked_bar_chart(df, df_col, bar_label, ax, text_result, colors, debug_df):
174169
""" Inputs:
175170
df = Data frame corresponding to the bar in a stacked bar chart
176171
bar_name = Text to represent in case data frame is empty (e.g. "Sensed Trip")
177-
bar_lab = Text to represent the Bar (e.g. Labeled by user\n (Confirmed trips))
172+
bar_label = Text to represent the Bar (e.g. Labeled by user\n (Confirmed trips))
178173
ax = axis information
179174
colors_combined = color mapping dictionary
180175
"""
181176
sns.set(font_scale=1.5)
182177
bar_height = 0.2
183178
bar_width = [0]
184-
if df.empty:
185-
ax.text(x = 0.5, y = 0.5, s = f"No data available for {bar_name}", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=20)
186-
ax.yaxis.set_visible(False)
187-
else:
188-
for label in pd.unique(df['Label']):
179+
try:
180+
# TODO: Put this into a dataframe to begin with so that we can use it directly instead of having multiple variables
181+
labels = df[df_col].value_counts(dropna=True).keys().tolist()
182+
values = df[df_col].value_counts(dropna=True).tolist()
183+
184+
# TODO: Do we need this as a separate function?
185+
df_all_entries, df_only_small = process_trip_data(labels, values)
186+
187+
# TODO: Fix this to be more pandas-like and change the "long" variable name
188+
for label in pd.unique(df_only_small['Label']):
189189
long = df[df['Label'] == label]
190+
# TODO: Remove if/else; if we only consider unique values, then long can never be empty
190191
if not long.empty:
191192
mode_prop = long['Proportion']
192193
mode_count = long['Value']
193194
vals_str = [f'{y:.1f} %\n({x:.0f})' if y > 4 else '' for x, y in zip(mode_count, mode_prop)]
194-
bar = ax.barh(y=bar_lab, width=mode_prop, height=bar_height, left=bar_width, label=label, color=colors_combined[label])
195+
bar = ax.barh(y=bar_label, width=mode_prop, height=bar_height, left=bar_width, label=label, color=colors_combined[label])
195196
ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=16)
196197
bar_width = [total + val for total, val in zip(bar_width, mode_prop)]
197198
else:
@@ -201,15 +202,45 @@ def plot_stacked_bar_chart(df, bar_name, bar_lab, ax, colors_combined):
201202
ax.legend(bbox_to_anchor=(1, 1), loc='upper left', fancybox=True, shadow=True, fontsize=15)
202203
# Fix for the error: RuntimeError("Unknown return type"), adding the below line to address as mentioned here https://github.com/matplotlib/matplotlib/issues/25625/
203204
ax.set_xlim(right=ax.get_xlim()[1] + 1.0, auto=True)
205+
text_result[0], text_result[1] = store_alt_text_and_html_stacked_bar_chart(df_all_entries, bar_label)
206+
print("After populating, %s" % text_result)
207+
except:
208+
# ax.set_title("Insufficient data", loc="center")
209+
ax.text(x = 0.5, y = 0.9, s = "Insufficient data", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=20)
210+
# TODO: consider switching to a two column table
211+
ax.text(x = 0.5, y = 0.8, s = debug_df.to_string(), horizontalalignment='center', verticalalignment='top', transform=ax.transAxes, fontsize=10)
212+
text_result[0] = store_alt_text_missing(debug_df, None, bar_label)
213+
text_result[1] = store_alt_html_missing(debug_df, None, bar_label)
214+
# ax.yaxis.set_visible(False)
204215

205216
# Adds chart title, x and y axis label to the 100% Stacked Bar Chart
206-
def add_stacked_bar_chart_title(fig, ax, plot_title, file_name):
217+
def set_title_and_save(fig, text_results, plot_title, file_name):
207218
# Setup label and title for the figure since these would be common for all sub-plots
219+
# We only need the axis to tweak the position (WHY!) so we do so by getting the first ax object
220+
ax = fig.get_axes()[0]
208221
fig.supxlabel('Proportion (Count)', fontsize=20, x=0.5, y= ax.xaxis.get_label().get_position()[0] - 0.62, va='top')
209222
fig.supylabel('Trip Types', fontsize=20, x=-0.12, y=0.5, rotation='vertical')
210223
fig.suptitle(plot_title, fontsize=25,va = 'bottom')
211224
plt.text(x=0, y=ax.xaxis.get_label().get_position()[0] - 0.62, s=f"Last updated {arrow.get()}", fontsize=12)
212225
plt.subplots_adjust(hspace=0.1, top= 0.95)
226+
227+
# Set up title and concatenate the text results
228+
# TODO: Consider using a dictionary or a data object instead of an array of arrays
229+
# for greater clarity
230+
concat_alt_text = plot_title + text_results[0][0] + text_results[1][0]
231+
alt_text = access_alt_text(concat_alt_text, file_name, 'w')
232+
233+
concat_alt_html = f"""
234+
<!DOCTYPE html>
235+
<html>
236+
<body>
237+
<p>{plot_title}</p>
238+
<p>{text_results[0][1]}</p>
239+
<p>{text_results[1][0]}</p>
240+
</body>
241+
</html>
242+
"""
243+
alt_html = access_alt_html(concat_alt_html, file_name, 'w')
213244
fig.savefig(SAVE_DIR + file_name + ".png", bbox_inches='tight')
214245
plt.show()
215246

@@ -452,7 +483,7 @@ def access_alt_html(html_content, chart_name, write_permission):
452483
return html_content
453484

454485
# Appends bar information into into the alt_html
455-
def store_alt_text_and_html_stacked_bar_chart(df, chart_name, var_name):
486+
def store_alt_text_and_html_stacked_bar_chart(df, var_name):
456487
""" Inputs:
457488
df = dataframe combining columns as Trip Type, Label, Value, Proportion
458489
chart_name = name of the chart
@@ -461,16 +492,12 @@ def store_alt_text_and_html_stacked_bar_chart(df, chart_name, var_name):
461492
alt_text = f"\nStacked Bar of: {var_name}\n"
462493
for i in range(len(df)):
463494
alt_text += f"{df['Label'].iloc[i]} is {df['Value'].iloc[i]}({df['Proportion'].iloc[i]}%).\n"
464-
alt_text = access_alt_text(alt_text, chart_name, 'a')
465495

466496
# Generate html table
467497
alt_html = "\n"
468498
for i in range(len(df)):
469499
alt_html += f"<tr><td>{df['Label'].iloc[i]}</td><td>{df['Value'].iloc[i]}</td><td>{df['Proportion'].iloc[i]}%</td></tr>"
470500
html_content = f"""
471-
<!DOCTYPE html>
472-
<html>
473-
<body>
474501
<p>Trip Type: {var_name}</p>
475502
<table border="1" style="background-color: white;">
476503
<tr>
@@ -480,31 +507,7 @@ def store_alt_text_and_html_stacked_bar_chart(df, chart_name, var_name):
480507
</tr>
481508
{alt_html}
482509
</table>
483-
</body>
484-
</html>
485510
"""
486-
alt_html = access_alt_html(html_content, chart_name, 'a')
487-
488-
return alt_text, alt_html
489-
490-
# Creates the html file, and appends plot_title
491-
def create_alt_text_and_html_title(plot_title, chart_name):
492-
""" Inputs:
493-
plot_title = Overall plot title
494-
chart_name = name of the chart
495-
"""
496-
alt_text = access_alt_text(plot_title, chart_name, 'w')
497-
498-
alt_html = f"""
499-
<!DOCTYPE html>
500-
<html>
501-
<body>
502-
<p>{plot_title}</p>
503-
</body>
504-
</html>
505-
"""
506-
alt_html = access_alt_html(alt_html, chart_name, 'w')
507-
508511
return alt_text, alt_html
509512

510513
def generate_missing_plot(plot_title,debug_df,file_name):
@@ -546,5 +549,27 @@ def store_alt_text_missing(df, chart_name, var_name):
546549
alt_text = f"Unable to generate\nBar chart of {var_name}.\nReason:"
547550
for i in range(0,len(df)):
548551
alt_text += f" {df.index[i]} is {np.round(df.iloc[i,0], 1)}."
549-
alt_text = access_alt_text(alt_text, chart_name)
552+
553+
# For the bar charts, there is no longer a 1:1 mapping between missing alt
554+
# text and a file. So we want to collect all the alt_text as strings and
555+
# then save it. We cannot just remove the call to `access_alt_text`, since
556+
# it will break other uses. So let's pass in None for the chart_name if we
557+
# don't want to save it.
558+
if chart_name is not None:
559+
alt_text = access_alt_text(alt_text, chart_name)
560+
return alt_text
561+
562+
# TODO Change this to HTML output instead of alt-text
563+
def store_alt_html_missing(df, chart_name, var_name):
564+
""" Inputs:
565+
df = dataframe with index of debug information, first column is counts
566+
chart_name = what to label chart by in the dictionary
567+
var_name = the variable being analyzed across pie slices
568+
"""
569+
# Fill out the alt text based on components of the chart and passed data
570+
alt_text = f"Unable to generate\nBar chart of {var_name}.\nReason:"
571+
for i in range(0,len(df)):
572+
alt_text += f" {df.index[i]} is {np.round(df.iloc[i,0], 1)}."
573+
if chart_name is not None:
574+
alt_text = access_alt_html(alt_text, chart_name)
550575
return alt_text

0 commit comments

Comments
 (0)