Skip to content

Commit 85f0e04

Browse files
committed
Change cov filling
1 parent 4547f6f commit 85f0e04

File tree

3 files changed

+136
-40
lines changed

3 files changed

+136
-40
lines changed

flasc/analysis/expected_power_analysis.py

+29-20
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
_fill_cov_with_var,
1919
_null_and_sync_covariance,
2020
_set_cov_to_zero,
21-
_synchronize_cov_nulls_back_to_mean,
2221
_synchronize_nulls,
22+
_synchronize_var_nulls_back_to_mean,
2323
)
2424
from flasc.data_processing.dataframe_manipulations import df_reduce_precision
2525
from flasc.logging_manager import LoggingManager
@@ -134,11 +134,6 @@ def _total_uplift_expected_power_single(
134134
/ df_sum.filter(pl.col("df_name") == uplift_pair[0])["weighted_power"].to_numpy()[0]
135135
)
136136

137-
# with pl.Config(tbl_cols=-1):
138-
# print(df_bin)
139-
# print(df_sum)
140-
# print(uplift_results)
141-
142137
return df_bin, df_sum, uplift_results
143138

144139

@@ -259,6 +254,7 @@ def _total_uplift_expected_power_with_standard_error(
259254
percentiles: List[float] = [2.5, 97.5],
260255
remove_any_null_turbine_bins: bool = False,
261256
set_cov_to_zero_or_var: str = "zero",
257+
use_cov_when_available: bool = False,
262258
# variance_only: bool = False,
263259
# fill_cov_with_var: bool = False,
264260
) -> Dict[str, Dict[str, float]]:
@@ -289,6 +285,8 @@ def _total_uplift_expected_power_with_standard_error(
289285
and of the test turbines is null. Defaults to False.
290286
set_cov_to_zero_or_var (str): Set the covariance to zero or product of variances.
291287
Can be "zero" or "var". Defaults to "zero".
288+
use_cov_when_available (bool): Use the covariance terms when available. If True,
289+
set_cov_to_zero_or_var must be 'var'. Defaults to False.
292290
293291
294292
Returns:
@@ -311,30 +309,38 @@ def _total_uplift_expected_power_with_standard_error(
311309
ws_max=ws_max,
312310
)
313311

314-
with pl.Config(tbl_cols=-1):
315-
print(df_)
312+
# with pl.Config(tbl_cols=-1):
313+
# print(df_)
316314

317315
# Compute the covariance frame
318316
df_cov = _compute_covariance(
319317
df_, test_cols=test_cols, bin_cols_with_df_name=bin_cols_with_df_name
320318
)
321319

322-
with pl.Config(tbl_cols=-1):
323-
print(df_cov)
320+
# with pl.Config(tbl_cols=-1):
321+
# print(df_cov)
324322

325323
# In current version of code, covarariances are either set to 0 or set to
326324
# product of variances
327325
if set_cov_to_zero_or_var == "zero":
328-
df_cov = _set_cov_to_zero(df_cov, test_cols=test_cols)
326+
if use_cov_when_available:
327+
raise ValueError(
328+
"use_cov_when_available cannot be True when set_cov_to_zero_or_var is 'zero'"
329+
)
330+
else:
331+
df_cov = _set_cov_to_zero(df_cov, test_cols=test_cols)
329332
elif set_cov_to_zero_or_var == "var":
330-
df_cov = _fill_cov_with_var(df_cov, test_cols=test_cols, fill_all=True)
333+
if use_cov_when_available:
334+
df_cov = _fill_cov_with_var(df_cov, test_cols=test_cols, fill_all=False)
335+
else:
336+
df_cov = _fill_cov_with_var(df_cov, test_cols=test_cols, fill_all=True)
331337
else:
332338
raise ValueError(
333339
f"set_cov_to_zero_or_var must be 'zero' or 'var', not {set_cov_to_zero_or_var}"
334340
)
335341

336-
with pl.Config(tbl_cols=-1):
337-
print(df_cov)
342+
# with pl.Config(tbl_cols=-1):
343+
# print(df_cov)
338344

339345
# # If filling missing covariance terms, do it now
340346
# if fill_cov_with_var:
@@ -352,8 +358,8 @@ def _total_uplift_expected_power_with_standard_error(
352358
bin_cols_without_df_name=bin_cols_without_df_name,
353359
)
354360

355-
with pl.Config(tbl_cols=-1):
356-
print(df_cov)
361+
# with pl.Config(tbl_cols=-1):
362+
# print(df_cov)
357363

358364
# Bin and group the dataframe
359365
df_bin = _bin_and_group_dataframe_expected_power(
@@ -362,14 +368,17 @@ def _total_uplift_expected_power_with_standard_error(
362368
bin_cols_without_df_name=bin_cols_without_df_name,
363369
)
364370

365-
with pl.Config(tbl_cols=-1):
366-
print(df_bin)
371+
# with pl.Config(tbl_cols=-1):
372+
# print(df_bin)
367373

368374
# Join the covariance dataframe to df_bin
369375
df_bin = df_bin.join(df_cov, on=bin_cols_with_df_name, how="left")
370376

371-
# Synchronize any null values in the covariance back to df_bin
372-
df_bin = _synchronize_cov_nulls_back_to_mean(df_bin=df_bin, test_cols=test_cols)
377+
# DROPPING THIS AS REDUNDANT TO COPYING BACK NULL VARIANCE TO MEAN
378+
# # Synchronize any null values in the covariance back to df_bin
379+
# df_bin = _synchronize_cov_nulls_back_to_mean(df_bin=df_bin, test_cols=test_cols)
380+
381+
df_bin = _synchronize_var_nulls_back_to_mean(df_bin=df_bin, test_cols=test_cols)
373382

374383
with pl.Config(tbl_cols=-1):
375384
print(df_bin)

flasc/analysis/expected_power_analysis_utilities.py

+54-15
Original file line numberDiff line numberDiff line change
@@ -405,14 +405,15 @@ def _set_cov_to_zero(
405405
return df_cov
406406

407407

408-
def _synchronize_cov_nulls_back_to_mean(
408+
def _synchronize_var_nulls_back_to_mean(
409409
df_bin: pl.DataFrame,
410410
test_cols: List[str],
411411
) -> pl.DataFrame:
412-
"""For each row, for any turbine with a null var or cov, null mean power.
412+
"""For each row, for any turbine with a null var, null mean power.
413413
414-
For each row, if there are any turbines in df_cov with undefined variances or covariances
415-
(because count < 2), then the mean power for those turbines would get set to Null as well.
414+
For each row, if there are any turbines with undefined variances
415+
(because count < 2), then the mean power for
416+
those turbines would get set to Null as well.
416417
417418
Args:
418419
df_bin (pl.DataFrame): A polars dataframe with the mean and variance of the test
@@ -423,25 +424,63 @@ def _synchronize_cov_nulls_back_to_mean(
423424
pl.DataFrame: Update df_bin dataframe
424425
"""
425426
n_test_cols = len(test_cols)
426-
all_cov_cols = [f"cov_{t1}_{t2}" for t1, t2 in product(test_cols, test_cols)]
427+
# all_cov_cols = [f"cov_{t1}_{t2}" for t1, t2 in product(test_cols, test_cols)]
427428

428429
# Loop over all combinations of test columns for the mean column
429430
for t1_idx in range(n_test_cols):
430431
t1 = test_cols[t1_idx]
431432
t1_mean_col = f"{t1}_mean"
432-
433-
# Get a list of cov_cols that include the present turbine
434-
cov_cols = [c for c in all_cov_cols if t1 in c]
435-
436-
# Get a mask for the rows where any of the cov_cols are null
437-
# using the horizontal or operator
438-
mask = df_bin.select(
439-
pl.any_horizontal([pl.col(c).is_null() for c in cov_cols]).alias("mask")
440-
)
433+
t1_var_col = f"cov_{t1}_{t1}"
441434

442435
# Set the mean power to null for the rows where the mask is true
443436
df_bin = df_bin.with_columns(
444-
pl.when(mask["mask"]).then(None).otherwise(pl.col(t1_mean_col)).alias(t1_mean_col)
437+
pl.when(pl.col(t1_var_col).is_null())
438+
.then(None)
439+
.otherwise(pl.col(t1_mean_col))
440+
.alias(t1_mean_col)
445441
)
446442

447443
return df_bin
444+
445+
446+
# REMOVING THIS FUNCTION AS TOO STRICT
447+
# def _synchronize_cov_nulls_back_to_mean(
448+
# df_bin: pl.DataFrame,
449+
# test_cols: List[str],
450+
# ) -> pl.DataFrame:
451+
# """For each row, for any turbine with a null var or cov, null mean power.
452+
453+
# For each row, if there are any turbines in df_cov with undefined variances or covariances
454+
# (because count < 2), then the mean power for those turbines would get set to Null as well.
455+
456+
# Args:
457+
# df_bin (pl.DataFrame): A polars dataframe with the mean and variance of the test
458+
# columns grouped by bin columns.
459+
# test_cols (List[str]): A list of column names to calculate the covariance of
460+
461+
# Returns:
462+
# pl.DataFrame: Update df_bin dataframe
463+
# """
464+
# n_test_cols = len(test_cols)
465+
# all_cov_cols = [f"cov_{t1}_{t2}" for t1, t2 in product(test_cols, test_cols)]
466+
467+
# # Loop over all combinations of test columns for the mean column
468+
# for t1_idx in range(n_test_cols):
469+
# t1 = test_cols[t1_idx]
470+
# t1_mean_col = f"{t1}_mean"
471+
472+
# # Get a list of cov_cols that include the present turbine
473+
# cov_cols = [c for c in all_cov_cols if t1 in c]
474+
475+
# # Get a mask for the rows where any of the cov_cols are null
476+
# # using the horizontal or operator
477+
# mask = df_bin.select(
478+
# pl.any_horizontal([pl.col(c).is_null() for c in cov_cols]).alias("mask")
479+
# )
480+
481+
# # Set the mean power to null for the rows where the mask is true
482+
# df_bin = df_bin.with_columns(
483+
# pl.when(mask["mask"]).then(None).otherwise(pl.col(t1_mean_col)).alias(t1_mean_col)
484+
# )
485+
486+
# return df_bin

tests/expected_power_analysis_test.py

+53-5
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
_get_num_points_pair,
1919
_null_and_sync_covariance,
2020
_set_cov_to_zero,
21-
_synchronize_cov_nulls_back_to_mean,
2221
_synchronize_nulls,
22+
# _synchronize_cov_nulls_back_to_mean,
23+
_synchronize_var_nulls_back_to_mean,
2324
)
2425

2526

@@ -555,7 +556,7 @@ def test_set_cov_to_zero():
555556
assert_frame_equal(zero_cov_df, expected_df, check_row_order=False, check_dtypes=False)
556557

557558

558-
def test_synchronize_cov_nulls_back_to_mean():
559+
def test__synchronize_var_nulls_back_to_mean():
559560
test_df_bin = pl.DataFrame(
560561
{
561562
"wd_bin": [0, 0, 1, 1],
@@ -583,8 +584,8 @@ def test_synchronize_cov_nulls_back_to_mean():
583584
"wd_bin": [0, 0, 1, 1],
584585
"ws_bin": [0, 1, 0, 1],
585586
"df_name": ["baseline", "baseline", "baseline", "baseline"],
586-
"pow_000_mean": [1, 2, None, None],
587-
"pow_001_mean": [5, 6, None, 8],
587+
"pow_000_mean": [1, 2, 3, None],
588+
"pow_001_mean": [5, 6, 7, 8],
588589
}
589590
)
590591

@@ -594,14 +595,61 @@ def test_synchronize_cov_nulls_back_to_mean():
594595
test_df_cov, on=["wd_bin", "ws_bin", "df_name"], how="left"
595596
)
596597

597-
df_res = _synchronize_cov_nulls_back_to_mean(
598+
df_res = _synchronize_var_nulls_back_to_mean(
598599
df_bin=test_df_bin,
599600
test_cols=["pow_000", "pow_001"],
600601
)
601602

602603
assert_frame_equal(df_res, expected_df_bin, check_row_order=False, check_dtypes=False)
603604

604605

606+
# def test_synchronize_cov_nulls_back_to_mean():
607+
# test_df_bin = pl.DataFrame(
608+
# {
609+
# "wd_bin": [0, 0, 1, 1],
610+
# "ws_bin": [0, 1, 0, 1],
611+
# "df_name": ["baseline", "baseline", "baseline", "baseline"],
612+
# "pow_000_mean": [1, 2, 3, 4],
613+
# "pow_001_mean": [5, 6, 7, 8],
614+
# }
615+
# )
616+
617+
# test_df_cov = pl.DataFrame(
618+
# {
619+
# "wd_bin": [0, 0, 1, 1],
620+
# "ws_bin": [0, 1, 0, 1],
621+
# "df_name": ["baseline", "baseline", "baseline", "baseline"],
622+
# "cov_pow_000_pow_000": [1, 2, 3, None],
623+
# "cov_pow_000_pow_001": [5, 6, None, 8],
624+
# "cov_pow_001_pow_000": [9, 10, 11, 12],
625+
# "cov_pow_001_pow_001": [13, 14, 15, 16],
626+
# }
627+
# )
628+
629+
# expected_df_bin = pl.DataFrame(
630+
# {
631+
# "wd_bin": [0, 0, 1, 1],
632+
# "ws_bin": [0, 1, 0, 1],
633+
# "df_name": ["baseline", "baseline", "baseline", "baseline"],
634+
# "pow_000_mean": [1, 2, None, None],
635+
# "pow_001_mean": [5, 6, None, 8],
636+
# }
637+
# )
638+
639+
# # Join the covariance dataframe to df_bin
640+
# test_df_bin = test_df_bin.join(test_df_cov, on=["wd_bin", "ws_bin", "df_name"], how="left")
641+
# expected_df_bin = expected_df_bin.join(
642+
# test_df_cov, on=["wd_bin", "ws_bin", "df_name"], how="left"
643+
# )
644+
645+
# df_res = _synchronize_cov_nulls_back_to_mean(
646+
# df_bin=test_df_bin,
647+
# test_cols=["pow_000", "pow_001"],
648+
# )
649+
650+
# assert_frame_equal(df_res, expected_df_bin, check_row_order=False, check_dtypes=False)
651+
652+
605653
def test_total_uplift_expected_power_with_standard_error():
606654
a_in = load_repeated_data()
607655

0 commit comments

Comments
 (0)