Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

integrate smarteole example into ci #45

Merged
merged 5 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tests/test_data/smarteole/* filter=lfs diff=lfs merge=lfs -text
2 changes: 2 additions & 0 deletions .github/workflows/lint-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ jobs:
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v4
with:
lfs: 'true'
- name: "set up Python ${{ matrix.python-version }}"
uses: actions/setup-python@v5
with:
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ Use `poe all` to run all required pre-push commands (make sure the virtual envir
## Running tests
Install dev dependencies and use `poe test` to run unit tests (make sure the virtual environment is activated)

For convenience when developing locally, run `poe test-fast` to avoid running the tests marked as slow.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, thanks for adding this!


## License
See [`LICENSE.txt`](LICENSE.txt)

Expand Down
Binary file not shown.
268 changes: 144 additions & 124 deletions examples/smarteole_example.ipynb

Large diffs are not rendered by default.

242 changes: 143 additions & 99 deletions examples/smarteole_example.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from __future__ import annotations

import logging
import sys
import zipfile
from functools import partial
from pathlib import Path
from typing import IO, NamedTuple

import pandas as pd
from pandas.testing import assert_frame_equal
Expand All @@ -13,7 +16,7 @@
from wind_up.constants import OUTPUT_DIR, PROJECTROOT_DIR, TIMESTAMP_COL, DataColumns
from wind_up.interface import AssessmentInputs
from wind_up.main_analysis import run_wind_up_analysis
from wind_up.models import PlotConfig, WindUpConfig
from wind_up.models import Asset, PlotConfig, Toggle, Turbine, WindUpConfig
from wind_up.reanalysis_data import ReanalysisDataset

sys.path.append(str(PROJECTROOT_DIR))
Expand All @@ -32,9 +35,19 @@
ZIP_FILENAME = "SMARTEOLE-WFC-open-dataset.zip"
MINIMUM_DATA_COUNT_COVERAGE = 0.5 # 50% of the data must be present

DEFAULT_SCADA_FILE_PATH = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_SCADA_1minData.csv"
DEFAULT_METADATA_FILE_PATH = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_Coordinates_staticData.csv"
DEFAULT_TOGGLE_FILE_PATH = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_ControlLog_1minData.csv"

REANALYSIS_DATA_FILE_PATH = (
PROJECTROOT_DIR / "tests/test_data/smarteole/ERA5T_50.00N_2.75E_100m_1hr_20200201_20200531.parquet"
)


@with_parquet_cache(CACHE_SUBDIR / "smarteole_scada.parquet")
def unpack_smarteole_scada(timebase_s: int) -> pd.DataFrame:
def unpack_smarteole_scada(
timebase_s: int, scada_data_file: Path | str | IO[bytes] = DEFAULT_SCADA_FILE_PATH
) -> pd.DataFrame:
"""
Function that translates 1-minute SCADA data to x minute data in the wind-up expected format
"""
Expand Down Expand Up @@ -81,64 +94,62 @@ def _map_and_mask_cols(df: pd.DataFrame) -> pd.DataFrame:
)

# unzipping the data in memory and only reading the relevant files
scada_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_SCADA_1minData.csv"
circular_mean = partial(circmean, low=0, high=360)
with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
return (
pd.read_csv(zf.open(scada_fpath), parse_dates=[0], index_col=0)
.pipe(_make_turbine_id_a_column)
.groupby(DataColumns.turbine_name)
.resample(f"{timebase_s}s")
.aggregate(
{
"active_power_avg": "mean",
"active_power_std": "mean",
"active_power_count": "sum",
"wind_speed_avg": "mean",
"wind_speed_std": "mean",
"wind_speed_count": "sum",
"blade_1_pitch_angle_avg": "mean", # no need for circular_mean because no wrap
"blade_1_pitch_angle_count": "sum",
"generator_speed_avg": "mean",
"generator_speed_count": "sum",
"temperature_avg": "mean",
"temperature_count": "sum",
"nacelle_position_avg": circular_mean,
"nacelle_position_max": "max",
"nacelle_position_min": "min",
"nacelle_position_count": "sum",
}
)
.reset_index(DataColumns.turbine_name)
.pipe(_map_and_mask_cols)
.loc[:, DataColumns.all()]
.rename_axis(TIMESTAMP_COL, axis=0)
.rename_axis(None, axis=1)
return (
pd.read_csv(scada_data_file, parse_dates=[0], index_col=0)
.pipe(_make_turbine_id_a_column)
.groupby(DataColumns.turbine_name)
.resample(f"{timebase_s}s")
.aggregate(
{
"active_power_avg": "mean",
"active_power_std": "mean",
"active_power_count": "sum",
"wind_speed_avg": "mean",
"wind_speed_std": "mean",
"wind_speed_count": "sum",
"blade_1_pitch_angle_avg": "mean", # no need for circular_mean because no wrap
"blade_1_pitch_angle_count": "sum",
"generator_speed_avg": "mean",
"generator_speed_count": "sum",
"temperature_avg": "mean",
"temperature_count": "sum",
"nacelle_position_avg": circular_mean,
"nacelle_position_max": "max",
"nacelle_position_min": "min",
"nacelle_position_count": "sum",
}
)
.reset_index(DataColumns.turbine_name)
.pipe(_map_and_mask_cols)
.loc[:, DataColumns.all()]
.rename_axis(TIMESTAMP_COL, axis=0)
.rename_axis(None, axis=1)
)


@with_parquet_cache(CACHE_DIR / "smarteole_metadata.parquet")
def unpack_smarteole_metadata(timebase_s: int) -> pd.DataFrame:
md_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_Coordinates_staticData.csv"
with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
return (
pd.read_csv(zf.open(md_fpath), index_col=0)
.reset_index()
.rename(columns={"Turbine": "Name"})
.query("Name.str.startswith('SMV')") # is a turbine
.loc[:, ["Name", "Latitude", "Longitude"]]
.assign(TimeZone="UTC", TimeSpanMinutes=timebase_s / 60, TimeFormat="Start")
)
def unpack_smarteole_metadata(
timebase_s: int, metadata_file: Path | str | IO[bytes] = DEFAULT_METADATA_FILE_PATH
) -> pd.DataFrame:
return (
pd.read_csv(metadata_file, index_col=0)
.reset_index()
.rename(columns={"Turbine": "Name"})
.query("Name.str.startswith('SMV')") # is a turbine
.loc[:, ["Name", "Latitude", "Longitude"]]
.assign(TimeZone="UTC", TimeSpanMinutes=timebase_s / 60, TimeFormat="Start")
)


@with_parquet_cache(CACHE_SUBDIR / "smarteole_toggle.parquet")
def unpack_smarteole_toggle_data(timebase_s: int) -> pd.DataFrame:
def unpack_smarteole_toggle_data(
timebase_s: int, toggle_file: Path | str | IO[bytes] = DEFAULT_TOGGLE_FILE_PATH
) -> pd.DataFrame:
ten_minutes_count_lower_limit = timebase_s * MINIMUM_DATA_COUNT_COVERAGE
toggle_value_threshold: float = 0.95

_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_ControlLog_1minData.csv"
with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
raw_df = pd.read_csv(zf.open(_fpath), parse_dates=[0], index_col=0)
raw_df = pd.read_csv(toggle_file, parse_dates=[0], index_col=0)

required_in_cols = [
"control_log_offset_active_avg",
Expand Down Expand Up @@ -169,21 +180,26 @@ def unpack_smarteole_toggle_data(timebase_s: int) -> pd.DataFrame:
return toggle_df[["toggle_on", "toggle_off", "yaw_offset_command"]]


def define_smarteole_example_config() -> WindUpConfig:
def define_smarteole_example_config(
analysis_timebase_s: int,
analysis_output_dir: Path,
) -> WindUpConfig:
wtg_map = {
f"SMV{i}": {
"name": f"SMV{i}",
"turbine_type": {
"turbine_type": "Senvion-MM82-2050",
"rotor_diameter_m": 82.0,
"rated_power_kw": 2050.0,
"cutout_ws_mps": 25,
"normal_operation_pitch_range": (-10.0, 35.0),
"normal_operation_genrpm_range": (250.0, 2000.0),
"rpm_v_pw_margin_factor": 0.05,
"pitch_to_stall": False,
},
}
f"SMV{i}": Turbine.model_validate(
{
"name": f"SMV{i}",
"turbine_type": {
"turbine_type": "Senvion-MM82-2050",
"rotor_diameter_m": 82.0,
"rated_power_kw": 2050.0,
"cutout_ws_mps": 25,
"normal_operation_pitch_range": (-10.0, 35.0),
"normal_operation_genrpm_range": (250.0, 2000.0),
"rpm_v_pw_margin_factor": 0.05,
"pitch_to_stall": False,
},
}
)
for i in range(1, 7 + 1)
}
northing_corrections_utc = [
Expand All @@ -196,16 +212,16 @@ def define_smarteole_example_config() -> WindUpConfig:
("SMV7", pd.Timestamp("2020-02-17 16:30:00+0000"), 4.605999999999972),
]

wd_filter_margin = 3 + 7 * ANALYSIS_TIMEBASE_S / 600
wd_filter_margin = 3 + 7 * analysis_timebase_s / 600
return WindUpConfig(
assessment_name="smarteole_example",
timebase_s=ANALYSIS_TIMEBASE_S,
timebase_s=analysis_timebase_s,
require_ref_wake_free=True,
detrend_min_hours=12,
ref_wd_filter=[207 - wd_filter_margin, 236 + wd_filter_margin], # steer is from 207-236
filter_all_test_wtgs_together=True,
use_lt_distribution=False,
out_dir=ANALYSIS_OUTPUT_DIR,
out_dir=analysis_output_dir,
test_wtgs=[wtg_map["SMV6"], wtg_map["SMV5"]],
ref_wtgs=[wtg_map["SMV7"]],
ref_super_wtgs=[],
Expand All @@ -220,21 +236,25 @@ def define_smarteole_example_config() -> WindUpConfig:
years_for_lt_distribution=0,
years_for_detrend=0,
ws_bin_width=1.0,
asset={
"name": "Sole du Moulin Vieux",
"wtgs": list(wtg_map.values()),
"masts_and_lidars": [],
},
asset=Asset.model_validate(
{
"name": "Sole du Moulin Vieux",
"wtgs": list(wtg_map.values()),
"masts_and_lidars": [],
}
),
northing_corrections_utc=northing_corrections_utc,
toggle={
"name": "wake steering",
"toggle_file_per_turbine": False,
"toggle_filename": "SMV_offset_active_toggle_df.parquet",
"detrend_data_selection": "use_toggle_off_data",
"pairing_filter_method": "any_within_timedelta",
"pairing_filter_timedelta_seconds": 3600,
"toggle_change_settling_filter_seconds": 120,
},
toggle=Toggle.model_validate(
{
"name": "wake steering",
"toggle_file_per_turbine": False,
"toggle_filename": "SMV_offset_active_toggle_df.parquet",
"detrend_data_selection": "use_toggle_off_data",
"pairing_filter_method": "any_within_timedelta",
"pairing_filter_timedelta_seconds": 3600,
"toggle_change_settling_filter_seconds": 120,
}
),
)


Expand Down Expand Up @@ -263,49 +283,73 @@ def print_smarteole_results(
assert_frame_equal(print_df, expected_print_df)


if __name__ == "__main__":
setup_logger(ANALYSIS_OUTPUT_DIR / "analysis.log")
logger = logging.getLogger(__name__)
class SmarteoleData(NamedTuple):
scada_df: pd.DataFrame
metadata_df: pd.DataFrame
toggle_df: pd.DataFrame

logger.info("Downloading example data from Zenodo")
download_zenodo_data(record_id="7342466", output_dir=CACHE_DIR, filenames={ZIP_FILENAME})

logger.info("Preprocessing (and caching) turbine SCADA data")
scada_df = unpack_smarteole_scada(ANALYSIS_TIMEBASE_S)
logger.info("Preprocessing (and caching) turbine metadata")
metadata_df = unpack_smarteole_metadata(ANALYSIS_TIMEBASE_S)
logger.info("Preprocessing (and caching) toggle data")
toggle_df = unpack_smarteole_toggle_data(ANALYSIS_TIMEBASE_S)
def _download_data_from_zenodo(analysis_timebase_s: int, cache_dir: Path, zip_filename: str) -> SmarteoleData:
download_zenodo_data(record_id="7342466", output_dir=cache_dir, filenames={zip_filename})
with zipfile.ZipFile(cache_dir / zip_filename) as zf:
scada_df = unpack_smarteole_scada(analysis_timebase_s, scada_data_file=zf.open(DEFAULT_SCADA_FILE_PATH))
metadata_df = unpack_smarteole_metadata(analysis_timebase_s, metadata_file=zf.open(DEFAULT_METADATA_FILE_PATH))
toggle_df = unpack_smarteole_toggle_data(analysis_timebase_s, toggle_file=zf.open(DEFAULT_TOGGLE_FILE_PATH))
return SmarteoleData(scada_df=scada_df, metadata_df=metadata_df, toggle_df=toggle_df)


def main_smarteole_analysis(
*,
smarteole_data: SmarteoleData,
analysis_timebase_s: int = ANALYSIS_TIMEBASE_S,
check_results: bool = CHECK_RESULTS,
analysis_output_dir: Path = ANALYSIS_OUTPUT_DIR,
cache_sub_dir: Path = CACHE_SUBDIR,
reanalysis_file_path: Path | str = REANALYSIS_DATA_FILE_PATH,
) -> None:
setup_logger(ANALYSIS_OUTPUT_DIR / "analysis.log")
logger = logging.getLogger(__name__)

logger.info("Merging SMV6 yaw offset command signal into SCADA data")
toggle_df_no_tz = toggle_df.copy()
toggle_df_no_tz = smarteole_data.toggle_df.copy()
toggle_df_no_tz.index = toggle_df_no_tz.index.tz_localize(None)
scada_df = scada_df.merge(toggle_df_no_tz["yaw_offset_command"], left_index=True, right_index=True, how="left")
scada_df = smarteole_data.scada_df.merge(
toggle_df_no_tz["yaw_offset_command"], left_index=True, right_index=True, how="left"
)
scada_df["yaw_offset_command"] = scada_df["yaw_offset_command"].where(scada_df["TurbineName"] == "SMV6", 0)
del toggle_df_no_tz

logger.info("Loading reference reanalysis data")
reanalysis_dataset = ReanalysisDataset(
id="ERA5T_50.00N_2.75E_100m_1hr",
data=pd.read_parquet(PARENT_DIR / "smarteole_data" / "ERA5T_50.00N_2.75E_100m_1hr_20200201_20200531.parquet"),
data=pd.read_parquet(reanalysis_file_path),
)

logger.info("Defining Assessment Configuration")
cfg = define_smarteole_example_config()
cfg = define_smarteole_example_config(
analysis_timebase_s=analysis_timebase_s, analysis_output_dir=analysis_output_dir
)
plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / "plots")

assessment_inputs = AssessmentInputs.from_cfg(
cfg=cfg,
plot_cfg=plot_cfg,
toggle_df=toggle_df,
toggle_df=smarteole_data.toggle_df,
scada_df=scada_df,
metadata_df=metadata_df,
metadata_df=smarteole_data.metadata_df,
reanalysis_datasets=[reanalysis_dataset],
cache_dir=CACHE_SUBDIR,
cache_dir=cache_sub_dir,
)
results_per_test_ref_df = run_wind_up_analysis(assessment_inputs)

net_p50, net_p95, net_p5 = calc_net_uplift(results_per_test_ref_df, confidence=0.9)
print(f"net P50: {net_p50:.1%}, net P95: {net_p95:.1%}, net P5: {net_p5:.1%}")

print_smarteole_results(results_per_test_ref_df, check_results=CHECK_RESULTS)
print_smarteole_results(results_per_test_ref_df, check_results=check_results)


if __name__ == "__main__":
smarteole_data = _download_data_from_zenodo(
analysis_timebase_s=ANALYSIS_TIMEBASE_S, cache_dir=CACHE_DIR, zip_filename=ZIP_FILENAME
)
main_smarteole_analysis(smarteole_data=smarteole_data)
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ dev = [
'types-requests',
'ruff',
'mypy',
'requests',
]
examples = [
'jupyterlab',
Expand Down Expand Up @@ -131,6 +132,7 @@ filterwarnings = [
"ignore:Passing unrecognized arguments to super:DeprecationWarning", # pycharm debugger issue
"ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning",
]
markers = ["slow: mark test as slow."]

[tool.coverage.report]
omit = [
Expand All @@ -155,6 +157,10 @@ sequence = [
{ cmd = "mypy ." }
]

[tool.poe.tasks.test-fast]
help = "Runs tests that are not marked as slow"
sequence = [{ cmd = 'python -m pytest -m "not slow"' }]

[tool.poe.tasks.test]
help = "Runs unit tests and show coverage"
sequence = [
Expand Down
Git LFS file not shown
Loading
Loading