Skip to content

Commit

Permalink
fix: issue with empty optional keywords
Browse files Browse the repository at this point in the history
If specifying only a keyword without the data for an optional keyword in
the yaml we would not get a validation error. I.e. the json-schema did
not capture the error. In addition, the validation context setup did not
account for None in the data, expecting either unset or some data (not
None). This caused a type error since we then tried to use None as a
list.

Adding only `MODELS:` to the yaml would reproduce the error. Removing
`MODELS:` or adding an empty list `MODELS: []` would make it valid.
  • Loading branch information
jsolaas committed Feb 26, 2025
1 parent 696a93e commit a498551
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 73 deletions.
101 changes: 42 additions & 59 deletions src/libecalc/presentation/yaml/yaml_models/pyyaml_yaml_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import yaml
from pydantic import TypeAdapter
from pydantic import ValidationError as PydanticValidationError
from typing_extensions import deprecated
from yaml import (
SafeLoader,
)
Expand All @@ -24,7 +23,6 @@
DataValidationError,
DtoValidationError,
DumpFlowStyle,
ValidationError,
)
from libecalc.presentation.yaml.yaml_entities import (
ResourceStream,
Expand Down Expand Up @@ -277,12 +275,40 @@ def read_yaml(
def name(self):
return self._name

def _get_yaml_data_or_default(self, keyword, factory):
"""
Function used to get data when we don't want validation to fail, only get the data if available.
Args:
keyword: keyword to get from the yaml
factory: builtin type that should work as a factory and for typechecking
Returns: data for keyword if available, else default created by factory
"""
default = factory()
if not isinstance(self._internal_datamodel, dict):
return default

data = self._internal_datamodel.get(keyword, default)

if data is None or not isinstance(data, factory):
return default

return data

def _get_yaml_list_or_empty(self, keyword: str) -> list:
return self._get_yaml_data_or_default(keyword, list)

def _get_yaml_dict_or_empty(self, keyword: str) -> dict:
return self._get_yaml_data_or_default(keyword, dict)

@property
def facility_resource_names(self) -> list[str]:
facility_input_data = self._internal_datamodel.get(EcalcYamlKeywords.facility_inputs, [])
facility_input_data = self._get_yaml_list_or_empty(EcalcYamlKeywords.facility_inputs)
model_curves_data = [
model.get(model_curves)
for model in self._internal_datamodel.get(EcalcYamlKeywords.models, [])
for model in self._get_yaml_list_or_empty(EcalcYamlKeywords.models)
for model_curves in [EcalcYamlKeywords.consumer_chart_curves, EcalcYamlKeywords.consumer_chart_curve]
if isinstance(model.get(model_curves), dict)
]
Expand All @@ -295,7 +321,7 @@ def facility_resource_names(self) -> list[str]:
@property
def timeseries_resources(self) -> list[YamlTimeseriesResource]:
timeseries_resources = []
for resource in self._internal_datamodel.get(EcalcYamlKeywords.time_series, []):
for resource in self._get_yaml_list_or_empty(EcalcYamlKeywords.time_series):
try:
timeseries_type = YamlTimeseriesType[resource.get(EcalcYamlKeywords.type)]
except KeyError as ke:
Expand All @@ -313,24 +339,6 @@ def timeseries_resources(self) -> list[YamlTimeseriesResource]:
)
return timeseries_resources

@property
def all_resource_names(self) -> list[str]:
facility_resource_names = self.facility_resource_names
timeseries_resource_names = [resource.name for resource in self.timeseries_resources]
return [*facility_resource_names, *timeseries_resource_names]

@property
@deprecated("Deprecated, variables in combination with validate should be used instead")
def variables_raise_if_invalid(self) -> YamlVariables:
if not isinstance(self._internal_datamodel, dict):
raise ValidationError("Yaml model is invalid.")

variables = self._internal_datamodel.get(EcalcYamlKeywords.variables, {})
try:
return TypeAdapter(YamlVariables).validate_python(variables)
except PydanticValidationError as e:
raise DtoValidationError(data=variables, validation_error=e) from e

@property
def variables(self) -> YamlVariables:
"""
Expand All @@ -339,10 +347,7 @@ def variables(self) -> YamlVariables:
Returns: valid variables
"""
if not isinstance(self._internal_datamodel, dict):
return {}

variables = self._internal_datamodel.get(EcalcYamlKeywords.variables, {})
variables = self._get_yaml_dict_or_empty(EcalcYamlKeywords.variables)

valid_variables = {}
for reference, variable in variables.items():
Expand All @@ -357,20 +362,17 @@ def variables(self) -> YamlVariables:

@property
def yaml_variables(self) -> dict[YamlVariableReferenceId, dict]:
if not isinstance(self._internal_datamodel, dict):
return {}

return self._internal_datamodel.get(EcalcYamlKeywords.variables, {})
"""
Get the internal data for variables directly.
Returns:
@property
@deprecated("Deprecated, facility_inputs in combination with validate should be used instead")
def facility_inputs_raise_if_invalid(self):
return self._internal_datamodel.get(EcalcYamlKeywords.facility_inputs, [])
"""
return self._get_yaml_dict_or_empty(EcalcYamlKeywords.variables)

@property
def facility_inputs(self) -> list[YamlFacilityModel]:
facility_inputs = []
for facility_input in self._internal_datamodel.get(EcalcYamlKeywords.facility_inputs, []):
for facility_input in self._get_yaml_list_or_empty(EcalcYamlKeywords.facility_inputs):
try:
facility_inputs.append(TypeAdapter(YamlFacilityModel).validate_python(facility_input))
except PydanticValidationError:
Expand All @@ -381,51 +383,32 @@ def facility_inputs(self) -> list[YamlFacilityModel]:
@property
def models(self) -> list[YamlConsumerModel]:
models = []
for model in self._internal_datamodel.get(EcalcYamlKeywords.models, []):
for model in self._get_yaml_list_or_empty(EcalcYamlKeywords.models):
try:
models.append(TypeAdapter(YamlConsumerModel).validate_python(model))
except PydanticValidationError:
pass

return models

@property
@deprecated("Deprecated, time_series in combination with validate should be used instead")
def time_series_raise_if_invalid(self) -> list[YamlTimeSeriesCollection]:
time_series = []
for time_series_data in self._internal_datamodel.get(EcalcYamlKeywords.time_series, []):
try:
time_series.append(TypeAdapter(YamlTimeSeriesCollection).validate_python(time_series_data))
except PydanticValidationError as e:
raise DtoValidationError(data=time_series_data, validation_error=e) from e

return time_series

@property
def time_series(self) -> list[YamlTimeSeriesCollection]:
"""
Get only valid time series, i.e. don't fail if one is invalid.
# TODO: Replace time_series_raise_if_invalid with this method when we are using Yaml-class validation everywhere. Then property
access will always try to get the available information, while ignoring invalid. validate should be used to
validate the full model.
"""
time_series = []
for time_series_data in self._internal_datamodel.get(EcalcYamlKeywords.time_series, []):
for time_series_data in self._get_yaml_list_or_empty(EcalcYamlKeywords.time_series):
try:
time_series.append(TypeAdapter(YamlTimeSeriesCollection).validate_python(time_series_data))
except PydanticValidationError:
pass

return time_series

@property
def models_raise_if_invalid(self):
return self._internal_datamodel.get(EcalcYamlKeywords.models, [])

@property
def fuel_types(self):
fuel_types = []
for fuel_type in self._internal_datamodel.get(EcalcYamlKeywords.fuel_types, []):
for fuel_type in self._get_yaml_list_or_empty(EcalcYamlKeywords.fuel_types):
try:
fuel_types.append(TypeAdapter(YamlFuelType).validate_python(fuel_type))
except PydanticValidationError:
Expand All @@ -435,7 +418,7 @@ def fuel_types(self):
@property
def installations(self) -> Iterable[YamlInstallation]:
installations = []
for installation in self._internal_datamodel.get(EcalcYamlKeywords.installations, []):
for installation in self._get_yaml_list_or_empty(EcalcYamlKeywords.installations):
try:
installations.append(TypeAdapter(YamlInstallation).validate_python(installation))
except PydanticValidationError:
Expand Down
5 changes: 0 additions & 5 deletions src/libecalc/presentation/yaml/yaml_models/yaml_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,6 @@ def facility_resource_names(self) -> list[str]:
def timeseries_resources(self) -> list[YamlTimeseriesResource]:
pass

@property
@abc.abstractmethod
def all_resource_names(self) -> list[str]:
pass

@property
@abc.abstractmethod
def variables(self) -> dict[str, YamlVariable]:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Optional

from pydantic import ConfigDict, Field, field_validator, model_validator
from pydantic_core.core_schema import ValidationInfo

Expand Down Expand Up @@ -30,20 +28,20 @@ class YamlAsset(YamlBase):
title="Asset",
)

time_series: Optional[list[YamlTimeSeriesCollection]] = Field(
None,
time_series: list[YamlTimeSeriesCollection] = Field(
default_factory=list,
title="TIME_SERIES",
description="Defines the inputs for time dependent variables, or 'reservoir variables'."
"\n\n$ECALC_DOCS_KEYWORDS_URL/TIME_SERIES",
)
facility_inputs: Optional[list[YamlFacilityModel]] = Field(
None,
facility_inputs: list[YamlFacilityModel] = Field(
default_factory=list,
title="FACILITY_INPUTS",
description="Defines input files which characterize various facility elements."
"\n\n$ECALC_DOCS_KEYWORDS_URL/FACILITY_INPUTS",
)
models: Optional[list[YamlConsumerModel]] = Field(
None,
models: list[YamlConsumerModel] = Field(
default_factory=list,
title="MODELS",
description="Defines input files which characterize various facility elements."
"\n\n$ECALC_DOCS_KEYWORDS_URL/MODELS",
Expand All @@ -55,7 +53,7 @@ class YamlAsset(YamlBase):
"\n\n$ECALC_DOCS_KEYWORDS_URL/FUEL_TYPES",
)
variables: YamlVariables = Field(
None,
default_factory=dict, # type: ignore
title="VARIABLES",
description="Defines variables used in an energy usage model by means of expressions or constants."
"\n\n$ECALC_DOCS_KEYWORDS_URL/VARIABLES",
Expand Down
30 changes: 30 additions & 0 deletions tests/libecalc/input/test_file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from libecalc.fixtures.cases import input_file_examples
from libecalc.infrastructure import file_io
from libecalc.presentation.yaml import yaml_entities
from libecalc.presentation.yaml.validation_errors import DtoValidationError
from libecalc.presentation.yaml.yaml_entities import YamlTimeseriesType
from libecalc.presentation.yaml.yaml_models.exceptions import DuplicateKeyError
from libecalc.presentation.yaml.yaml_models.pyyaml_yaml_model import PyYamlYamlModel
Expand Down Expand Up @@ -294,6 +295,35 @@ def test_detect_duplicate_keys_in_yaml(self):
PyYamlYamlModel.read_yaml(main_yaml=main_yaml)
assert "Duplicate key" in str(ve.value)

@pytest.mark.snapshot
@pytest.mark.inlinesnapshot
def test_empty_optional_list_keyword(self):
"""
Make sure we can get the property for an empty optional list keyword (MODELS), while still catching the error
in validation.
The property is used before validation, therefore we need to handle invalid and empty data.
"""
main_text = """MODELS:"""

main_yaml = yaml_entities.ResourceStream(
stream=StringIO(main_text),
name="main.yaml",
)

yaml_reader = PyYamlYamlModel.read(main_yaml=main_yaml)

# Make sure we can get the property even when invalid
assert yaml_reader.models == []

# Make sure we catch the error in validation
with pytest.raises(DtoValidationError) as exc_info:
yaml_reader.validate({})

for error in exc_info.value.errors():
if "MODELS" in error.location.keys:
assert error.message == snapshot("Input should be a valid list")

@pytest.mark.snapshot
@pytest.mark.inlinesnapshot
def test_time_series_missing_headers(self):
Expand Down

0 comments on commit a498551

Please sign in to comment.