diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index f5bb80b34..05b41cf27 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -13,9 +13,11 @@ import openpyxl import pandas as pd import seaborn as sns +import torch import xxhash from pandas import DataFrame from scipy import stats +from torch.utils.data import DataLoader, Dataset from safeds.data.image.containers import Image from safeds.data.tabular.typing import ColumnType, Schema @@ -2392,3 +2394,41 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): # data_copy = self._data.reset_index(drop=True) data_copy.columns = self.column_names return data_copy.__dataframe__(nan_as_null, allow_copy) + + def _into_dataloader(self, batch_size: int) -> DataLoader: + """ + Return a Dataloader for the data stored in this table, used for training neural networks. + + The original table is not modified. + + Parameters + ---------- + batch_size + The size of data batches that should be loaded at one time. + + Returns + ------- + result : + The DataLoader. + + """ + features = self.to_rows() + all_rows = [] + for row in features: + new_item = [] + for column_name in row: + new_item.append(row.get_value(column_name)) + all_rows.append(new_item.copy()) + return DataLoader(dataset=_CustomDataset(np.array(all_rows)), batch_size=batch_size) + + +class _CustomDataset(Dataset): + def __init__(self, features: np.array): + self.X = torch.from_numpy(features.astype(np.float32)) + self.len = self.X.shape[0] + + def __getitem__(self, item: int) -> torch.Tensor: + return self.X[item] + + def __len__(self) -> int: + return self.len diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index c9c1b1344..c5b72e591 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,7 +3,10 @@ import sys from typing import TYPE_CHECKING +import numpy as np +import torch import xxhash +from torch.utils.data import DataLoader, Dataset from safeds.data.tabular.containers import Column, Row, Table from safeds.exceptions import ( @@ -190,7 +193,9 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64(hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8)).intdigest() + return xxhash.xxh3_64( + hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8), + ).intdigest() def __sizeof__(self) -> int: """ @@ -871,3 +876,42 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg target_name=self.target.name, feature_names=self.features.column_names, ) + + def _into_dataloader(self, batch_size: int) -> DataLoader: + """ + Return a Dataloader for the data stored in this table, used for training neural networks. + + The original table is not modified. + + Parameters + ---------- + batch_size + The size of data batches that should be loaded at one time. + + Returns + ------- + result : + The DataLoader. + + """ + feature_rows = self.features.to_rows() + all_rows = [] + for row in feature_rows: + new_item = [] + for column_name in row: + new_item.append(row.get_value(column_name)) + all_rows.append(new_item.copy()) + return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size) + + +class _CustomDataset(Dataset): + def __init__(self, features: np.array, target: np.array): + self.X = torch.from_numpy(features.astype(np.float32)) + self.Y = torch.from_numpy(target.astype(np.float32)) + self.len = self.X.shape[0] + + def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]: + return self.X[item], self.Y[item].unsqueeze(-1) + + def __len__(self) -> int: + return self.len diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py new file mode 100644 index 000000000..e001f06f2 --- /dev/null +++ b/src/safeds/ml/nn/__init__.py @@ -0,0 +1,10 @@ +"""Classes for classification tasks.""" + +from ._fnn_layer import FNNLayer +from ._model import ClassificationNeuralNetwork, RegressionNeuralNetwork + +__all__ = [ + "FNNLayer", + "ClassificationNeuralNetwork", + "RegressionNeuralNetwork", +] diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py new file mode 100644 index 000000000..a74df8ff3 --- /dev/null +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -0,0 +1,67 @@ +from torch import nn + +from safeds.exceptions import ClosedBound, OutOfBoundsError + + +class _InternalLayer(nn.Module): + def __init__(self, input_size: int, output_size: int, activation_function: str): + super().__init__() + self._layer = nn.Linear(input_size, output_size) + match activation_function: + case "sigmoid": + self._fn = nn.Sigmoid() + case "relu": + self._fn = nn.ReLU() + case "softmax": + self._fn = nn.Softmax() + case _: + raise ValueError("Unknown Activation Function: " + activation_function) + + def forward(self, x: float) -> float: + return self._fn(self._layer(x)) + + +class FNNLayer: + def __init__(self, output_size: int, input_size: int | None = None): + """ + Create a FNN Layer. + + Parameters + ---------- + input_size + The number of neurons in the previous layer + output_size + The number of neurons in this layer + + Raises + ------ + ValueError + If input_size < 1 + If output_size < 1 + + """ + if input_size is not None: + self._set_input_size(input_size=input_size) + if output_size < 1: + raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1)) + self._output_size = output_size + + def _get_internal_layer(self, activation_function: str) -> _InternalLayer: + return _InternalLayer(self._input_size, self._output_size, activation_function) + + @property + def output_size(self) -> int: + """ + Get the output_size of this layer. + + Returns + ------- + result : + The Number of Neurons in this layer. + """ + return self._output_size + + def _set_input_size(self, input_size: int) -> None: + if input_size < 1: + raise OutOfBoundsError(actual=input_size, name="input_size", lower_bound=ClosedBound(1)) + self._input_size = input_size diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py new file mode 100644 index 000000000..20b064f68 --- /dev/null +++ b/src/safeds/ml/nn/_model.py @@ -0,0 +1,316 @@ +import copy +from collections.abc import Callable +from typing import Self + +import torch +from torch import Tensor, nn + +from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.exceptions import ClosedBound, ModelNotFittedError, OutOfBoundsError +from safeds.ml.nn._fnn_layer import FNNLayer + + +class RegressionNeuralNetwork: + def __init__(self, layers: list): + self._model = _PytorchModel(layers, is_for_classification=False) + self._batch_size = 1 + self._is_fitted = False + + def fit( + self, + train_data: TaggedTable, + epoch_size: int = 25, + batch_size: int = 1, + callback_on_batch_completion: Callable[[int, float], None] | None = None, + callback_on_epoch_completion: Callable[[int, float], None] | None = None, + ) -> Self: + """ + Train the neural network with given training data. + + The original model is not modified. + + Parameters + ---------- + train_data + The data the network should be trained on. + epoch_size + The number of times the training cycle should be done. + batch_size + The size of data batches that should be loaded at one time. + callback_on_batch_completion + Function used to view metrics while training. Gets called after a batch is completed with the index of the last batch and the overall loss average. + callback_on_epoch_completion + Function used to view metrics while training. Gets called after an epoch is completed with the index of the last epoch and the overall loss average. + + Raises + ------ + ValueError + If epoch_size < 1 + If batch_size < 1 + + Returns + ------- + trained_model : + The trained Model + """ + if epoch_size < 1: + raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) + if batch_size < 1: + raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) + copied_model = copy.deepcopy(self) + copied_model._batch_size = batch_size + dataloader = train_data._into_dataloader(copied_model._batch_size) + + loss_fn = nn.MSELoss() + + optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=0.05) + loss_sum = 0.0 + number_of_batches_done = 0 + for epoch in range(epoch_size): + for x, y in dataloader: + optimizer.zero_grad() + + pred = copied_model._model(x) + + loss = loss_fn(pred, y) + loss_sum += loss.item() + loss.backward() + optimizer.step() + number_of_batches_done += 1 + if callback_on_batch_completion is not None: + callback_on_batch_completion( + number_of_batches_done, + loss_sum / (number_of_batches_done * batch_size), + ) + if callback_on_epoch_completion is not None: + callback_on_epoch_completion(epoch + 1, loss_sum / (number_of_batches_done * batch_size)) + copied_model._is_fitted = True + copied_model._model.eval() + return copied_model + + def predict(self, test_data: Table) -> TaggedTable: + """ + Make a prediction for the given test data. + + The original Model is not modified. + + Parameters + ---------- + test_data + The data the network should predict. + + Returns + ------- + prediction : + The given test_data with an added "prediction" column at the end + + Raises + ------ + ModelNotFittedError + If the model has not been fitted yet + """ + if not self._is_fitted: + raise ModelNotFittedError + dataloader = test_data._into_dataloader(self._batch_size) + predictions = [] + with torch.no_grad(): + for x in dataloader: + elem = self._model(x) + for item in range(len(elem)): + predictions.append(elem[item].item()) + return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") + + @property + def is_fitted(self) -> bool: + """ + Check if the model is fitted. + + Returns + ------- + is_fitted + Whether the model is fitted. + """ + return self._is_fitted + + +class ClassificationNeuralNetwork: + def __init__(self, layers: list[FNNLayer]): + self._model = _PytorchModel(layers, is_for_classification=True) + self._batch_size = 1 + self._is_fitted = False + self._is_multi_class = layers[-1].output_size > 1 + + def fit( + self, + train_data: TaggedTable, + epoch_size: int = 25, + batch_size: int = 1, + callback_on_batch_completion: Callable[[int, float], None] | None = None, + callback_on_epoch_completion: Callable[[int, float], None] | None = None, + ) -> Self: + """ + Train the neural network with given training data. + + The original model is not modified. + + Parameters + ---------- + train_data + The data the network should be trained on. + epoch_size + The number of times the training cycle should be done. + batch_size + The size of data batches that should be loaded at one time. + callback_on_batch_completion + Function used to view metrics while training. Gets called after a batch is completed with the index of the last batch and the overall loss average. + callback_on_epoch_completion + Function used to view metrics while training. Gets called after an epoch is completed with the index of the last epoch and the overall loss average. + + Raises + ------ + ValueError + If epoch_size < 1 + If batch_size < 1 + + Returns + ------- + trained_model : + The trained Model + """ + if epoch_size < 1: + raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) + if batch_size < 1: + raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) + copied_model = copy.deepcopy(self) + copied_model._batch_size = batch_size + dataloader = train_data._into_dataloader(copied_model._batch_size) + + if self._is_multi_class: + loss_fn = nn.CrossEntropyLoss() + else: + loss_fn = nn.BCELoss() + + optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=0.05) + loss_sum = 0.0 + number_of_batches_done = 0 + for epoch in range(epoch_size): + for x, y in dataloader: + optimizer.zero_grad() + pred = copied_model._model(x) + if self._is_multi_class: + pred_size = Tensor.size(pred, dim=1) + predictions_for_all_items_of_batch = [] + for value in range(len(y)): + list_of_probabilities_for_each_category = [] + class_index = y[value].item() + for index in range(pred_size): + if index is int(class_index): + list_of_probabilities_for_each_category.append(1.0) + else: + list_of_probabilities_for_each_category.append(0.0) + predictions_for_all_items_of_batch.append(list_of_probabilities_for_each_category.copy()) + + y_reshaped_as_tensor_to_fit_format_of_pred = torch.tensor(predictions_for_all_items_of_batch) + + loss = loss_fn(pred, y_reshaped_as_tensor_to_fit_format_of_pred) + else: + loss = loss_fn(pred, y) + loss_sum += loss.item() + loss.backward() + optimizer.step() + number_of_batches_done += 1 + if callback_on_batch_completion is not None: + callback_on_batch_completion( + number_of_batches_done, + loss_sum / (number_of_batches_done * batch_size), + ) + if callback_on_epoch_completion is not None: + callback_on_epoch_completion(epoch + 1, loss_sum / (number_of_batches_done * batch_size)) + copied_model._is_fitted = True + copied_model._model.eval() + return copied_model + + def predict(self, test_data: Table) -> TaggedTable: + """ + Make a prediction for the given test data. + + The original Model is not modified. + + Parameters + ---------- + test_data + The data the network should predict. + + Returns + ------- + prediction : + The given test_data with an added "prediction" column at the end + + Raises + ------ + ModelNotFittedError + If the Model has not been fitted yet + """ + if not self._is_fitted: + raise ModelNotFittedError + dataloader = test_data._into_dataloader(self._batch_size) + predictions = [] + with torch.no_grad(): + for x in dataloader: + elem = self._model(x) + for item in range(len(elem)): + if not self._is_multi_class: + if elem[item].item() < 0.5: + predicted_class = 0 # pragma: no cover + else: # pragma: no cover + predicted_class = 1 # pragma: no cover + predictions.append(predicted_class) + else: + values = elem[item].tolist() + highest_value = 0 + category_of_highest_value = 0 + for index in range(len(values)): + if values[index] > highest_value: + highest_value = values[index] + category_of_highest_value = index + predictions.append(category_of_highest_value) + return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") + + @property + def is_fitted(self) -> bool: + """ + Check if the model is fitted. + + Returns + ------- + is_fitted : + Whether the model is fitted. + """ + return self._is_fitted + + +class _PytorchModel(nn.Module): + def __init__(self, fnn_layers: list[FNNLayer], is_for_classification: bool) -> None: + super().__init__() + self._layer_list = fnn_layers + internal_layers = [] + previous_output_size = None + + for layer in fnn_layers: + if previous_output_size is not None: + layer._set_input_size(previous_output_size) + internal_layers.append(layer._get_internal_layer(activation_function="relu")) + previous_output_size = layer.output_size + + if is_for_classification: + internal_layers.pop() + if fnn_layers[-1].output_size > 2: + internal_layers.append(fnn_layers[-1]._get_internal_layer(activation_function="softmax")) + else: + internal_layers.append(fnn_layers[-1]._get_internal_layer(activation_function="sigmoid")) + self._pytorch_layers = nn.ModuleList(internal_layers) + + def forward(self, x: float) -> float: + for layer in self._pytorch_layers: + x = layer(x) + return x diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py new file mode 100644 index 000000000..bcef1bd1d --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py @@ -0,0 +1,31 @@ +import pytest +from safeds.data.tabular.containers import Table +from torch.utils.data import DataLoader + + +@pytest.mark.parametrize( + ("data", "target_name", "feature_names"), + [ + ( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 0], + }, + "T", + ["A", "B", "C"], + ), + ], + ids=[ + "test", + ], +) +def test_should_create_dataloader( + data: dict[str, list[int]], + target_name: str, + feature_names: list[str] | None, +) -> None: + tagged_table = Table.from_dict(data).tag_columns(target_name, feature_names) + data_loader = tagged_table._into_dataloader(1) + assert isinstance(data_loader, DataLoader) diff --git a/tests/safeds/ml/nn/__init__.py b/tests/safeds/ml/nn/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/ml/nn/test_fnn_layer.py b/tests/safeds/ml/nn/test_fnn_layer.py new file mode 100644 index 000000000..e75488bc8 --- /dev/null +++ b/tests/safeds/ml/nn/test_fnn_layer.py @@ -0,0 +1,60 @@ +import pytest +from safeds.exceptions import OutOfBoundsError +from safeds.ml.nn import FNNLayer + + +@pytest.mark.parametrize( + "input_size", + [ + 0, + ], + ids=["input_size_out_of_bounds"], +) +def test_should_raise_if_input_size_out_of_bounds(input_size: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"input_size \(={input_size}\) is not inside \[1, \u221e\)\.", + ): + FNNLayer(output_size=1, input_size=input_size) + + +@pytest.mark.parametrize( + "activation_function", + [ + "unknown_string", + ], + ids=["unknown"], +) +def test_should_raise_if_unknown_activation_function_is_passed(activation_function: str) -> None: + with pytest.raises( + ValueError, + match=rf"Unknown Activation Function: {activation_function}", + ): + FNNLayer(output_size=1, input_size=1)._get_internal_layer(activation_function) + + +@pytest.mark.parametrize( + "output_size", + [ + 0, + ], + ids=["output_size_out_of_bounds"], +) +def test_should_raise_if_output_size_out_of_bounds(output_size: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"output_size \(={output_size}\) is not inside \[1, \u221e\)\.", + ): + FNNLayer(output_size=output_size, input_size=1) + + +@pytest.mark.parametrize( + "output_size", + [ + 1, + 20, + ], + ids=["one", "twenty"], +) +def test_should_raise_if_output_size_doesnt_match(output_size: int) -> None: + assert FNNLayer(output_size=output_size, input_size=1).output_size == output_size diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py new file mode 100644 index 000000000..3214b171c --- /dev/null +++ b/tests/safeds/ml/nn/test_model.py @@ -0,0 +1,223 @@ +import pytest +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.exceptions import ModelNotFittedError, OutOfBoundsError +from safeds.ml.nn import ClassificationNeuralNetwork, FNNLayer, RegressionNeuralNetwork + + +class TestClassificationModel: + @pytest.mark.parametrize( + "epoch_size", + [ + 0, + ], + ids=["epoch_size_out_of_bounds"], + ) + def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", + ): + ClassificationNeuralNetwork([FNNLayer(1, 1)]).fit( + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + epoch_size=epoch_size, + ) + + @pytest.mark.parametrize( + "batch_size", + [ + 0, + ], + ids=["batch_size_out_of_bounds"], + ) + def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", + ): + ClassificationNeuralNetwork([FNNLayer(input_size=1, output_size=1)]).fit( + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + batch_size=batch_size, + ) + + def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: + fitted_model = ClassificationNeuralNetwork( + [FNNLayer(input_size=1, output_size=8), FNNLayer(output_size=1)], + ).fit( + Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), + ) + assert isinstance(fitted_model, ClassificationNeuralNetwork) + + def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: + fitted_model = ClassificationNeuralNetwork( + [FNNLayer(input_size=1, output_size=8), FNNLayer(output_size=1)], + ).fit( + Table.from_dict({"a": [1, 0], "b": [0, 1]}).tag_columns("a"), + ) + predictions = fitted_model.predict(Table.from_dict({"b": [1, 0]})) + assert isinstance(predictions, TaggedTable) + + def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_classification(self) -> None: + fitted_model = ClassificationNeuralNetwork( + [FNNLayer(input_size=1, output_size=8), FNNLayer(output_size=3)], + ).fit( + Table.from_dict({"a": [0, 1, 2], "b": [0, 15, 51]}).tag_columns("a"), + ) + predictions = fitted_model.predict(Table.from_dict({"b": [1]})) + assert isinstance(predictions, TaggedTable) + + def test_should_raise_if_model_has_not_been_fitted(self) -> None: + with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): + ClassificationNeuralNetwork([FNNLayer(input_size=1, output_size=1)]).predict( + Table.from_dict({"a": [1]}), + ) + + def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(self) -> None: + model = ClassificationNeuralNetwork([FNNLayer(input_size=1, output_size=1)]) + assert not model.is_fitted + model = model.fit( + Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), + ) + assert model.is_fitted + + def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classification(self) -> None: + model = ClassificationNeuralNetwork([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) + assert not model.is_fitted + model = model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), + ) + assert model.is_fitted + + def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: + model = ClassificationNeuralNetwork([FNNLayer(input_size=1, output_size=1)]) + + class Test: + self.was_called = False + + def cb(self, ind: int, loss: float) -> None: + if ind >= 0 and loss >= 0.0: + self.was_called = True + + def callback_was_called(self) -> bool: + return self.was_called + + obj = Test() + model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_batch_completion=obj.cb) + + assert obj.callback_was_called() is True + + def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: + model = ClassificationNeuralNetwork([FNNLayer(input_size=1, output_size=1)]) + + class Test: + self.was_called = False + + def cb(self, ind: int, loss: float) -> None: + if ind >= 0 and loss >= 0.0: + self.was_called = True + + def callback_was_called(self) -> bool: + return self.was_called + + obj = Test() + model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_epoch_completion=obj.cb) + + assert obj.callback_was_called() is True + + +class TestRegressionModel: + @pytest.mark.parametrize( + "epoch_size", + [ + 0, + ], + ids=["epoch_size_out_of_bounds"], + ) + def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", + ): + RegressionNeuralNetwork([FNNLayer(input_size=1, output_size=1)]).fit( + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + epoch_size=epoch_size, + ) + + @pytest.mark.parametrize( + "batch_size", + [ + 0, + ], + ids=["batch_size_out_of_bounds"], + ) + def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", + ): + RegressionNeuralNetwork([FNNLayer(input_size=1, output_size=1)]).fit( + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + batch_size=batch_size, + ) + + def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: + fitted_model = RegressionNeuralNetwork([FNNLayer(input_size=1, output_size=1)]).fit( + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + ) + assert isinstance(fitted_model, RegressionNeuralNetwork) + + def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: + fitted_model = RegressionNeuralNetwork([FNNLayer(input_size=1, output_size=1)]).fit( + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + ) + predictions = fitted_model.predict(Table.from_dict({"b": [1]})) + assert isinstance(predictions, TaggedTable) + + def test_should_raise_if_model_has_not_been_fitted(self) -> None: + with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): + RegressionNeuralNetwork([FNNLayer(input_size=1, output_size=1)]).predict( + Table.from_dict({"a": [1]}), + ) + + def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: + model = RegressionNeuralNetwork([FNNLayer(input_size=1, output_size=1)]) + assert not model.is_fitted + model = model.fit( + Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), + ) + assert model.is_fitted + + def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: + model = RegressionNeuralNetwork([FNNLayer(input_size=1, output_size=1)]) + + class Test: + self.was_called = False + + def cb(self, ind: int, loss: float) -> None: + if ind >= 0 and loss >= 0.0: + self.was_called = True + + def callback_was_called(self) -> bool: + return self.was_called + + obj = Test() + model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_batch_completion=obj.cb) + + assert obj.callback_was_called() is True + + def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: + model = RegressionNeuralNetwork([FNNLayer(input_size=1, output_size=1)]) + + class Test: + self.was_called = False + + def cb(self, ind: int, loss: float) -> None: + if ind >= 0 and loss >= 0.0: + self.was_called = True + + def callback_was_called(self) -> bool: + return self.was_called + + obj = Test() + model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_epoch_completion=obj.cb) + + assert obj.callback_was_called() is True