diff --git a/tests/clip/test_modeling_clip.py b/tests/clip/test_modeling_clip.py index b9fc50c4b6b8..88649b31455c 100644 --- a/tests/clip/test_modeling_clip.py +++ b/tests/clip/test_modeling_clip.py @@ -28,7 +28,6 @@ from transformers.testing_utils import ( is_flax_available, is_pt_flax_cross_test, - is_pt_tf_cross_test, require_torch, require_vision, slow, @@ -602,149 +601,6 @@ def test_load_vision_text_config(self): text_config = CLIPTextConfig.from_pretrained(tmp_dir_name) self.assertDictEqual(config.text_config.to_dict(), text_config.to_dict()) - # overwrite from common since CLIPModel/TFCLIPModel return CLIPOutput/TFCLIPOutput - @is_pt_tf_cross_test - def test_pt_tf_model_equivalence(self): - import numpy as np - import tensorflow as tf - - import transformers - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning - - if not hasattr(transformers, tf_model_class_name): - # transformers does not have TF version yet - return - - tf_model_class = getattr(transformers, tf_model_class_name) - - config.output_hidden_states = True - - tf_model = tf_model_class(config) - pt_model = model_class(config) - - # make sure only tf inputs are forward that actually exist in function args - tf_input_keys = set(inspect.signature(tf_model.call).parameters.keys()) - - # remove all head masks - tf_input_keys.discard("head_mask") - tf_input_keys.discard("cross_attn_head_mask") - tf_input_keys.discard("decoder_head_mask") - - pt_inputs = self._prepare_for_class(inputs_dict, model_class) - pt_inputs = {k: v for k, v in pt_inputs.items() if k in tf_input_keys} - - # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences - pt_model.eval() - tf_inputs_dict = {} - for key, tensor in pt_inputs.items(): - # skip key that does not exist in tf - if type(tensor) == bool: - tf_inputs_dict[key] = tensor - elif key == "input_values": - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) - elif key == "pixel_values": - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) - else: - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32) - - # Check we can load pt model in tf and vice-versa with model => model functions - tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict) - pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model).to(torch_device) - - # need to rename encoder-decoder "inputs" for PyTorch - # if "inputs" in pt_inputs_dict and self.is_encoder_decoder: - # pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs") - - with torch.no_grad(): - pto = pt_model(**pt_inputs) - tfo = tf_model(tf_inputs_dict, training=False) - - self.assertEqual(len(tfo), len(pto), "Output lengths differ between TF and PyTorch") - for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()): - - if not (isinstance(tf_output, tf.Tensor) and isinstance(pt_output, torch.Tensor)): - continue - - tf_out = tf_output.numpy() - pt_out = pt_output.cpu().numpy() - - self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch") - - if len(tf_out.shape) > 0: - - tf_nans = np.copy(np.isnan(tf_out)) - pt_nans = np.copy(np.isnan(pt_out)) - - pt_out[tf_nans] = 0 - tf_out[tf_nans] = 0 - pt_out[pt_nans] = 0 - tf_out[pt_nans] = 0 - - max_diff = np.amax(np.abs(tf_out - pt_out)) - self.assertLessEqual(max_diff, 4e-2) - - # Check we can load pt model in tf and vice-versa with checkpoint => model functions - with tempfile.TemporaryDirectory() as tmpdirname: - pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin") - torch.save(pt_model.state_dict(), pt_checkpoint_path) - tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path) - - tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5") - tf_model.save_weights(tf_checkpoint_path) - pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) - pt_model = pt_model.to(torch_device) - - # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences - pt_model.eval() - tf_inputs_dict = {} - for key, tensor in pt_inputs.items(): - # skip key that does not exist in tf - if type(tensor) == bool: - tensor = np.array(tensor, dtype=bool) - tf_inputs_dict[key] = tf.convert_to_tensor(tensor, dtype=tf.int32) - elif key == "input_values": - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) - elif key == "pixel_values": - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) - else: - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32) - - # need to rename encoder-decoder "inputs" for PyTorch - # if "inputs" in pt_inputs_dict and self.is_encoder_decoder: - # pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs") - - with torch.no_grad(): - pto = pt_model(**pt_inputs) - - tfo = tf_model(tf_inputs_dict) - - self.assertEqual(len(tfo), len(pto), "Output lengths differ between TF and PyTorch") - for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()): - - if not (isinstance(tf_output, tf.Tensor) and isinstance(pt_output, torch.Tensor)): - continue - - tf_out = tf_output.numpy() - pt_out = pt_output.cpu().numpy() - - self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch") - - if len(tf_out.shape) > 0: - tf_nans = np.copy(np.isnan(tf_out)) - pt_nans = np.copy(np.isnan(pt_out)) - - pt_out[tf_nans] = 0 - tf_out[tf_nans] = 0 - pt_out[pt_nans] = 0 - tf_out[pt_nans] = 0 - - max_diff = np.amax(np.abs(tf_out - pt_out)) - self.assertLessEqual(max_diff, 4e-2) - # overwrite from common since FlaxCLIPModel returns nested output # which is not supported in the common test @is_pt_flax_cross_test diff --git a/tests/lxmert/test_modeling_lxmert.py b/tests/lxmert/test_modeling_lxmert.py index adbfbb2ab1c0..f1209d132dc6 100644 --- a/tests/lxmert/test_modeling_lxmert.py +++ b/tests/lxmert/test_modeling_lxmert.py @@ -15,16 +15,13 @@ import copy -import os -import tempfile import unittest import numpy as np -import transformers from transformers import LxmertConfig, is_tf_available, is_torch_available from transformers.models.auto import get_values -from transformers.testing_utils import is_pt_tf_cross_test, require_torch, slow, torch_device +from transformers.testing_utils import require_torch, slow, torch_device from ..test_configuration_common import ConfigTester from ..test_modeling_common import ModelTesterMixin, ids_tensor @@ -527,6 +524,8 @@ def prepare_config_and_inputs_for_common(self, return_obj_labels=False): if return_obj_labels: inputs_dict["obj_labels"] = obj_labels + else: + config.task_obj_predict = False return config, inputs_dict @@ -740,121 +739,30 @@ def test_retain_grad_hidden_states_attentions(self): self.assertIsNotNone(hidden_states_vision.grad) self.assertIsNotNone(attentions_vision.grad) - @is_pt_tf_cross_test - def test_pt_tf_model_equivalence(self): - for model_class in self.all_model_classes: - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( - return_obj_labels="PreTraining" in model_class.__name__ - ) - - tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning - - if not hasattr(transformers, tf_model_class_name): - # transformers does not have TF version yet - return - - tf_model_class = getattr(transformers, tf_model_class_name) - - config.output_hidden_states = True - config.task_obj_predict = False - - pt_model = model_class(config) - tf_model = tf_model_class(config) - - # Check we can load pt model in tf and vice-versa with model => model functions - pt_inputs = self._prepare_for_class(inputs_dict, model_class) - - def recursive_numpy_convert(iterable): - return_dict = {} - for key, value in iterable.items(): - if type(value) == bool: - return_dict[key] = value - if isinstance(value, dict): - return_dict[key] = recursive_numpy_convert(value) - else: - if isinstance(value, (list, tuple)): - return_dict[key] = ( - tf.convert_to_tensor(iter_value.cpu().numpy(), dtype=tf.int32) for iter_value in value - ) - else: - return_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.int32) - return return_dict - - tf_inputs_dict = recursive_numpy_convert(pt_inputs) - - tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict) - pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model).to(torch_device) - - # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences - pt_model.eval() - - # Delete obj labels as we want to compute the hidden states and not the loss - - if "obj_labels" in inputs_dict: - del inputs_dict["obj_labels"] - - pt_inputs = self._prepare_for_class(inputs_dict, model_class) - tf_inputs_dict = recursive_numpy_convert(pt_inputs) - - with torch.no_grad(): - pto = pt_model(**pt_inputs) - tfo = tf_model(tf_inputs_dict, training=False) - tf_hidden_states = tfo[0].numpy() - pt_hidden_states = pto[0].cpu().numpy() - - tf_nans = np.copy(np.isnan(tf_hidden_states)) - pt_nans = np.copy(np.isnan(pt_hidden_states)) - - pt_hidden_states[tf_nans] = 0 - tf_hidden_states[tf_nans] = 0 - pt_hidden_states[pt_nans] = 0 - tf_hidden_states[pt_nans] = 0 - - max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states)) - # Debug info (remove when fixed) - if max_diff >= 2e-2: - print("===") - print(model_class) - print(config) - print(inputs_dict) - print(pt_inputs) - self.assertLessEqual(max_diff, 6e-2) - - # Check we can load pt model in tf and vice-versa with checkpoint => model functions - with tempfile.TemporaryDirectory() as tmpdirname: - pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin") - torch.save(pt_model.state_dict(), pt_checkpoint_path) - tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path) - - tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5") - tf_model.save_weights(tf_checkpoint_path) - pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) - - # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences - pt_model.eval() - - for key, value in pt_inputs.items(): - if key in ("visual_feats", "visual_pos"): - pt_inputs[key] = value.to(torch.float32) - else: - pt_inputs[key] = value.to(torch.long) - - with torch.no_grad(): - pto = pt_model(**pt_inputs) - - tfo = tf_model(tf_inputs_dict) - tfo = tfo[0].numpy() - pto = pto[0].cpu().numpy() - tf_nans = np.copy(np.isnan(tfo)) - pt_nans = np.copy(np.isnan(pto)) - - pto[tf_nans] = 0 - tfo[tf_nans] = 0 - pto[pt_nans] = 0 - tfo[pt_nans] = 0 - - max_diff = np.amax(np.abs(tfo - pto)) - self.assertLessEqual(max_diff, 6e-2) + def prepare_tf_inputs_from_pt_inputs(self, pt_inputs_dict): + + tf_inputs_dict = {} + for key, value in pt_inputs_dict.items(): + # skip key that does not exist in tf + if isinstance(value, dict): + tf_inputs_dict[key] = self.prepare_pt_inputs_from_tf_inputs(value) + elif isinstance(value, (list, tuple)): + tf_inputs_dict[key] = (self.prepare_pt_inputs_from_tf_inputs(iter_value) for iter_value in value) + elif type(value) == bool: + tf_inputs_dict[key] = value + elif key == "input_values": + tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.float32) + elif key == "pixel_values": + tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.float32) + elif key == "input_features": + tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.float32) + # other general float inputs + elif value.is_floating_point(): + tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.float32) + else: + tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.int32) + + return tf_inputs_dict @require_torch diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 4a4a0eba044f..ac45a1c10822 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -56,7 +56,14 @@ slow, torch_device, ) -from transformers.utils import WEIGHTS_INDEX_NAME, WEIGHTS_NAME, is_flax_available, is_torch_fx_available +from transformers.utils import ( + WEIGHTS_INDEX_NAME, + WEIGHTS_NAME, + is_flax_available, + is_tf_available, + is_torch_fx_available, +) +from transformers.utils.generic import ModelOutput sys.path.append(str(Path(__file__).parent.parent / "utils")) @@ -94,6 +101,9 @@ ) from transformers.modeling_utils import shard_checkpoint +if is_tf_available(): + import tensorflow as tf + if is_flax_available(): import jax.numpy as jnp from transformers.modeling_flax_pytorch_utils import ( @@ -1478,237 +1488,240 @@ def recursive_check(tuple_object, dict_object): model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True} ) - @is_pt_tf_cross_test - def test_pt_tf_model_equivalence(self): - import numpy as np - import tensorflow as tf + # Don't copy this method to model specific test file! + # TODO: remove this method once the issues are all fixed! + def _make_attention_mask_non_null(self, inputs_dict): + """Make sure no sequence has all zeros as attention mask""" - import transformers + for k in ["attention_mask", "encoder_attention_mask", "decoder_attention_mask"]: + if k in inputs_dict: + attention_mask = inputs_dict[k] - def prepare_tf_inputs_from_pt_inputs(pt_inputs_dict): - - tf_inputs_dict = {} - for key, tensor in pt_inputs_dict.items(): - # skip key that does not exist in tf - if type(tensor) == bool: - tf_inputs_dict[key] = tensor - elif key == "input_values": - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) - elif key == "pixel_values": - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) - elif key == "input_features": - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) - # To deal with the edge cases from `TFTapasForQuestionAnswering`. - # PyTorch can deal with type casting automatically, but TensorFlow is more strict! - # TODO: find a clean/better way to deal with these extra keys that are not common. - elif key in ["float_answer", "numeric_values", "numeric_values_scale"]: - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) - else: - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32) + # Make sure no all 0s attention masks - to avoid failure at this moment. + # Put `1` at the beginning of sequences to make it still work when combining causal attention masks. + # TODO: remove this line once a fix regarding large negative values for attention mask is done. + attention_mask = torch.cat( + [torch.ones_like(attention_mask[:, :1], dtype=attention_mask.dtype), attention_mask[:, 1:]], dim=-1 + ) - return tf_inputs_dict + # Here we make the first sequence with all 0s as attention mask. + # Currently, this will fail for `TFWav2Vec2Model`. This is caused by the different large negative + # values, like `1e-4`, `1e-9`, `1e-30` and `-inf` for attention mask across models/frameworks. + # TODO: enable this block once the large negative values thing is cleaned up. + # (see https://github.com/huggingface/transformers/issues/14859) + # attention_mask = torch.cat( + # [torch.zeros_like(attention_mask[:1], dtype=attention_mask.dtype), attention_mask[1:]], + # dim=0 + # ) + + inputs_dict[k] = attention_mask + + # Don't copy this method to model specific test file! + # TODO: remove this method once the issues are all fixed! + def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_class): + """For temporarily ignoring some failed test cases (issues to be fixed)""" + + tf_keys = set([k for k, v in tf_outputs.items() if v is not None]) + pt_keys = set([k for k, v in pt_outputs.items() if v is not None]) + + key_differences = tf_keys.symmetric_difference(pt_keys) + + if model_class.__name__ in [ + "FlaubertWithLMHeadModel", + "FunnelForPreTraining", + "ElectraForPreTraining", + "XLMWithLMHeadModel", + "TransfoXLLMHeadModel", + ]: + for k in key_differences: + if k in ["loss", "losses"]: + tf_keys.discard(k) + pt_keys.discard(k) + elif model_class.__name__.startswith("GPT2"): + # `TFGPT2` has `past_key_values` as a tensor while `GPT2` has it as a tuple. + tf_keys.discard("past_key_values") + pt_keys.discard("past_key_values") + + # create new outputs from the remaining fields + new_tf_outputs = type(tf_outputs)(**{k: tf_outputs[k] for k in tf_keys}) + new_pt_outputs = type(pt_outputs)(**{k: pt_outputs[k] for k in pt_keys}) + + return new_tf_outputs, new_pt_outputs + + # Copied from tests.test_modeling_tf_common.TFModelTesterMixin.check_pt_tf_outputs + def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=1e-5, name="outputs", attributes=None): + """Check the outputs from PyTorch and TensorFlow models are closed enough. Checks are done in a recursive way. - def check_outputs(tf_outputs, pt_outputs, model_class, names): - """ - Args: - model_class: The class of the model that is currently testing. For example, `TFBertModel`, - TFBertForMaskedLM`, `TFBertForSequenceClassification`, etc. Currently unused, but it could make - debugging easier and faster. + Args: + model_class: The class of the model that is currently testing. For example, `TFBertModel`, + TFBertForMaskedLM`, `TFBertForSequenceClassification`, etc. Mainly used for providing more informative + error messages. + name (`str`): The name of the output. For example, `output.hidden_states`, `output.attentions`, etc. + attributes (`Tuple[str]`): The names of the output's element if the output is a tuple/list with each element + being a named field in the output. + """ - names: A string, or a tuple of strings. These specify what tf_outputs/pt_outputs represent in the model outputs. - Currently unused, but in the future, we could use this information to make the error message clearer - by giving the name(s) of the output tensor(s) with large difference(s) between PT and TF. - """ + self.assertEqual(type(name), str) + if attributes is not None: + self.assertEqual(type(attributes), tuple, f"{name}: The argument `attributes` should be a `tuple`") - # Some issue (`about past_key_values`) to solve (e.g. `TFPegasusForConditionalGeneration`) in a separate PR. - if names == "past_key_values": - return + # Allow `ModelOutput` (e.g. `CLIPOutput` has `text_model_output` and `vision_model_output`). + if isinstance(tf_outputs, ModelOutput): + self.assertTrue( + isinstance(pt_outputs, ModelOutput), + f"{name}: `pt_outputs` should an instance of `ModelOutput` when `tf_outputs` is", + ) - # Allow `list` because `(TF)TransfoXLModelOutput.mems` is a list of tensors. - if type(tf_outputs) in [tuple, list]: - self.assertEqual(type(tf_outputs), type(pt_outputs)) - self.assertEqual(len(tf_outputs), len(pt_outputs)) - if type(names) == tuple: - for tf_output, pt_output, name in zip(tf_outputs, pt_outputs, names): - check_outputs(tf_output, pt_output, model_class, names=name) - elif type(names) == str: - for idx, (tf_output, pt_output) in enumerate(zip(tf_outputs, pt_outputs)): - check_outputs(tf_output, pt_output, model_class, names=f"{names}_{idx}") - else: - raise ValueError(f"`names` should be a `tuple` or a string. Got {type(names)} instead.") - elif isinstance(tf_outputs, tf.Tensor): - self.assertTrue(isinstance(pt_outputs, torch.Tensor)) + # Don't copy this block to model specific test file! + # TODO: remove this method and this line after issues are fixed + tf_outputs, pt_outputs = self._postprocessing_to_ignore_test_cases(tf_outputs, pt_outputs, model_class) - tf_outputs = tf_outputs.numpy() - pt_outputs = pt_outputs.detach().to("cpu").numpy() + tf_keys = tuple([k for k, v in tf_outputs.items() if v is not None]) + pt_keys = tuple([k for k, v in pt_outputs.items() if v is not None]) - tf_nans = np.isnan(tf_outputs) - pt_nans = np.isnan(pt_outputs) + self.assertEqual(tf_keys, pt_keys, f"{name}: Output keys differ between TF and PyTorch") - pt_outputs[tf_nans] = 0 - tf_outputs[tf_nans] = 0 - pt_outputs[pt_nans] = 0 - tf_outputs[pt_nans] = 0 + # convert to the case of `tuple` + # appending each key to the current (string) `names` + attributes = tuple([f"{name}.{k}" for k in tf_keys]) + self.check_pt_tf_outputs( + tf_outputs.to_tuple(), pt_outputs.to_tuple(), model_class, tol=tol, name=name, attributes=attributes + ) - max_diff = np.amax(np.abs(tf_outputs - pt_outputs)) - self.assertLessEqual(max_diff, 1e-5) - else: - raise ValueError( - f"`tf_outputs` should be a `tuple` or an instance of `tf.Tensor`. Got {type(tf_outputs)} instead." + # Allow `list` (e.g. `TransfoXLModelOutput.mems` is a list of tensors.) + elif type(tf_outputs) in [tuple, list]: + self.assertEqual(type(tf_outputs), type(pt_outputs), f"{name}: Output types differ between TF and PyTorch") + self.assertEqual(len(tf_outputs), len(pt_outputs), f"{name}: Output lengths differ between TF and PyTorch") + + if attributes is not None: + # case 1: each output has assigned name (e.g. a tuple form of a `ModelOutput`) + self.assertEqual( + len(attributes), + len(tf_outputs), + f"{name}: The tuple `names` should have the same length as `tf_outputs`", ) + else: + # case 2: each output has no assigned name (e.g. hidden states of each layer) -> add an index to `names` + attributes = tuple([f"{name}_{idx}" for idx in range(len(tf_outputs))]) - def check_pt_tf_models(tf_model, pt_model, pt_inputs_dict, pt_inputs_dict_maybe_with_labels): - - # send pytorch model to the correct device - pt_model.to(torch_device) + for tf_output, pt_output, attr in zip(tf_outputs, pt_outputs, attributes): + self.check_pt_tf_outputs(tf_output, pt_output, model_class, tol=tol, name=attr) - # Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences - pt_model.eval() + elif isinstance(tf_outputs, tf.Tensor): + self.assertTrue( + isinstance(pt_outputs, torch.Tensor), f"{name}: `pt_outputs` should a tensor when `tf_outputs` is" + ) - tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict) - tf_inputs_dict_maybe_with_labels = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict_maybe_with_labels) + tf_outputs = tf_outputs.numpy() + pt_outputs = pt_outputs.detach().to("cpu").numpy() - # send pytorch inputs to the correct device - pt_inputs_dict = { - k: v.to(device=torch_device) if isinstance(v, torch.Tensor) else v for k, v in pt_inputs_dict.items() - } - pt_inputs_dict_maybe_with_labels = { - k: v.to(device=torch_device) if isinstance(v, torch.Tensor) else v - for k, v in pt_inputs_dict_maybe_with_labels.items() - } + self.assertEqual( + tf_outputs.shape, pt_outputs.shape, f"{name}: Output shapes differ between TF and PyTorch" + ) - # Original test: check without `labels` - with torch.no_grad(): - pt_outputs = pt_model(**pt_inputs_dict) - tf_outputs = tf_model(tf_inputs_dict) + # deal with NumPy's scalars to make replacing nan values by 0 work. + if np.isscalar(tf_outputs): + tf_outputs = np.array([tf_outputs]) + pt_outputs = np.array([pt_outputs]) - tf_keys = tuple([k for k, v in tf_outputs.items() if v is not None]) - pt_keys = tuple([k for k, v in pt_outputs.items() if v is not None]) + tf_nans = np.isnan(tf_outputs) + pt_nans = np.isnan(pt_outputs) - self.assertEqual(tf_keys, pt_keys) - check_outputs(tf_outputs.to_tuple(), pt_outputs.to_tuple(), model_class, names=tf_keys) + pt_outputs[tf_nans] = 0 + tf_outputs[tf_nans] = 0 + pt_outputs[pt_nans] = 0 + tf_outputs[pt_nans] = 0 - # check the case where `labels` is passed - has_labels = any( - x in tf_inputs_dict_maybe_with_labels for x in ["labels", "next_sentence_label", "start_positions"] + max_diff = np.amax(np.abs(tf_outputs - pt_outputs)) + self.assertLessEqual(max_diff, tol, f"{name}: Difference between torch and tf is {max_diff} (>= {tol}).") + else: + raise ValueError( + f"`tf_outputs` should be an instance of `tf.Tensor`, a `tuple`, or an instance of `tf.Tensor`. Got {type(tf_outputs)} instead." ) - if has_labels: - with torch.no_grad(): - pt_outputs = pt_model(**pt_inputs_dict_maybe_with_labels) - tf_outputs = tf_model(tf_inputs_dict_maybe_with_labels) - - # Some models' output class don't have `loss` attribute despite `labels` is used. - # TODO: identify which models - tf_loss = getattr(tf_outputs, "loss", None) - pt_loss = getattr(pt_outputs, "loss", None) - - # Some PT models return loss while the corresponding TF models don't (i.e. `None` for `loss`). - # - FlaubertWithLMHeadModel - # - FunnelForPreTraining - # - ElectraForPreTraining - # - XLMWithLMHeadModel - # TODO: Fix PT/TF diff -> remove this condition to fail the test if a diff occurs - if not ((tf_loss is None and pt_loss is None) or (tf_loss is not None and pt_loss is not None)): - if model_class.__name__ not in [ - "FlaubertWithLMHeadModel", - "FunnelForPreTraining", - "ElectraForPreTraining", - "XLMWithLMHeadModel", - "TransfoXLLMHeadModel", - ]: - self.assertEqual(tf_loss is None, pt_loss is None) - - tf_keys = tuple([k for k, v in tf_outputs.items() if v is not None]) - pt_keys = tuple([k for k, v in pt_outputs.items() if v is not None]) + def prepare_tf_inputs_from_pt_inputs(self, pt_inputs_dict): + + tf_inputs_dict = {} + for key, tensor in pt_inputs_dict.items(): + # skip key that does not exist in tf + if type(tensor) == bool: + tf_inputs_dict[key] = tensor + elif key == "input_values": + tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) + elif key == "pixel_values": + tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) + elif key == "input_features": + tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) + # other general float inputs + elif tensor.is_floating_point(): + tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) + else: + tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32) - # TODO: remove these 2 conditions once the above TODOs (above loss) are implemented - # (Also, `TFTransfoXLLMHeadModel` has no `loss` while `TransfoXLLMHeadModel` return `losses`) - if tf_keys != pt_keys: - if model_class.__name__ not in [ - "FlaubertWithLMHeadModel", - "FunnelForPreTraining", - "ElectraForPreTraining", - "XLMWithLMHeadModel", - "TransfoXLLMHeadModel", - ]: - self.assertEqual(tf_keys, pt_keys) - - # Since we deliberately make some tests pass above (regarding the `loss`), let's still try to test - # some remaining attributes in the outputs. - # TODO: remove this block of `index` computing once the above TODOs (above loss) are implemented - # compute the 1st `index` where `tf_keys` and `pt_keys` is different - index = 0 - for _ in range(min(len(tf_keys), len(pt_keys))): - if tf_keys[index] == pt_keys[index]: - index += 1 - else: - break - if tf_keys[:index] != pt_keys[:index]: - self.assertEqual(tf_keys, pt_keys) + return tf_inputs_dict - # Some models require extra condition to return loss. For example, `(TF)BertForPreTraining` requires - # both`labels` and `next_sentence_label`. - if tf_loss is not None and pt_loss is not None: + def check_pt_tf_models(self, tf_model, pt_model, pt_inputs_dict): - # check anything else than `loss` - keys = tuple([k for k in tf_keys]) - check_outputs(tf_outputs[1:index], pt_outputs[1:index], model_class, names=keys[1:index]) + tf_inputs_dict = self.prepare_tf_inputs_from_pt_inputs(pt_inputs_dict) - # check `loss` + # send pytorch inputs to the correct device + pt_inputs_dict = { + k: v.to(device=torch_device) if isinstance(v, torch.Tensor) else v for k, v in pt_inputs_dict.items() + } - # tf models returned loss is usually a tensor rather than a scalar. - # (see `hf_compute_loss`: it uses `tf.keras.losses.Reduction.NONE`) - # Change it here to a scalar to match PyTorch models' loss - tf_loss = tf.math.reduce_mean(tf_loss).numpy() - pt_loss = pt_loss.detach().to("cpu").numpy() + # send pytorch model to the correct device + pt_model.to(torch_device) - tf_nans = np.isnan(tf_loss) - pt_nans = np.isnan(pt_loss) - # the 2 losses need to be both nan or both not nan - self.assertEqual(tf_nans, pt_nans) + # Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences + pt_model.eval() - if not tf_nans: - max_diff = np.amax(np.abs(tf_loss - pt_loss)) - self.assertLessEqual(max_diff, 1e-5) + with torch.no_grad(): + pt_outputs = pt_model(**pt_inputs_dict) + tf_outputs = tf_model(tf_inputs_dict) - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + # tf models returned loss is usually a tensor rather than a scalar. + # (see `hf_compute_loss`: it uses `tf.keras.losses.Reduction.NONE`) + # Change it here to a scalar to match PyTorch models' loss + tf_loss = getattr(tf_outputs, "loss", None) + if tf_loss is not None: + tf_outputs.loss = tf.math.reduce_mean(tf_loss) + + self.check_pt_tf_outputs(tf_outputs, pt_outputs, type(pt_model)) + + @is_pt_tf_cross_test + def test_pt_tf_model_equivalence(self): + import transformers for model_class in self.all_model_classes: - tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning if not hasattr(transformers, tf_model_class_name): - # transformers does not have TF version yet + # transformers does not have this model in TF version yet return # Output all for aggressive testing config.output_hidden_states = True config.output_attentions = self.has_attentions - for k in ["attention_mask", "encoder_attention_mask", "decoder_attention_mask"]: - if k in inputs_dict: - attention_mask = inputs_dict[k] - # make sure no all 0s attention masks - to avoid failure at this moment. - # TODO: remove this line once the TODO below is implemented. - attention_mask = torch.ones_like(attention_mask, dtype=torch.int32) - # Here we make the first sequence with all 0s as attention mask. - # Currently, this will fail for `TFWav2Vec2Model`. This is caused by the different large negative - # values, like `1e-4`, `1e-9`, `1e-30` and `-inf` for attention mask across models/frameworks. - # TODO: enable this block once the large negative values thing is cleaned up. - # (see https://github.com/huggingface/transformers/issues/14859) - # attention_mask = torch.cat( - # [ - # torch.zeros_like(attention_mask[:1], dtype=torch.int32), - # attention_mask[1:].type(dtype=torch.int32) - # ], - # dim=0 - # ) - inputs_dict[k] = attention_mask + # Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency + # of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`. + # TODO: Use a uniform value for all models, make sure all tests pass without this processing, and remove it. + self._make_attention_mask_non_null(inputs_dict) tf_model_class = getattr(transformers, tf_model_class_name) - tf_model = tf_model_class(config) pt_model = model_class(config) + tf_model = tf_model_class(config) + + pt_inputs_dict = self._prepare_for_class(inputs_dict, model_class) + pt_inputs_dict_with_labels = self._prepare_for_class( + inputs_dict, + model_class, + # Not all models accept "labels" in the forward pass (yet :) ) + return_labels=True if "labels" in inspect.signature(model_class.forward).parameters.keys() else False, + ) # make sure only tf inputs are forward that actually exist in function args tf_input_keys = set(inspect.signature(tf_model.call).parameters.keys()) @@ -1718,20 +1731,25 @@ def check_pt_tf_models(tf_model, pt_model, pt_inputs_dict, pt_inputs_dict_maybe_ tf_input_keys.discard("cross_attn_head_mask") tf_input_keys.discard("decoder_head_mask") - pt_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - pt_inputs_dict_maybe_with_labels = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - pt_inputs_dict = {k: v for k, v in pt_inputs_dict.items() if k in tf_input_keys} - pt_inputs_dict_maybe_with_labels = { - k: v for k, v in pt_inputs_dict_maybe_with_labels.items() if k in tf_input_keys - } + pt_inputs_dict_with_labels = {k: v for k, v in pt_inputs_dict_with_labels.items() if k in tf_input_keys} + + # For some models (e.g. base models), there is no label returned. + # Set the input dict to `None` to avoid check outputs twice for the same input dicts. + if set(pt_inputs_dict_with_labels.keys()).symmetric_difference(pt_inputs_dict.keys()): + pt_inputs_dict_with_labels = None # Check we can load pt model in tf and vice-versa with model => model functions - tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict) + # Here requires `tf_inputs_dict` to build `tf_model` + tf_inputs_dict = self.prepare_tf_inputs_from_pt_inputs(pt_inputs_dict) tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict) pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model) - check_pt_tf_models(tf_model, pt_model, pt_inputs_dict, pt_inputs_dict_maybe_with_labels) + # Original test: check without `labels` + self.check_pt_tf_models(tf_model, pt_model, pt_inputs_dict) + # check with `labels` + if pt_inputs_dict_with_labels: + self.check_pt_tf_models(tf_model, pt_model, pt_inputs_dict_with_labels) # Check we can load pt model in tf and vice-versa with checkpoint => model functions with tempfile.TemporaryDirectory() as tmpdirname: @@ -1742,9 +1760,12 @@ def check_pt_tf_models(tf_model, pt_model, pt_inputs_dict, pt_inputs_dict_maybe_ tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5") tf_model.save_weights(tf_checkpoint_path) pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) - pt_model = pt_model.to(torch_device) - check_pt_tf_models(tf_model, pt_model, pt_inputs_dict, pt_inputs_dict_maybe_with_labels) + # Original test: check without `labels` + self.check_pt_tf_models(tf_model, pt_model, pt_inputs_dict) + # check with `labels` + if pt_inputs_dict_with_labels: + self.check_pt_tf_models(tf_model, pt_model, pt_inputs_dict_with_labels) def assert_almost_equals(self, a: np.ndarray, b: np.ndarray, tol: float): diff = np.abs((a - b)).max() diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 195c7daa84dd..6fb0f845c6dd 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -565,8 +565,7 @@ def test_pt_tf_model_equivalence(self): # Output all for aggressive testing config.output_hidden_states = True - if self.has_attentions: - config.output_attentions = True + config.output_attentions = self.has_attentions # Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency # of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`. diff --git a/tests/vit_mae/test_modeling_vit_mae.py b/tests/vit_mae/test_modeling_vit_mae.py index 8cbde5b2ce92..fae72a8ad7be 100644 --- a/tests/vit_mae/test_modeling_vit_mae.py +++ b/tests/vit_mae/test_modeling_vit_mae.py @@ -17,14 +17,13 @@ import inspect import math -import os import tempfile import unittest import numpy as np from transformers import ViTMAEConfig -from transformers.testing_utils import is_pt_tf_cross_test, require_torch, require_vision, slow, torch_device +from transformers.testing_utils import require_torch, require_vision, slow, torch_device from transformers.utils import cached_property, is_torch_available, is_vision_available from ..test_configuration_common import ConfigTester @@ -321,150 +320,20 @@ def check_hidden_states_output(inputs_dict, config, model_class): # overwrite from common since ViTMAEForPretraining has random masking, we need to fix the noise # to generate masks during test - @is_pt_tf_cross_test - def test_pt_tf_model_equivalence(self): - import numpy as np - import tensorflow as tf - - import transformers + def check_pt_tf_models(self, tf_model, pt_model, pt_inputs_dict): # make masks reproducible np.random.seed(2) - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - num_patches = int((config.image_size // config.patch_size) ** 2) + num_patches = int((pt_model.config.image_size // pt_model.config.patch_size) ** 2) noise = np.random.uniform(size=(self.model_tester.batch_size, num_patches)) - pt_noise = torch.from_numpy(noise).to(device=torch_device) - tf_noise = tf.constant(noise) - - def prepare_tf_inputs_from_pt_inputs(pt_inputs_dict): - - tf_inputs_dict = {} - for key, tensor in pt_inputs_dict.items(): - tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32) - - return tf_inputs_dict - - def check_outputs(tf_outputs, pt_outputs, model_class, names): - """ - Args: - model_class: The class of the model that is currently testing. For example, `TFBertModel`, - TFBertForMaskedLM`, `TFBertForSequenceClassification`, etc. Currently unused, but it could make - debugging easier and faster. - - names: A string, or a tuple of strings. These specify what tf_outputs/pt_outputs represent in the model outputs. - Currently unused, but in the future, we could use this information to make the error message clearer - by giving the name(s) of the output tensor(s) with large difference(s) between PT and TF. - """ - - # Allow `list` because `(TF)TransfoXLModelOutput.mems` is a list of tensors. - if type(tf_outputs) in [tuple, list]: - self.assertEqual(type(tf_outputs), type(pt_outputs)) - self.assertEqual(len(tf_outputs), len(pt_outputs)) - if type(names) == tuple: - for tf_output, pt_output, name in zip(tf_outputs, pt_outputs, names): - check_outputs(tf_output, pt_output, model_class, names=name) - elif type(names) == str: - for idx, (tf_output, pt_output) in enumerate(zip(tf_outputs, pt_outputs)): - check_outputs(tf_output, pt_output, model_class, names=f"{names}_{idx}") - else: - raise ValueError(f"`names` should be a `tuple` or a string. Got {type(names)} instead.") - elif isinstance(tf_outputs, tf.Tensor): - self.assertTrue(isinstance(pt_outputs, torch.Tensor)) - - tf_outputs = tf_outputs.numpy() - if isinstance(tf_outputs, np.float32): - tf_outputs = np.array(tf_outputs, dtype=np.float32) - pt_outputs = pt_outputs.detach().to("cpu").numpy() - - tf_nans = np.isnan(tf_outputs) - pt_nans = np.isnan(pt_outputs) - - pt_outputs[tf_nans] = 0 - tf_outputs[tf_nans] = 0 - pt_outputs[pt_nans] = 0 - tf_outputs[pt_nans] = 0 - - max_diff = np.amax(np.abs(tf_outputs - pt_outputs)) - self.assertLessEqual(max_diff, 1e-5) - else: - raise ValueError( - f"`tf_outputs` should be a `tuple` or an instance of `tf.Tensor`. Got {type(tf_outputs)} instead." - ) - - def check_pt_tf_models(tf_model, pt_model, pt_inputs_dict): - # we are not preparing a model with labels because of the formation - # of the ViT MAE model - - # send pytorch model to the correct device - pt_model.to(torch_device) - - # Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences - pt_model.eval() - - tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict) - - # send pytorch inputs to the correct device - pt_inputs_dict = { - k: v.to(device=torch_device) if isinstance(v, torch.Tensor) else v for k, v in pt_inputs_dict.items() - } - - # Original test: check without `labels` - with torch.no_grad(): - pt_outputs = pt_model(**pt_inputs_dict, noise=pt_noise) - tf_outputs = tf_model(tf_inputs_dict, noise=tf_noise) - - tf_keys = tuple([k for k, v in tf_outputs.items() if v is not None]) - pt_keys = tuple([k for k, v in pt_outputs.items() if v is not None]) - - self.assertEqual(tf_keys, pt_keys) - check_outputs(tf_outputs.to_tuple(), pt_outputs.to_tuple(), model_class, names=tf_keys) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning - - # Output all for aggressive testing - config.output_hidden_states = True - config.output_attentions = self.has_attentions - - tf_model_class = getattr(transformers, tf_model_class_name) - - tf_model = tf_model_class(config) - pt_model = model_class(config) - - # make sure only tf inputs are forward that actually exist in function args - tf_input_keys = set(inspect.signature(tf_model.call).parameters.keys()) - - # remove all head masks - tf_input_keys.discard("head_mask") - tf_input_keys.discard("cross_attn_head_mask") - tf_input_keys.discard("decoder_head_mask") - - pt_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - - pt_inputs_dict = {k: v for k, v in pt_inputs_dict.items() if k in tf_input_keys} - - # Check we can load pt model in tf and vice-versa with model => model functions - tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict) - tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict) - pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model) - - check_pt_tf_models(tf_model, pt_model, pt_inputs_dict) - - # Check we can load pt model in tf and vice-versa with checkpoint => model functions - with tempfile.TemporaryDirectory() as tmpdirname: - pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin") - torch.save(pt_model.state_dict(), pt_checkpoint_path) - tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path) + pt_noise = torch.from_numpy(noise) - tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5") - tf_model.save_weights(tf_checkpoint_path) - pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) - pt_model = pt_model.to(torch_device) + # Add `noise` argument. + # PT inputs will be prepared in `super().check_pt_tf_models()` with this added `noise` argument + pt_inputs_dict["noise"] = pt_noise - check_pt_tf_models(tf_model, pt_model, pt_inputs_dict) + super().check_pt_tf_models(tf_model, pt_model, pt_inputs_dict) def test_save_load(self):