From 62e65123ab5037281e4f07eb54bc52dcc8ffcf88 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 19 Apr 2022 12:01:20 +0200 Subject: [PATCH 1/2] Fixing return type tensor with `num_return_sequences>1`. --- .../pipelines/text2text_generation.py | 2 +- src/transformers/pipelines/text_generation.py | 2 +- .../test_pipelines_text2text_generation.py | 33 +++++++++++++++++++ .../test_pipelines_text_generation.py | 31 +++++++++++++++++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/src/transformers/pipelines/text2text_generation.py b/src/transformers/pipelines/text2text_generation.py index ee9be76f4494..97cbc1a395d4 100644 --- a/src/transformers/pipelines/text2text_generation.py +++ b/src/transformers/pipelines/text2text_generation.py @@ -168,7 +168,7 @@ def postprocess(self, model_outputs, return_type=ReturnType.TEXT, clean_up_token records = [] for output_ids in model_outputs["output_ids"][0]: if return_type == ReturnType.TENSORS: - record = {f"{self.return_name}_token_ids": model_outputs} + record = {f"{self.return_name}_token_ids": output_ids} elif return_type == ReturnType.TEXT: record = { f"{self.return_name}_text": self.tokenizer.decode( diff --git a/src/transformers/pipelines/text_generation.py b/src/transformers/pipelines/text_generation.py index 3e7a34bfec25..dbaa0a9df75a 100644 --- a/src/transformers/pipelines/text_generation.py +++ b/src/transformers/pipelines/text_generation.py @@ -226,7 +226,7 @@ def postprocess(self, model_outputs, return_type=ReturnType.FULL_TEXT, clean_up_ records = [] for sequence in generated_sequence: if return_type == ReturnType.TENSORS: - record = {"generated_token_ids": generated_sequence} + record = {"generated_token_ids": sequence} elif return_type in {ReturnType.NEW_TEXT, ReturnType.FULL_TEXT}: # Decode text text = self.tokenizer.decode( diff --git a/tests/pipelines/test_pipelines_text2text_generation.py b/tests/pipelines/test_pipelines_text2text_generation.py index 563b41954be6..2ce063369d02 100644 --- a/tests/pipelines/test_pipelines_text2text_generation.py +++ b/tests/pipelines/test_pipelines_text2text_generation.py @@ -83,6 +83,39 @@ def test_small_model_pt(self): ] self.assertEqual(outputs, target_outputs) + import torch + + outputs = generator("This is a test", do_sample=True, num_return_sequences=2, return_tensors=True) + self.assertEqual( + outputs, + [ + {"generated_token_ids": ANY(torch.Tensor)}, + {"generated_token_ids": ANY(torch.Tensor)}, + ], + ) + generator.tokenizer.pad_token_id = generator.model.config.eos_token_id + generator.tokenizer.pad_token = "" + outputs = generator( + ["This is a test", "This is a second test"], + do_sample=True, + num_return_sequences=2, + batch_size=2, + return_tensors=True, + ) + self.assertEqual( + outputs, + [ + [ + {"generated_token_ids": ANY(torch.Tensor)}, + {"generated_token_ids": ANY(torch.Tensor)}, + ], + [ + {"generated_token_ids": ANY(torch.Tensor)}, + {"generated_token_ids": ANY(torch.Tensor)}, + ], + ], + ) + @require_tf def test_small_model_tf(self): generator = pipeline("text2text-generation", model="patrickvonplaten/t5-tiny-random", framework="tf") diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index 69ac3aee832f..ca67c3bea13d 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -56,6 +56,37 @@ def test_small_model_pt(self): ], ) + outputs = text_generator("This is a test", do_sample=True, num_return_sequences=2, return_tensors=True) + self.assertEqual( + outputs, + [ + {"generated_token_ids": ANY(list)}, + {"generated_token_ids": ANY(list)}, + ], + ) + text_generator.tokenizer.pad_token_id = text_generator.model.config.eos_token_id + text_generator.tokenizer.pad_token = "" + outputs = text_generator( + ["This is a test", "This is a second test"], + do_sample=True, + num_return_sequences=2, + batch_size=2, + return_tensors=True, + ) + self.assertEqual( + outputs, + [ + [ + {"generated_token_ids": ANY(list)}, + {"generated_token_ids": ANY(list)}, + ], + [ + {"generated_token_ids": ANY(list)}, + {"generated_token_ids": ANY(list)}, + ], + ], + ) + @require_tf def test_small_model_tf(self): text_generator = pipeline(task="text-generation", model="sshleifer/tiny-ctrl", framework="tf") From 5528330e28bb80035fba0507a71c9918632b7a5a Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 20 Apr 2022 14:44:18 +0200 Subject: [PATCH 2/2] Nit. --- tests/pipelines/test_pipelines_text2text_generation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/test_pipelines_text2text_generation.py b/tests/pipelines/test_pipelines_text2text_generation.py index 2ce063369d02..4490c5716220 100644 --- a/tests/pipelines/test_pipelines_text2text_generation.py +++ b/tests/pipelines/test_pipelines_text2text_generation.py @@ -21,10 +21,15 @@ pipeline, ) from transformers.testing_utils import is_pipeline_test, require_tf, require_torch +from transformers.utils import is_torch_available from .test_pipelines_common import ANY, PipelineTestCaseMeta +if is_torch_available(): + import torch + + @is_pipeline_test class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING @@ -83,8 +88,6 @@ def test_small_model_pt(self): ] self.assertEqual(outputs, target_outputs) - import torch - outputs = generator("This is a test", do_sample=True, num_return_sequences=2, return_tensors=True) self.assertEqual( outputs,