From e1aebb8d7b7ca9743deb818361caa6b0d80ed733 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Sun, 1 Dec 2024 09:31:41 +0100 Subject: [PATCH] fix: `LLMMetadataExtractor` serialisation (#136) * fixing serialization * fixing tests * adding page_range (already expanded) to the serialisation * fixing serialisation tests --- .../components/extractors/llm_metadata_extractor.py | 3 ++- test/components/extractors/test_llm_metadata_extractor.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/haystack_experimental/components/extractors/llm_metadata_extractor.py b/haystack_experimental/components/extractors/llm_metadata_extractor.py index 9b9a1b42..32547045 100644 --- a/haystack_experimental/components/extractors/llm_metadata_extractor.py +++ b/haystack_experimental/components/extractors/llm_metadata_extractor.py @@ -228,11 +228,12 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict( self, prompt=self.prompt, - input_text=self.prompt_variable, + prompt_variable=self.prompt_variable, expected_keys=self.expected_keys, raise_on_failure=self.raise_on_failure, generator_api=self.generator_api.value, generator_api_params=llm_provider["init_parameters"], + page_range=self.expanded_range ) @classmethod diff --git a/test/components/extractors/test_llm_metadata_extractor.py b/test/components/extractors/test_llm_metadata_extractor.py index 0412cb81..094633fb 100644 --- a/test/components/extractors/test_llm_metadata_extractor.py +++ b/test/components/extractors/test_llm_metadata_extractor.py @@ -59,7 +59,7 @@ def test_init_missing_prompt_variable(self, monkeypatch): prompt_variable="test2" ) - def test_to_dict(self, monkeypatch): + def test_to_dict_default_params(self, monkeypatch): monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") extractor = LLMMetadataExtractor( prompt="some prompt that was used with the LLM {{test}}", @@ -68,15 +68,18 @@ def test_to_dict(self, monkeypatch): prompt_variable="test", generator_api_params={'model': 'gpt-4o-mini', 'generation_kwargs': {"temperature": 0.5}}, raise_on_failure=True) + extractor_dict = extractor.to_dict() + assert extractor_dict == { 'type': 'haystack_experimental.components.extractors.llm_metadata_extractor.LLMMetadataExtractor', 'init_parameters': { 'prompt': 'some prompt that was used with the LLM {{test}}', 'expected_keys': ['key1', 'key2'], 'raise_on_failure': True, - 'input_text': 'test', + 'prompt_variable': 'test', 'generator_api': 'openai', + 'page_range': None, 'generator_api_params': { 'api_base_url': None, 'api_key': {'env_vars': ['OPENAI_API_KEY'],'strict': True,'type': 'env_var'},