Skip to content

Commit a64850d

Browse files
authored
Merge pull request #36 from VinciGit00/multiple-chunking-for-generating-answer
Multiple chunking for generating answer
2 parents 5f20299 + cd176ff commit a64850d

File tree

3 files changed

+20
-13
lines changed

3 files changed

+20
-13
lines changed

commit_and_push.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ commit_message="$1"
2121

2222
# Run Pylint on the specified Python files
2323
pylint scrapegraphai/**/*.py scrapegraphai/*.py examples/**/*.py tests/**/*.py
24-
#Maket the pull
24+
#Make the pull
2525
git pull
2626

2727
# Add the modified files to the Git repository

examples/graph_examples/smart_scraper_example.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
}
1717

1818
# Define URL and PROMPT
19-
URL = "https://perinim.github.io/projects/"
20-
PROMPT = "List me all the titles and project descriptions"
19+
URL = "https://www.google.com/search?client=safari&rls=en&q=ristoranti+trento&ie=UTF-8&oe=UTF-8"
20+
PROMPT = "List me all the https inside the page"
2121

2222
# Create the SmartScraperGraph instance
2323
smart_scraper_graph = SmartScraperGraph(PROMPT, URL, llm_config)

scrapegraphai/nodes/generate_answer_node.py

+17-10
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
# Imports from the library
1313
from .base_node import BaseNode
14+
from langchain.text_splitter import RecursiveCharacterTextSplitter
1415

1516

1617
class GenerateAnswerNode(BaseNode):
@@ -114,24 +115,30 @@ def execute(self, state: dict) -> dict:
114115
"chunk_id": i + 1, "format_instructions": format_instructions},
115116
)
116117
# Dynamically name the chains based on their index
117-
chain_name = f"chunk{i+1}"
118-
chains_dict[chain_name] = prompt | self.llm | output_parser
118+
chains_dict[f"chunk{i+1}"] = prompt | self.llm | output_parser
119119

120-
# Use dictionary unpacking to pass the dynamically named chains to RunnableParallel
121-
map_chain = RunnableParallel(**chains_dict)
122-
# Chain
123-
answer_map = map_chain.invoke({"question": user_input})
120+
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
121+
chunk_size=4000,
122+
chunk_overlap=0,
123+
)
124+
125+
chunks = text_splitter.split_text(str(chains_dict))
124126

125-
# Merge the answers from the chunks
126127
merge_prompt = PromptTemplate(
127128
template=template_merge,
128129
input_variables=["context", "question"],
129130
partial_variables={"format_instructions": format_instructions},
130131
)
131132
merge_chain = merge_prompt | self.llm | output_parser
132-
answer = merge_chain.invoke(
133-
{"context": answer_map, "question": user_input})
134133

135-
# Update the state with the generated answer
134+
answer_lines = []
135+
for chunk in chunks:
136+
answer_temp = merge_chain.invoke(
137+
{"context": chunk, "question": user_input})
138+
answer_lines.append(answer_temp)
139+
140+
unique_answer_lines = list(set(answer_lines))
141+
answer = '\n'.join(unique_answer_lines)
142+
136143
state.update({"answer": answer})
137144
return state

0 commit comments

Comments
 (0)