Skip to content

Commit

Permalink
Merge pull request #102 from merefield/web_crawler_function
Browse files Browse the repository at this point in the history
FEATURE: add a web crawler function
  • Loading branch information
merefield authored Jun 19, 2024
2 parents f514404 + c3a3ff3 commit 1988181
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 1 deletion.
11 changes: 11 additions & 0 deletions config/locales/server.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ en:
chatbot_escalate_to_staff_groups: "(EXPERIMENTAL, Chat only) groups added to escalation PM, e.g. support team"
chatbot_escalate_to_staff_max_history: "(EXPERIMENTAL, Chat only) number of chat messages included in transcript added to escalation PM"
chatbot_news_api_token: "News API token for news (if left blank, news will never be searched)<a target='_blank' rel='noopener' href='https://newsapi.org/'>Get one at NewsAPI.org</a>"
chatbot_firecrawl_api_token: "Firecrawl API token for crawling remote websites. If left blank, crawling will not be available. <a target='_blank' rel='noopener' href='https://www.firecrawl.dev/'>Get one at https://www.firecrawl.dev/</a>"
chatbot_serp_api_key: "Serp API token for google search (if left blank, google will never be searched). <a target='_blank' rel='noopener' href='https://serpapi.com/'>Get one at SerpAPI.com</a>"
chatbot_marketstack_key: "Marketstack API key for stock price information (if left blank, Marketstack will never be queried).<a target='_blank' rel='noopener' href='https://marketstack.com/'>Get one at MarketStack.com</a>"
chatbot_enable_verbose_console_logging: "Enable response retrieval progress logging to console to help debug issues"
Expand Down Expand Up @@ -304,6 +305,16 @@ en:
start_date: "start date from which to search for news in format YYYY-MM-DD"
answer: "The latest news about this is: "
error: "ERROR: Had trouble retrieving the news!"
web_crawler:
description: |
A wrapper around the Firecrawl web crawler API.
Useful for when you need to get the content of a remote webpage for e.g. summarisation or fact retreival.
Input should be the url of the target website.
parameters:
url: "the url of the target website."
error: "ERROR: Had trouble crawling the target website!"
stock_data:
description: |
An API for MarketStack stock data. You need to call it using the stock ticker. You can optionally also provide a specific date.
Expand Down
3 changes: 3 additions & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,9 @@ plugins:
chatbot_news_api_token:
client: false
default: ''
chatbot_firecrawl_api_token:
client: false
default: ''
chatbot_serp_api_key:
client: false
default: ''
Expand Down
2 changes: 2 additions & 0 deletions lib/discourse_chatbot/bots/open_ai_bot_rag.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def merge_functions(opts)
calculator_function = ::DiscourseChatbot::CalculatorFunction.new
wikipedia_function = ::DiscourseChatbot::WikipediaFunction.new
news_function = ::DiscourseChatbot::NewsFunction.new
crawl_function = ::DiscourseChatbot::WebCrawlerFunction.new
google_search_function = ::DiscourseChatbot::GoogleSearchFunction.new
stock_data_function = ::DiscourseChatbot::StockDataFunction.new
escalate_to_staff_function = ::DiscourseChatbot::EscalateToStaffFunction.new
Expand Down Expand Up @@ -85,6 +86,7 @@ def merge_functions(opts)
functions << get_user_address if get_user_address
functions << escalate_to_staff_function if SiteSetting.chatbot_escalate_to_staff_function && opts[:private] && opts[:type] == ::DiscourseChatbot::MESSAGE
functions << news_function if !SiteSetting.chatbot_news_api_token.blank?
functions << crawl_function if !SiteSetting.chatbot_firecrawl_api_token.blank?
functions << google_search_function if !SiteSetting.chatbot_serp_api_key.blank?
functions << stock_data_function if !SiteSetting.chatbot_marketstack_key.blank?

Expand Down
70 changes: 70 additions & 0 deletions lib/discourse_chatbot/functions/web_crawler_function.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# frozen_string_literal: true

require_relative '../function'

module DiscourseChatbot
class WebCrawlerFunction < Function

def name
'web_crawler'
end

def description
I18n.t("chatbot.prompt.function.web_crawler.description")
end

def parameters
[
{ name: 'url', type: String, description: I18n.t("chatbot.prompt.function.web_crawler.parameters.url") },
]
end

def required
['url']
end

def process(args)
begin
::DiscourseChatbot.progress_debug_message <<~EOS
-------------------------------------
arguments for news: #{args[parameters[0][:name]]}
--------------------------------------
EOS
super(args)

conn = Faraday.new(
url: 'https://api.firecrawl.dev',
headers: {
"Content-Type" => "application/json",
"Authorization" => "Bearer #{SiteSetting.chatbot_firecrawl_api_token}"
}
)

response = conn.post('v0/crawl') do |req|
req.body = { url: "#{args[parameters[0][:name]]}" }.to_json
end

response_body = JSON.parse(response.body)

job_id = response_body["jobId"]

iterations = 0
while true
iterations += 1
sleep 5
break if iterations > 20

response = conn.get("/v0/crawl/status/#{job_id}")

response_body = JSON.parse(response.body)

break if response_body["status"] == "completed"
end

response_body["data"][0]["markdown"]
rescue
I18n.t("chatbot.prompt.function.web_crawler.error")
end
end
end
end
3 changes: 2 additions & 1 deletion plugin.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true
# name: discourse-chatbot
# about: a plugin that allows you to have a conversation with a configurable chatbot in Discourse Chat, Topics and Private Messages
# version: 0.9.32
# version: 0.9.33
# authors: merefield
# url: https://github.com/merefield/discourse-chatbot

Expand Down Expand Up @@ -95,6 +95,7 @@ def progress_debug_message(message)
../lib/discourse_chatbot/functions/calculator_function.rb
../lib/discourse_chatbot/functions/escalate_to_staff_function.rb
../lib/discourse_chatbot/functions/news_function.rb
../lib/discourse_chatbot/functions/web_crawler_function.rb
../lib/discourse_chatbot/functions/wikipedia_function.rb
../lib/discourse_chatbot/functions/vision_function.rb
../lib/discourse_chatbot/functions/google_search_function.rb
Expand Down

0 comments on commit 1988181

Please sign in to comment.