Skip to content

Commit

Permalink
Update and simplify embeddings rake task
Browse files Browse the repository at this point in the history
  • Loading branch information
merefield committed Apr 13, 2024
1 parent 6592772 commit a67244f
Showing 1 changed file with 25 additions and 35 deletions.
60 changes: 25 additions & 35 deletions lib/tasks/chatbot.rake
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,13 @@ task "chatbot:refresh_embeddings", %i[missing_only delay] => :environment do |_,
ENV["RAILS_DB"] ? refresh_embeddings(args) : refresh_embeddings_all_sites(args)
end

desc "Refresh embeddings for all posts matching string/regex and optionally delay the loop"
task "chatbot:refresh_embeddings_match", %i[pattern type delay] => [:environment] do |_, args|
args.with_defaults(type: "string")
pattern = args[:pattern]
type = args[:type]&.downcase
delay = args[:delay]&.to_i

if !pattern
puts "ERROR: Expecting rake chatbot:refresh_embeddings_match[pattern,type,delay]"
exit 1
elsif delay && delay < 1
puts "ERROR: delay parameter should be an integer and greater than 0"
exit 1
elsif type != "string" && type != "regex"
puts "ERROR: Expecting rake chatbot:refresh_embeddings_match[pattern,type] where type is string or regex"
exit 1
end

search = Post.raw_match(pattern, type)

refreshed = 0
total = search.count

process_post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new

search.find_each do |post|
process_post_embedding.upsert(post.id)
print_status(refreshed += 1, total)
sleep(delay) if delay
end

puts "", "#{refreshed} posts done!", ""
end

def refresh_embeddings_all_sites(args)
RailsMultisite::ConnectionManagement.each_connection { |db| refresh_embeddings(args) }
end

def refresh_embeddings(args)
puts "-" * 50
puts "Refreshing embeddings for posts for '#{RailsMultisite::ConnectionManagement.current_db}'"
puts "Refreshing embeddings for posts and topic titles for '#{RailsMultisite::ConnectionManagement.current_db}'"
puts "-" * 50

missing_only = args[:missing_only]&.to_i
Expand Down Expand Up @@ -82,4 +48,28 @@ def refresh_embeddings(args)
end

puts "", "#{refreshed} posts done!", "-" * 50

begin
total = Topic.count
refreshed = 0
batch = 1000

process_topic_title_embedding = ::DiscourseChatbot::TopicTitleEmbeddingProcess.new

(0..(total - 1).abs).step(batch) do |i|
Topic
.order(id: :desc)
.offset(i)
.limit(batch)
.each do |topic|
if !missing_only.to_i.zero? && ::DiscourseChatbot::TopicTitleEmbedding.find_by(topic_id: topic.id).nil? || missing_only.to_i.zero?
process_post_embedding.upsert(topic.id)
sleep(delay) if delay
end
print_status(refreshed += 1, total)
end
end
end

puts "", "#{refreshed} topic titles done!", "-" * 50
end

0 comments on commit a67244f

Please sign in to comment.