Restructure omni services and add Chatwoot research snapshot
This commit is contained in:
@@ -0,0 +1,61 @@
|
||||
class Captain::Documents::CrawlJob < ApplicationJob
|
||||
queue_as :low
|
||||
|
||||
def perform(document)
|
||||
if document.pdf_document?
|
||||
perform_pdf_processing(document)
|
||||
elsif InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY')&.value.present?
|
||||
perform_firecrawl_crawl(document)
|
||||
else
|
||||
perform_simple_crawl(document)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
include Captain::FirecrawlHelper
|
||||
|
||||
def perform_pdf_processing(document)
|
||||
Captain::Llm::PdfProcessingService.new(document).process
|
||||
document.update!(status: :available)
|
||||
rescue StandardError => e
|
||||
Rails.logger.error I18n.t('captain.documents.pdf_processing_failed', document_id: document.id, error: e.message)
|
||||
raise # Re-raise to let job framework handle retry logic
|
||||
end
|
||||
|
||||
def perform_simple_crawl(document)
|
||||
page_links = Captain::Tools::SimplePageCrawlService.new(document.external_link).page_links
|
||||
|
||||
page_links.each do |page_link|
|
||||
Captain::Tools::SimplePageCrawlParserJob.perform_later(
|
||||
assistant_id: document.assistant_id,
|
||||
page_link: page_link
|
||||
)
|
||||
end
|
||||
|
||||
Captain::Tools::SimplePageCrawlParserJob.perform_later(
|
||||
assistant_id: document.assistant_id,
|
||||
page_link: document.external_link
|
||||
)
|
||||
end
|
||||
|
||||
def perform_firecrawl_crawl(document)
|
||||
captain_usage_limits = document.account.usage_limits[:captain] || {}
|
||||
document_limit = captain_usage_limits[:documents] || {}
|
||||
crawl_limit = [document_limit[:current_available] || 10, 500].min
|
||||
|
||||
Captain::Tools::FirecrawlService
|
||||
.new
|
||||
.perform(
|
||||
document.external_link,
|
||||
firecrawl_webhook_url(document),
|
||||
crawl_limit
|
||||
)
|
||||
end
|
||||
|
||||
def firecrawl_webhook_url(document)
|
||||
webhook_url = Rails.application.routes.url_helpers.enterprise_webhooks_firecrawl_url
|
||||
|
||||
"#{webhook_url}?assistant_id=#{document.assistant_id}&token=#{generate_firecrawl_token(document.assistant_id, document.account_id)}"
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,78 @@
|
||||
class Captain::Documents::ResponseBuilderJob < ApplicationJob
|
||||
queue_as :low
|
||||
|
||||
def perform(document, options = {})
|
||||
reset_previous_responses(document)
|
||||
|
||||
faqs = generate_faqs(document, options)
|
||||
create_responses_from_faqs(faqs, document)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def generate_faqs(document, options)
|
||||
if should_use_pagination?(document)
|
||||
generate_paginated_faqs(document, options)
|
||||
else
|
||||
generate_standard_faqs(document)
|
||||
end
|
||||
end
|
||||
|
||||
def generate_paginated_faqs(document, options)
|
||||
service = build_paginated_service(document, options)
|
||||
faqs = service.generate
|
||||
store_paginated_metadata(document, service)
|
||||
faqs
|
||||
end
|
||||
|
||||
def generate_standard_faqs(document)
|
||||
Captain::Llm::FaqGeneratorService.new(document.content, document.account.locale_english_name, account_id: document.account_id).generate
|
||||
end
|
||||
|
||||
def build_paginated_service(document, options)
|
||||
Captain::Llm::PaginatedFaqGeneratorService.new(
|
||||
document,
|
||||
pages_per_chunk: options[:pages_per_chunk],
|
||||
max_pages: options[:max_pages],
|
||||
language: document.account.locale_english_name
|
||||
)
|
||||
end
|
||||
|
||||
def store_paginated_metadata(document, service)
|
||||
document.update!(
|
||||
metadata: (document.metadata || {}).merge(
|
||||
'faq_generation' => {
|
||||
'method' => 'paginated',
|
||||
'pages_processed' => service.total_pages_processed,
|
||||
'iterations' => service.iterations_completed,
|
||||
'timestamp' => Time.current.iso8601
|
||||
}
|
||||
)
|
||||
)
|
||||
end
|
||||
|
||||
def create_responses_from_faqs(faqs, document)
|
||||
faqs.each { |faq| create_response(faq, document) }
|
||||
end
|
||||
|
||||
def should_use_pagination?(document)
|
||||
# Auto-detect when to use pagination
|
||||
# For now, use pagination for PDFs with OpenAI file ID
|
||||
document.pdf_document? && document.openai_file_id.present?
|
||||
end
|
||||
|
||||
def reset_previous_responses(response_document)
|
||||
response_document.responses.destroy_all
|
||||
end
|
||||
|
||||
def create_response(faq, document)
|
||||
document.responses.create!(
|
||||
question: faq['question'],
|
||||
answer: faq['answer'],
|
||||
assistant: document.assistant,
|
||||
documentable: document
|
||||
)
|
||||
rescue ActiveRecord::RecordInvalid => e
|
||||
Rails.logger.error I18n.t('captain.documents.response_creation_error', error: e.message)
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user