Restructure omni services and add Chatwoot research snapshot

2026-02-21 11:11:27 +07:00
parent edea7a0034
commit b73babbbf6
7732 changed files with 978203 additions and 32 deletions
--- a/research/chatwoot/enterprise/app/helpers/captain/chat_generation_recorder.rb
+++ b/research/chatwoot/enterprise/app/helpers/captain/chat_generation_recorder.rb
@@ -0,0 +1,47 @@
+module Captain::ChatGenerationRecorder
+  extend ActiveSupport::Concern
+  include Integrations::LlmInstrumentationConstants
+
+  private
+
+  def record_llm_generation(chat, message)
+    return unless valid_llm_message?(message)
+
+    # Create a generation span with model and token info for Langfuse cost calculation.
+    # Note: span duration will be near-zero since we create and end it immediately, but token counts are what Langfuse uses for cost calculation.
+    tracer.in_span("llm.captain.#{feature_name}.generation") do |span|
+      set_generation_span_attributes(span, chat, message)
+    end
+  rescue StandardError => e
+    Rails.logger.warn "Failed to record LLM generation: #{e.message}"
+  end
+
+  # Skip non-LLM messages (e.g., tool results that RubyLLM processes internally).
+  # Check for assistant role rather than token presence - some providers/streaming modes
+  # may not return token counts, but we still want to capture the generation for evals.
+  def valid_llm_message?(message)
+    message.respond_to?(:role) && message.role.to_s == 'assistant'
+  end
+
+  def set_generation_span_attributes(span, chat, message)
+    generation_attributes(chat, message).each do |key, value|
+      span.set_attribute(key, value) if value
+    end
+  end
+
+  def generation_attributes(chat, message)
+    {
+      ATTR_GEN_AI_PROVIDER => determine_provider(model),
+      ATTR_GEN_AI_REQUEST_MODEL => model,
+      ATTR_GEN_AI_REQUEST_TEMPERATURE => temperature,
+      ATTR_GEN_AI_USAGE_INPUT_TOKENS => message.input_tokens,
+      ATTR_GEN_AI_USAGE_OUTPUT_TOKENS => message.respond_to?(:output_tokens) ? message.output_tokens : nil,
+      ATTR_LANGFUSE_OBSERVATION_INPUT => format_input_messages(chat),
+      ATTR_LANGFUSE_OBSERVATION_OUTPUT => message.respond_to?(:content) ? message.content.to_s : nil
+    }
+  end
+
+  def format_input_messages(chat)
+    chat.messages[0...-1].map { |m| { role: m.role.to_s, content: m.content.to_s } }.to_json
+  end
+end
--- a/research/chatwoot/enterprise/app/helpers/captain/chat_helper.rb
+++ b/research/chatwoot/enterprise/app/helpers/captain/chat_helper.rb
@@ -0,0 +1,136 @@
+module Captain::ChatHelper
+  include Integrations::LlmInstrumentation
+  include Captain::ChatResponseHelper
+  include Captain::ChatGenerationRecorder
+
+  def request_chat_completion
+    log_chat_completion_request
+
+    chat = build_chat
+
+    add_messages_to_chat(chat)
+    with_agent_session do
+      last_content = conversation_messages.last[:content]
+      text, attachments = Captain::OpenAiMessageBuilderService.extract_text_and_attachments(last_content)
+
+      response = attachments.any? ? chat.ask(text, with: attachments) : chat.ask(text)
+      build_response(response)
+    end
+  rescue StandardError => e
+    Rails.logger.error "#{self.class.name} Assistant: #{@assistant.id}, Error in chat completion: #{e}"
+    raise e
+  end
+
+  private
+
+  def build_chat
+    llm_chat = chat(model: @model, temperature: temperature)
+    llm_chat = llm_chat.with_params(response_format: { type: 'json_object' })
+
+    llm_chat = setup_tools(llm_chat)
+    llm_chat = setup_system_instructions(llm_chat)
+    setup_event_handlers(llm_chat)
+  end
+
+  def setup_tools(llm_chat)
+    @tools&.each do |tool|
+      llm_chat = llm_chat.with_tool(tool)
+    end
+    llm_chat
+  end
+
+  def setup_system_instructions(chat)
+    system_messages = @messages.select { |m| m[:role] == 'system' || m[:role] == :system }
+    combined_instructions = system_messages.pluck(:content).join("\n\n")
+    chat.with_instructions(combined_instructions)
+  end
+
+  def setup_event_handlers(chat)
+    # NOTE: We only use on_end_message to record the generation with token counts.
+    # RubyLLM callbacks fire after chunks arrive, not around the API call, so
+    # span timing won't reflect actual API latency. But Langfuse calculates costs
+    # from model + token counts, so this is sufficient for cost tracking.
+    chat.on_end_message { |message| record_llm_generation(chat, message) }
+    chat.on_tool_call { |tool_call| handle_tool_call(tool_call) }
+    chat.on_tool_result { |result| handle_tool_result(result) }
+    chat
+  end
+
+  def handle_tool_call(tool_call)
+    persist_thinking_message(tool_call)
+    start_tool_span(tool_call)
+    (@pending_tool_calls ||= []).push(tool_call)
+  end
+
+  def handle_tool_result(result)
+    end_tool_span(result)
+    persist_tool_completion
+  end
+
+  def add_messages_to_chat(chat)
+    conversation_messages[0...-1].each do |msg|
+      text, attachments = Captain::OpenAiMessageBuilderService.extract_text_and_attachments(msg[:content])
+      content = attachments.any? ? RubyLLM::Content.new(text, attachments) : text
+      chat.add_message(role: msg[:role].to_sym, content: content)
+    end
+  end
+
+  def instrumentation_params(chat = nil)
+    {
+      span_name: "llm.captain.#{feature_name}",
+      account_id: resolved_account_id,
+      conversation_id: @conversation_id,
+      feature_name: feature_name,
+      model: @model,
+      messages: chat ? chat.messages.map { |m| { role: m.role.to_s, content: m.content.to_s } } : @messages,
+      temperature: temperature,
+      metadata: {
+        assistant_id: @assistant&.id,
+        channel_type: resolved_channel_type
+      }.compact
+    }
+  end
+
+  def conversation_messages
+    @messages.reject { |m| m[:role] == 'system' || m[:role] == :system }
+  end
+
+  def temperature
+    @assistant&.config&.[]('temperature').to_f || 1
+  end
+
+  def resolved_account_id
+    @account&.id || @assistant&.account_id
+  end
+
+  def resolved_channel_type
+    Conversation.find_by(account_id: resolved_account_id, display_id: @conversation_id)&.inbox&.channel_type if @conversation_id
+  end
+
+  # Ensures all LLM calls and tool executions within an agentic loop
+  # are grouped under a single trace/session in Langfuse.
+  #
+  # Without this guard, each recursive call to request_chat_completion
+  # (triggered by tool calls) would create a separate trace instead of
+  # nesting within the existing session span.
+  def with_agent_session(&)
+    already_active = @agent_session_active
+    return yield if already_active
+
+    @agent_session_active = true
+    instrument_agent_session(instrumentation_params, &)
+  ensure
+    @agent_session_active = false unless already_active
+  end
+
+  # Must be implemented by including class to identify the feature for instrumentation.
+  # Used for Langfuse tagging and span naming.
+  def feature_name
+    raise NotImplementedError, "#{self.class.name} must implement #feature_name"
+  end
+
+  def log_chat_completion_request
+    Rails.logger.info("#{self.class.name} Assistant: #{@assistant.id}, Requesting chat completion " \
+                      "for messages #{@messages} with #{@tools&.length || 0} tools")
+  end
+end
--- a/research/chatwoot/enterprise/app/helpers/captain/chat_response_helper.rb
+++ b/research/chatwoot/enterprise/app/helpers/captain/chat_response_helper.rb
@@ -0,0 +1,75 @@
+module Captain::ChatResponseHelper
+  include Integrations::LlmInstrumentationConstants
+
+  private
+
+  def build_response(response)
+    Rails.logger.debug { "#{self.class.name} Assistant: #{@assistant.id}, Received response #{response}" }
+
+    parsed = parse_json_response(response.content)
+    apply_credit_usage_metadata(parsed)
+
+    persist_message(parsed, 'assistant')
+    parsed
+  end
+
+  def parse_json_response(content)
+    content = content.gsub('```json', '').gsub('```', '')
+    content = content.strip
+    JSON.parse(content)
+  rescue JSON::ParserError => e
+    Rails.logger.error "#{self.class.name} Assistant: #{@assistant.id}, Error parsing JSON response: #{e.message}"
+    { 'content' => content }
+  end
+
+  def apply_credit_usage_metadata(parsed_response)
+    return unless captain_v1_assistant?
+
+    OpenTelemetry::Trace.current_span.set_attribute(
+      format(ATTR_LANGFUSE_METADATA, 'credit_used'),
+      credit_used_for_response?(parsed_response).to_s
+    )
+  rescue StandardError => e
+    Rails.logger.warn "#{self.class.name} Assistant: #{@assistant.id}, Failed to set credit usage metadata: #{e.message}"
+  end
+
+  def credit_used_for_response?(parsed_response)
+    response = parsed_response['response']
+    response.present? && response != 'conversation_handoff'
+  end
+
+  def captain_v1_assistant?
+    feature_name == 'assistant' && !@assistant.account.feature_enabled?('captain_integration_v2')
+  end
+
+  def persist_thinking_message(tool_call)
+    return if @copilot_thread.blank?
+
+    tool_name = tool_call.name.to_s
+
+    persist_message(
+      {
+        'content' => "Using #{tool_name}",
+        'function_name' => tool_name
+      },
+      'assistant_thinking'
+    )
+  end
+
+  def persist_tool_completion
+    return if @copilot_thread.blank?
+
+    tool_call = @pending_tool_calls&.pop
+    return unless tool_call
+
+    tool_name = tool_call.name.to_s
+
+    persist_message(
+      {
+        'content' => "Completed #{tool_name}",
+        'function_name' => tool_name
+      },
+      'assistant_thinking'
+    )
+  end
+end
--- a/research/chatwoot/enterprise/app/helpers/captain/firecrawl_helper.rb
+++ b/research/chatwoot/enterprise/app/helpers/captain/firecrawl_helper.rb
@@ -0,0 +1,9 @@
+module Captain::FirecrawlHelper
+  def generate_firecrawl_token(assistant_id, account_id)
+    api_key = InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY')&.value
+    return nil unless api_key
+
+    token_base = "#{api_key[-4..]}#{assistant_id}#{account_id}"
+    Digest::SHA256.hexdigest(token_base)
+  end
+end