112 lines
2.9 KiB
Ruby
112 lines
2.9 KiB
Ruby
class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService
|
|
include Integrations::LlmInstrumentation
|
|
|
|
WHISPER_MODEL = 'whisper-1'.freeze
|
|
|
|
attr_reader :attachment, :message, :account
|
|
|
|
def initialize(attachment)
|
|
super()
|
|
@attachment = attachment
|
|
@message = attachment.message
|
|
@account = message.account
|
|
end
|
|
|
|
def perform
|
|
return { error: 'Transcription limit exceeded' } unless can_transcribe?
|
|
return { error: 'Message not found' } if message.blank?
|
|
|
|
transcriptions = transcribe_audio
|
|
Rails.logger.info "Audio transcription successful: #{transcriptions}"
|
|
{ success: true, transcriptions: transcriptions }
|
|
end
|
|
|
|
private
|
|
|
|
def can_transcribe?
|
|
return false unless account.feature_enabled?('captain_integration')
|
|
return false if account.audio_transcriptions.blank?
|
|
|
|
account.usage_limits[:captain][:responses][:current_available].positive?
|
|
end
|
|
|
|
def fetch_audio_file
|
|
blob = attachment.file.blob
|
|
temp_dir = Rails.root.join('tmp/uploads/audio-transcriptions')
|
|
FileUtils.mkdir_p(temp_dir)
|
|
temp_file_name = "#{blob.key}-#{blob.filename}"
|
|
|
|
if blob.filename.extension_without_delimiter.blank?
|
|
extension = extension_from_content_type(blob.content_type)
|
|
temp_file_name = "#{temp_file_name}.#{extension}" if extension.present?
|
|
end
|
|
|
|
temp_file_path = File.join(temp_dir, temp_file_name)
|
|
|
|
File.open(temp_file_path, 'wb') do |file|
|
|
blob.open do |blob_file|
|
|
IO.copy_stream(blob_file, file)
|
|
end
|
|
end
|
|
|
|
temp_file_path
|
|
end
|
|
|
|
def transcribe_audio
|
|
transcribed_text = attachment.meta&.[]('transcribed_text') || ''
|
|
return transcribed_text if transcribed_text.present?
|
|
|
|
temp_file_path = fetch_audio_file
|
|
transcribed_text = nil
|
|
|
|
File.open(temp_file_path, 'rb') do |file|
|
|
response = @client.audio.transcribe(
|
|
parameters: {
|
|
model: WHISPER_MODEL,
|
|
file: file,
|
|
temperature: 0.4
|
|
}
|
|
)
|
|
transcribed_text = response['text']
|
|
end
|
|
|
|
update_transcription(transcribed_text)
|
|
transcribed_text
|
|
ensure
|
|
FileUtils.rm_f(temp_file_path) if temp_file_path.present?
|
|
end
|
|
|
|
def instrumentation_params(file_path)
|
|
{
|
|
span_name: 'llm.messages.audio_transcription',
|
|
model: WHISPER_MODEL,
|
|
account_id: account&.id,
|
|
feature_name: 'audio_transcription',
|
|
file_path: file_path
|
|
}
|
|
end
|
|
|
|
def update_transcription(transcribed_text)
|
|
return if transcribed_text.blank?
|
|
|
|
attachment.update!(meta: { transcribed_text: transcribed_text })
|
|
message.reload.send_update_event
|
|
message.account.increment_response_usage
|
|
|
|
return unless ChatwootApp.advanced_search_allowed?
|
|
|
|
message.reindex
|
|
end
|
|
|
|
def extension_from_content_type(content_type)
|
|
subtype = content_type.to_s.downcase.split(';').first.to_s.split('/').last.to_s
|
|
return if subtype.blank?
|
|
|
|
{
|
|
'x-m4a' => 'm4a',
|
|
'x-wav' => 'wav',
|
|
'x-mp3' => 'mp3'
|
|
}.fetch(subtype, subtype)
|
|
end
|
|
end
|