Restructure omni services and add Chatwoot research snapshot
This commit is contained in:
31
research/chatwoot/lib/test_data/account_creator.rb
Normal file
31
research/chatwoot/lib/test_data/account_creator.rb
Normal file
@@ -0,0 +1,31 @@
|
||||
class TestData::AccountCreator
|
||||
DATA_FILE = 'tmp/test_data_account_ids.txt'.freeze
|
||||
|
||||
def self.create!(id)
|
||||
company_name = generate_company_name
|
||||
domain = generate_domain(company_name)
|
||||
account = Account.create!(
|
||||
id: id,
|
||||
name: company_name,
|
||||
domain: domain,
|
||||
created_at: Faker::Time.between(from: 2.years.ago, to: 6.months.ago)
|
||||
)
|
||||
persist_account_id(account.id)
|
||||
account
|
||||
end
|
||||
|
||||
def self.generate_company_name
|
||||
"#{Faker::Company.name} #{TestData::Constants::COMPANY_TYPES.sample}"
|
||||
end
|
||||
|
||||
def self.generate_domain(company_name)
|
||||
"#{company_name.parameterize}.#{TestData::Constants::DOMAIN_EXTENSIONS.sample}"
|
||||
end
|
||||
|
||||
def self.persist_account_id(account_id)
|
||||
FileUtils.mkdir_p('tmp')
|
||||
File.open(DATA_FILE, 'a') do |file|
|
||||
file.write("#{account_id},")
|
||||
end
|
||||
end
|
||||
end
|
||||
51
research/chatwoot/lib/test_data/cleanup_service.rb
Normal file
51
research/chatwoot/lib/test_data/cleanup_service.rb
Normal file
@@ -0,0 +1,51 @@
|
||||
class TestData::CleanupService
|
||||
DATA_FILE = 'tmp/test_data_account_ids.txt'.freeze
|
||||
|
||||
class << self
|
||||
def call
|
||||
Rails.logger.info 'Cleaning up any existing test data...'
|
||||
|
||||
return log_no_file_found unless file_exists?
|
||||
|
||||
account_ids = parse_account_ids_from_file
|
||||
|
||||
if account_ids.any?
|
||||
delete_accounts(account_ids)
|
||||
else
|
||||
log_no_accounts_found
|
||||
end
|
||||
|
||||
delete_data_file
|
||||
Rails.logger.info '==> Cleanup complete!'
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def file_exists?
|
||||
File.exist?(DATA_FILE)
|
||||
end
|
||||
|
||||
def log_no_file_found
|
||||
Rails.logger.info 'No test data file found, skipping cleanup'
|
||||
end
|
||||
|
||||
def parse_account_ids_from_file
|
||||
File.read(DATA_FILE).split(',').map(&:strip).reject(&:empty?).map(&:to_i)
|
||||
end
|
||||
|
||||
def delete_accounts(account_ids)
|
||||
Rails.logger.info "Found #{account_ids.size} test accounts to clean up: #{account_ids.join(', ')}"
|
||||
start_time = Time.zone.now
|
||||
Account.where(id: account_ids).destroy_all
|
||||
Rails.logger.info "Deleted #{account_ids.size} accounts in #{Time.zone.now - start_time}s"
|
||||
end
|
||||
|
||||
def log_no_accounts_found
|
||||
Rails.logger.info 'No test account IDs found in the data file'
|
||||
end
|
||||
|
||||
def delete_data_file
|
||||
File.delete(DATA_FILE)
|
||||
end
|
||||
end
|
||||
end
|
||||
18
research/chatwoot/lib/test_data/constants.rb
Normal file
18
research/chatwoot/lib/test_data/constants.rb
Normal file
@@ -0,0 +1,18 @@
|
||||
module TestData::Constants
|
||||
NUM_ACCOUNTS = 20
|
||||
MIN_MESSAGES = 1_000_000 # 1M
|
||||
MAX_MESSAGES = 10_000_000 # 10M
|
||||
BATCH_SIZE = 5_000
|
||||
|
||||
MAX_CONVERSATIONS_PER_CONTACT = 20
|
||||
INBOXES_PER_ACCOUNT = 5
|
||||
STATUSES = %w[open resolved pending].freeze
|
||||
MESSAGE_TYPES = %w[incoming outgoing].freeze
|
||||
|
||||
MIN_MESSAGES_PER_CONVO = 5
|
||||
MAX_MESSAGES_PER_CONVO = 50
|
||||
|
||||
COMPANY_TYPES = %w[Retail Healthcare Finance Education Manufacturing].freeze
|
||||
DOMAIN_EXTENSIONS = %w[com io tech ai].freeze
|
||||
COUNTRY_CODES = %w[1 44 91 61 81 86 49 33 34 39].freeze # US, UK, India, Australia, Japan, China, Germany, France, Spain, Italy
|
||||
end
|
||||
196
research/chatwoot/lib/test_data/contact_batch_service.rb
Normal file
196
research/chatwoot/lib/test_data/contact_batch_service.rb
Normal file
@@ -0,0 +1,196 @@
|
||||
class TestData::ContactBatchService
|
||||
def initialize(account:, inboxes:, batch_size:, display_id_tracker:)
|
||||
@account = account
|
||||
@inboxes = inboxes
|
||||
@batch_size = batch_size
|
||||
@display_id_tracker = display_id_tracker
|
||||
@total_messages = 0
|
||||
end
|
||||
|
||||
# Generates contacts, contact_inboxes, conversations, and messages
|
||||
# Returns the total number of messages created in this batch
|
||||
def generate!
|
||||
Rails.logger.info { "Starting batch generation for account ##{@account.id} with #{@batch_size} contacts" }
|
||||
|
||||
create_contacts
|
||||
create_contact_inboxes
|
||||
create_conversations
|
||||
create_messages
|
||||
|
||||
Rails.logger.info { "Completed batch with #{@total_messages} messages for account ##{@account.id}" }
|
||||
@total_messages
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# rubocop:disable Rails/SkipsModelValidations
|
||||
def create_contacts
|
||||
Rails.logger.info { "Creating #{@batch_size} contacts for account ##{@account.id}" }
|
||||
start_time = Time.current
|
||||
|
||||
@contacts_data = Array.new(@batch_size) { build_contact_data }
|
||||
Contact.insert_all!(@contacts_data) if @contacts_data.any?
|
||||
@contacts = Contact
|
||||
.where(account_id: @account.id)
|
||||
.order(created_at: :desc)
|
||||
.limit(@batch_size)
|
||||
|
||||
Rails.logger.info { "Contacts created in #{Time.current - start_time}s" }
|
||||
end
|
||||
# rubocop:enable Rails/SkipsModelValidations
|
||||
|
||||
def build_contact_data
|
||||
created_at = Faker::Time.between(from: 1.year.ago, to: Time.current)
|
||||
{
|
||||
account_id: @account.id,
|
||||
name: Faker::Name.name,
|
||||
email: "#{SecureRandom.uuid}@example.com",
|
||||
phone_number: generate_e164_phone_number,
|
||||
additional_attributes: maybe_add_additional_attributes,
|
||||
created_at: created_at,
|
||||
updated_at: created_at
|
||||
}
|
||||
end
|
||||
|
||||
def maybe_add_additional_attributes
|
||||
return unless rand < 0.3
|
||||
|
||||
{
|
||||
company: Faker::Company.name,
|
||||
city: Faker::Address.city,
|
||||
country: Faker::Address.country_code
|
||||
}
|
||||
end
|
||||
|
||||
def generate_e164_phone_number
|
||||
return nil unless rand < 0.7
|
||||
|
||||
country_code = TestData::Constants::COUNTRY_CODES.sample
|
||||
subscriber_number = rand(1_000_000..9_999_999_999).to_s
|
||||
subscriber_number = subscriber_number[0...(15 - country_code.length)]
|
||||
"+#{country_code}#{subscriber_number}"
|
||||
end
|
||||
|
||||
# rubocop:disable Rails/SkipsModelValidations
|
||||
def create_contact_inboxes
|
||||
Rails.logger.info { "Creating contact inboxes for #{@contacts.size} contacts" }
|
||||
start_time = Time.current
|
||||
|
||||
contact_inboxes_data = @contacts.flat_map do |contact|
|
||||
@inboxes.map do |inbox|
|
||||
{
|
||||
inbox_id: inbox.id,
|
||||
contact_id: contact.id,
|
||||
source_id: SecureRandom.uuid,
|
||||
created_at: contact.created_at,
|
||||
updated_at: contact.created_at
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
count = contact_inboxes_data.size
|
||||
ContactInbox.insert_all!(contact_inboxes_data) if contact_inboxes_data.any?
|
||||
@contact_inboxes = ContactInbox.where(contact_id: @contacts.pluck(:id))
|
||||
|
||||
Rails.logger.info { "Created #{count} contact inboxes in #{Time.current - start_time}s" }
|
||||
end
|
||||
# rubocop:enable Rails/SkipsModelValidations
|
||||
|
||||
# rubocop:disable Rails/SkipsModelValidations
|
||||
def create_conversations
|
||||
Rails.logger.info { "Creating conversations for account ##{@account.id}" }
|
||||
start_time = Time.current
|
||||
|
||||
conversations_data = []
|
||||
@contact_inboxes.each do |ci|
|
||||
num_convos = rand(1..TestData::Constants::MAX_CONVERSATIONS_PER_CONTACT)
|
||||
num_convos.times { conversations_data << build_conversation(ci) }
|
||||
end
|
||||
|
||||
count = conversations_data.size
|
||||
Rails.logger.info { "Preparing to insert #{count} conversations" }
|
||||
|
||||
Conversation.insert_all!(conversations_data) if conversations_data.any?
|
||||
@conversations = Conversation.where(
|
||||
account_id: @account.id,
|
||||
display_id: conversations_data.pluck(:display_id)
|
||||
).order(:created_at)
|
||||
|
||||
Rails.logger.info { "Created #{count} conversations in #{Time.current - start_time}s" }
|
||||
end
|
||||
# rubocop:enable Rails/SkipsModelValidations
|
||||
|
||||
def build_conversation(contact_inbox)
|
||||
created_at = Faker::Time.between(from: contact_inbox.created_at, to: Time.current)
|
||||
{
|
||||
account_id: @account.id,
|
||||
inbox_id: contact_inbox.inbox_id,
|
||||
contact_id: contact_inbox.contact_id,
|
||||
contact_inbox_id: contact_inbox.id,
|
||||
status: TestData::Constants::STATUSES.sample,
|
||||
created_at: created_at,
|
||||
updated_at: created_at,
|
||||
display_id: @display_id_tracker.next_id
|
||||
}
|
||||
end
|
||||
|
||||
# rubocop:disable Rails/SkipsModelValidations
|
||||
def create_messages
|
||||
Rails.logger.info { "Creating messages for #{@conversations.size} conversations" }
|
||||
start_time = Time.current
|
||||
|
||||
batch_count = 0
|
||||
@conversations.find_in_batches(batch_size: 1000) do |batch|
|
||||
batch_count += 1
|
||||
batch_start = Time.current
|
||||
|
||||
messages_data = batch.flat_map do |convo|
|
||||
build_messages_for_conversation(convo)
|
||||
end
|
||||
|
||||
batch_message_count = messages_data.size
|
||||
Rails.logger.info { "Preparing to insert #{batch_message_count} messages (batch #{batch_count})" }
|
||||
|
||||
Message.insert_all!(messages_data) if messages_data.any?
|
||||
@total_messages += batch_message_count
|
||||
|
||||
Rails.logger.info { "Created batch #{batch_count} with #{batch_message_count} messages in #{Time.current - batch_start}s" }
|
||||
end
|
||||
|
||||
Rails.logger.info { "Created total of #{@total_messages} messages in #{Time.current - start_time}s" }
|
||||
end
|
||||
# rubocop:enable Rails/SkipsModelValidations
|
||||
|
||||
def build_messages_for_conversation(conversation)
|
||||
num_messages = rand(TestData::Constants::MIN_MESSAGES_PER_CONVO..TestData::Constants::MAX_MESSAGES_PER_CONVO)
|
||||
message_type = TestData::Constants::MESSAGE_TYPES.sample
|
||||
time_range = [conversation.created_at, Time.current]
|
||||
generate_messages(conversation, num_messages, message_type, time_range)
|
||||
end
|
||||
|
||||
def generate_messages(conversation, num_messages, initial_message_type, time_range)
|
||||
message_type = initial_message_type
|
||||
|
||||
Array.new(num_messages) do
|
||||
message_type = (message_type == 'incoming' ? 'outgoing' : 'incoming')
|
||||
created_at = Faker::Time.between(from: time_range.first, to: time_range.last)
|
||||
build_message_data(conversation, message_type, created_at)
|
||||
end
|
||||
end
|
||||
|
||||
def build_message_data(conversation, message_type, created_at)
|
||||
{
|
||||
account_id: @account.id,
|
||||
inbox_id: conversation.inbox_id,
|
||||
conversation_id: conversation.id,
|
||||
message_type: message_type,
|
||||
content: Faker::Lorem.paragraph(sentence_count: 2),
|
||||
created_at: created_at,
|
||||
updated_at: created_at,
|
||||
private: false,
|
||||
status: 'sent',
|
||||
content_type: 'text',
|
||||
source_id: SecureRandom.uuid
|
||||
}
|
||||
end
|
||||
end
|
||||
80
research/chatwoot/lib/test_data/database_optimizer.rb
Normal file
80
research/chatwoot/lib/test_data/database_optimizer.rb
Normal file
@@ -0,0 +1,80 @@
|
||||
class TestData::DatabaseOptimizer
|
||||
class << self
|
||||
# Tables that need trigger management
|
||||
TABLES_WITH_TRIGGERS = %w[conversations messages].freeze
|
||||
|
||||
# Memory settings in MB
|
||||
# Increased work_mem for better query performance with complex operations
|
||||
WORK_MEM = 256
|
||||
|
||||
def setup
|
||||
Rails.logger.info '==> Setting up database optimizations for improved performance'
|
||||
|
||||
# Remove statement timeout to allow long-running operations to complete
|
||||
Rails.logger.info ' Removing statement timeout'
|
||||
ActiveRecord::Base.connection.execute('SET statement_timeout = 0')
|
||||
|
||||
# Increase working memory for better query performance
|
||||
Rails.logger.info " Increasing work_mem to #{WORK_MEM}MB"
|
||||
ActiveRecord::Base.connection.execute("SET work_mem = '#{WORK_MEM}MB'")
|
||||
|
||||
# Set tables to UNLOGGED mode for better write performance
|
||||
# This disables WAL completely for these tables
|
||||
Rails.logger.info ' Setting tables to UNLOGGED mode'
|
||||
set_tables_unlogged
|
||||
|
||||
# Disable triggers on specified tables to avoid overhead
|
||||
Rails.logger.info ' Disabling triggers on specified tables'
|
||||
disable_triggers
|
||||
|
||||
Rails.logger.info '==> Database optimizations complete, data generation will run faster'
|
||||
end
|
||||
|
||||
def restore
|
||||
Rails.logger.info '==> Restoring database settings to normal'
|
||||
|
||||
Rails.logger.info ' Re-enabling triggers on specified tables'
|
||||
enable_triggers
|
||||
|
||||
Rails.logger.info ' Setting tables back to LOGGED mode'
|
||||
set_tables_logged
|
||||
|
||||
# Reset memory settings to defaults
|
||||
Rails.logger.info ' Resetting memory settings to defaults'
|
||||
ActiveRecord::Base.connection.execute('RESET work_mem')
|
||||
ActiveRecord::Base.connection.execute('RESET maintenance_work_mem')
|
||||
|
||||
Rails.logger.info '==> Database settings restored to normal operation'
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def disable_triggers
|
||||
TABLES_WITH_TRIGGERS.each do |table|
|
||||
Rails.logger.info " Disabling triggers on #{table} table"
|
||||
ActiveRecord::Base.connection.execute("ALTER TABLE #{table} DISABLE TRIGGER ALL")
|
||||
end
|
||||
end
|
||||
|
||||
def enable_triggers
|
||||
TABLES_WITH_TRIGGERS.each do |table|
|
||||
Rails.logger.info " Enabling triggers on #{table} table"
|
||||
ActiveRecord::Base.connection.execute("ALTER TABLE #{table} ENABLE TRIGGER ALL")
|
||||
end
|
||||
end
|
||||
|
||||
def set_tables_unlogged
|
||||
TABLES_WITH_TRIGGERS.each do |table|
|
||||
Rails.logger.info " Setting #{table} table as UNLOGGED"
|
||||
ActiveRecord::Base.connection.execute("ALTER TABLE #{table} SET UNLOGGED")
|
||||
end
|
||||
end
|
||||
|
||||
def set_tables_logged
|
||||
TABLES_WITH_TRIGGERS.each do |table|
|
||||
Rails.logger.info " Setting #{table} table as LOGGED"
|
||||
ActiveRecord::Base.connection.execute("ALTER TABLE #{table} SET LOGGED")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
12
research/chatwoot/lib/test_data/display_id_tracker.rb
Normal file
12
research/chatwoot/lib/test_data/display_id_tracker.rb
Normal file
@@ -0,0 +1,12 @@
|
||||
class TestData::DisplayIdTracker
|
||||
attr_reader :current
|
||||
|
||||
def initialize(account:)
|
||||
max_display_id = Conversation.where(account_id: account.id).maximum(:display_id) || 0
|
||||
@current = max_display_id
|
||||
end
|
||||
|
||||
def next_id
|
||||
@current += 1
|
||||
end
|
||||
end
|
||||
12
research/chatwoot/lib/test_data/inbox_creator.rb
Normal file
12
research/chatwoot/lib/test_data/inbox_creator.rb
Normal file
@@ -0,0 +1,12 @@
|
||||
class TestData::InboxCreator
|
||||
def self.create_for(account)
|
||||
Array.new(TestData::Constants::INBOXES_PER_ACCOUNT) do
|
||||
channel = Channel::Api.create!(account: account)
|
||||
Inbox.create!(
|
||||
account_id: account.id,
|
||||
name: "API Inbox #{SecureRandom.hex(4)}",
|
||||
channel: channel
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
109
research/chatwoot/lib/test_data/orchestrator.rb
Normal file
109
research/chatwoot/lib/test_data/orchestrator.rb
Normal file
@@ -0,0 +1,109 @@
|
||||
class TestData::Orchestrator
|
||||
class << self
|
||||
def call
|
||||
Rails.logger.info { '========== STARTING TEST DATA GENERATION ==========' }
|
||||
|
||||
cleanup_existing_data
|
||||
set_start_id
|
||||
|
||||
Rails.logger.info { "Starting to generate distributed test data across #{TestData::Constants::NUM_ACCOUNTS} accounts..." }
|
||||
Rails.logger.info do
|
||||
"Each account have between #{TestData::Constants::MIN_MESSAGES / 1_000_000}M and #{TestData::Constants::MAX_MESSAGES / 1_000_000}M messages"
|
||||
end
|
||||
|
||||
TestData::Constants::NUM_ACCOUNTS.times do |account_index|
|
||||
Rails.logger.info { "Processing account #{account_index + 1} of #{TestData::Constants::NUM_ACCOUNTS}" }
|
||||
process_account(account_index)
|
||||
end
|
||||
|
||||
Rails.logger.info { "========== ALL DONE! Created #{TestData::Constants::NUM_ACCOUNTS} accounts with distributed test data ==========" }
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Simple value object to group generation parameters
|
||||
class DataGenerationParams
|
||||
attr_reader :account, :inboxes, :total_contacts_needed, :target_message_count, :display_id_tracker
|
||||
|
||||
def initialize(account:, inboxes:, total_contacts_needed:, target_message_count:, display_id_tracker:)
|
||||
@account = account
|
||||
@inboxes = inboxes
|
||||
@total_contacts_needed = total_contacts_needed
|
||||
@target_message_count = target_message_count
|
||||
@display_id_tracker = display_id_tracker
|
||||
end
|
||||
end
|
||||
|
||||
# 1. Remove existing data for old test accounts
|
||||
def cleanup_existing_data
|
||||
Rails.logger.info { 'Cleaning up existing test data...' }
|
||||
TestData::CleanupService.call
|
||||
Rails.logger.info { 'Cleanup complete' }
|
||||
end
|
||||
|
||||
# 2. Find the max Account ID to avoid conflicts
|
||||
def set_start_id
|
||||
max_id = Account.maximum(:id) || 0
|
||||
@start_id = max_id + 1
|
||||
Rails.logger.info { "Setting start ID to #{@start_id}" }
|
||||
end
|
||||
|
||||
# 3. Create an account, its inboxes, and some data
|
||||
def process_account(account_index)
|
||||
account_id = @start_id + account_index
|
||||
Rails.logger.info { "Creating account with ID #{account_id}" }
|
||||
account = TestData::AccountCreator.create!(account_id)
|
||||
|
||||
inboxes = TestData::InboxCreator.create_for(account)
|
||||
target_messages = rand(TestData::Constants::MIN_MESSAGES..TestData::Constants::MAX_MESSAGES)
|
||||
avg_per_convo = rand(15..50)
|
||||
total_convos = (target_messages / avg_per_convo.to_f).ceil
|
||||
total_contacts = (total_convos / TestData::Constants::MAX_CONVERSATIONS_PER_CONTACT.to_f).ceil
|
||||
|
||||
log_account_details(account, target_messages, total_contacts, total_convos)
|
||||
display_id_tracker = TestData::DisplayIdTracker.new(account: account)
|
||||
|
||||
params = DataGenerationParams.new(
|
||||
account: account,
|
||||
inboxes: inboxes,
|
||||
total_contacts_needed: total_contacts,
|
||||
target_message_count: target_messages,
|
||||
display_id_tracker: display_id_tracker
|
||||
)
|
||||
|
||||
Rails.logger.info { "Starting data generation for account ##{account.id}" }
|
||||
generate_data_for_account(params)
|
||||
end
|
||||
|
||||
def generate_data_for_account(params)
|
||||
contact_count = 0
|
||||
message_count = 0
|
||||
batch_number = 0
|
||||
|
||||
while contact_count < params.total_contacts_needed
|
||||
batch_number += 1
|
||||
batch_size = [TestData::Constants::BATCH_SIZE, params.total_contacts_needed - contact_count].min
|
||||
Rails.logger.info { "Processing batch ##{batch_number} (#{batch_size} contacts) for account ##{params.account.id}" }
|
||||
|
||||
batch_service = TestData::ContactBatchService.new(
|
||||
account: params.account,
|
||||
inboxes: params.inboxes,
|
||||
batch_size: batch_size,
|
||||
display_id_tracker: params.display_id_tracker
|
||||
)
|
||||
batch_created_messages = batch_service.generate!
|
||||
|
||||
contact_count += batch_size
|
||||
message_count += batch_created_messages
|
||||
|
||||
end
|
||||
|
||||
Rails.logger.info { "==> Completed Account ##{params.account.id} with #{message_count} messages" }
|
||||
end
|
||||
|
||||
def log_account_details(account, target_messages, total_contacts, total_convos)
|
||||
Rails.logger.info { "==> Account ##{account.id} plan: target of #{target_messages / 1_000_000.0}M messages" }
|
||||
Rails.logger.info { " Planning for #{total_contacts} contacts and #{total_convos} conversations" }
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user