Restructure omni services and add Chatwoot research snapshot

This commit is contained in:
Ruslan Bakiev
2026-02-21 11:11:27 +07:00
parent edea7a0034
commit b73babbbf6
7732 changed files with 978203 additions and 32 deletions

View File

@@ -0,0 +1,38 @@
class PageCrawlerService
attr_reader :external_link
def initialize(external_link)
@external_link = external_link
@doc = Nokogiri::HTML(HTTParty.get(external_link).body)
end
def page_links
sitemap? ? extract_links_from_sitemap : extract_links_from_html
end
def page_title
title_element = @doc.at_xpath('//title')
title_element&.text&.strip
end
def body_text_content
ReverseMarkdown.convert @doc.at_xpath('//body'), unknown_tags: :bypass, github_flavored: true
end
private
def sitemap?
@external_link.end_with?('.xml')
end
def extract_links_from_sitemap
@doc.xpath('//loc').to_set(&:text)
end
def extract_links_from_html
@doc.xpath('//a/@href').to_set do |link|
absolute_url = URI.join(@external_link, link.value).to_s
absolute_url
end
end
end