Restructure omni services and add Chatwoot research snapshot
This commit is contained in:
@@ -0,0 +1,38 @@
|
||||
class PageCrawlerService
|
||||
attr_reader :external_link
|
||||
|
||||
def initialize(external_link)
|
||||
@external_link = external_link
|
||||
@doc = Nokogiri::HTML(HTTParty.get(external_link).body)
|
||||
end
|
||||
|
||||
def page_links
|
||||
sitemap? ? extract_links_from_sitemap : extract_links_from_html
|
||||
end
|
||||
|
||||
def page_title
|
||||
title_element = @doc.at_xpath('//title')
|
||||
title_element&.text&.strip
|
||||
end
|
||||
|
||||
def body_text_content
|
||||
ReverseMarkdown.convert @doc.at_xpath('//body'), unknown_tags: :bypass, github_flavored: true
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def sitemap?
|
||||
@external_link.end_with?('.xml')
|
||||
end
|
||||
|
||||
def extract_links_from_sitemap
|
||||
@doc.xpath('//loc').to_set(&:text)
|
||||
end
|
||||
|
||||
def extract_links_from_html
|
||||
@doc.xpath('//a/@href').to_set do |link|
|
||||
absolute_url = URI.join(@external_link, link.value).to_s
|
||||
absolute_url
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user