catstodon/app/lib/formatter.rb
ThibG ea436b355b Add support for linking XMPP URIs in toots (#12709)
* Fix wrong grouping in Twitter valid_url regex

* Add support for xmpp URIs

Fixes #9776

The difficult part is autolinking, because Twitter-text's extractor does
some pretty ad-hoc stuff to find things that “look like” URLs, and XMPP
URIs do not really match the assumptions of that lib, so it doesn't sound
wise to try to shoehorn it into the existing regex.

This is why I used a specific regex (very close, although slightly more
permissive than the RFC), and a specific scan function (a simplified version
of the generalized one from Twitter).

* Remove leading “xmpp:” from auto-linked text
2020-01-11 02:15:25 +01:00

303 lines
9.3 KiB
Ruby

# frozen_string_literal: true
require 'singleton'
require_relative './sanitize_config'
class Formatter
include Singleton
include RoutingHelper
include ActionView::Helpers::TextHelper
def format(status, **options)
if status.reblog?
prepend_reblog = status.reblog.account.acct
status = status.proper
else
prepend_reblog = false
end
raw_content = status.text
if options[:inline_poll_options] && status.preloadable_poll
raw_content = raw_content + "\n\n" + status.preloadable_poll.options.map { |title| "[ ] #{title}" }.join("\n")
end
return '' if raw_content.blank?
unless status.local?
html = reformat(raw_content)
html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
return html.html_safe # rubocop:disable Rails/OutputSafety
end
linkable_accounts = status.active_mentions.map(&:account)
linkable_accounts << status.account
html = raw_content
html = "RT @#{prepend_reblog} #{html}" if prepend_reblog
html = encode_and_link_urls(html, linkable_accounts)
html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
html = simple_format(html, {}, sanitize: false)
html = html.delete("\n")
html.html_safe # rubocop:disable Rails/OutputSafety
end
def reformat(html)
sanitize(html, Sanitize::Config::MASTODON_STRICT)
end
def plaintext(status)
return status.text if status.local?
text = status.text.gsub(/(<br \/>|<br>|<\/p>)+/) { |match| "#{match}\n" }
strip_tags(text)
end
def simplified_format(account, **options)
html = account.local? ? linkify(account.note) : reformat(account.note)
html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
html.html_safe # rubocop:disable Rails/OutputSafety
end
def sanitize(html, config)
Sanitize.fragment(html, config)
end
def format_spoiler(status, **options)
html = encode(status.spoiler_text)
html = encode_custom_emojis(html, status.emojis, options[:autoplay])
html.html_safe # rubocop:disable Rails/OutputSafety
end
def format_poll_option(status, option, **options)
html = encode(option.title)
html = encode_custom_emojis(html, status.emojis, options[:autoplay])
html.html_safe # rubocop:disable Rails/OutputSafety
end
def format_display_name(account, **options)
html = encode(account.display_name.presence || account.username)
html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
html.html_safe # rubocop:disable Rails/OutputSafety
end
def format_field(account, str, **options)
html = account.local? ? encode_and_link_urls(str, me: true) : reformat(str)
html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
html.html_safe # rubocop:disable Rails/OutputSafety
end
def linkify(text)
html = encode_and_link_urls(text)
html = simple_format(html, {}, sanitize: false)
html = html.delete("\n")
html.html_safe # rubocop:disable Rails/OutputSafety
end
private
def html_entities
@html_entities ||= HTMLEntities.new
end
def encode(html)
html_entities.encode(html)
end
def encode_and_link_urls(html, accounts = nil, options = {})
entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)
if accounts.is_a?(Hash)
options = accounts
accounts = nil
end
rewrite(html.dup, entities) do |entity|
if entity[:url]
link_to_url(entity, options)
elsif entity[:hashtag]
link_to_hashtag(entity)
elsif entity[:screen_name]
link_to_mention(entity, accounts)
end
end
end
def count_tag_nesting(tag)
if tag[1] == '/' then -1
elsif tag[-2] == '/' then 0
else 1
end
end
def encode_custom_emojis(html, emojis, animate = false)
return html if emojis.empty?
emoji_map = emojis.each_with_object({}) { |e, h| h[e.shortcode] = [full_asset_url(e.image.url), full_asset_url(e.image.url(:static))] }
i = -1
tag_open_index = nil
inside_shortname = false
shortname_start_index = -1
invisible_depth = 0
while i + 1 < html.size
i += 1
if invisible_depth.zero? && inside_shortname && html[i] == ':'
shortcode = html[shortname_start_index + 1..i - 1]
emoji = emoji_map[shortcode]
if emoji
original_url, static_url = emoji
replacement = begin
if animate
"<img draggable=\"false\" class=\"emojione\" alt=\":#{encode(shortcode)}:\" title=\":#{encode(shortcode)}:\" src=\"#{encode(original_url)}\" />"
else
"<img draggable=\"false\" class=\"emojione custom-emoji\" alt=\":#{encode(shortcode)}:\" title=\":#{encode(shortcode)}:\" src=\"#{encode(static_url)}\" data-original=\"#{original_url}\" data-static=\"#{static_url}\" />"
end
end
before_html = shortname_start_index.positive? ? html[0..shortname_start_index - 1] : ''
html = before_html + replacement + html[i + 1..-1]
i += replacement.size - (shortcode.size + 2) - 1
else
i -= 1
end
inside_shortname = false
elsif tag_open_index && html[i] == '>'
tag = html[tag_open_index..i]
tag_open_index = nil
if invisible_depth.positive?
invisible_depth += count_tag_nesting(tag)
elsif tag == '<span class="invisible">'
invisible_depth = 1
end
elsif html[i] == '<'
tag_open_index = i
inside_shortname = false
elsif !tag_open_index && html[i] == ':'
inside_shortname = true
shortname_start_index = i
end
end
html
end
def rewrite(text, entities)
text = text.to_s
# Sort by start index
entities = entities.sort_by do |entity|
indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
indices.first
end
result = []
last_index = entities.reduce(0) do |index, entity|
indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
result << encode(text[index...indices.first])
result << yield(entity)
indices.last
end
result << encode(text[last_index..-1])
result.flatten.join
end
UNICODE_ESCAPE_BLACKLIST_RE = /\p{Z}|\p{P}/
def utf8_friendly_extractor(text, options = {})
old_to_new_index = [0]
escaped = text.chars.map do |c|
output = begin
if c.ord.to_s(16).length > 2 && UNICODE_ESCAPE_BLACKLIST_RE.match(c).nil?
CGI.escape(c)
else
c
end
end
old_to_new_index << old_to_new_index.last + output.length
output
end.join
# Note: I couldn't obtain list_slug with @user/list-name format
# for mention so this requires additional check
special = Extractor.extract_urls_with_indices(escaped, options).map do |extract|
new_indices = [
old_to_new_index.find_index(extract[:indices].first),
old_to_new_index.find_index(extract[:indices].last),
]
next extract.merge(
indices: new_indices,
url: text[new_indices.first..new_indices.last - 1]
)
end
standard = Extractor.extract_entities_with_indices(text, options)
xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
Extractor.remove_overlapping_entities(special + standard + xmpp)
end
def link_to_url(entity, options = {})
url = Addressable::URI.parse(entity[:url])
html_attrs = { target: '_blank', rel: 'nofollow noopener noreferrer' }
html_attrs[:rel] = "me #{html_attrs[:rel]}" if options[:me]
Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), url, html_attrs)
rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
encode(entity[:url])
end
def link_to_mention(entity, linkable_accounts)
acct = entity[:screen_name]
return link_to_account(acct) unless linkable_accounts
account = linkable_accounts.find { |item| TagManager.instance.same_acct?(item.acct, acct) }
account ? mention_html(account) : "@#{encode(acct)}"
end
def link_to_account(acct)
username, domain = acct.split('@')
domain = nil if TagManager.instance.local_domain?(domain)
account = EntityCache.instance.mention(username, domain)
account ? mention_html(account) : "@#{encode(acct)}"
end
def link_to_hashtag(entity)
hashtag_html(entity[:hashtag])
end
def link_html(url)
url = Addressable::URI.parse(url).to_s
prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s
text = url[prefix.length, 30]
suffix = url[prefix.length + 30..-1]
cutoff = url[prefix.length..-1].length > 30
"<span class=\"invisible\">#{encode(prefix)}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{encode(text)}</span><span class=\"invisible\">#{encode(suffix)}</span>"
end
def hashtag_html(tag)
"<a href=\"#{encode(tag_url(tag))}\" class=\"mention hashtag\" rel=\"tag\">#<span>#{encode(tag)}</span></a>"
end
def mention_html(account)
"<span class=\"h-card\"><a href=\"#{encode(ActivityPub::TagManager.instance.url_for(account))}\" class=\"u-url mention\">@<span>#{encode(account.username)}</span></a></span>"
end
end