mirror of
https://git.kescher.at/CatCatNya/catstodon.git
synced 2025-01-22 00:34:04 +01:00
2cabc5d188
* Use a tree‐based approach for adv. text formatting Sanitizing HTML/Markdown means parsing the content into an HTML tree under‐the‐hood anyway, and it is more accurate to do mention/hashtag replacement on the text nodes in that tree than it is to try to hack it in with regexes et cetera. This undoes the overrides of `#entities` and `#rewrite` on `AdvancedTextFormatter` but also stops using them, instead keeping track of the parsed Nokogiri tree itself and using that in the `#to_s` method. Internally, this tree uses `<mastodon-entity>` nodes to keep track of hashtags, links, and mentions. Sanitization is moved to the beginning, so it should be known that these do not appear in the input. * Also disallow entities inside of `<code>` I think this is generally expected behaviour, and people are annoyed when their code gets turned into links/hashtags/mentions. * Minor cleanup to AdvancedTextFormatter * Change AdvancedTextFormatter to rewrite entities in one pass and sanitize at the end Also, minor refactoring to better match how other formatters are organized. * Add some tests Co-authored-by: Claire <claire.github-309c@sitedethib.com>
133 lines
3.7 KiB
Ruby
133 lines
3.7 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class AdvancedTextFormatter < TextFormatter
|
|
class HTMLRenderer < Redcarpet::Render::HTML
|
|
def initialize(options, &block)
|
|
super(options)
|
|
@format_link = block
|
|
end
|
|
|
|
def block_code(code, _language)
|
|
<<~HTML
|
|
<pre><code>#{ERB::Util.h(code).gsub("\n", '<br/>')}</code></pre>
|
|
HTML
|
|
end
|
|
|
|
def autolink(link, link_type)
|
|
return link if link_type == :email
|
|
@format_link.call(link)
|
|
end
|
|
end
|
|
|
|
attr_reader :content_type
|
|
|
|
# @param [String] text
|
|
# @param [Hash] options
|
|
# @option options [Boolean] :multiline
|
|
# @option options [Boolean] :with_domains
|
|
# @option options [Boolean] :with_rel_me
|
|
# @option options [Array<Account>] :preloaded_accounts
|
|
# @option options [String] :content_type
|
|
def initialize(text, options = {})
|
|
@content_type = options.delete(:content_type)
|
|
super(text, options)
|
|
|
|
@text = format_markdown(text) if content_type == 'text/markdown'
|
|
end
|
|
|
|
# Differs from TextFormatter by not messing with newline after parsing
|
|
def to_s
|
|
return ''.html_safe if text.blank?
|
|
|
|
html = rewrite do |entity|
|
|
if entity[:url]
|
|
link_to_url(entity)
|
|
elsif entity[:hashtag]
|
|
link_to_hashtag(entity)
|
|
elsif entity[:screen_name]
|
|
link_to_mention(entity)
|
|
end
|
|
end
|
|
|
|
html.html_safe # rubocop:disable Rails/OutputSafety
|
|
end
|
|
|
|
# Differs from TextFormatter by operating on the parsed HTML tree
|
|
def rewrite
|
|
if @tree.nil?
|
|
src = text.gsub(Sanitize::REGEX_UNSUITABLE_CHARS, '')
|
|
@tree = Nokogiri::HTML5.fragment(src)
|
|
document = @tree.document
|
|
|
|
@tree.xpath('.//text()[not(ancestor::a | ancestor::code)]').each do |text_node|
|
|
# Iterate over text elements and build up their replacements.
|
|
content = text_node.content
|
|
replacement = Nokogiri::XML::NodeSet.new(document)
|
|
processed_index = 0
|
|
Extractor.extract_entities_with_indices(
|
|
content,
|
|
extract_url_without_protocol: false
|
|
) do |entity|
|
|
# Iterate over entities in this text node.
|
|
advance = entity[:indices].first - processed_index
|
|
if advance.positive?
|
|
# Text node for content which precedes entity.
|
|
replacement << Nokogiri::XML::Text.new(
|
|
content[processed_index, advance],
|
|
document
|
|
)
|
|
end
|
|
replacement << Nokogiri::HTML5.fragment(yield(entity))
|
|
processed_index = entity[:indices].last
|
|
end
|
|
if processed_index < content.size
|
|
# Text node for remaining content.
|
|
replacement << Nokogiri::XML::Text.new(
|
|
content[processed_index, content.size - processed_index],
|
|
document
|
|
)
|
|
end
|
|
text_node.replace(replacement)
|
|
end
|
|
end
|
|
|
|
Sanitize.node!(@tree, Sanitize::Config::MASTODON_OUTGOING).to_html
|
|
end
|
|
|
|
private
|
|
|
|
def format_markdown(html)
|
|
html = markdown_formatter.render(html)
|
|
html.delete("\r").delete("\n")
|
|
end
|
|
|
|
def markdown_formatter
|
|
extensions = {
|
|
autolink: true,
|
|
no_intra_emphasis: true,
|
|
fenced_code_blocks: true,
|
|
disable_indented_code_blocks: true,
|
|
strikethrough: true,
|
|
lax_spacing: true,
|
|
space_after_headers: true,
|
|
superscript: true,
|
|
underline: true,
|
|
highlight: true,
|
|
footnotes: false,
|
|
}
|
|
|
|
renderer = HTMLRenderer.new({
|
|
filter_html: false,
|
|
escape_html: false,
|
|
no_images: true,
|
|
no_styles: true,
|
|
safe_links_only: true,
|
|
hard_wrap: true,
|
|
link_attributes: { target: '_blank', rel: 'nofollow noopener' },
|
|
}) do |url|
|
|
link_to_url({ url: url })
|
|
end
|
|
|
|
Redcarpet::Markdown.new(renderer, extensions)
|
|
end
|
|
end
|