catstodon/lib/sanitize_ext/sanitize_config.rb

155 lines
4.5 KiB
Ruby

# frozen_string_literal: true
class Sanitize
module Config
HTTP_PROTOCOLS = %w(
http
https
).freeze
LINK_PROTOCOLS = %w(
http
https
dat
dweb
ipfs
ipns
ssb
gopher
xmpp
magnet
gemini
).freeze
ALLOWED_CLASS_TRANSFORMER = lambda do |env|
node = env[:node]
class_list = node['class']&.split(/[\t\n\f\r ]/)
return unless class_list
class_list.keep_if do |e|
next true if /^(h|p|u|dt|e)-/.match?(e) # microformats classes
next true if /^(mention|hashtag)$/.match?(e) # semantic classes
next true if /^(ellipsis|invisible)$/.match?(e) # link formatting classes
end
node['class'] = class_list.join(' ')
end
TRANSLATE_TRANSFORMER = lambda do |env|
node = env[:node]
node.remove_attribute('translate') unless node['translate'] == 'no'
end
UNSUPPORTED_HREF_TRANSFORMER = lambda do |env|
return unless env[:node_name] == 'a'
current_node = env[:node]
scheme = if current_node['href'] =~ Sanitize::REGEX_PROTOCOL
Regexp.last_match(1).downcase
else
:relative
end
current_node.replace(current_node.document.create_text_node(current_node.text)) unless LINK_PROTOCOLS.include?(scheme)
end
UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env|
return unless %w(h1 h2 h3 h4 h5 h6).include?(env[:node_name])
current_node = env[:node]
current_node.name = 'strong'
current_node.wrap('<p></p>')
end
# We assume that incomming <math> nodes are of the form
# <math><semantics>...<annotation>...</annotation></semantics></math>
# according to the [FEP]. We try to grab the most relevant plain-text
# annotation from the semantics node, and use it to display a representation
# of the mathematics.
#
# FEP: https://codeberg.org/fediverse/fep/src/branch/main/fep/dc88/fep-dc88.md
MATH_TRANSFORMER = lambda do |env|
math = env[:node]
return if env[:is_allowlisted]
return unless math.element? && env[:node_name] == 'math'
semantics = math.element_children[0]
return if semantics.nil? || semantics.name != 'semantics'
# next, we find the plain-text description
is_annotation_with_encoding = lambda do |encoding, node|
return false unless node.name == 'annotation'
node.attributes['encoding'].value == encoding
end
annotation = semantics.children.find(&is_annotation_with_encoding.curry['application/x-tex'])
if annotation
text = if math.attributes['display']&.value == 'block'
"$$#{annotation.text}$$"
else
"$#{annotation.text}$"
end
math.replace(math.document.create_text_node(text))
return
end
# Don't bother surrounding 'text/plain' annotations with dollar signs,
# since it isn't LaTeX
annotation = semantics.children.find(&is_annotation_with_encoding.curry['text/plain'])
math.replace(math.document.create_text_node(annotation.text)) unless annotation.nil?
end
MASTODON_STRICT = freeze_config(
elements: %w(p br span a del s pre blockquote code b strong u i em ul ol li ruby rt rp),
attributes: {
'a' => %w(href rel class translate),
'span' => %w(class translate),
'ol' => %w(start reversed),
'li' => %w(value),
},
add_attributes: {
'a' => {
'rel' => 'nofollow noopener',
'target' => '_blank',
},
},
protocols: {},
transformers: [
ALLOWED_CLASS_TRANSFORMER,
TRANSLATE_TRANSFORMER,
MATH_TRANSFORMER,
UNSUPPORTED_ELEMENTS_TRANSFORMER,
UNSUPPORTED_HREF_TRANSFORMER,
]
)
MASTODON_OEMBED = freeze_config(
elements: %w(audio embed iframe source video),
attributes: {
'audio' => %w(controls),
'embed' => %w(height src type width),
'iframe' => %w(allowfullscreen frameborder height scrolling src width),
'source' => %w(src type),
'video' => %w(controls height loop width),
},
protocols: {
'embed' => { 'src' => HTTP_PROTOCOLS },
'iframe' => { 'src' => HTTP_PROTOCOLS },
'source' => { 'src' => HTTP_PROTOCOLS },
},
add_attributes: {
'iframe' => { 'sandbox' => 'allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox allow-forms' },
}
)
end
end