mirror of
https://git.kescher.at/CatCatNya/catstodon.git
synced 2024-11-26 14:51:37 +01:00
Do not pass unknown encoding names to nokogiri. (#30987)
This commit is contained in:
parent
36592d10aa
commit
2ea9336b68
3 changed files with 35 additions and 1 deletions
|
@ -274,7 +274,7 @@ class LinkDetailsExtractor
|
||||||
end
|
end
|
||||||
|
|
||||||
def detect_encoding_and_parse_document
|
def detect_encoding_and_parse_document
|
||||||
[detect_encoding, nil, @html_charset].uniq.each do |encoding|
|
[detect_encoding, nil, header_encoding].uniq.each do |encoding|
|
||||||
document = Nokogiri::HTML(@html, nil, encoding)
|
document = Nokogiri::HTML(@html, nil, encoding)
|
||||||
return document if document.to_s.valid_encoding?
|
return document if document.to_s.valid_encoding?
|
||||||
end
|
end
|
||||||
|
@ -286,6 +286,13 @@ class LinkDetailsExtractor
|
||||||
guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil
|
guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def header_encoding
|
||||||
|
Encoding.find(@html_charset).name if @html_charset
|
||||||
|
rescue ArgumentError
|
||||||
|
# Encoding from HTTP header is not recognized by ruby
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
def detector
|
def detector
|
||||||
@detector ||= CharlockHolmes::EncodingDetector.new.tap do |detector|
|
@detector ||= CharlockHolmes::EncodingDetector.new.tap do |detector|
|
||||||
detector.strip_tags = true
|
detector.strip_tags = true
|
||||||
|
|
18
spec/fixtures/requests/alternative_utf8_spelling_in_header.txt
vendored
Normal file
18
spec/fixtures/requests/alternative_utf8_spelling_in_header.txt
vendored
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
HTTP/1.1 200 OK
|
||||||
|
server: nginx
|
||||||
|
date: Thu, 13 Jun 2024 14:33:13 GMT
|
||||||
|
content-type: text/html; charset=utf8
|
||||||
|
content-length: 192
|
||||||
|
accept-ranges: bytes
|
||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Webserver Configs R Us</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h2>Welcome</h2>
|
||||||
|
<p>Sneaky non-UTF character: á</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -32,6 +32,7 @@ RSpec.describe FetchLinkCardService do
|
||||||
stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt'))
|
stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt'))
|
||||||
stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt'))
|
stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt'))
|
||||||
stub_request(:get, 'http://example.com/long_canonical_url').to_return(request_fixture('long_canonical_url.txt'))
|
stub_request(:get, 'http://example.com/long_canonical_url').to_return(request_fixture('long_canonical_url.txt'))
|
||||||
|
stub_request(:get, 'http://example.com/alternative_utf8_spelling_in_header').to_return(request_fixture('alternative_utf8_spelling_in_header.txt'))
|
||||||
|
|
||||||
Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache
|
Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache
|
||||||
|
|
||||||
|
@ -292,6 +293,14 @@ RSpec.describe FetchLinkCardService do
|
||||||
expect(status.preview_card).to be_nil
|
expect(status.preview_card).to be_nil
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'with a URL where the `Content-Type` header uses `utf8` instead of `utf-8`' do
|
||||||
|
let(:status) { Fabricate(:status, text: 'test http://example.com/alternative_utf8_spelling_in_header') }
|
||||||
|
|
||||||
|
it 'does not create a preview card' do
|
||||||
|
expect(status.preview_card.title).to eq 'Webserver Configs R Us'
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'with a remote status' do
|
context 'with a remote status' do
|
||||||
|
|
Loading…
Reference in a new issue