mirror of
https://git.kescher.at/CatCatNya/catstodon.git
synced 2024-11-30 10:59:04 +01:00
8fdff2748f
* Add more accurate account search When ElasticSearch is available, a more accurate search is implemented: - Using edge n-gram index for acct and display name - Using asciifolding and cjk width normalization on display names - Using Gaussian decay on account activity for additional scoring (recency) - Using followers/friends ratio for additional scoring (spamminess) - Using followers number for additional scoring (size) The exact match precedence only takes effect when the input conforms to the username format and the username part of it is complete, i.e. when the user started typing the domain part. * Support single-letter usernames * Fix tests * Fix not picking up account updates * Add weights and normalization for scores, skip zero terms queries * Use local counts for accounts index, adjust search parameters * Fix mistakes * Using updated_at of accounts is inadequate for remote accounts
180 lines
4 KiB
Ruby
180 lines
4 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class AccountSearchService < BaseService
|
|
attr_reader :query, :limit, :offset, :options, :account
|
|
|
|
def call(query, account = nil, options = {})
|
|
@acct_hint = query.start_with?('@')
|
|
@query = query.strip.gsub(/\A@/, '')
|
|
@limit = options[:limit].to_i
|
|
@offset = options[:offset].to_i
|
|
@options = options
|
|
@account = account
|
|
|
|
search_service_results.compact.uniq
|
|
end
|
|
|
|
private
|
|
|
|
def search_service_results
|
|
return [] if query.blank? || limit < 1
|
|
|
|
[exact_match] + search_results
|
|
end
|
|
|
|
def exact_match
|
|
return unless offset.zero? && username_complete?
|
|
|
|
return @exact_match if defined?(@exact_match)
|
|
|
|
@exact_match = begin
|
|
if options[:resolve]
|
|
ResolveAccountService.new.call(query)
|
|
elsif domain_is_local?
|
|
Account.find_local(query_username)
|
|
else
|
|
Account.find_remote(query_username, query_domain)
|
|
end
|
|
end
|
|
end
|
|
|
|
def search_results
|
|
return [] if limit_for_non_exact_results.zero?
|
|
|
|
@search_results ||= begin
|
|
if Chewy.enabled?
|
|
from_elasticsearch
|
|
else
|
|
from_database
|
|
end
|
|
end
|
|
end
|
|
|
|
def from_database
|
|
if account
|
|
advanced_search_results
|
|
else
|
|
simple_search_results
|
|
end
|
|
end
|
|
|
|
def advanced_search_results
|
|
Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset)
|
|
end
|
|
|
|
def simple_search_results
|
|
Account.search_for(terms_for_query, limit_for_non_exact_results, offset)
|
|
end
|
|
|
|
def from_elasticsearch
|
|
must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct) : %w(acct^2 display_name), type: 'best_fields' } }]
|
|
should_clauses = []
|
|
|
|
if account
|
|
return [] if options[:following] && following_ids.empty?
|
|
|
|
if options[:following]
|
|
must_clauses << { terms: { id: following_ids } }
|
|
elsif following_ids.any?
|
|
should_clauses << { terms: { id: following_ids, boost: 100 } }
|
|
end
|
|
end
|
|
|
|
query = { bool: { must: must_clauses, should: should_clauses } }
|
|
functions = [reputation_score_function, followers_score_function, time_distance_function]
|
|
|
|
records = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' })
|
|
.limit(limit_for_non_exact_results)
|
|
.offset(offset)
|
|
.objects
|
|
.compact
|
|
|
|
ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
|
|
|
|
records
|
|
end
|
|
|
|
def reputation_score_function
|
|
{
|
|
script_score: {
|
|
script: {
|
|
source: "(doc['followers_count'].value + 0.0) / (doc['followers_count'].value + doc['following_count'].value + 1)",
|
|
},
|
|
},
|
|
}
|
|
end
|
|
|
|
def followers_score_function
|
|
{
|
|
field_value_factor: {
|
|
field: 'followers_count',
|
|
modifier: 'log2p',
|
|
missing: 1,
|
|
},
|
|
}
|
|
end
|
|
|
|
def time_distance_function
|
|
{
|
|
gauss: {
|
|
last_status_at: {
|
|
scale: '30d',
|
|
offset: '30d',
|
|
decay: 0.3,
|
|
},
|
|
},
|
|
}
|
|
end
|
|
|
|
def following_ids
|
|
@following_ids ||= account.active_relationships.pluck(:target_account_id)
|
|
end
|
|
|
|
def limit_for_non_exact_results
|
|
if exact_match?
|
|
limit - 1
|
|
else
|
|
limit
|
|
end
|
|
end
|
|
|
|
def terms_for_query
|
|
if domain_is_local?
|
|
query_username
|
|
else
|
|
query
|
|
end
|
|
end
|
|
|
|
def split_query_string
|
|
@split_query_string ||= query.split('@')
|
|
end
|
|
|
|
def query_username
|
|
@query_username ||= split_query_string.first || ''
|
|
end
|
|
|
|
def query_domain
|
|
@query_domain ||= query_without_split? ? nil : split_query_string.last
|
|
end
|
|
|
|
def query_without_split?
|
|
split_query_string.size == 1
|
|
end
|
|
|
|
def domain_is_local?
|
|
@domain_is_local ||= TagManager.instance.local_domain?(query_domain)
|
|
end
|
|
|
|
def exact_match?
|
|
exact_match.present?
|
|
end
|
|
|
|
def username_complete?
|
|
query.include?('@') && "@#{query}" =~ Account::MENTION_RE
|
|
end
|
|
|
|
def likely_acct?
|
|
@acct_hint || username_complete?
|
|
end
|
|
end
|