invidious-mod/src/invidious/search.cr

252 行
7.0 KiB
Crystal
Raw 通常表示 履歴

2018-09-14 07:47:31 +09:00
def channel_search(query, page, channel)
2021-03-24 14:15:06 +09:00
response = YT_POOL.client &.get("/channel/#{channel}")
2021-03-24 14:15:06 +09:00
if response.status_code == 404
response = YT_POOL.client &.get("/user/#{channel}")
response = YT_POOL.client &.get("/c/#{channel}") if response.status_code == 404
initial_data = extract_initial_data(response.body)
ucid = initial_data["header"]["c4TabbedHeaderRenderer"]?.try &.["channelId"].as_s?
raise InfoException.new("Impossible to extract channel ID from page") if !ucid
2021-03-24 14:15:06 +09:00
else
ucid = channel
end
2018-09-14 07:47:31 +09:00
2021-03-24 14:15:06 +09:00
continuation = produce_channel_search_continuation(ucid, query, page)
response_json = YoutubeAPI.browse(continuation)
2018-09-14 07:47:31 +09:00
continuationItems = response_json["onResponseReceivedActions"]?
.try &.[0]["appendContinuationItemsAction"]["continuationItems"]
return 0, [] of SearchItem if !continuationItems
2021-03-24 14:15:06 +09:00
items = [] of SearchItem
continuationItems.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each { |item|
2021-03-24 14:15:06 +09:00
extract_item(item["itemSectionRenderer"]["contents"].as_a[0])
.try { |t| items << t }
}
2018-09-14 07:47:31 +09:00
2020-06-16 07:33:23 +09:00
return items.size, items
2018-09-14 07:47:31 +09:00
end
def search(query, search_params = produce_search_params(content_type: "all"), region = nil)
2020-06-16 07:33:23 +09:00
return 0, [] of SearchItem if query.empty?
2018-08-28 05:23:25 +09:00
client_config = YoutubeAPI::ClientConfig.new(region: region)
initial_data = YoutubeAPI.search(query, search_params, client_config: client_config)
2020-06-16 07:33:23 +09:00
items = extract_items(initial_data)
2018-08-05 05:30:44 +09:00
2020-06-16 07:33:23 +09:00
return items.size, items
2018-08-05 05:30:44 +09:00
end
def produce_search_params(page = 1, sort : String = "relevance", date : String = "", content_type : String = "",
2018-09-18 06:38:18 +09:00
duration : String = "", features : Array(String) = [] of String)
2019-10-28 02:50:42 +09:00
object = {
"1:varint" => 0_i64,
"2:embedded" => {} of String => Int64,
"9:varint" => ((page - 1) * 20).to_i64,
2019-10-28 02:50:42 +09:00
}
case sort
2019-08-27 23:35:15 +09:00
when "relevance"
2019-10-28 02:50:42 +09:00
object["1:varint"] = 0_i64
2019-08-27 23:35:15 +09:00
when "rating"
2019-10-28 02:50:42 +09:00
object["1:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "upload_date", "date"
2019-10-28 02:50:42 +09:00
object["1:varint"] = 2_i64
2019-08-27 23:35:15 +09:00
when "view_count", "views"
2019-10-28 02:50:42 +09:00
object["1:varint"] = 3_i64
2019-08-27 23:35:15 +09:00
else
raise "No sort #{sort}"
end
2019-10-28 02:50:42 +09:00
case date
2019-08-27 23:35:15 +09:00
when "hour"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["1:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "today"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["1:varint"] = 2_i64
2019-08-27 23:35:15 +09:00
when "week"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["1:varint"] = 3_i64
2019-08-27 23:35:15 +09:00
when "month"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["1:varint"] = 4_i64
2019-08-27 23:35:15 +09:00
when "year"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["1:varint"] = 5_i64
else nil # Ignore
2019-08-27 23:35:15 +09:00
end
2019-10-28 02:50:42 +09:00
case content_type
2019-08-27 23:35:15 +09:00
when "video"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["2:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "channel"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["2:varint"] = 2_i64
2019-08-27 23:35:15 +09:00
when "playlist"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["2:varint"] = 3_i64
2019-08-27 23:35:15 +09:00
when "movie"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["2:varint"] = 4_i64
2019-08-27 23:35:15 +09:00
when "show"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["2:varint"] = 5_i64
2019-08-27 23:35:15 +09:00
when "all"
2019-10-28 02:50:42 +09:00
#
2019-08-27 23:35:15 +09:00
else
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["2:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
end
2019-10-28 02:50:42 +09:00
case duration
2019-08-27 23:35:15 +09:00
when "short"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["3:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "long"
2019-11-08 23:29:33 +09:00
object["2:embedded"].as(Hash)["3:varint"] = 2_i64
else nil # Ignore
2019-08-27 23:35:15 +09:00
end
features.each do |feature|
2019-10-28 02:50:42 +09:00
case feature
2019-08-27 23:35:15 +09:00
when "hd"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["4:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "subtitles"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["5:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "creative_commons", "cc"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["6:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "3d"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["7:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "live", "livestream"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["8:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "purchased"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["9:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "4k"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["14:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "360"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["15:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "location"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["23:varint"] = 1_i64
2019-08-27 23:35:15 +09:00
when "hdr"
2019-10-28 02:50:42 +09:00
object["2:embedded"].as(Hash)["25:varint"] = 1_i64
else nil # Ignore
2019-08-27 23:35:15 +09:00
end
end
2019-10-28 19:17:39 +09:00
if object["2:embedded"].as(Hash).empty?
object.delete("2:embedded")
end
2019-10-28 02:50:42 +09:00
params = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
2019-10-28 19:17:39 +09:00
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
2019-10-28 02:50:42 +09:00
return params
end
2018-09-14 07:47:31 +09:00
2021-03-24 14:15:06 +09:00
def produce_channel_search_continuation(ucid, query, page)
if page <= 1
idx = 0_i64
else
idx = 30_i64 * (page - 1)
end
2019-10-28 02:50:42 +09:00
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:base64" => {
"2:string" => "search",
2021-03-24 14:15:06 +09:00
"6:varint" => 1_i64,
2019-10-28 02:50:42 +09:00
"7:varint" => 1_i64,
2021-03-24 14:15:06 +09:00
"12:varint" => 1_i64,
"15:base64" => {
"3:varint" => idx,
},
2020-06-16 07:33:23 +09:00
"23:varint" => 0_i64,
2019-10-28 02:50:42 +09:00
},
"11:string" => query,
2021-03-24 14:15:06 +09:00
"35:string" => "browse-feed#{ucid}search",
2019-10-28 02:50:42 +09:00
},
}
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
2021-03-24 14:15:06 +09:00
return continuation
2018-09-14 07:47:31 +09:00
end
2019-08-06 08:49:13 +09:00
def process_search_query(query, page, user, region)
if user
user = user.as(User)
view_name = "subscriptions_#{sha256(user.email)}"
end
channel = nil
content_type = "all"
date = ""
duration = ""
features = [] of String
sort = "relevance"
subscriptions = nil
operators = query.split(" ").select { |a| a.match(/\w+:[\w,]+/) }
operators.each do |operator|
key, value = operator.downcase.split(":")
case key
when "channel", "user"
channel = operator.split(":")[-1]
when "content_type", "type"
content_type = value
when "date"
date = value
when "duration"
duration = value
when "feature", "features"
features = value.split(",")
when "sort"
sort = value
when "subscriptions"
subscriptions = value == "true"
else
operators.delete(operator)
end
end
search_query = (query.split(" ") - operators).join(" ")
if channel
count, items = channel_search(search_query, page, channel)
elsif subscriptions
if view_name
items = PG_DB.query_all("SELECT id,title,published,updated,ucid,author,length_seconds FROM (
SELECT *,
to_tsvector(#{view_name}.title) ||
to_tsvector(#{view_name}.author)
as document
FROM #{view_name}
) v_search WHERE v_search.document @@ plainto_tsquery($1) LIMIT 20 OFFSET $2;", search_query, (page - 1) * 20, as: ChannelVideo)
count = items.size
else
items = [] of ChannelVideo
count = 0
end
else
search_params = produce_search_params(page: page, sort: sort, date: date, content_type: content_type,
2019-08-06 08:49:13 +09:00
duration: duration, features: features)
count, items = search(search_query, search_params, region).as(Tuple)
2019-08-06 08:49:13 +09:00
end
2021-10-13 11:31:06 +09:00
# Light processing to flatten search results out of Categories.
# They should ideally be supported in the future.
items_without_category = [] of SearchItem | ChannelVideo
items.each do |i|
if i.is_a? Category
i.contents.each do |nest_i|
if !nest_i.is_a? Video
items_without_category << nest_i
end
end
else
items_without_category << i
end
end
{search_query, items_without_category.size, items_without_category, operators}
2019-08-06 08:49:13 +09:00
end