From 839e90aeff93a18d59cb4fc53eb25cc5c152b44a Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Sat, 15 Jul 2023 15:41:04 +0200 Subject: [PATCH] Extractors: Add module for 'hashtagTileRenderer' --- src/invidious/helpers/serialized_yt_data.cr | 21 +++++++- src/invidious/yt_backend/extractors.cr | 53 ++++++++++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/src/invidious/helpers/serialized_yt_data.cr b/src/invidious/helpers/serialized_yt_data.cr index 7c12ad0e..e0bd7279 100644 --- a/src/invidious/helpers/serialized_yt_data.cr +++ b/src/invidious/helpers/serialized_yt_data.cr @@ -232,6 +232,25 @@ struct SearchChannel end end +struct SearchHashtag + include DB::Serializable + + property title : String + property url : String + property video_count : Int64 + property channel_count : Int64 + + def to_json(locale : String?, json : JSON::Builder) + json.object do + json.field "type", "hashtag" + json.field "title", self.title + json.field "url", self.url + json.field "videoCount", self.video_count + json.field "channelCount", self.channel_count + end + end +end + class Category include DB::Serializable @@ -274,4 +293,4 @@ struct Continuation end end -alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category +alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | SearchHashtag | Category diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index e5029dc5..8456313b 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -11,15 +11,16 @@ private ITEM_CONTAINER_EXTRACTOR = { } private ITEM_PARSERS = { + Parsers::RichItemRendererParser, Parsers::VideoRendererParser, Parsers::ChannelRendererParser, Parsers::GridPlaylistRendererParser, Parsers::PlaylistRendererParser, Parsers::CategoryRendererParser, - Parsers::RichItemRendererParser, Parsers::ReelItemRendererParser, Parsers::ItemSectionRendererParser, Parsers::ContinuationItemRendererParser, + Parsers::HashtagRendererParser, } private alias InitialData = Hash(String, JSON::Any) @@ -210,6 +211,56 @@ private module Parsers end end + # Parses an Innertube `hashtagTileRenderer` into a `SearchHashtag`. + # Returns `nil` when the given object is not a `hashtagTileRenderer`. + # + # A `hashtagTileRenderer` is a kind of search result. + # It can be found when searching for any hashtag (e.g "#hi" or "#shorts") + module HashtagRendererParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item["hashtagTileRenderer"]? + return self.parse(item_contents) + end + end + + private def self.parse(item_contents) + title = extract_text(item_contents["hashtag"]).not_nil! # E.g "#hi" + + # E.g "/hashtag/hi" + url = item_contents.dig?("onTapCommand", "commandMetadata", "webCommandMetadata", "url").try &.as_s + url ||= URI.encode_path("/hashtag/#{title.lchop('#')}") + + video_count_txt = extract_text(item_contents["hashtagVideoCount"]?) # E.g "203K videos" + channel_count_txt = extract_text(item_contents["hashtagChannelCount"]?) # E.g "81K channels" + + # Fallback for video/channel counts + if channel_count_txt.nil? || video_count_txt.nil? + # E.g: "203K videos • 81K channels" + info_text = extract_text(item_contents["hashtagInfoText"]?).try &.split(" • ") + + if info_text && info_text.size == 2 + video_count_txt ||= info_text[0] + channel_count_txt ||= info_text[1] + end + end + + return SearchHashtag.new({ + title: title, + url: url, + video_count: short_text_to_number(video_count_txt || ""), + channel_count: short_text_to_number(channel_count_txt || ""), + }) + rescue ex + LOGGER.debug("HashtagRendererParser: Failed to extract renderer.") + LOGGER.debug("HashtagRendererParser: Got exception: #{ex.message}") + return nil + end + + def self.parser_name + return {{@type.name}} + end + end + # Parses a InnerTube gridPlaylistRenderer into a SearchPlaylist. Returns nil when the given object isn't a gridPlaylistRenderer # # A gridPlaylistRenderer renders a playlist, that is located in a grid, to click on within the YouTube and Invidious UI.