invidious-mod/src/invidious/videos/parser.cr

372 行
14 KiB
Crystal

require "json"
# Use to parse both "compactVideoRenderer" and "endScreenVideoRenderer".
# The former is preferred as it has more videos in it. The second has
# the same 11 first entries as the compact rendered.
#
# TODO: "compactRadioRenderer" (Mix) and
# TODO: Use a proper struct/class instead of a hacky JSON object
def parse_related_video(related : JSON::Any) : Hash(String, JSON::Any)?
return nil if !related["videoId"]?
# The compact renderer has video length in seconds, where the end
# screen rendered has a full text version ("42:40")
length = related["lengthInSeconds"]?.try &.as_i.to_s
length ||= related.dig?("lengthText", "simpleText").try do |box|
decode_length_seconds(box.as_s).to_s
end
# Both have "short", so the "long" option shouldn't be required
channel_info = (related["shortBylineText"]? || related["longBylineText"]?)
.try &.dig?("runs", 0)
author = channel_info.try &.dig?("text")
author_verified = has_verified_badge?(related["ownerBadges"]?).to_s
ucid = channel_info.try { |ci| HelperExtractors.get_browse_id(ci) }
# "4,088,033 views", only available on compact renderer
# and when video is not a livestream
view_count = related.dig?("viewCountText", "simpleText")
.try &.as_s.gsub(/\D/, "")
short_view_count = related.try do |r|
HelperExtractors.get_short_view_count(r).to_s
end
LOGGER.trace("parse_related_video: Found \"watchNextEndScreenRenderer\" container")
# TODO: when refactoring video types, make a struct for related videos
# or reuse an existing type, if that fits.
return {
"id" => related["videoId"],
"title" => related["title"]["simpleText"],
"author" => author || JSON::Any.new(""),
"ucid" => JSON::Any.new(ucid || ""),
"length_seconds" => JSON::Any.new(length || "0"),
"view_count" => JSON::Any.new(view_count || "0"),
"short_view_count" => JSON::Any.new(short_view_count || "0"),
"author_verified" => JSON::Any.new(author_verified),
}
end
def extract_video_info(video_id : String, proxy_region : String? = nil)
# Init client config for the API
client_config = YoutubeAPI::ClientConfig.new(proxy_region: proxy_region)
# Fetch data from the player endpoint
# 8AEB param is used to fetch YouTube stories
player_response = YoutubeAPI.player(video_id: video_id, params: "8AEB", client_config: client_config)
playability_status = player_response.dig?("playabilityStatus", "status").try &.as_s
if playability_status != "OK"
subreason = player_response.dig?("playabilityStatus", "errorScreen", "playerErrorMessageRenderer", "subreason")
reason = subreason.try &.[]?("simpleText").try &.as_s
reason ||= subreason.try &.[]("runs").as_a.map(&.[]("text")).join("")
reason ||= player_response.dig("playabilityStatus", "reason").as_s
# Stop here if video is not a scheduled livestream
if !{"LIVE_STREAM_OFFLINE", "LOGIN_REQUIRED"}.any?(playability_status)
return {
"version" => JSON::Any.new(Video::SCHEMA_VERSION.to_i64),
"reason" => JSON::Any.new(reason),
}
end
elsif video_id != player_response.dig("videoDetails", "videoId")
# YouTube may return a different video player response than expected.
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
raise VideoNotAvailableException.new("The video returned by YouTube isn't the requested one. (WEB client)")
else
reason = nil
end
# Don't fetch the next endpoint if the video is unavailable.
if {"OK", "LIVE_STREAM_OFFLINE", "LOGIN_REQUIRED"}.any?(playability_status)
next_response = YoutubeAPI.next({"videoId": video_id, "params": ""})
player_response = player_response.merge(next_response)
end
params = parse_video_info(video_id, player_response)
params["reason"] = JSON::Any.new(reason) if reason
new_player_response = nil
if reason.nil?
# Fetch the video streams using an Android client in order to get the
# decrypted URLs and maybe fix throttling issues (#2194). See the
# following issue for an explanation about decrypted URLs:
# https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
client_config.client_type = YoutubeAPI::ClientType::Android
new_player_response = try_fetch_streaming_data(video_id, client_config)
elsif !reason.includes?("your country") # Handled separately
# The Android embedded client could help here
client_config.client_type = YoutubeAPI::ClientType::AndroidScreenEmbed
new_player_response = try_fetch_streaming_data(video_id, client_config)
end
# Last hope
if new_player_response.nil?
client_config.client_type = YoutubeAPI::ClientType::TvHtml5ScreenEmbed
new_player_response = try_fetch_streaming_data(video_id, client_config)
end
# Replace player response and reset reason
if !new_player_response.nil?
player_response = new_player_response
params.delete("reason")
end
{"captions", "playabilityStatus", "playerConfig", "storyboards", "streamingData"}.each do |f|
params[f] = player_response[f] if player_response[f]?
end
# Data structure version, for cache control
params["version"] = JSON::Any.new(Video::SCHEMA_VERSION.to_i64)
return params
end
def try_fetch_streaming_data(id : String, client_config : YoutubeAPI::ClientConfig) : Hash(String, JSON::Any)?
LOGGER.debug("try_fetch_streaming_data: [#{id}] Using #{client_config.client_type} client.")
# 8AEB param is used to fetch YouTube stories
response = YoutubeAPI.player(video_id: id, params: "8AEB", client_config: client_config)
playability_status = response["playabilityStatus"]["status"]
LOGGER.debug("try_fetch_streaming_data: [#{id}] Got playabilityStatus == #{playability_status}.")
if id != response.dig("videoDetails", "videoId")
# YouTube may return a different video player response than expected.
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
raise VideoNotAvailableException.new(
"The video returned by YouTube isn't the requested one. (#{client_config.client_type} client)"
)
elsif playability_status == "OK"
return response
else
return nil
end
end
def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any)) : Hash(String, JSON::Any)
# Top level elements
main_results = player_response.dig?("contents", "twoColumnWatchNextResults")
raise BrokenTubeException.new("twoColumnWatchNextResults") if !main_results
# Primary results are not available on Music videos
# See: https://github.com/iv-org/invidious/pull/3238#issuecomment-1207193725
if primary_results = main_results.dig?("results", "results", "contents")
video_primary_renderer = primary_results
.as_a.find(&.["videoPrimaryInfoRenderer"]?)
.try &.["videoPrimaryInfoRenderer"]
video_secondary_renderer = primary_results
.as_a.find(&.["videoSecondaryInfoRenderer"]?)
.try &.["videoSecondaryInfoRenderer"]
raise BrokenTubeException.new("videoPrimaryInfoRenderer") if !video_primary_renderer
raise BrokenTubeException.new("videoSecondaryInfoRenderer") if !video_secondary_renderer
end
video_details = player_response.dig?("videoDetails")
microformat = player_response.dig?("microformat", "playerMicroformatRenderer")
raise BrokenTubeException.new("videoDetails") if !video_details
raise BrokenTubeException.new("microformat") if !microformat
# Basic video infos
title = video_details["title"]?.try &.as_s
# We have to try to extract viewCount from videoPrimaryInfoRenderer first,
# then from videoDetails, as the latter is "0" for livestreams (we want
# to get the amount of viewers watching).
views_txt = video_primary_renderer
.try &.dig?("viewCount", "videoViewCountRenderer", "viewCount", "runs", 0, "text")
views_txt ||= video_details["viewCount"]?
views = views_txt.try &.as_s.gsub(/\D/, "").to_i64?
length_txt = (microformat["lengthSeconds"]? || video_details["lengthSeconds"])
.try &.as_s.to_i64
published = microformat["publishDate"]?
.try { |t| Time.parse(t.as_s, "%Y-%m-%d", Time::Location::UTC) } || Time.utc
premiere_timestamp = microformat.dig?("liveBroadcastDetails", "startTimestamp")
.try { |t| Time.parse_rfc3339(t.as_s) }
live_now = microformat.dig?("liveBroadcastDetails", "isLiveNow")
.try &.as_bool || false
# Extra video infos
allowed_regions = microformat["availableCountries"]?
.try &.as_a.map &.as_s || [] of String
allow_ratings = video_details["allowRatings"]?.try &.as_bool
family_friendly = microformat["isFamilySafe"].try &.as_bool
is_listed = video_details["isCrawlable"]?.try &.as_bool
is_upcoming = video_details["isUpcoming"]?.try &.as_bool
keywords = video_details["keywords"]?
.try &.as_a.map &.as_s || [] of String
# Related videos
LOGGER.debug("extract_video_info: parsing related videos...")
related = [] of JSON::Any
# Parse "compactVideoRenderer" items (under secondary results)
secondary_results = main_results
.dig?("secondaryResults", "secondaryResults", "results")
secondary_results.try &.as_a.each do |element|
if item = element["compactVideoRenderer"]?
related_video = parse_related_video(item)
related << JSON::Any.new(related_video) if related_video
end
end
# If nothing was found previously, fall back to end screen renderer
if related.empty?
# Container for "endScreenVideoRenderer" items
player_overlays = player_response.dig?(
"playerOverlays", "playerOverlayRenderer",
"endScreen", "watchNextEndScreenRenderer", "results"
)
player_overlays.try &.as_a.each do |element|
if item = element["endScreenVideoRenderer"]?
related_video = parse_related_video(item)
related << JSON::Any.new(related_video) if related_video
end
end
end
# Likes
toplevel_buttons = video_primary_renderer
.try &.dig?("videoActions", "menuRenderer", "topLevelButtons")
if toplevel_buttons
likes_button = toplevel_buttons.try &.as_a
.find(&.dig?("toggleButtonRenderer", "defaultIcon", "iconType").=== "LIKE")
.try &.["toggleButtonRenderer"]
# New format as of september 2022
likes_button ||= toplevel_buttons.try &.as_a
.find(&.["segmentedLikeDislikeButtonRenderer"]?)
.try &.dig?(
"segmentedLikeDislikeButtonRenderer",
"likeButton", "toggleButtonRenderer"
)
if likes_button
# Note: The like count from `toggledText` is off by one, as it would
# represent the new like count in the event where the user clicks on "like".
likes_txt = (likes_button["defaultText"]? || likes_button["toggledText"]?)
.try &.dig?("accessibility", "accessibilityData", "label")
likes = likes_txt.as_s.gsub(/\D/, "").to_i64? if likes_txt
LOGGER.trace("extract_video_info: Found \"likes\" button. Button text is \"#{likes_txt}\"")
LOGGER.debug("extract_video_info: Likes count is #{likes}") if likes
end
end
# Description
description = microformat.dig?("description", "simpleText").try &.as_s || ""
short_description = player_response.dig?("videoDetails", "shortDescription")
description_html = video_secondary_renderer.try &.dig?("description", "runs")
.try &.as_a.try { |t| content_to_comment_html(t, video_id) }
# Video metadata
metadata = video_secondary_renderer
.try &.dig?("metadataRowContainer", "metadataRowContainerRenderer", "rows")
.try &.as_a
genre = microformat["category"]?
genre_ucid = nil
license = nil
metadata.try &.each do |row|
metadata_title = extract_text(row.dig?("metadataRowRenderer", "title"))
contents = row.dig?("metadataRowRenderer", "contents", 0)
if metadata_title == "Category"
contents = contents.try &.dig?("runs", 0)
genre = contents.try &.["text"]?
genre_ucid = contents.try &.dig?("navigationEndpoint", "browseEndpoint", "browseId")
elsif metadata_title == "License"
license = contents.try &.dig?("runs", 0, "text")
elsif metadata_title == "Licensed to YouTube by"
license = contents.try &.["simpleText"]?
end
end
# Author infos
author = video_details["author"]?.try &.as_s
ucid = video_details["channelId"]?.try &.as_s
if author_info = video_secondary_renderer.try &.dig?("owner", "videoOwnerRenderer")
author_thumbnail = author_info.dig?("thumbnail", "thumbnails", 0, "url")
author_verified = has_verified_badge?(author_info["badges"]?)
subs_text = author_info["subscriberCountText"]?
.try { |t| t["simpleText"]? || t.dig?("runs", 0, "text") }
.try &.as_s.split(" ", 2)[0]
end
# Return data
if live_now
video_type = VideoType::Livestream
elsif !premiere_timestamp.nil?
video_type = VideoType::Scheduled
published = premiere_timestamp || Time.utc
else
video_type = VideoType::Video
end
params = {
"videoType" => JSON::Any.new(video_type.to_s),
# Basic video infos
"title" => JSON::Any.new(title || ""),
"views" => JSON::Any.new(views || 0_i64),
"likes" => JSON::Any.new(likes || 0_i64),
"lengthSeconds" => JSON::Any.new(length_txt || 0_i64),
"published" => JSON::Any.new(published.to_rfc3339),
# Extra video infos
"allowedRegions" => JSON::Any.new(allowed_regions.map { |v| JSON::Any.new(v) }),
"allowRatings" => JSON::Any.new(allow_ratings || false),
"isFamilyFriendly" => JSON::Any.new(family_friendly || false),
"isListed" => JSON::Any.new(is_listed || false),
"isUpcoming" => JSON::Any.new(is_upcoming || false),
"keywords" => JSON::Any.new(keywords.map { |v| JSON::Any.new(v) }),
# Related videos
"relatedVideos" => JSON::Any.new(related),
# Description
"description" => JSON::Any.new(description || ""),
"descriptionHtml" => JSON::Any.new(description_html || "<p></p>"),
"shortDescription" => JSON::Any.new(short_description.try &.as_s || nil),
# Video metadata
"genre" => JSON::Any.new(genre.try &.as_s || ""),
"genreUcid" => JSON::Any.new(genre_ucid.try &.as_s || ""),
"license" => JSON::Any.new(license.try &.as_s || ""),
# Author infos
"author" => JSON::Any.new(author || ""),
"ucid" => JSON::Any.new(ucid || ""),
"authorThumbnail" => JSON::Any.new(author_thumbnail.try &.as_s || ""),
"authorVerified" => JSON::Any.new(author_verified || false),
"subCountText" => JSON::Any.new(subs_text || "-"),
}
return params
end