diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 96903fc7..1ca0dc96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,11 +38,10 @@ jobs: matrix: stable: [true] crystal: - - 1.4.1 - - 1.5.1 - 1.6.2 - 1.7.3 - - 1.8.1 + - 1.8.2 + - 1.9.2 include: - crystal: nightly stable: false @@ -53,7 +52,7 @@ jobs: submodules: true - name: Install Crystal - uses: crystal-lang/install-crystal@v1.7.0 + uses: crystal-lang/install-crystal@v1.8.0 with: crystal: ${{ matrix.crystal }} diff --git a/.github/workflows/container-release.yml b/.github/workflows/container-release.yml index 13bbf34c..c2756fcc 100644 --- a/.github/workflows/container-release.yml +++ b/.github/workflows/container-release.yml @@ -25,9 +25,9 @@ jobs: uses: actions/checkout@v3 - name: Install Crystal - uses: crystal-lang/install-crystal@v1.6.0 + uses: crystal-lang/install-crystal@v1.8.0 with: - crystal: 1.5.0 + crystal: 1.9.2 - name: Run lint run: | @@ -77,4 +77,3 @@ jobs: tags: quay.io/invidious/invidious:${{ github.sha }}-arm64,quay.io/invidious/invidious:latest-arm64 build-args: | "release=1" - diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index a945da58..a7e218a2 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -14,7 +14,7 @@ jobs: with: repo-token: ${{ secrets.GITHUB_TOKEN }} days-before-stale: 365 - days-before-pr-stale: 45 # PRs should be active. Anything that hasn't had activity in more than 45 days should be considered abandoned. + days-before-pr-stale: 90 days-before-close: 30 exempt-pr-labels: blocked stale-issue-message: 'This issue has been automatically marked as stale and will be closed in 30 days because it has not had recent activity and is much likely outdated. If you think this issue is still relevant and applicable, you just have to post a comment and it will be unmarked.' diff --git a/config/config.example.yml b/config/config.example.yml index e925a5e3..b44fcc0e 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -161,6 +161,19 @@ https_only: false #force_resolve: +## +## Use Innertube's transcripts API instead of timedtext for closed captions +## +## Useful for larger instances as InnerTube is **not ratelimited**. See https://github.com/iv-org/invidious/issues/2567 +## +## Subtitle experience may differ slightly on Invidious. +## +## Accepted values: true, false +## Default: false +## +# use_innertube_for_captions: false + + # ----------------------------- # Logging # ----------------------------- diff --git a/src/invidious/config.cr b/src/invidious/config.cr index 0e69abe4..bb9280c3 100644 --- a/src/invidious/config.cr +++ b/src/invidious/config.cr @@ -127,6 +127,9 @@ class Config # Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`) property pool_size : Int32 = 100 + # Use Innertube's transcripts API instead of timedtext for closed captions + property use_innertube_for_captions : Bool = false + # Saved cookies in "name1=value1; name2=value2..." format @[YAML::Field(converter: Preferences::StringToCookies)] property cookies : HTTP::Cookies = HTTP::Cookies.new diff --git a/src/invidious/frontend/watch_page.cr b/src/invidious/frontend/watch_page.cr index e3214469..5fd81168 100644 --- a/src/invidious/frontend/watch_page.cr +++ b/src/invidious/frontend/watch_page.cr @@ -7,7 +7,7 @@ module Invidious::Frontend::WatchPage getter full_videos : Array(Hash(String, JSON::Any)) getter video_streams : Array(Hash(String, JSON::Any)) getter audio_streams : Array(Hash(String, JSON::Any)) - getter captions : Array(Invidious::Videos::Caption) + getter captions : Array(Invidious::Videos::Captions::Metadata) def initialize( @full_videos, diff --git a/src/invidious/playlists.cr b/src/invidious/playlists.cr index 013be268..955e0855 100644 --- a/src/invidious/playlists.cr +++ b/src/invidious/playlists.cr @@ -89,6 +89,7 @@ struct Playlist property views : Int64 property updated : Time property thumbnail : String? + property subtitle : String? def to_json(offset, json : JSON::Builder, video_id : String? = nil) json.object do @@ -100,6 +101,7 @@ struct Playlist json.field "author", self.author json.field "authorId", self.ucid json.field "authorUrl", "/channel/#{self.ucid}" + json.field "subtitle", self.subtitle json.field "authorThumbnails" do json.array do @@ -356,6 +358,8 @@ def fetch_playlist(plid : String) updated = Time.utc video_count = 0 + subtitle = extract_text(initial_data.dig?("header", "playlistHeaderRenderer", "subtitle")) + playlist_info["stats"]?.try &.as_a.each do |stat| text = stat["runs"]?.try &.as_a.map(&.["text"].as_s).join("") || stat["simpleText"]?.try &.as_s next if !text @@ -397,6 +401,7 @@ def fetch_playlist(plid : String) views: views, updated: updated, thumbnail: thumbnail, + subtitle: subtitle, }) end diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr index af4fc806..25e766d2 100644 --- a/src/invidious/routes/api/v1/videos.cr +++ b/src/invidious/routes/api/v1/videos.cr @@ -87,70 +87,78 @@ module Invidious::Routes::API::V1::Videos caption = caption[0] end - url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target + if CONFIG.use_innertube_for_captions + params = Invidious::Videos::Transcript.generate_param(id, caption.language_code, caption.auto_generated) + initial_data = YoutubeAPI.get_transcript(params) - # Auto-generated captions often have cues that aren't aligned properly with the video, - # as well as some other markup that makes it cumbersome, so we try to fix that here - if caption.name.includes? "auto-generated" - caption_xml = YT_POOL.client &.get(url).body + webvtt = Invidious::Videos::Transcript.convert_transcripts_to_vtt(initial_data, caption.language_code) + else + # Timedtext API handling + url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target - if caption_xml.starts_with?(" i + 1 - end_time = caption_nodes[i + 1]["start"].to_f.seconds - else - end_time = start_time + duration + if caption_nodes.size > i + 1 + end_time = caption_nodes[i + 1]["start"].to_f.seconds + else + end_time = start_time + duration + end + + start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}" + end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}" + + text = HTML.unescape(node.content) + text = text.gsub(//, "") + text = text.gsub(/<\/font>/, "") + if md = text.match(/(?.*) : (?.*)/) + text = "#{md["text"]}" + end + + str << <<-END_CUE + #{start_time} --> #{end_time} + #{text} + + + END_CUE end - - start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}" - end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}" - - text = HTML.unescape(node.content) - text = text.gsub(//, "") - text = text.gsub(/<\/font>/, "") - if md = text.match(/(?.*) : (?.*)/) - text = "#{md["text"]}" - end - - str << <<-END_CUE - #{start_time} --> #{end_time} - #{text} - - - END_CUE end end - end - else - # Some captions have "align:[start/end]" and "position:[num]%" - # attributes. Those are causing issues with VideoJS, which is unable - # to properly align the captions on the video, so we remove them. - # - # See: https://github.com/iv-org/invidious/issues/2391 - webvtt = YT_POOL.client &.get("#{url}&format=vtt").body - if webvtt.starts_with?(" [0-9:.]{12}).+/, "\\1") + if webvtt.starts_with?(" [0-9:.]{12}).+/, "\\1") + end end end diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index f38b33e5..9fbd1374 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -24,7 +24,7 @@ struct Video property updated : Time @[DB::Field(ignore: true)] - @captions = [] of Invidious::Videos::Caption + @captions = [] of Invidious::Videos::Captions::Metadata @[DB::Field(ignore: true)] property adaptive_fmts : Array(Hash(String, JSON::Any))? @@ -215,9 +215,9 @@ struct Video keywords.includes? "YouTube Red" end - def captions : Array(Invidious::Videos::Caption) + def captions : Array(Invidious::Videos::Captions::Metadata) if @captions.empty? && @info.has_key?("captions") - @captions = Invidious::Videos::Caption.from_yt_json(info["captions"]) + @captions = Invidious::Videos::Captions::Metadata.from_yt_json(info["captions"]) end return @captions diff --git a/src/invidious/videos/caption.cr b/src/invidious/videos/caption.cr index 13f81a31..256dfcc0 100644 --- a/src/invidious/videos/caption.cr +++ b/src/invidious/videos/caption.cr @@ -1,100 +1,106 @@ require "json" module Invidious::Videos - struct Caption - property name : String - property language_code : String - property base_url : String + module Captions + struct Metadata + property name : String + property language_code : String + property base_url : String - def initialize(@name, @language_code, @base_url) - end + property auto_generated : Bool - # Parse the JSON structure from Youtube - def self.from_yt_json(container : JSON::Any) : Array(Caption) - caption_tracks = container - .dig?("playerCaptionsTracklistRenderer", "captionTracks") - .try &.as_a - - captions_list = [] of Caption - return captions_list if caption_tracks.nil? - - caption_tracks.each do |caption| - name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"] - name = name.to_s.split(" - ")[0] - - language_code = caption["languageCode"].to_s - base_url = caption["baseUrl"].to_s - - captions_list << Caption.new(name, language_code, base_url) + def initialize(@name, @language_code, @base_url, @auto_generated) end - return captions_list - end + # Parse the JSON structure from Youtube + def self.from_yt_json(container : JSON::Any) : Array(Captions::Metadata) + caption_tracks = container + .dig?("playerCaptionsTracklistRenderer", "captionTracks") + .try &.as_a - def timedtext_to_vtt(timedtext : String, tlang = nil) : String - # In the future, we could just directly work with the url. This is more of a POC - cues = [] of XML::Node - tree = XML.parse(timedtext) - tree = tree.children.first + captions_list = [] of Captions::Metadata + return captions_list if caption_tracks.nil? - tree.children.each do |item| - if item.name == "body" - item.children.each do |cue| - if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n") - cues << cue + caption_tracks.each do |caption| + name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"] + name = name.to_s.split(" - ")[0] + + language_code = caption["languageCode"].to_s + base_url = caption["baseUrl"].to_s + + auto_generated = (caption["kind"]? == "asr") + + captions_list << Captions::Metadata.new(name, language_code, base_url, auto_generated) + end + + return captions_list + end + + def timedtext_to_vtt(timedtext : String, tlang = nil) : String + # In the future, we could just directly work with the url. This is more of a POC + cues = [] of XML::Node + tree = XML.parse(timedtext) + tree = tree.children.first + + tree.children.each do |item| + if item.name == "body" + item.children.each do |cue| + if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n") + cues << cue + end end + break end - break end - end - result = String.build do |result| - result << <<-END_VTT - WEBVTT - Kind: captions - Language: #{tlang || @language_code} + result = String.build do |result| + result << <<-END_VTT + WEBVTT + Kind: captions + Language: #{tlang || @language_code} - END_VTT + END_VTT - result << "\n\n" + result << "\n\n" - cues.each_with_index do |node, i| - start_time = node["t"].to_f.milliseconds + cues.each_with_index do |node, i| + start_time = node["t"].to_f.milliseconds - duration = node["d"]?.try &.to_f.milliseconds + duration = node["d"]?.try &.to_f.milliseconds - duration ||= start_time + duration ||= start_time - if cues.size > i + 1 - end_time = cues[i + 1]["t"].to_f.milliseconds - else - end_time = start_time + duration + if cues.size > i + 1 + end_time = cues[i + 1]["t"].to_f.milliseconds + else + end_time = start_time + duration + end + + # start_time + result << start_time.hours.to_s.rjust(2, '0') + result << ':' << start_time.minutes.to_s.rjust(2, '0') + result << ':' << start_time.seconds.to_s.rjust(2, '0') + result << '.' << start_time.milliseconds.to_s.rjust(3, '0') + + result << " --> " + + # end_time + result << end_time.hours.to_s.rjust(2, '0') + result << ':' << end_time.minutes.to_s.rjust(2, '0') + result << ':' << end_time.seconds.to_s.rjust(2, '0') + result << '.' << end_time.milliseconds.to_s.rjust(3, '0') + + result << "\n" + + node.children.each do |s| + result << s.content + end + result << "\n" + result << "\n" end - - # start_time - result << start_time.hours.to_s.rjust(2, '0') - result << ':' << start_time.minutes.to_s.rjust(2, '0') - result << ':' << start_time.seconds.to_s.rjust(2, '0') - result << '.' << start_time.milliseconds.to_s.rjust(3, '0') - - result << " --> " - - # end_time - result << end_time.hours.to_s.rjust(2, '0') - result << ':' << end_time.minutes.to_s.rjust(2, '0') - result << ':' << end_time.seconds.to_s.rjust(2, '0') - result << '.' << end_time.milliseconds.to_s.rjust(3, '0') - - result << "\n" - - node.children.each do |s| - result << s.content - end - result << "\n" - result << "\n" end + return result end - return result end # List of all caption languages available on Youtube. diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr new file mode 100644 index 00000000..f3360a52 --- /dev/null +++ b/src/invidious/videos/transcript.cr @@ -0,0 +1,103 @@ +module Invidious::Videos + # Namespace for methods primarily relating to Transcripts + module Transcript + record TranscriptLine, start_ms : Time::Span, end_ms : Time::Span, line : String + + def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String + kind = auto_generated ? "asr" : "" + + object = { + "1:0:string" => video_id, + + "2:base64" => { + "1:string" => kind, + "2:string" => language_code, + "3:string" => "", + }, + + "3:varint" => 1_i64, + "5:string" => "engagement-panel-searchable-transcript-search-panel", + "6:varint" => 1_i64, + "7:varint" => 1_i64, + "8:varint" => 1_i64, + } + + params = object.try { |i| Protodec::Any.cast_json(i) } + .try { |i| Protodec::Any.from_json(i) } + .try { |i| Base64.urlsafe_encode(i) } + .try { |i| URI.encode_www_form(i) } + + return params + end + + def self.convert_transcripts_to_vtt(initial_data : Hash(String, JSON::Any), target_language : String) : String + # Convert into array of TranscriptLine + lines = self.parse(initial_data) + + # Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt() + vtt = String.build do |vtt| + vtt << <<-END_VTT + WEBVTT + Kind: captions + Language: #{target_language} + + + END_VTT + + vtt << "\n\n" + + lines.each do |line| + start_time = line.start_ms + end_time = line.end_ms + + # start_time + vtt << start_time.hours.to_s.rjust(2, '0') + vtt << ':' << start_time.minutes.to_s.rjust(2, '0') + vtt << ':' << start_time.seconds.to_s.rjust(2, '0') + vtt << '.' << start_time.milliseconds.to_s.rjust(3, '0') + + vtt << " --> " + + # end_time + vtt << end_time.hours.to_s.rjust(2, '0') + vtt << ':' << end_time.minutes.to_s.rjust(2, '0') + vtt << ':' << end_time.seconds.to_s.rjust(2, '0') + vtt << '.' << end_time.milliseconds.to_s.rjust(3, '0') + + vtt << "\n" + vtt << line.line + + vtt << "\n" + vtt << "\n" + end + end + + return vtt + end + + private def self.parse(initial_data : Hash(String, JSON::Any)) + body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer", + "content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer", + "initialSegments").as_a + + lines = [] of TranscriptLine + body.each do |line| + # Transcript section headers. They are not apart of the captions and as such we can safely skip them. + if line.as_h.has_key?("transcriptSectionHeaderRenderer") + next + end + + line = line["transcriptSegmentRenderer"] + + start_ms = line["startMs"].as_s.to_i.millisecond + end_ms = line["endMs"].as_s.to_i.millisecond + + text = extract_text(line["snippet"]) || "" + + lines << TranscriptLine.new(start_ms, end_ms, text) + end + + return lines + end + end +end diff --git a/src/invidious/views/playlist.ecr b/src/invidious/views/playlist.ecr index 90387cab..675dfc34 100644 --- a/src/invidious/views/playlist.ecr +++ b/src/invidious/views/playlist.ecr @@ -70,7 +70,12 @@ <% else %> - <%= author %> | + <% if !author.empty? %> + <%= author %> | + <% elsif !playlist.subtitle.nil? %> + <% subtitle = playlist.subtitle || "" %> + <%= HTML.escape(subtitle[0..subtitle.rindex(" • ") || subtitle.size]) %> | + <% end %> <%= translate_count(locale, "generic_videos_count", playlist.video_count) %> | <%= translate(locale, "Updated `x` ago", recode_date(playlist.updated, locale)) %> diff --git a/src/invidious/views/user/preferences.ecr b/src/invidious/views/user/preferences.ecr index 5c6ecb96..b51d2244 100644 --- a/src/invidious/views/user/preferences.ecr +++ b/src/invidious/views/user/preferences.ecr @@ -89,7 +89,7 @@ <% preferences.captions.each_with_index do |caption, index| %> diff --git a/src/invidious/yt_backend/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr index aef9ddd9..a5e621f2 100644 --- a/src/invidious/yt_backend/youtube_api.cr +++ b/src/invidious/yt_backend/youtube_api.cr @@ -557,6 +557,30 @@ module YoutubeAPI return self._post_json("/youtubei/v1/search", data, client_config) end + #################################################################### + # get_transcript(params, client_config?) + # + # Requests the youtubei/v1/get_transcript endpoint with the required headers + # and POST data in order to get a JSON reply. + # + # The requested data is a specially encoded protobuf string that denotes the specific language requested. + # + # An optional ClientConfig parameter can be passed, too (see + # `struct ClientConfig` above for more details). + # + + def get_transcript( + params : String, + client_config : ClientConfig | Nil = nil + ) : Hash(String, JSON::Any) + data = { + "context" => self.make_context(client_config), + "params" => params, + } + + return self._post_json("/youtubei/v1/get_transcript", data, client_config) + end + #################################################################### # _post_json(endpoint, data, client_config?) #