invidious-mod/src/invidious/videos/caption.cr

224 行
5.1 KiB
Crystal

require "json"
module Invidious::Videos
module Captions
struct Metadata
property name : String
property language_code : String
property base_url : String
property auto_generated : Bool
def initialize(@name, @language_code, @base_url, @auto_generated)
end
# Parse the JSON structure from Youtube
def self.from_yt_json(container : JSON::Any) : Array(Captions::Metadata)
caption_tracks = container
.dig?("playerCaptionsTracklistRenderer", "captionTracks")
.try &.as_a
captions_list = [] of Captions::Metadata
return captions_list if caption_tracks.nil?
caption_tracks.each do |caption|
name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"]
name = name.to_s.split(" - ")[0]
language_code = caption["languageCode"].to_s
base_url = caption["baseUrl"].to_s
auto_generated = (caption["kind"]? == "asr")
captions_list << Captions::Metadata.new(name, language_code, base_url, auto_generated)
end
return captions_list
end
def timedtext_to_vtt(timedtext : String, tlang = nil) : String
# In the future, we could just directly work with the url. This is more of a POC
cues = [] of XML::Node
tree = XML.parse(timedtext)
tree = tree.children.first
tree.children.each do |item|
if item.name == "body"
item.children.each do |cue|
if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n")
cues << cue
end
end
break
end
end
settings_field = {
"Kind" => "captions",
"Language" => "#{tlang || @language_code}",
}
result = WebVTT.build(settings_field) do |vtt|
cues.each_with_index do |node, i|
start_time = node["t"].to_f.milliseconds
duration = node["d"]?.try &.to_f.milliseconds
duration ||= start_time
if cues.size > i + 1
end_time = cues[i + 1]["t"].to_f.milliseconds
else
end_time = start_time + duration
end
text = String.build do |io|
node.children.each do |s|
io << s.content
end
end
vtt.cue(start_time, end_time, text)
end
end
return result
end
end
# List of all caption languages available on Youtube.
LANGUAGES = {
"",
"English",
"English (auto-generated)",
"English (United Kingdom)",
"English (United States)",
"Afrikaans",
"Albanian",
"Amharic",
"Arabic",
"Armenian",
"Azerbaijani",
"Bangla",
"Basque",
"Belarusian",
"Bosnian",
"Bulgarian",
"Burmese",
"Cantonese (Hong Kong)",
"Catalan",
"Cebuano",
"Chinese",
"Chinese (China)",
"Chinese (Hong Kong)",
"Chinese (Simplified)",
"Chinese (Taiwan)",
"Chinese (Traditional)",
"Corsican",
"Croatian",
"Czech",
"Danish",
"Dutch",
"Dutch (auto-generated)",
"Esperanto",
"Estonian",
"Filipino",
"Finnish",
"French",
"French (auto-generated)",
"Galician",
"Georgian",
"German",
"German (auto-generated)",
"Greek",
"Gujarati",
"Haitian Creole",
"Hausa",
"Hawaiian",
"Hebrew",
"Hindi",
"Hmong",
"Hungarian",
"Icelandic",
"Igbo",
"Indonesian",
"Indonesian (auto-generated)",
"Interlingue",
"Irish",
"Italian",
"Italian (auto-generated)",
"Japanese",
"Japanese (auto-generated)",
"Javanese",
"Kannada",
"Kazakh",
"Khmer",
"Korean",
"Korean (auto-generated)",
"Kurdish",
"Kyrgyz",
"Lao",
"Latin",
"Latvian",
"Lithuanian",
"Luxembourgish",
"Macedonian",
"Malagasy",
"Malay",
"Malayalam",
"Maltese",
"Maori",
"Marathi",
"Mongolian",
"Nepali",
"Norwegian Bokmål",
"Nyanja",
"Pashto",
"Persian",
"Polish",
"Portuguese",
"Portuguese (auto-generated)",
"Portuguese (Brazil)",
"Punjabi",
"Romanian",
"Russian",
"Russian (auto-generated)",
"Samoan",
"Scottish Gaelic",
"Serbian",
"Shona",
"Sindhi",
"Sinhala",
"Slovak",
"Slovenian",
"Somali",
"Southern Sotho",
"Spanish",
"Spanish (auto-generated)",
"Spanish (Latin America)",
"Spanish (Mexico)",
"Spanish (Spain)",
"Sundanese",
"Swahili",
"Swedish",
"Tajik",
"Tamil",
"Telugu",
"Thai",
"Turkish",
"Turkish (auto-generated)",
"Ukrainian",
"Urdu",
"Uzbek",
"Vietnamese",
"Vietnamese (auto-generated)",
"Welsh",
"Western Frisian",
"Xhosa",
"Yiddish",
"Yoruba",
"Zulu",
}
end
end