class Config YAML.mapping({ crawl_threads: Int32, channel_threads: Int32, video_threads: Int32, db: NamedTuple( user: String, password: String, host: String, port: Int32, dbname: String, ), dl_api_key: String?, https_only: Bool?, hmac_key: String?, full_refresh: Bool, }) end class FilteredCompressHandler < Kemal::Handler exclude ["/videoplayback", "/api/*"] def call(env) return call_next env if exclude_match? env {% if flag?(:without_zlib) %} call_next env {% else %} request_headers = env.request.headers if request_headers.includes_word?("Accept-Encoding", "gzip") env.response.headers["Content-Encoding"] = "gzip" env.response.output = Gzip::Writer.new(env.response.output, sync_close: true) elsif request_headers.includes_word?("Accept-Encoding", "deflate") env.response.headers["Content-Encoding"] = "deflate" env.response.output = Flate::Writer.new(env.response.output, sync_close: true) end call_next env {% end %} end end def rank_videos(db, n, filter, url) top = [] of {Float64, String} db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 1000") do |rs| rs.each do id = rs.read(String) wilson_score = rs.read(Float64) published = rs.read(Time) # Exponential decay, older videos tend to rank lower temperature = wilson_score * Math.exp(-0.000005*((Time.now - published).total_minutes)) top << {temperature, id} end end top.sort! # Make hottest come first top.reverse! top = top.map { |a, b| b } if filter language_list = [] of String top.each do |id| if language_list.size == n break else client = make_client(url) begin video = get_video(id, db) rescue ex next end if video.language language = video.language else description = XML.parse(video.description) content = [video.title, description.content].join(" ") content = content[0, 10000] results = DetectLanguage.detect(content) language = results[0].language db.exec("UPDATE videos SET language = $1 WHERE id = $2", language, id) end if language == "en" language_list << id end end end return language_list else return top[0..n - 1] end end def login_req(login_form, f_req) data = { "pstMsg" => "1", "checkConnection" => "youtube", "checkedDomains" => "youtube", "hl" => "en", "deviceinfo" => %q([null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]), "f.req" => f_req, "flowName" => "GlifWebSignIn", "flowEntry" => "ServiceLogin", } data = login_form.merge(data) return HTTP::Params.encode(data) end def produce_playlist_url(ucid, index) ucid = ucid.lchop("UC") ucid = "VLUU" + ucid continuation = write_var_int(index) continuation.unshift(0x08_u8) slice = continuation.to_unsafe.to_slice(continuation.size) continuation = Base64.urlsafe_encode(slice, false) continuation = "PT:" + continuation continuation = continuation.bytes continuation.unshift(0x7a_u8, continuation.size.to_u8) slice = continuation.to_unsafe.to_slice(continuation.size) continuation = Base64.urlsafe_encode(slice) continuation = URI.escape(continuation) continuation = continuation.bytes continuation.unshift(continuation.size.to_u8) continuation.unshift(ucid.size.to_u8) continuation = ucid.bytes + continuation continuation.unshift(0x12.to_u8, ucid.size.to_u8) continuation.unshift(0xe2_u8, 0xa9_u8, 0x85_u8, 0xb2_u8, 2_u8, continuation.size.to_u8) slice = continuation.to_unsafe.to_slice(continuation.size) continuation = Base64.urlsafe_encode(slice) continuation = URI.escape(continuation) url = "/browse_ajax?action_continuation=1&continuation=#{continuation}" return url end def produce_videos_url(ucid, page = 1) page = "#{page}" meta = "\x12\x06videos \x00\x30\x02\x38\x01\x60\x01\x6a\x00\x7a" meta += page.size.to_u8.unsafe_chr meta += page meta += "\xb8\x01\x00" meta = Base64.urlsafe_encode(meta) meta = URI.escape(meta) continuation = "\x12" continuation += ucid.size.to_u8.unsafe_chr continuation += ucid continuation += "\x1a" continuation += meta.size.to_u8.unsafe_chr continuation += meta continuation = continuation.size.to_u8.unsafe_chr + continuation continuation = "\xe2\xa9\x85\xb2\x02" + continuation continuation = Base64.urlsafe_encode(continuation) continuation = URI.escape(continuation) url = "/browse_ajax?continuation=#{continuation}" return url end def read_var_int(bytes) numRead = 0 result = 0 read = bytes[numRead] if bytes.size == 1 result = bytes[0].to_i32 else while ((read & 0b10000000) != 0) read = bytes[numRead].to_u64 value = (read & 0b01111111) result |= (value << (7 * numRead)) numRead += 1 if numRead > 5 raise "VarInt is too big" end end end return result end def write_var_int(value : Int) bytes = [] of UInt8 value = value.to_u32 if value == 0 bytes = [0_u8] else while value != 0 temp = (value & 0b01111111).to_u8 value = value >> 7 if value != 0 temp |= 0b10000000 end bytes << temp end end return bytes end def generate_captcha(key) minute = Random::Secure.rand(12) minute_angle = minute * 30 minute = minute * 5 hour = Random::Secure.rand(12) hour_angle = hour * 30 + minute_angle.to_f / 12 if hour == 0 hour = 12 end clock_svg = <<-END_SVG 1 2 3 4 5 6 7 8 9 10 11 12 END_SVG challenge = "" convert = Process.run(%(convert -density 1200 -resize 400x400 -background none svg:- png:-), shell: true, input: IO::Memory.new(clock_svg), output: Process::Redirect::Pipe) do |proc| challenge = proc.output.gets_to_end challenge = Base64.strict_encode(challenge) challenge = "data:image/png;base64,#{challenge}" end answer = "#{hour}:#{minute.to_s.rjust(2, '0')}" token = OpenSSL::HMAC.digest(:sha256, key, answer) token = Base64.encode(token) return {challenge: challenge, token: token} end def html_to_description(description_html) if !description_html description = "" description_html = "" else description_html = description_html.to_s description = description_html.gsub("
", "\n") description = description.gsub("
", "\n") description = XML.parse_html(description).content.strip("\n ") end return description, description_html end