From 420ceffbb0cebed6c7b7d976828d6619f2aefe9d Mon Sep 17 00:00:00 2001 From: saltycrys <73420320+saltycrys@users.noreply.github.com> Date: Sun, 27 Dec 2020 05:14:33 +0100 Subject: [PATCH 1/2] Rename threads to fibers The config and command line options haven't been changed. --- src/invidious/jobs/refresh_channels_job.cr | 16 ++++++++-------- src/invidious/jobs/refresh_feeds_job.cr | 10 +++++----- src/invidious/jobs/subscribe_to_feeds_job.cr | 12 ++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/invidious/jobs/refresh_channels_job.cr b/src/invidious/jobs/refresh_channels_job.cr index bbf55ff3c..f4dee0632 100644 --- a/src/invidious/jobs/refresh_channels_job.cr +++ b/src/invidious/jobs/refresh_channels_job.cr @@ -7,9 +7,9 @@ class Invidious::Jobs::RefreshChannelsJob < Invidious::Jobs::BaseJob end def begin - max_threads = config.channel_threads - lim_threads = max_threads - active_threads = 0 + max_fibers = config.channel_threads + lim_fibers = max_fibers + active_fibers = 0 active_channel = Channel(Bool).new backoff = 1.seconds @@ -19,26 +19,26 @@ class Invidious::Jobs::RefreshChannelsJob < Invidious::Jobs::BaseJob rs.each do id = rs.read(String) - if active_threads >= lim_threads + if active_fibers >= lim_fibers if active_channel.receive - active_threads -= 1 + active_fibers -= 1 end end - active_threads += 1 + active_fibers += 1 spawn do begin logger.trace("RefreshChannelsJob: Fetching channel #{id}") channel = fetch_channel(id, db, config.full_refresh) - lim_threads = max_threads + lim_fibers = max_fibers db.exec("UPDATE channels SET updated = $1, author = $2, deleted = false WHERE id = $3", Time.utc, channel.author, id) rescue ex logger.error("RefreshChannelsJob: #{id} : #{ex.message}") if ex.message == "Deleted or invalid channel" db.exec("UPDATE channels SET updated = $1, deleted = true WHERE id = $2", Time.utc, id) else - lim_threads = 1 + lim_fibers = 1 logger.error("RefreshChannelsJob: #{id} : backing off for #{backoff}s") sleep backoff if backoff < 1.days diff --git a/src/invidious/jobs/refresh_feeds_job.cr b/src/invidious/jobs/refresh_feeds_job.cr index 5dd476394..208569b83 100644 --- a/src/invidious/jobs/refresh_feeds_job.cr +++ b/src/invidious/jobs/refresh_feeds_job.cr @@ -7,8 +7,8 @@ class Invidious::Jobs::RefreshFeedsJob < Invidious::Jobs::BaseJob end def begin - max_threads = config.feed_threads - active_threads = 0 + max_fibers = config.feed_threads + active_fibers = 0 active_channel = Channel(Bool).new loop do @@ -17,13 +17,13 @@ class Invidious::Jobs::RefreshFeedsJob < Invidious::Jobs::BaseJob email = rs.read(String) view_name = "subscriptions_#{sha256(email)}" - if active_threads >= max_threads + if active_fibers >= max_fibers if active_channel.receive - active_threads -= 1 + active_fibers -= 1 end end - active_threads += 1 + active_fibers += 1 spawn do begin # Drop outdated views diff --git a/src/invidious/jobs/subscribe_to_feeds_job.cr b/src/invidious/jobs/subscribe_to_feeds_job.cr index 3bb312996..2255730dc 100644 --- a/src/invidious/jobs/subscribe_to_feeds_job.cr +++ b/src/invidious/jobs/subscribe_to_feeds_job.cr @@ -8,12 +8,12 @@ class Invidious::Jobs::SubscribeToFeedsJob < Invidious::Jobs::BaseJob end def begin - max_threads = 1 + max_fibers = 1 if config.use_pubsub_feeds.is_a?(Int32) - max_threads = config.use_pubsub_feeds.as(Int32) + max_fibers = config.use_pubsub_feeds.as(Int32) end - active_threads = 0 + active_fibers = 0 active_channel = Channel(Bool).new loop do @@ -21,13 +21,13 @@ class Invidious::Jobs::SubscribeToFeedsJob < Invidious::Jobs::BaseJob rs.each do ucid = rs.read(String) - if active_threads >= max_threads.as(Int32) + if active_fibers >= max_fibers.as(Int32) if active_channel.receive - active_threads -= 1 + active_fibers -= 1 end end - active_threads += 1 + active_fibers += 1 spawn do begin From c4ef055248781b36c57d99b97e798673a097ea73 Mon Sep 17 00:00:00 2001 From: saltycrys <73420320+saltycrys@users.noreply.github.com> Date: Sun, 27 Dec 2020 05:20:33 +0100 Subject: [PATCH 2/2] Add RefreshChannelsJob traces Traces can be enabled with `-l trace`. The problem with subscriptions is that sometimes requests to YouTube never finish. As soon as that happens `channel-threads` times subscriptions stop being refreshed. This is most likely a problem with the lsquick bindings. --- src/invidious.cr | 20 ++++++------- src/invidious/channels.cr | 33 ++++++++++++++++------ src/invidious/jobs/refresh_channels_job.cr | 17 +++++++---- src/invidious/routes/login.cr | 2 +- src/invidious/users.cr | 10 +++---- 5 files changed, 53 insertions(+), 29 deletions(-) diff --git a/src/invidious.cr b/src/invidious.cr index 91798129d..7184145a9 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -256,7 +256,7 @@ before_all do |env| headers["Cookie"] = env.request.headers["Cookie"] begin - user, sid = get_user(sid, headers, PG_DB, false) + user, sid = get_user(sid, headers, PG_DB, logger, false) csrf_token = generate_response(sid, { ":authorize_token", ":playlist_ajax", @@ -526,7 +526,7 @@ post "/subscription_ajax" do |env| case action when "action_create_subscription_to_channel" if !user.subscriptions.includes? channel_id - get_channel(channel_id, PG_DB, false, false) + get_channel(channel_id, PG_DB, logger, false, false) PG_DB.exec("UPDATE users SET feed_needs_update = true, subscriptions = array_append(subscriptions, $1) WHERE email = $2", channel_id, email) end when "action_remove_subscriptions" @@ -561,7 +561,7 @@ get "/subscription_manager" do |env| headers = HTTP::Headers.new headers["Cookie"] = env.request.headers["Cookie"] - user, sid = get_user(sid, headers, PG_DB) + user, sid = get_user(sid, headers, PG_DB, logger) end action_takeout = env.params.query["action_takeout"]?.try &.to_i? @@ -685,7 +685,7 @@ post "/data_control" do |env| user.subscriptions += body["subscriptions"].as_a.map { |a| a.as_s } user.subscriptions.uniq! - user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, false, false) + user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, logger, false, false) PG_DB.exec("UPDATE users SET feed_needs_update = true, subscriptions = $1 WHERE email = $2", user.subscriptions, user.email) end @@ -754,7 +754,7 @@ post "/data_control" do |env| end user.subscriptions.uniq! - user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, false, false) + user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, logger, false, false) PG_DB.exec("UPDATE users SET feed_needs_update = true, subscriptions = $1 WHERE email = $2", user.subscriptions, user.email) when "import_freetube" @@ -763,7 +763,7 @@ post "/data_control" do |env| end user.subscriptions.uniq! - user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, false, false) + user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, logger, false, false) PG_DB.exec("UPDATE users SET feed_needs_update = true, subscriptions = $1 WHERE email = $2", user.subscriptions, user.email) when "import_newpipe_subscriptions" @@ -782,7 +782,7 @@ post "/data_control" do |env| end user.subscriptions.uniq! - user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, false, false) + user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, logger, false, false) PG_DB.exec("UPDATE users SET feed_needs_update = true, subscriptions = $1 WHERE email = $2", user.subscriptions, user.email) when "import_newpipe" @@ -801,7 +801,7 @@ post "/data_control" do |env| user.subscriptions += db.query_all("SELECT url FROM subscriptions", as: String).map { |url| url.lchop("https://www.youtube.com/channel/") } user.subscriptions.uniq! - user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, false, false) + user.subscriptions = get_batch_channels(user.subscriptions, PG_DB, logger, false, false) PG_DB.exec("UPDATE users SET feed_needs_update = true, subscriptions = $1 WHERE email = $2", user.subscriptions, user.email) @@ -1197,7 +1197,7 @@ get "/feed/subscriptions" do |env| headers["Cookie"] = env.request.headers["Cookie"] if !user.password - user, sid = get_user(sid, headers, PG_DB) + user, sid = get_user(sid, headers, PG_DB, logger) end max_results = env.params.query["max_results"]?.try &.to_i?.try &.clamp(0, MAX_ITEMS_PER_PAGE) @@ -2811,7 +2811,7 @@ post "/api/v1/auth/subscriptions/:ucid" do |env| ucid = env.params.url["ucid"] if !user.subscriptions.includes? ucid - get_channel(ucid, PG_DB, false, false) + get_channel(ucid, PG_DB, logger, false, false) PG_DB.exec("UPDATE users SET feed_needs_update = true, subscriptions = array_append(subscriptions,$1) WHERE email = $2", ucid, user.email) end diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index 444a6eda6..659dc599d 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -144,7 +144,7 @@ class ChannelRedirect < Exception end end -def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, max_threads = 10) +def get_batch_channels(channels, db, logger, refresh = false, pull_all_videos = true, max_threads = 10) finished_channel = Channel(String | Nil).new spawn do @@ -160,7 +160,7 @@ def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, ma active_threads += 1 spawn do begin - get_channel(ucid, db, refresh, pull_all_videos) + get_channel(ucid, db, logger, refresh, pull_all_videos) finished_channel.send(ucid) rescue ex finished_channel.send(nil) @@ -181,10 +181,10 @@ def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, ma return final end -def get_channel(id, db, refresh = true, pull_all_videos = true) +def get_channel(id, db, logger, refresh = true, pull_all_videos = true) if channel = db.query_one?("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel) if refresh && Time.utc - channel.updated > 10.minutes - channel = fetch_channel(id, db, pull_all_videos: pull_all_videos) + channel = fetch_channel(id, db, logger, pull_all_videos: pull_all_videos) channel_array = channel.to_a args = arg_array(channel_array) @@ -192,7 +192,7 @@ def get_channel(id, db, refresh = true, pull_all_videos = true) ON CONFLICT (id) DO UPDATE SET author = $2, updated = $3", args: channel_array) end else - channel = fetch_channel(id, db, pull_all_videos: pull_all_videos) + channel = fetch_channel(id, db, logger, pull_all_videos: pull_all_videos) channel_array = channel.to_a args = arg_array(channel_array) @@ -202,8 +202,12 @@ def get_channel(id, db, refresh = true, pull_all_videos = true) return channel end -def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) +def fetch_channel(ucid, db, logger, pull_all_videos = true, locale = nil) + logger.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}, locale = #{locale}") + + logger.trace("fetch_channel: #{ucid} : Downloading RSS feed") rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body + logger.trace("fetch_channel: #{ucid} : Parsing RSS feed") rss = XML.parse_html(rss) author = rss.xpath_node(%q(//feed/title)) @@ -219,14 +223,19 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) auto_generated = true end + logger.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}") + page = 1 + logger.trace("fetch_channel: #{ucid} : Downloading channel videos page") response = get_channel_videos_response(ucid, page, auto_generated: auto_generated) videos = [] of SearchVideo begin initial_data = JSON.parse(response.body).as_a.find &.["response"]? raise InfoException.new("Could not extract channel JSON") if !initial_data + + logger.trace("fetch_channel: #{ucid} : Extracting videos from channel videos page initial_data") videos = extract_videos(initial_data.as_h, author, ucid) rescue ex if response.body.includes?("To continue with your YouTube experience, please fill out the form below.") || @@ -236,6 +245,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) raise ex end + logger.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed") rss.xpath_nodes("//feed/entry").each do |entry| video_id = entry.xpath_node("videoid").not_nil!.content title = entry.xpath_node("title").not_nil!.content @@ -269,6 +279,8 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) views: views, }) + logger.trace("fetch_channel: #{ucid} : video #{video_id} : Updating or inserting video") + # We don't include the 'premiere_timestamp' here because channel pages don't include them, # meaning the above timestamp is always null was_insert = db.query_one("INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \ @@ -276,8 +288,13 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) updated = $4, ucid = $5, author = $6, length_seconds = $7, \ live_now = $8, views = $10 returning (xmax=0) as was_insert", *video.to_tuple, as: Bool) - db.exec("UPDATE users SET notifications = array_append(notifications, $1), \ - feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid) if was_insert + if was_insert + logger.trace("fetch_channel: #{ucid} : video #{video_id} : Inserted, updating subscriptions") + db.exec("UPDATE users SET notifications = array_append(notifications, $1), \ + feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid) + else + logger.trace("fetch_channel: #{ucid} : video #{video_id} : Updated") + end end if pull_all_videos diff --git a/src/invidious/jobs/refresh_channels_job.cr b/src/invidious/jobs/refresh_channels_job.cr index f4dee0632..6c858afa0 100644 --- a/src/invidious/jobs/refresh_channels_job.cr +++ b/src/invidious/jobs/refresh_channels_job.cr @@ -20,18 +20,23 @@ class Invidious::Jobs::RefreshChannelsJob < Invidious::Jobs::BaseJob id = rs.read(String) if active_fibers >= lim_fibers + logger.trace("RefreshChannelsJob: Fiber limit reached, waiting...") if active_channel.receive + logger.trace("RefreshChannelsJob: Fiber limit ok, continuing") active_fibers -= 1 end end + logger.trace("RefreshChannelsJob: #{id} : Spawning fiber") active_fibers += 1 spawn do begin - logger.trace("RefreshChannelsJob: Fetching channel #{id}") - channel = fetch_channel(id, db, config.full_refresh) + logger.trace("RefreshChannelsJob: #{id} fiber : Fetching channel") + channel = fetch_channel(id, db, logger, config.full_refresh) lim_fibers = max_fibers + + logger.trace("RefreshChannelsJob: #{id} fiber : Updating DB") db.exec("UPDATE channels SET updated = $1, author = $2, deleted = false WHERE id = $3", Time.utc, channel.author, id) rescue ex logger.error("RefreshChannelsJob: #{id} : #{ex.message}") @@ -39,7 +44,7 @@ class Invidious::Jobs::RefreshChannelsJob < Invidious::Jobs::BaseJob db.exec("UPDATE channels SET updated = $1, deleted = true WHERE id = $2", Time.utc, id) else lim_fibers = 1 - logger.error("RefreshChannelsJob: #{id} : backing off for #{backoff}s") + logger.error("RefreshChannelsJob: #{id} fiber : backing off for #{backoff}s") sleep backoff if backoff < 1.days backoff += backoff @@ -47,13 +52,15 @@ class Invidious::Jobs::RefreshChannelsJob < Invidious::Jobs::BaseJob backoff = 1.days end end + ensure + logger.trace("RefreshChannelsJob: #{id} fiber : Done") + active_channel.send(true) end - - active_channel.send(true) end end end + logger.debug("RefreshChannelsJob: Done, sleeping for one minute") sleep 1.minute Fiber.yield end diff --git a/src/invidious/routes/login.cr b/src/invidious/routes/login.cr index 45a6d4d8c..42fb46762 100644 --- a/src/invidious/routes/login.cr +++ b/src/invidious/routes/login.cr @@ -267,7 +267,7 @@ class Invidious::Routes::Login < Invidious::Routes::BaseRoute raise "Couldn't get SID." end - user, sid = get_user(sid, headers, PG_DB) + user, sid = get_user(sid, headers, PG_DB, logger) # We are now logged in traceback << "done.
" diff --git a/src/invidious/users.cr b/src/invidious/users.cr index 6a3ca5c10..38767a23f 100644 --- a/src/invidious/users.cr +++ b/src/invidious/users.cr @@ -269,12 +269,12 @@ struct Preferences end end -def get_user(sid, headers, db, refresh = true) +def get_user(sid, headers, db, logger, refresh = true) if email = db.query_one?("SELECT email FROM session_ids WHERE id = $1", sid, as: String) user = db.query_one("SELECT * FROM users WHERE email = $1", email, as: User) if refresh && Time.utc - user.updated > 1.minute - user, sid = fetch_user(sid, headers, db) + user, sid = fetch_user(sid, headers, db, logger) user_array = user.to_a user_array[4] = user_array[4].to_json # User preferences args = arg_array(user_array) @@ -292,7 +292,7 @@ def get_user(sid, headers, db, refresh = true) end end else - user, sid = fetch_user(sid, headers, db) + user, sid = fetch_user(sid, headers, db, logger) user_array = user.to_a user_array[4] = user_array[4].to_json # User preferences args = arg_array(user.to_a) @@ -313,7 +313,7 @@ def get_user(sid, headers, db, refresh = true) return user, sid end -def fetch_user(sid, headers, db) +def fetch_user(sid, headers, db, logger) feed = YT_POOL.client &.get("/subscription_manager?disable_polymer=1", headers) feed = XML.parse_html(feed.body) @@ -326,7 +326,7 @@ def fetch_user(sid, headers, db) end end - channels = get_batch_channels(channels, db, false, false) + channels = get_batch_channels(channels, db, logger, false, false) email = feed.xpath_node(%q(//a[@class="yt-masthead-picker-header yt-masthead-picker-active-account"])) if email