From 35e63fa3f5472f847044499ce6d2c6a44220ce07 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Tue, 9 Oct 2018 08:40:29 -0500 Subject: [PATCH] Use materialized views for subscription feeds --- config/config.yml | 1 + src/invidious.cr | 72 +++++++++++++++++++------------- src/invidious/helpers/helpers.cr | 1 + src/invidious/helpers/utils.cr | 6 +++ src/invidious/jobs.cr | 11 +++++ 5 files changed, 61 insertions(+), 30 deletions(-) diff --git a/config/config.yml b/config/config.yml index 577968f0d..309be236a 100644 --- a/config/config.yml +++ b/config/config.yml @@ -10,3 +10,4 @@ db: full_refresh: false https_only: false geo_bypass: true +update_feeds: true diff --git a/src/invidious.cr b/src/invidious.cr index a73054fc4..4ba4f9604 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -98,6 +98,12 @@ spawn do end end +if CONFIG.update_feeds + spawn do + update_feeds(PG_DB) + end +end + decrypt_function = [] of {name: String, value: Int32} spawn do update_decrypt_function do |function| @@ -475,9 +481,8 @@ get "/search" do |env| user = env.get? "user" if user user = user.as(User) - ucids = user.subscriptions + view_name = "subscriptions_#{sha256(user.email)[0..7]}" end - ucids ||= [] of String channel = nil content_type = "all" @@ -514,14 +519,19 @@ get "/search" do |env| if channel count, videos = channel_search(search_query, page, channel) elsif subscriptions - videos = PG_DB.query_all("SELECT id,title,published,updated,ucid,author FROM ( + if view_name + videos = PG_DB.query_all("SELECT id,title,published,updated,ucid,author FROM ( SELECT *, - to_tsvector(channel_videos.title) || - to_tsvector(channel_videos.author) + to_tsvector(#{view_name}.title) || + to_tsvector(#{view_name}.author) as document - FROM channel_videos WHERE ucid IN (#{arg_array(ucids, 3)}) - ) v_search WHERE v_search.document @@ plainto_tsquery($1) LIMIT 20 OFFSET $2;", [search_query, (page - 1) * 20] + ucids, as: ChannelVideo) - count = videos.size + FROM #{view_name} + ) v_search WHERE v_search.document @@ plainto_tsquery($1) LIMIT 20 OFFSET $2;", search_query, (page - 1) * 20, as: ChannelVideo) + count = videos.size + else + videos = [] of ChannelVideo + count = 0 + end else begin search_params = produce_search_params(sort: sort, date: date, content_type: content_type, @@ -799,6 +809,12 @@ post "/login" do |env| PG_DB.exec("INSERT INTO users VALUES (#{args})", user_array) + view_name = "subscriptions_#{sha256(user.email)[0..7]}" + PG_DB.exec("CREATE MATERIALIZED VIEW #{view_name} AS \ + SELECT * FROM channel_videos WHERE \ + ucid = ANY ((SELECT subscriptions FROM users WHERE email = '#{user.email}')::text[]) \ + ORDER BY published DESC;") + if Kemal.config.ssl || CONFIG.https_only secure = true else @@ -1364,6 +1380,8 @@ get "/feed/subscriptions" do |env| notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email, as: Array(String)) + view_name = "subscriptions_#{sha256(user.email)[0..7]}" + if preferences.notifications_only && !notifications.empty? args = arg_array(notifications) @@ -1386,39 +1404,34 @@ get "/feed/subscriptions" do |env| else if preferences.latest_only if preferences.unseen_only - ucids = arg_array(user.subscriptions) if user.watched.empty? watched = "'{}'" else - watched = arg_array(user.watched, user.subscriptions.size + 1) + watched = arg_array(user.watched) end - videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ - ucid IN (#{ucids}) AND id NOT IN (#{watched}) ORDER BY ucid, published DESC", - user.subscriptions + user.watched, as: ChannelVideo) + videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} WHERE \ + id NOT IN (#{watched}) ORDER BY ucid, published DESC", + user.watched, as: ChannelVideo) else - args = arg_array(user.subscriptions) - videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ - ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo) + videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name}", as: ChannelVideo) end videos.sort_by! { |video| video.published }.reverse! else if preferences.unseen_only - ucids = arg_array(user.subscriptions, 3) if user.watched.empty? watched = "'{}'" else - watched = arg_array(user.watched, user.subscriptions.size + 3) + watched = arg_array(user.watched, 3) end - videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{ucids}) \ - AND id NOT IN (#{watched}) ORDER BY published DESC LIMIT $1 OFFSET $2", - [limit, offset] + user.subscriptions + user.watched, as: ChannelVideo) + videos = PG_DB.query_all("SELECT * FROM #{view_name} WHERE \ + id NOT IN (#{watched}) LIMIT $1 OFFSET $2", + [limit, offset] + user.watched, as: ChannelVideo) else - args = arg_array(user.subscriptions, 3) - videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \ - ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo) + videos = PG_DB.query_all("SELECT * FROM #{view_name} \ + ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo) end end @@ -1576,15 +1589,14 @@ get "/feed/private" do |env| latest_only ||= 0 latest_only = latest_only == 1 + view_name = "subscriptions_#{sha256(user.email)[0..7]}" + if latest_only - args = arg_array(user.subscriptions) - videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ - ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo) + videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} ORDER BY ucid, published DESC", as: ChannelVideo) videos.sort_by! { |video| video.published }.reverse! else - args = arg_array(user.subscriptions, 3) - videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \ - ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo) + videos = PG_DB.query_all("SELECT * FROM #{view_name} \ + ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo) end sort = env.params.query["sort"]? diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index ac48534a0..46e1e9313 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -15,6 +15,7 @@ class Config hmac_key: String?, full_refresh: Bool, geo_bypass: Bool, + update_feeds: Bool, }) end diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index f6478d00c..7ff2e6227 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -238,3 +238,9 @@ def write_var_int(value : Int) return bytes end + +def sha256(text) + digest = OpenSSL::Digest.new("SHA256") + digest << text + return digest.hexdigest +end diff --git a/src/invidious/jobs.cr b/src/invidious/jobs.cr index e1c4e2d31..14ddf6796 100644 --- a/src/invidious/jobs.cr +++ b/src/invidious/jobs.cr @@ -104,6 +104,17 @@ def refresh_videos(db) end end +def update_feeds(db) + loop do + users = db.query_all("SELECT email FROM users", as: String) + + users.each do |email| + view_name = "subscriptions_#{sha256(email)[0..7]}" + db.exec("REFRESH MATERIALIZED VIEW #{view_name}") + end + end +end + def pull_top_videos(config, db) if config.dl_api_key DetectLanguage.configure do |dl_config|