aaaaddsas/1-2469.patch

359 行
15 KiB
Diff

From 6a19f66c5380488896c341d88a52b99601008d8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89milien=20Devos?= <contact@emiliendevos.be>
Date: Tue, 19 Oct 2021 07:12:15 +0000
Subject: [PATCH] limit feeds and delete materialized views
---
config/migrate-scripts/migrate-db-8bc91ce.sh | 6 ++
config/sql/channel_videos.sql | 8 +--
kubernetes/values.yaml | 1 -
src/invidious.cr | 12 ----
src/invidious/config.cr | 2 -
src/invidious/jobs/refresh_feeds_job.cr | 75 --------------------
src/invidious/routes/account.cr | 2 -
src/invidious/routes/login.cr | 3 -
src/invidious/routes/search.cr | 2 +
src/invidious/search/processors.cr | 18 ++---
src/invidious/users.cr | 51 ++++++-------
11 files changed, 43 insertions(+), 137 deletions(-)
create mode 100644 config/migrate-scripts/migrate-db-8bc91ce.sh
delete mode 100644 src/invidious/jobs/refresh_feeds_job.cr
diff --git a/config/migrate-scripts/migrate-db-8bc91ce.sh b/config/migrate-scripts/migrate-db-8bc91ce.sh
new file mode 100644
index 000000000..04388175e
--- /dev/null
+++ b/config/migrate-scripts/migrate-db-8bc91ce.sh
@@ -0,0 +1,6 @@
+CREATE INDEX channel_videos_ucid_published_idx
+ ON public.channel_videos
+ USING btree
+ (ucid COLLATE pg_catalog."default", published);
+
+DROP INDEX channel_videos_ucid_idx;
\ No newline at end of file
diff --git a/config/sql/channel_videos.sql b/config/sql/channel_videos.sql
index cd4e0ffdb..f2ac4876c 100644
--- a/config/sql/channel_videos.sql
+++ b/config/sql/channel_videos.sql
@@ -19,12 +19,12 @@ CREATE TABLE IF NOT EXISTS public.channel_videos
GRANT ALL ON TABLE public.channel_videos TO current_user;
--- Index: public.channel_videos_ucid_idx
+-- Index: public.channel_videos_ucid_published_idx
--- DROP INDEX public.channel_videos_ucid_idx;
+-- DROP INDEX public.channel_videos_ucid_published_idx;
-CREATE INDEX IF NOT EXISTS channel_videos_ucid_idx
+CREATE INDEX IF NOT EXISTS channel_videos_ucid_published_idx
ON public.channel_videos
USING btree
- (ucid COLLATE pg_catalog."default");
+ (ucid COLLATE pg_catalog."default", published);
diff --git a/kubernetes/values.yaml b/kubernetes/values.yaml
index 2dc4db2c4..5506c772d 100644
--- a/kubernetes/values.yaml
+++ b/kubernetes/values.yaml
@@ -49,7 +49,6 @@ postgresql:
# Adapted from ../config/config.yml
config:
channel_threads: 1
- feed_threads: 1
db:
user: kemal
password: kemal
diff --git a/src/invidious.cr b/src/invidious.cr
index 9f3d5d10f..6c6bd32ce 100644
--- a/src/invidious.cr
+++ b/src/invidious.cr
@@ -86,14 +86,6 @@ Kemal.config.extra_options do |parser|
exit
end
end
- parser.on("-f THREADS", "--feed-threads=THREADS", "Number of threads for refreshing feeds (default: #{CONFIG.feed_threads})") do |number|
- begin
- CONFIG.feed_threads = number.to_i
- rescue ex
- puts "THREADS must be integer"
- exit
- end
- end
parser.on("-o OUTPUT", "--output=OUTPUT", "Redirect output (default: #{CONFIG.output})") do |output|
CONFIG.output = output
end
@@ -141,10 +133,6 @@ if CONFIG.channel_threads > 0
Invidious::Jobs.register Invidious::Jobs::RefreshChannelsJob.new(PG_DB)
end
-if CONFIG.feed_threads > 0
- Invidious::Jobs.register Invidious::Jobs::RefreshFeedsJob.new(PG_DB)
-end
-
DECRYPT_FUNCTION = DecryptFunction.new(CONFIG.decrypt_polling)
if CONFIG.decrypt_polling
Invidious::Jobs.register Invidious::Jobs::UpdateDecryptFunctionJob.new
diff --git a/src/invidious/config.cr b/src/invidious/config.cr
index 93c4c0f7a..6fe72dbda 100644
--- a/src/invidious/config.cr
+++ b/src/invidious/config.cr
@@ -62,8 +62,6 @@ class Config
# Time interval between two executions of the job that crawls channel videos (subscriptions update).
@[YAML::Field(converter: Preferences::TimeSpanConverter)]
property channel_refresh_interval : Time::Span = 30.minutes
- # Number of threads to use for updating feeds
- property feed_threads : Int32 = 1
# Log file path or STDOUT
property output : String = "STDOUT"
# Default log level, valid YAML values are ints and strings, see src/invidious/helpers/logger.cr
diff --git a/src/invidious/jobs/refresh_feeds_job.cr b/src/invidious/jobs/refresh_feeds_job.cr
deleted file mode 100644
index 4b52c9596..000000000
--- a/src/invidious/jobs/refresh_feeds_job.cr
+++ /dev/null
@@ -1,75 +0,0 @@
-class Invidious::Jobs::RefreshFeedsJob < Invidious::Jobs::BaseJob
- private getter db : DB::Database
-
- def initialize(@db)
- end
-
- def begin
- max_fibers = CONFIG.feed_threads
- active_fibers = 0
- active_channel = Channel(Bool).new
-
- loop do
- db.query("SELECT email FROM users WHERE feed_needs_update = true OR feed_needs_update IS NULL") do |rs|
- rs.each do
- email = rs.read(String)
- view_name = "subscriptions_#{sha256(email)}"
-
- if active_fibers >= max_fibers
- if active_channel.receive
- active_fibers -= 1
- end
- end
-
- active_fibers += 1
- spawn do
- begin
- # Drop outdated views
- column_array = Invidious::Database.get_column_array(db, view_name)
- ChannelVideo.type_array.each_with_index do |name, i|
- if name != column_array[i]?
- LOGGER.info("RefreshFeedsJob: DROP MATERIALIZED VIEW #{view_name}")
- db.exec("DROP MATERIALIZED VIEW #{view_name}")
- raise "view does not exist"
- end
- end
-
- if !db.query_one("SELECT pg_get_viewdef('#{view_name}')", as: String).includes? "WHERE ((cv.ucid = ANY (u.subscriptions))"
- LOGGER.info("RefreshFeedsJob: Materialized view #{view_name} is out-of-date, recreating...")
- db.exec("DROP MATERIALIZED VIEW #{view_name}")
- end
-
- db.exec("REFRESH MATERIALIZED VIEW #{view_name}")
- db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
- rescue ex
- # Rename old views
- begin
- legacy_view_name = "subscriptions_#{sha256(email)[0..7]}"
-
- db.exec("SELECT * FROM #{legacy_view_name} LIMIT 0")
- LOGGER.info("RefreshFeedsJob: RENAME MATERIALIZED VIEW #{legacy_view_name}")
- db.exec("ALTER MATERIALIZED VIEW #{legacy_view_name} RENAME TO #{view_name}")
- rescue ex
- begin
- # While iterating through, we may have an email stored from a deleted account
- if db.query_one?("SELECT true FROM users WHERE email = $1", email, as: Bool)
- LOGGER.info("RefreshFeedsJob: CREATE #{view_name}")
- db.exec("CREATE MATERIALIZED VIEW #{view_name} AS #{MATERIALIZED_VIEW_SQL.call(email)}")
- db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
- end
- rescue ex
- LOGGER.error("RefreshFeedJobs: REFRESH #{email} : #{ex.message}")
- end
- end
- end
-
- active_channel.send(true)
- end
- end
- end
-
- sleep 5.seconds
- Fiber.yield
- end
- end
-end
diff --git a/src/invidious/routes/account.cr b/src/invidious/routes/account.cr
index 9bb731360..e3220cdb8 100644
--- a/src/invidious/routes/account.cr
+++ b/src/invidious/routes/account.cr
@@ -128,10 +128,8 @@ module Invidious::Routes::Account
return error_template(400, ex)
end
- view_name = "subscriptions_#{sha256(user.email)}"
Invidious::Database::Users.delete(user)
Invidious::Database::SessionIDs.delete(email: user.email)
- PG_DB.exec("DROP MATERIALIZED VIEW #{view_name}")
env.request.cookies.each do |cookie|
cookie.expires = Time.utc(1990, 1, 1)
diff --git a/src/invidious/routes/login.cr b/src/invidious/routes/login.cr
index 99fc13a2b..ca223b425 100644
--- a/src/invidious/routes/login.cr
+++ b/src/invidious/routes/login.cr
@@ -430,9 +430,6 @@ module Invidious::Routes::Login
Invidious::Database::Users.insert(user)
Invidious::Database::SessionIDs.insert(sid, email)
- view_name = "subscriptions_#{sha256(user.email)}"
- PG_DB.exec("CREATE MATERIALIZED VIEW #{view_name} AS #{MATERIALIZED_VIEW_SQL.call(user.email)}")
-
env.response.cookies["SID"] = Invidious::User::Cookies.sid(CONFIG.domain, sid)
if env.request.cookies["PREFS"]?
diff --git a/src/invidious/routes/search.cr b/src/invidious/routes/search.cr
index e60d00815..1d4911bde 100644
--- a/src/invidious/routes/search.cr
+++ b/src/invidious/routes/search.cr
@@ -51,6 +51,8 @@ module Invidious::Routes::Search
else
user = env.get? "user"
+ user = user ? user.as(User) : nil
+
begin
videos = query.process
rescue ex : ChannelSearchException
diff --git a/src/invidious/search/processors.cr b/src/invidious/search/processors.cr
index d1409c06c..1ff6f95a9 100644
--- a/src/invidious/search/processors.cr
+++ b/src/invidious/search/processors.cr
@@ -45,18 +45,18 @@ module Invidious::Search
# Search inside of user subscriptions
def subscriptions(query : Query, user : Invidious::User) : Array(ChannelVideo)
- view_name = "subscriptions_#{sha256(user.email)}"
-
return PG_DB.query_all("
SELECT id,title,published,updated,ucid,author,length_seconds
FROM (
- SELECT *,
- to_tsvector(#{view_name}.title) ||
- to_tsvector(#{view_name}.author)
- as document
- FROM #{view_name}
- ) v_search WHERE v_search.document @@ plainto_tsquery($1) LIMIT 20 OFFSET $2;",
- query.text, (query.page - 1) * 20,
+ SELECT cv.*,
+ to_tsvector(cv.title) ||
+ to_tsvector(cv.author) AS document
+ FROM channel_videos cv
+ JOIN users ON cv.ucid = any(users.subscriptions)
+ WHERE users.email = $1 AND published > now() - interval '1 month'
+ ORDER BY published
+ ) v_search WHERE v_search.document @@ plainto_tsquery($2) LIMIT 20 OFFSET $3;",
+ user.email, query.text, (query.page - 1) * 20,
as: ChannelVideo
)
end
diff --git a/src/invidious/users.cr b/src/invidious/users.cr
index b763596bc..6f82ead33 100644
--- a/src/invidious/users.cr
+++ b/src/invidious/users.cr
@@ -12,24 +12,12 @@ def get_user(sid, headers, refresh = true)
Invidious::Database::Users.insert(user, update_on_conflict: true)
Invidious::Database::SessionIDs.insert(sid, user.email, handle_conflicts: true)
-
- begin
- view_name = "subscriptions_#{sha256(user.email)}"
- PG_DB.exec("CREATE MATERIALIZED VIEW #{view_name} AS #{MATERIALIZED_VIEW_SQL.call(user.email)}")
- rescue ex
- end
end
else
user, sid = fetch_user(sid, headers)
Invidious::Database::Users.insert(user, update_on_conflict: true)
Invidious::Database::SessionIDs.insert(sid, user.email, handle_conflicts: true)
-
- begin
- view_name = "subscriptions_#{sha256(user.email)}"
- PG_DB.exec("CREATE MATERIALIZED VIEW #{view_name} AS #{MATERIALIZED_VIEW_SQL.call(user.email)}")
- rescue ex
- end
end
return user, sid
@@ -128,7 +116,6 @@ def get_subscription_feed(user, max_results = 40, page = 1)
offset = (page - 1) * limit
notifications = Invidious::Database::Users.select_notifications(user)
- view_name = "subscriptions_#{sha256(user.email)}"
if user.preferences.notifications_only && !notifications.empty?
# Only show notifications
@@ -154,33 +141,39 @@ def get_subscription_feed(user, max_results = 40, page = 1)
# Show latest video from a channel that a user hasn't watched
# "unseen_only" isn't really correct here, more accurate would be "unwatched_only"
- if user.watched.empty?
- values = "'{}'"
- else
- values = "VALUES #{user.watched.map { |id| %(('#{id}')) }.join(",")}"
- end
- videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} WHERE NOT id = ANY (#{values}) ORDER BY ucid, published DESC", as: ChannelVideo)
+ # "SELECT cv.* FROM channel_videos cv JOIN users ON cv.ucid = any(users.subscriptions) WHERE users.email = $1 AND published > now() - interval '1 month' ORDER BY published DESC"
+ # "SELECT DISTINCT ON (cv.ucid) cv.* FROM channel_videos cv JOIN users ON cv.ucid = any(users.subscriptions) WHERE users.email = ? AND NOT cv.id = any(users.watched) AND published > now() - interval '1 month' ORDER BY ucid, published DESC"
+ videos = PG_DB.query_all("SELECT DISTINCT ON (cv.ucid) cv.* " \
+ "FROM channel_videos cv " \
+ "JOIN users ON cv.ucid = any(users.subscriptions) " \
+ "WHERE users.email = $1 AND NOT cv.id = any(users.watched) AND published > now() - interval '1 month' " \
+ "ORDER BY ucid, published DESC", user.email, as: ChannelVideo)
else
# Show latest video from each channel
- videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} ORDER BY ucid, published DESC", as: ChannelVideo)
+ videos = PG_DB.query_all("SELECT DISTINCT ON (cv.ucid) cv.* " \
+ "FROM channel_videos cv " \
+ "JOIN users ON cv.ucid = any(users.subscriptions) " \
+ "WHERE users.email = $1 AND published > now() - interval '1 month' " \
+ "ORDER BY ucid, published DESC", user.email, as: ChannelVideo)
end
videos.sort_by!(&.published).reverse!
else
if user.preferences.unseen_only
# Only show unwatched
-
- if user.watched.empty?
- values = "'{}'"
- else
- values = "VALUES #{user.watched.map { |id| %(('#{id}')) }.join(",")}"
- end
- videos = PG_DB.query_all("SELECT * FROM #{view_name} WHERE NOT id = ANY (#{values}) ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo)
+ videos = PG_DB.query_all("SELECT cv.* " \
+ "FROM channel_videos cv " \
+ "JOIN users ON cv.ucid = any(users.subscriptions) " \
+ "WHERE users.email = $1 AND NOT cv.id = any(users.watched) AND published > now() - interval '1 month' " \
+ "ORDER BY published DESC LIMIT $2 OFFSET $3", user.email, limit, offset, as: ChannelVideo)
else
# Sort subscriptions as normal
-
- videos = PG_DB.query_all("SELECT * FROM #{view_name} ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo)
+ videos = PG_DB.query_all("SELECT cv.* " \
+ "FROM channel_videos cv " \
+ "JOIN users ON cv.ucid = any(users.subscriptions) " \
+ "WHERE users.email = $1 AND published > now() - interval '1 month' " \
+ "ORDER BY published DESC LIMIT $2 OFFSET $3", user.email, limit, offset, as: ChannelVideo)
end
end