Refactor 'description_html'

2019-06-08 15:08:27 -05:00 · 2019-06-08 15:08:27 -05:00 · b43e9ed7e7
--- a/src/invidious.cr
+++ b/src/invidious.cr
@ -473,9 +473,8 @@ get "/watch" do |env|

  aspect_ratio = "16:9"

-  video.description = fill_links(video.description, "https", "www.youtube.com")
-  video.description = replace_links(video.description)
-  description = video.short_description
+  video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
+  video.description_html = replace_links(video.description_html)

  host_url = make_host_url(config, Kemal.config)
  host_params = env.request.query_params
@ -648,9 +647,8 @@ get "/embed/:id" do |env|

  aspect_ratio = nil

-  video.description = fill_links(video.description, "https", "www.youtube.com")
-  video.description = replace_links(video.description)
-  description = video.short_description
+  video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
+  video.description_html = replace_links(video.description_html)

  host_url = make_host_url(config, Kemal.config)
  host_params = env.request.query_params
@ -2466,7 +2464,7 @@ get "/feed/channel/:ucid" do |env|

    author = entry.xpath_node("author/name").not_nil!.content
    ucid = entry.xpath_node("channelid").not_nil!.content
-    description = entry.xpath_node("group/description").not_nil!.content
+    description_html = entry.xpath_node("group/description").not_nil!.to_s
    views = entry.xpath_node("group/community/statistics").not_nil!.["views"].to_i64

    videos << SearchVideo.new(
@ -2476,8 +2474,7 @@ get "/feed/channel/:ucid" do |env|
      ucid: ucid,
      published: published,
      views: views,
-      description: description,
-      description_html: "",
+      description_html: description_html,
      length_seconds: 0,
      live_now: false,
      paid: false,
@ -3460,11 +3457,8 @@ get "/api/v1/top" do |env|
          json.field "published", video.published.to_unix
          json.field "publishedText", translate(locale, "`x` ago", recode_date(video.published, locale))

-          description = video.description.gsub("<br>", "\n")
-          description = description.gsub("<br/>", "\n")
-          description = XML.parse_html(description)
-          json.field "description", description.content
-          json.field "descriptionHtml", video.description
+          json.field "description", html_to_content(video.description_html)
+          json.field "descriptionHtml", video.description_html
        end
      end
    end
@ -3511,8 +3505,7 @@ get "/api/v1/channels/:ucid" do |env|
  author = channel_html.xpath_node(%q(//a[contains(@class, "branded-page-header-title-link")])).not_nil!.content
  author_url = channel_html.xpath_node(%q(//a[@class="channel-header-profile-image-container spf-link"])).not_nil!["href"]
  author_thumbnail = channel_html.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"]
-  description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")]))
-  description_html, description = html_to_content(description_html)
+  description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")])).try &.to_s || ""

  paid = channel_html.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
  is_family_friendly = channel_html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
@ -3607,7 +3600,7 @@ get "/api/v1/channels/:ucid" do |env|

      json.field "autoGenerated", auto_generated
      json.field "isFamilyFriendly", is_family_friendly
-      json.field "description", description
+      json.field "description", html_to_content(description_html)
      json.field "descriptionHtml", description_html

      json.field "allowedRegions", allowed_regions
@ -3884,7 +3877,7 @@ get "/api/v1/playlists/:plid" do |env|
        end
      end

-      json.field "description", playlist.description
+      json.field "description", html_to_content(playlist.description_html)
      json.field "descriptionHtml", playlist.description_html
      json.field "videoCount", playlist.video_count

--- a/src/invidious/comments.cr
+++ b/src/invidious/comments.cr
@ -138,13 +138,8 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m
                node_comment = node["commentRenderer"]
              end

-              content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff')
-              if content_html
-                content_html = HTML.escape(content_html)
-              end
-
-              content_html ||= content_to_comment_html(node_comment["contentText"]["runs"].as_a)
-              content_html, content = html_to_content(content_html)
+              content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |block| HTML.escape(block) }.to_s ||
+                             content_to_comment_html(node_comment["contentText"]["runs"].as_a).try &.to_s || ""

              author = node_comment["authorText"]?.try &.["simpleText"]
              author ||= ""
@ -179,7 +174,7 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m
                json.field "isEdited", false
              end

-              json.field "content", content
+              json.field "content", html_to_content(content_html)
              json.field "contentHtml", content_html
              json.field "published", published.to_unix
              json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@ -177,23 +177,17 @@ def login_req(login_form, f_req)
  return HTTP::Params.encode(data)
 end

-def html_to_content(description_html)
-  if !description_html
-    description = ""
-    description_html = ""
-  else
-    description_html = description_html.to_s
-    description = description_html.gsub("<br>", "\n")
-    description = description.gsub("<br/>", "\n")
+def html_to_content(description_html : String)
+  description = description_html.gsub(/(<br>)|(<br\/>)/, {
+    "<br>":  "\n",
+    "<br/>": "\n",
+  })

-    if description.empty?
-      description = ""
-    else
-      description = XML.parse_html(description).content.strip("\n ")
-    end
+  if !description.empty?
+    description = XML.parse_html(description).content.strip("\n ")
  end

-  return description_html, description
+  return description
 end

 def extract_videos(nodeset, ucid = nil, author_name = nil)
@ -230,8 +224,7 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
    author ||= ""
    author_id ||= ""

-    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
-    description_html, description = html_to_content(description_html)
+    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])).try &.to_s || ""

    tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")]))
    if !tile
@ -330,7 +323,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
        author_thumbnail: author_thumbnail,
        subscriber_count: subscriber_count,
        video_count: video_count,
-        description: description,
        description_html: description_html
      )
    else
@ -396,7 +388,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
        ucid: author_id,
        published: published,
        views: view_count,
-        description: description,
        description_html: description_html,
        length_seconds: length_seconds,
        live_now: live_now,
--- a/src/invidious/playlists.cr
+++ b/src/invidious/playlists.cr
@ -47,7 +47,6 @@ struct Playlist
    author:           String,
    author_thumbnail: String,
    ucid:             String,
-    description:      String,
    description_html: String,
    video_count:      Int32,
    views:            Int64,
@ -214,9 +213,8 @@ def fetch_playlist(plid, locale)
  end
  title = title.content.strip(" \n")

-  description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1]))
-  description_html ||= document.xpath_node(%q(//span[@class="pl-header-description-text"]))
-  description_html, description = html_to_content(description_html)
+  description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1])).try &.to_s ||
+                     document.xpath_node(%q(//span[@class="pl-header-description-text"])).try &.to_s || ""

  # YouTube allows anonymous playlists, so most of this can be empty or optional
  anchor = document.xpath_node(%q(//ul[@class="pl-header-details"]))
@ -245,7 +243,6 @@ def fetch_playlist(plid, locale)
    author: author,
    author_thumbnail: author_thumbnail,
    ucid: ucid,
-    description: description,
    description_html: description_html,
    video_count: video_count,
    views: views,
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@ -31,7 +31,7 @@ struct SearchVideo
        xml.element("media:title") { xml.text self.title }
        xml.element("media:thumbnail", url: "#{host_url}/vi/#{self.id}/mqdefault.jpg",
          width: "320", height: "180")
-        xml.element("media:description") { xml.text self.description }
+        xml.element("media:description") { xml.text html_to_content(self.description_html) }
      end

      xml.element("media:community") do
@ -64,7 +64,7 @@ struct SearchVideo
        generate_thumbnails(json, self.id, config, kemal_config)
      end

-      json.field "description", self.description
+      json.field "description", html_to_content(self.description_html)
      json.field "descriptionHtml", self.description_html

      json.field "viewCount", self.views
@ -94,7 +94,6 @@ struct SearchVideo
    ucid:               String,
    published:          Time,
    views:              Int64,
-    description:        String,
    description_html:   String,
    length_seconds:     Int32,
    live_now:           Bool,
@ -187,7 +186,7 @@ struct SearchChannel

      json.field "subCount", self.subscriber_count
      json.field "videoCount", self.video_count
-      json.field "description", self.description
+      json.field "description", html_to_content(self.description_html)
      json.field "descriptionHtml", self.description_html
    end
  end
@ -208,7 +207,6 @@ struct SearchChannel
    author_thumbnail: String,
    subscriber_count: Int32,
    video_count:      Int32,
-    description:      String,
    description_html: String,
  })
 end
--- a/src/invidious/videos.cr
+++ b/src/invidious/videos.cr
@ -286,10 +286,8 @@ struct Video
        generate_storyboards(json, self.id, self.storyboards, config, kemal_config)
      end

-      description_html, description = html_to_content(self.description)
-
-      json.field "description", description
-      json.field "descriptionHtml", description_html
+      json.field "description", html_to_content(self.description_html)
+      json.field "descriptionHtml", self.description_html
      json.field "published", self.published.to_unix
      json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
      json.field "keywords", self.keywords
@ -467,6 +465,17 @@ struct Video
    end
  end

+  # `description_html` is stored in DB as `description`, which can be
+  # quite confusing. Since it currently isn't very practical to rename
+  # it, we instead define a getter and setter here.
+  def description_html
+    self.description
+  end
+
+  def description_html=(other : String)
+    self.description = other
+  end
+
  def allow_ratings
    allow_ratings = player_response["videoDetails"]?.try &.["allowRatings"]?.try &.as_bool

@ -796,14 +805,19 @@ struct Video
  end

  def short_description
-    description = self.description.gsub("<br>", " ")
-    description = description.gsub("<br/>", " ")
-    description = XML.parse_html(description).content[0..200].gsub('"', "&quot;").gsub("\n", " ").strip(" ")
-    if description.empty?
-      description = " "
+    short_description = self.description_html.gsub(/(<br>)|(<br\/>|"|\n)/, {
+      "<br>"  => " ",
+      "<br/>" => " ",
+      "\""    => "&quot;",
+      "\n"    => " ",
+    })
+    short_description = XML.parse_html(short_description).content[0..200].strip(" ")
+
+    if short_description.empty?
+      short_description = " "
    end

-    return description
+    return short_description
  end

  def length_seconds
@ -1151,28 +1165,23 @@ def fetch_video(id, proxies, region)
  end

  title = info["title"]
-  author = info["author"]
-  ucid = info["ucid"]
+  author = info["author"]? || ""
+  ucid = info["ucid"]? || ""

  views = html.xpath_node(%q(//meta[@itemprop="interactionCount"]))
-  views = views.try &.["content"].to_i64?
-  views ||= 0_i64
+    .try &.["content"].to_i64? || 0_i64

  likes = html.xpath_node(%q(//button[@title="I like this"]/span))
-  likes = likes.try &.content.delete(",").try &.to_i?
-  likes ||= 0
+    .try &.content.delete(",").try &.to_i? || 0

  dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span))
-  dislikes = dislikes.try &.content.delete(",").try &.to_i?
-  dislikes ||= 0
+    .try &.content.delete(",").try &.to_i? || 0

  avg_rating = (likes.to_f/(likes.to_f + dislikes.to_f) * 4 + 1)
  avg_rating = avg_rating.nan? ? 0.0 : avg_rating
  info["avg_rating"] = "#{avg_rating}"

-  description = html.xpath_node(%q(//p[@id="eow-description"]))
-  description = description ? description.to_xml(options: XML::SaveOptions::NO_DECL) : %q(<p id="eow-description"></p>)
-
+  description_html = html.xpath_node(%q(//p[@id="eow-description"])).try &.to_xml(options: XML::SaveOptions::NO_DECL) || ""
  wilson_score = ci_lower_bound(likes, likes + dislikes)

  published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).try &.["content"]
@ -1188,7 +1197,8 @@ def fetch_video(id, proxies, region)
  genre = html.xpath_node(%q(//meta[@itemprop="genre"])).try &.["content"]
  genre ||= ""

-  genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]
+  genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]?
+  genre_url ||= ""

  # YouTube provides invalid URLs for some genres, so we fix that here
  case genre
@ -1205,30 +1215,12 @@ def fetch_video(id, proxies, region)
  when "Trailers"
    genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g"
  end
-  genre_url ||= ""

-  license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li))
-  if license
-    license = license.content
-  else
-    license = ""
-  end
+  license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)).try &.content || ""
+  sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")])).try &.["title"]? || "0"
+  author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img)).try &.["data-thumb"]? || ""

-  sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")]))
-  if sub_count_text
-    sub_count_text = sub_count_text["title"]
-  else
-    sub_count_text = "0"
-  end
-
-  author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img))
-  if author_thumbnail
-    author_thumbnail = author_thumbnail["data-thumb"]
-  else
-    author_thumbnail = ""
-  end
-
-  video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description,
+  video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description_html,
    nil, author, ucid, allowed_regions, is_family_friendly, genre, genre_url, license, sub_count_text, author_thumbnail)

  return video
--- a/src/invidious/views/components/player.ecr
+++ b/src/invidious/views/components/player.ecr
@ -43,7 +43,7 @@
 var player_data = {
    aspect_ratio: '<%= aspect_ratio %>',
    title: "<%= video.title.dump_unquoted %>",
-    description: "<%= HTML.escape(description) %>",
+    description: "<%= HTML.escape(video.short_description) %>",
    thumbnail: "<%= thumbnail %>"
 }
 </script>
--- a/src/invidious/views/watch.ecr
+++ b/src/invidious/views/watch.ecr
@ -1,12 +1,12 @@
 <% content_for "header" do %>
 <meta name="thumbnail" content="<%= thumbnail %>">
-<meta name="description" content="<%= description %>">
+<meta name="description" content="<%= video.short_description %>">
 <meta name="keywords" content="<%= video.keywords.join(",") %>">
 <meta property="og:site_name" content="Invidious">
 <meta property="og:url" content="<%= host_url %>/watch?v=<%= video.id %>">
 <meta property="og:title" content="<%= HTML.escape(video.title) %>">
 <meta property="og:image" content="/vi/<%= video.id %>/maxres.jpg">
-<meta property="og:description" content="<%= description %>">
+<meta property="og:description" content="<%= video.short_description %>">
 <meta property="og:type" content="video.other">
 <meta property="og:video:url" content="<%= host_url %>/embed/<%= video.id %>">
 <meta property="og:video:secure_url" content="<%= host_url %>/embed/<%= video.id %>">
@ -17,7 +17,7 @@
 <meta name="twitter:site" content="@omarroth1">
 <meta name="twitter:url" content="<%= host_url %>/watch?v=<%= video.id %>">
 <meta name="twitter:title" content="<%= HTML.escape(video.title) %>">
-<meta name="twitter:description" content="<%= description %>">
+<meta name="twitter:description" content="<%= video.short_description %>">
 <meta name="twitter:image" content="<%= host_url %>/vi/<%= video.id %>/maxres.jpg">
 <meta name="twitter:player" content="<%= host_url %>/embed/<%= video.id %>">
 <meta name="twitter:player:width" content="1280">
@ -185,7 +185,7 @@ var video_data = {
            </p>

            <div>
-                <%= video.description %>
+                <%= video.description_html %>
            </div>

            <hr>