Use XML.parse instead of XML.parse_html

Due to recent changes to libxml2 (between 2.9.14 and 2.10.4, See https://gitlab.gnome.org/GNOME/libxml2/-/issues/508), the HTML parser doesn't take into account the namespaces (xmlns). Because HTML shouldn't contain namespaces anyway, there is no reason for use to keep using it. But switching to the XML parser means that we have to pass the namespaces to every single 'xpath_node(s)' method for it to be able to properly navigate the XML structure.
2023-05-08 00:53:08 +02:00 · 2023-05-08 00:53:08 +02:00 · ce1fb8d08c
--- a/src/invidious/channels/channels.cr
+++ b/src/invidious/channels/channels.cr
@ -159,12 +159,18 @@ def fetch_channel(ucid, pull_all_videos : Bool)
  LOGGER.debug("fetch_channel: #{ucid}")
  LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}")
  namespaces = {
    "yt"      => "http://www.youtube.com/xml/schemas/2015",
    "media"   => "http://search.yahoo.com/mrss/",
    "default" => "http://www.w3.org/2005/Atom",
  }
  LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed")
  rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
  LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed")
-  rss = XML.parse_html(rss)
+  rss = XML.parse(rss)
-  author = rss.xpath_node(%q(//feed/title))
+  author = rss.xpath_node("//default:feed/default:title", namespaces)
  if !author
    raise InfoException.new("Deleted or invalid channel")
  end
@ -192,15 +198,23 @@ def fetch_channel(ucid, pull_all_videos : Bool)
  videos, continuation = IV::Channel::Tabs.get_videos(channel)
  LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
-  rss.xpath_nodes("//feed/entry").each do |entry|
+  rss.xpath_nodes("//default:feed/default:entry", namespaces).each do |entry|
-    video_id = entry.xpath_node("videoid").not_nil!.content
+    video_id = entry.xpath_node("yt:videoid", namespaces).not_nil!.content
-    title = entry.xpath_node("title").not_nil!.content
+    title = entry.xpath_node("default:title", namespaces).not_nil!.content
-    published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content)
+
-    updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content)
+    published = Time.parse_rfc3339(
-    author = entry.xpath_node("author/name").not_nil!.content
+      entry.xpath_node("default:published", namespaces).not_nil!.content
-    ucid = entry.xpath_node("channelid").not_nil!.content
+    )
-    views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
+    updated = Time.parse_rfc3339(
-    views ||= 0_i64
+      entry.xpath_node("default:updated", namespaces).not_nil!.content
    )
    author = entry.xpath_node("default:author/default:name", namespaces).not_nil!.content
    ucid = entry.xpath_node("yt:channelid", namespaces).not_nil!.content
    views = entry
      .xpath_node("media:group/media:community/media:statistics", namespaces)
      .try &.["views"]?.try &.to_i64? || 0_i64
    channel_video = videos
      .select(SearchVideo)