Use XML.parse instead of XML.parse_html

Due to recent changes to libxml2 (between 2.9.14 and 2.10.4,
See https://gitlab.gnome.org/GNOME/libxml2/-/issues/508), the
HTML parser doesn't take into account the namespaces (xmlns).

Because HTML shouldn't contain namespaces anyway, there is no
reason for use to keep using it. But switching to the XML
parser means that we have to pass the namespaces to every
single 'xpath_node(s)' method for it to be able to properly
navigate the XML structure.
このコミットが含まれているのは:
Samantaz Fox 2023-05-08 00:53:08 +02:00
コミット ce1fb8d08c
この署名に対応する既知のキーがデータベースに存在しません
GPGキーID: F42821059186176E
1個のファイルの変更25行の追加11行の削除

ファイルの表示

@ -159,12 +159,18 @@ def fetch_channel(ucid, pull_all_videos : Bool)
LOGGER.debug("fetch_channel: #{ucid}") LOGGER.debug("fetch_channel: #{ucid}")
LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}") LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}")
namespaces = {
"yt" => "http://www.youtube.com/xml/schemas/2015",
"media" => "http://search.yahoo.com/mrss/",
"default" => "http://www.w3.org/2005/Atom",
}
LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed") LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed")
rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed") LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed")
rss = XML.parse_html(rss) rss = XML.parse(rss)
author = rss.xpath_node(%q(//feed/title)) author = rss.xpath_node("//default:feed/default:title", namespaces)
if !author if !author
raise InfoException.new("Deleted or invalid channel") raise InfoException.new("Deleted or invalid channel")
end end
@ -192,15 +198,23 @@ def fetch_channel(ucid, pull_all_videos : Bool)
videos, continuation = IV::Channel::Tabs.get_videos(channel) videos, continuation = IV::Channel::Tabs.get_videos(channel)
LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed") LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
rss.xpath_nodes("//feed/entry").each do |entry| rss.xpath_nodes("//default:feed/default:entry", namespaces).each do |entry|
video_id = entry.xpath_node("videoid").not_nil!.content video_id = entry.xpath_node("yt:videoid", namespaces).not_nil!.content
title = entry.xpath_node("title").not_nil!.content title = entry.xpath_node("default:title", namespaces).not_nil!.content
published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content)
updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content) published = Time.parse_rfc3339(
author = entry.xpath_node("author/name").not_nil!.content entry.xpath_node("default:published", namespaces).not_nil!.content
ucid = entry.xpath_node("channelid").not_nil!.content )
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64? updated = Time.parse_rfc3339(
views ||= 0_i64 entry.xpath_node("default:updated", namespaces).not_nil!.content
)
author = entry.xpath_node("default:author/default:name", namespaces).not_nil!.content
ucid = entry.xpath_node("yt:channelid", namespaces).not_nil!.content
views = entry
.xpath_node("media:group/media:community/media:statistics", namespaces)
.try &.["views"]?.try &.to_i64? || 0_i64
channel_video = videos channel_video = videos
.select(SearchVideo) .select(SearchVideo)