forked from Fijxu/invidious
Use XML.parse instead of XML.parse_html
Due to recent changes to libxml2 (between 2.9.14 and 2.10.4, See https://gitlab.gnome.org/GNOME/libxml2/-/issues/508), the HTML parser doesn't take into account the namespaces (xmlns). Because HTML shouldn't contain namespaces anyway, there is no reason for use to keep using it. But switching to the XML parser means that we have to pass the namespaces to every single 'xpath_node(s)' method for it to be able to properly navigate the XML structure.
This commit is contained in:
parent
1eb1bae370
commit
ce1fb8d08c
1 changed files with 25 additions and 11 deletions
|
@ -159,12 +159,18 @@ def fetch_channel(ucid, pull_all_videos : Bool)
|
|||
LOGGER.debug("fetch_channel: #{ucid}")
|
||||
LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}")
|
||||
|
||||
namespaces = {
|
||||
"yt" => "http://www.youtube.com/xml/schemas/2015",
|
||||
"media" => "http://search.yahoo.com/mrss/",
|
||||
"default" => "http://www.w3.org/2005/Atom",
|
||||
}
|
||||
|
||||
LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed")
|
||||
rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
|
||||
LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed")
|
||||
rss = XML.parse_html(rss)
|
||||
rss = XML.parse(rss)
|
||||
|
||||
author = rss.xpath_node(%q(//feed/title))
|
||||
author = rss.xpath_node("//default:feed/default:title", namespaces)
|
||||
if !author
|
||||
raise InfoException.new("Deleted or invalid channel")
|
||||
end
|
||||
|
@ -192,15 +198,23 @@ def fetch_channel(ucid, pull_all_videos : Bool)
|
|||
videos, continuation = IV::Channel::Tabs.get_videos(channel)
|
||||
|
||||
LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
|
||||
rss.xpath_nodes("//feed/entry").each do |entry|
|
||||
video_id = entry.xpath_node("videoid").not_nil!.content
|
||||
title = entry.xpath_node("title").not_nil!.content
|
||||
published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content)
|
||||
updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content)
|
||||
author = entry.xpath_node("author/name").not_nil!.content
|
||||
ucid = entry.xpath_node("channelid").not_nil!.content
|
||||
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
|
||||
views ||= 0_i64
|
||||
rss.xpath_nodes("//default:feed/default:entry", namespaces).each do |entry|
|
||||
video_id = entry.xpath_node("yt:videoid", namespaces).not_nil!.content
|
||||
title = entry.xpath_node("default:title", namespaces).not_nil!.content
|
||||
|
||||
published = Time.parse_rfc3339(
|
||||
entry.xpath_node("default:published", namespaces).not_nil!.content
|
||||
)
|
||||
updated = Time.parse_rfc3339(
|
||||
entry.xpath_node("default:updated", namespaces).not_nil!.content
|
||||
)
|
||||
|
||||
author = entry.xpath_node("default:author/default:name", namespaces).not_nil!.content
|
||||
ucid = entry.xpath_node("yt:channelid", namespaces).not_nil!.content
|
||||
|
||||
views = entry
|
||||
.xpath_node("media:group/media:community/media:statistics", namespaces)
|
||||
.try &.["views"]?.try &.to_i64? || 0_i64
|
||||
|
||||
channel_video = videos
|
||||
.select(SearchVideo)
|
||||
|
|
Loading…
Reference in a new issue