Use new youtube API to fetch channel videos (#1355)
* Use new API to fetch videos from channels This mirrors the process used by subscriptions.gir.st. The old API is tried first, and if it fails then the new one is used. * Use the new API whenever getting videos from a channel I created the get_channel_videos_response function because now instead of just getting a single url, there are extra steps involved in getting the API response for channel videos, and these steps don't need to be repeated throughout the code. The only remaining exception is the bypass_captcha function, which still only makes a request to the old API. I don't know whether this code needs to be updated to use the new API for captcha bypassing to work correctly. * Correctly determine video length with new api * Remove unnecessary line
This commit is contained in:
parent
13f58d602f
commit
4a6e920d0e
2 changed files with 204 additions and 156 deletions
|
@ -213,8 +213,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
|
|||
|
||||
page = 1
|
||||
|
||||
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
|
||||
response = YT_POOL.client &.get(url)
|
||||
response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
|
||||
|
||||
videos = [] of SearchVideo
|
||||
begin
|
||||
|
@ -291,8 +290,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
|
|||
ids = [] of String
|
||||
|
||||
loop do
|
||||
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
|
||||
response = YT_POOL.client &.get(url)
|
||||
response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
|
||||
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
||||
raise "Could not extract JSON" if !initial_data
|
||||
videos = extract_videos(initial_data.as_h, author, ucid)
|
||||
|
@ -396,7 +394,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
|
|||
return items, continuation
|
||||
end
|
||||
|
||||
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest")
|
||||
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
|
||||
object = {
|
||||
"80226972:embedded" => {
|
||||
"2:string" => ucid,
|
||||
|
@ -411,6 +409,7 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
|
|||
},
|
||||
}
|
||||
|
||||
if !v2
|
||||
if auto_generated
|
||||
seed = Time.unix(1525757349)
|
||||
until seed >= Time.utc
|
||||
|
@ -424,6 +423,20 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
|
|||
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
|
||||
end
|
||||
else
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
|
||||
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
|
||||
"1:embedded" => {
|
||||
"1:varint" => 6307666885028338688_i64,
|
||||
"2:embedded" => {
|
||||
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
|
||||
"1:varint" => 30_i64 * (page - 1),
|
||||
}))),
|
||||
},
|
||||
},
|
||||
})))
|
||||
end
|
||||
|
||||
case sort_by
|
||||
when "newest"
|
||||
|
@ -901,12 +914,28 @@ def get_about_info(ucid, locale)
|
|||
})
|
||||
end
|
||||
|
||||
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
|
||||
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false)
|
||||
response = YT_POOL.client &.get(url)
|
||||
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
||||
return response if !initial_data
|
||||
needs_v2 = initial_data
|
||||
.try &.["response"]?.try &.["alerts"]?
|
||||
.try &.as_a.any? { |alert|
|
||||
alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" }
|
||||
}
|
||||
if needs_v2
|
||||
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
|
||||
response = YT_POOL.client &.get(url)
|
||||
end
|
||||
response
|
||||
end
|
||||
|
||||
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
|
||||
videos = [] of SearchVideo
|
||||
|
||||
2.times do |i|
|
||||
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
|
||||
response = YT_POOL.client &.get(url)
|
||||
response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
|
||||
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
||||
break if !initial_data
|
||||
videos.concat extract_videos(initial_data.as_h, author, ucid)
|
||||
|
@ -916,8 +945,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
|
|||
end
|
||||
|
||||
def get_latest_videos(ucid)
|
||||
url = produce_channel_videos_url(ucid, 0)
|
||||
response = YT_POOL.client &.get(url)
|
||||
response = get_channel_videos_response(ucid, 1)
|
||||
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
||||
return [] of SearchVideo if !initial_data
|
||||
author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
|
||||
|
|
|
@ -164,20 +164,8 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
|
|||
extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
|
||||
end
|
||||
|
||||
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
|
||||
items = [] of SearchItem
|
||||
|
||||
initial_data.try { |t| t["contents"]? || t["response"]? }
|
||||
.try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
|
||||
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
|
||||
t["continuationContents"]? }
|
||||
.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
|
||||
.try &.["contents"].as_a
|
||||
.each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
|
||||
.try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
|
||||
t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
|
||||
.each { |item|
|
||||
if i = item["videoRenderer"]?
|
||||
def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
|
||||
if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
|
||||
video_id = i["videoId"].as_s
|
||||
title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
|
||||
|
||||
|
@ -188,7 +176,9 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
|
|||
published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
|
||||
view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
|
||||
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
|
||||
length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
|
||||
length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
|
||||
i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
|
||||
.try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
|
||||
|
||||
live_now = false
|
||||
paid = false
|
||||
|
@ -212,7 +202,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
|
|||
end
|
||||
end
|
||||
|
||||
items << SearchVideo.new({
|
||||
SearchVideo.new({
|
||||
title: title,
|
||||
id: video_id,
|
||||
author: author,
|
||||
|
@ -238,7 +228,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
|
|||
video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
|
||||
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
|
||||
|
||||
items << SearchChannel.new({
|
||||
SearchChannel.new({
|
||||
author: author,
|
||||
ucid: author_id,
|
||||
author_thumbnail: author_thumbnail,
|
||||
|
@ -254,7 +244,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
|
|||
video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
|
||||
playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
|
||||
|
||||
items << SearchPlaylist.new({
|
||||
SearchPlaylist.new({
|
||||
title: title,
|
||||
id: plid,
|
||||
author: author_fallback || "",
|
||||
|
@ -288,7 +278,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
|
|||
|
||||
# TODO: i["publishedTimeText"]?
|
||||
|
||||
items << SearchPlaylist.new({
|
||||
SearchPlaylist.new({
|
||||
title: title,
|
||||
id: plid,
|
||||
author: author,
|
||||
|
@ -305,7 +295,37 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
|
|||
elsif i = item["horizontalCardListRenderer"]?
|
||||
elsif i = item["searchPyvRenderer"]? # Ad
|
||||
end
|
||||
end
|
||||
|
||||
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
|
||||
items = [] of SearchItem
|
||||
|
||||
channel_v2_response = initial_data
|
||||
.try &.["response"]?
|
||||
.try &.["continuationContents"]?
|
||||
.try &.["gridContinuation"]?
|
||||
.try &.["items"]?
|
||||
|
||||
if channel_v2_response
|
||||
channel_v2_response.try &.as_a.each { |item|
|
||||
extract_item(item, author_fallback, author_id_fallback)
|
||||
.try { |t| items << t }
|
||||
}
|
||||
else
|
||||
initial_data.try { |t| t["contents"]? || t["response"]? }
|
||||
.try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
|
||||
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
|
||||
t["continuationContents"]? }
|
||||
.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
|
||||
.try &.["contents"].as_a
|
||||
.each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
|
||||
.try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
|
||||
t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
|
||||
.each { |item|
|
||||
extract_item(item, author_fallback, author_id_fallback)
|
||||
.try { |t| items << t }
|
||||
} }
|
||||
end
|
||||
|
||||
items
|
||||
end
|
||||
|
|
Loading…
Add table
Reference in a new issue