extractors: separate 'extract' and 'parse' logic

This commit is contained in:
Samantaz Fox 2022-11-10 23:32:51 +01:00
parent 9588fcb5d1
commit bdc51cd20f
No known key found for this signature in database
GPG key ID: F42821059186176E
3 changed files with 33 additions and 25 deletions

View file

@ -8,7 +8,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by)
items = [] of SearchItem items = [] of SearchItem
continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item| continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
extract_item(item, author, ucid).try { |t| items << t } parse_item(item, author, ucid).try { |t| items << t }
} }
continuation = continuation_items.as_a.last["continuationItemRenderer"]? continuation = continuation_items.as_a.last["continuationItemRenderer"]?

View file

@ -37,7 +37,7 @@ module Invidious::Search
items = [] of SearchItem items = [] of SearchItem
continuation_items.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each do |item| continuation_items.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each do |item|
extract_item(item["itemSectionRenderer"]["contents"].as_a[0]).try { |t| items << t } parse_item(item["itemSectionRenderer"]["contents"].as_a[0]).try { |t| items << t }
end end
return items return items

View file

@ -20,6 +20,8 @@ private ITEM_PARSERS = {
Parsers::ReelItemRendererParser, Parsers::ReelItemRendererParser,
} }
private alias InitialData = Hash(String, JSON::Any)
record AuthorFallback, name : String, id : String record AuthorFallback, name : String, id : String
# Namespace for logic relating to parsing InnerTube data into various datastructs. # Namespace for logic relating to parsing InnerTube data into various datastructs.
@ -348,7 +350,7 @@ private module Parsers
raw_contents = content_container["items"]?.try &.as_a raw_contents = content_container["items"]?.try &.as_a
if !raw_contents.nil? if !raw_contents.nil?
raw_contents.each do |item| raw_contents.each do |item|
result = extract_item(item) result = parse_item(item)
if !result.nil? if !result.nil?
contents << result contents << result
end end
@ -510,7 +512,7 @@ private module Extractors
# }] # }]
# #
module YouTubeTabs module YouTubeTabs
def self.process(initial_data : Hash(String, JSON::Any)) def self.process(initial_data : InitialData)
if target = initial_data["twoColumnBrowseResultsRenderer"]? if target = initial_data["twoColumnBrowseResultsRenderer"]?
self.extract(target) self.extract(target)
end end
@ -575,7 +577,7 @@ private module Extractors
# } # }
# #
module SearchResults module SearchResults
def self.process(initial_data : Hash(String, JSON::Any)) def self.process(initial_data : InitialData)
if target = initial_data["twoColumnSearchResultsRenderer"]? if target = initial_data["twoColumnSearchResultsRenderer"]?
self.extract(target) self.extract(target)
end end
@ -608,8 +610,8 @@ private module Extractors
# The way they are structured is too varied to be accurately written down here. # The way they are structured is too varied to be accurately written down here.
# However, they all eventually lead to an array of parsable items after traversing # However, they all eventually lead to an array of parsable items after traversing
# through the JSON structure. # through the JSON structure.
module Continuation module ContinuationContent
def self.process(initial_data : Hash(String, JSON::Any)) def self.process(initial_data : InitialData)
if target = initial_data["continuationContents"]? if target = initial_data["continuationContents"]?
self.extract(target) self.extract(target)
elsif target = initial_data["appendContinuationItemsAction"]? elsif target = initial_data["appendContinuationItemsAction"]?
@ -691,8 +693,7 @@ end
# Parses an item from Youtube's JSON response into a more usable structure. # Parses an item from Youtube's JSON response into a more usable structure.
# The end result can either be a SearchVideo, SearchPlaylist or SearchChannel. # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
def extract_item(item : JSON::Any, author_fallback : String? = "", def parse_item(item : JSON::Any, author_fallback : String? = "", author_id_fallback : String? = "")
author_id_fallback : String? = "")
# We "allow" nil values but secretly use empty strings instead. This is to save us the # We "allow" nil values but secretly use empty strings instead. This is to save us the
# hassle of modifying every author_fallback and author_id_fallback arg usage # hassle of modifying every author_fallback and author_id_fallback arg usage
# which is more often than not nil. # which is more often than not nil.
@ -702,24 +703,23 @@ def extract_item(item : JSON::Any, author_fallback : String? = "",
# Each parser automatically validates the data given to see if the data is # Each parser automatically validates the data given to see if the data is
# applicable to itself. If not nil is returned and the next parser is attempted. # applicable to itself. If not nil is returned and the next parser is attempted.
ITEM_PARSERS.each do |parser| ITEM_PARSERS.each do |parser|
LOGGER.trace("extract_item: Attempting to parse item using \"#{parser.parser_name}\" (cycling...)") LOGGER.trace("parse_item: Attempting to parse item using \"#{parser.parser_name}\" (cycling...)")
if result = parser.process(item, author_fallback) if result = parser.process(item, author_fallback)
LOGGER.debug("extract_item: Successfully parsed via #{parser.parser_name}") LOGGER.debug("parse_item: Successfully parsed via #{parser.parser_name}")
return result return result
else else
LOGGER.trace("extract_item: Parser \"#{parser.parser_name}\" does not apply. Cycling to the next one...") LOGGER.trace("parse_item: Parser \"#{parser.parser_name}\" does not apply. Cycling to the next one...")
end end
end end
end end
# Parses multiple items from YouTube's initial JSON response into a more usable structure. # Parses multiple items from YouTube's initial JSON response into a more usable structure.
# The end result is an array of SearchItem. # The end result is an array of SearchItem.
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, #
author_id_fallback : String? = nil) : Array(SearchItem) # This function yields the container so that items can be parsed separately.
items = [] of SearchItem #
def extract_items(initial_data : InitialData, &block)
if unpackaged_data = initial_data["contents"]?.try &.as_h if unpackaged_data = initial_data["contents"]?.try &.as_h
elsif unpackaged_data = initial_data["response"]?.try &.as_h elsif unpackaged_data = initial_data["response"]?.try &.as_h
elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 0).try &.as_h elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 0).try &.as_h
@ -727,24 +727,32 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
unpackaged_data = initial_data unpackaged_data = initial_data
end end
# This is identical to the parser cycling of extract_item(). # This is identical to the parser cycling of parse_item().
ITEM_CONTAINER_EXTRACTOR.each do |extractor| ITEM_CONTAINER_EXTRACTOR.each do |extractor|
LOGGER.trace("extract_items: Attempting to extract item container using \"#{extractor.extractor_name}\" (cycling...)") LOGGER.trace("extract_items: Attempting to extract item container using \"#{extractor.extractor_name}\" (cycling...)")
if container = extractor.process(unpackaged_data) if container = extractor.process(unpackaged_data)
LOGGER.debug("extract_items: Successfully unpacked container with \"#{extractor.extractor_name}\"") LOGGER.debug("extract_items: Successfully unpacked container with \"#{extractor.extractor_name}\"")
# Extract items in container # Extract items in container
container.each do |item| container.each { |item| yield item }
if parsed_result = extract_item(item, author_fallback, author_id_fallback)
items << parsed_result
end
end
break
else else
LOGGER.trace("extract_items: Extractor \"#{extractor.extractor_name}\" does not apply. Cycling to the next one...") LOGGER.trace("extract_items: Extractor \"#{extractor.extractor_name}\" does not apply. Cycling to the next one...")
end end
end end
end
# Wrapper using the block function above
def extract_items(
initial_data : InitialData,
author_fallback : String? = nil,
author_id_fallback : String? = nil
) : Array(SearchItem)
items = [] of SearchItem
extract_items(initial_data) do |item|
parsed = parse_item(item, author_fallback, author_id_fallback)
items << parsed if !parsed.nil?
end
return items return items
end end