From e49aaa021646476b1499f7f612c13d5ba780462f Mon Sep 17 00:00:00 2001 From: Svallinn <41585298+Svallinn@users.noreply.github.com> Date: Wed, 24 Mar 2021 05:15:06 +0000 Subject: [PATCH 1/3] Fix channel search API --- src/invidious/search.cr | 46 ++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/src/invidious/search.cr b/src/invidious/search.cr index cf8fd7908..26161fab2 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -231,20 +231,27 @@ end alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist def channel_search(query, page, channel) - response = YT_POOL.client &.get("/channel/#{channel}?hl=en&gl=US") - response = YT_POOL.client &.get("/user/#{channel}?hl=en&gl=US") if response.headers["location"]? - response = YT_POOL.client &.get("/c/#{channel}?hl=en&gl=US") if response.headers["location"]? + response = YT_POOL.client &.get("/channel/#{channel}") - ucid = response.body.match(/\\"channelId\\":\\"(?[^\\]+)\\"/).try &.["ucid"]? + if response.status_code == 404 + response = YT_POOL.client &.get("/user/#{channel}") + response = YT_POOL.client &.get("/c/#{channel}") if response.status_code == 404 + ucid = response.body.match(/HeaderRenderer":\{"channelId":"(?[^\\"]+)"/).try &.["ucid"]? + else + ucid = channel + end - return 0, [] of SearchItem if !ucid + continuation = produce_channel_search_continuation(ucid, query, page) + response_json = request_youtube_api_browse(continuation) - url = produce_channel_search_url(ucid, query, page) - response = YT_POOL.client &.get(url) - initial_data = JSON.parse(response.body).as_a.find &.["response"]? - return 0, [] of SearchItem if !initial_data - author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s - items = extract_items(initial_data.as_h, author, ucid) + result = JSON.parse(response_json.match(/"continuationItems": (?\[.*\]),/m).try &.["items"] || "{}") + return 0, [] of SearchItem if result.size == 0 + + items = [] of SearchItem + result.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each { |item| + extract_item(item["itemSectionRenderer"]["contents"].as_a[0]) + .try { |t| items << t } + } return items.size, items end @@ -361,17 +368,28 @@ def produce_search_params(page = 1, sort : String = "relevance", date : String = return params end -def produce_channel_search_url(ucid, query, page) +def produce_channel_search_continuation(ucid, query, page) + if page <= 1 + idx = 0_i64 + else + idx = 30_i64 * (page - 1) + end + object = { "80226972:embedded" => { "2:string" => ucid, "3:base64" => { "2:string" => "search", + "6:varint" => 1_i64, "7:varint" => 1_i64, - "15:string" => "#{page}", + "12:varint" => 1_i64, + "15:base64" => { + "3:varint" => idx, + }, "23:varint" => 0_i64, }, "11:string" => query, + "35:string" => "browse-feed#{ucid}search", }, } @@ -380,7 +398,7 @@ def produce_channel_search_url(ucid, query, page) .try { |i| Base64.urlsafe_encode(i) } .try { |i| URI.encode_www_form(i) } - return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en" + return continuation end def process_search_query(query, page, user, region) From d652ab9920354517a122a17a5ba241fab5651c1a Mon Sep 17 00:00:00 2001 From: Svallinn <41585298+Svallinn@users.noreply.github.com> Date: Wed, 24 Mar 2021 05:15:30 +0000 Subject: [PATCH 2/3] Modify spec file --- spec/helpers_spec.cr | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/helpers_spec.cr b/spec/helpers_spec.cr index a4aaff9fe..06a772b5b 100644 --- a/spec/helpers_spec.cr +++ b/spec/helpers_spec.cr @@ -27,11 +27,11 @@ describe "Helper" do end end - describe "#produce_channel_search_url" do + describe "#produce_channel_search_continuation" do it "correctly produces token for searching a specific channel" do - produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "", 100).should eq("/browse_ajax?continuation=4qmFsgI2EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaGEVnWnpaV0Z5WTJnNEFYb0RNVEF3dUFFQVoA&gl=US&hl=en") + produce_channel_search_continuation("UCXuqSBlHAE6Xw-yeJA0Tunw", "", 100).should eq("4qmFsgJqEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWnpaV0Z5WTJnd0FUZ0JZQUY2QkVkS2IxaTRBUUE9WgCaAilicm93c2UtZmVlZFVDWHVxU0JsSEFFNlh3LXllSkEwVHVud3NlYXJjaA%3D%3D") - produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "По ожиशुपतिरपि子而時ஸ்றீனி", 0).should eq("/browse_ajax?continuation=4qmFsgJ0EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaGEVnWnpaV0Z5WTJnNEFYb0JNTGdCQUE9PVo-0J_QviDQvtC20LjgpLbgpYHgpKrgpKTgpL_gpLDgpKrgpL_lrZDogIzmmYLgrrjgr43grrHgr4Dgrqngrr8%3D&gl=US&hl=en") + produce_channel_search_continuation("UCXuqSBlHAE6Xw-yeJA0Tunw", "По ожиशुपतिरपि子而時ஸ்றீனி", 0).should eq("4qmFsgKoARIYVUNYdXFTQmxIQUU2WHcteWVKQTBUdW53GiBFZ1p6WldGeVkyZ3dBVGdCWUFGNkJFZEJRVDI0QVFBPVo-0J_QviDQvtC20LjgpLbgpYHgpKrgpKTgpL_gpLDgpKrgpL_lrZDogIzmmYLgrrjgr43grrHgr4Dgrqngrr-aAilicm93c2UtZmVlZFVDWHVxU0JsSEFFNlh3LXllSkEwVHVud3NlYXJjaA%3D%3D") end end From acfa9e8a5525bea7f87429aa51e84cacd2ad641d Mon Sep 17 00:00:00 2001 From: Svallinn <41585298+Svallinn@users.noreply.github.com> Date: Fri, 26 Mar 2021 04:16:50 +0000 Subject: [PATCH 3/3] Parse responses to JSON instead of using regex --- src/invidious/search.cr | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/invidious/search.cr b/src/invidious/search.cr index 26161fab2..4b216613a 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -236,7 +236,9 @@ def channel_search(query, page, channel) if response.status_code == 404 response = YT_POOL.client &.get("/user/#{channel}") response = YT_POOL.client &.get("/c/#{channel}") if response.status_code == 404 - ucid = response.body.match(/HeaderRenderer":\{"channelId":"(?[^\\"]+)"/).try &.["ucid"]? + initial_data = extract_initial_data(response.body) + ucid = initial_data["header"]["c4TabbedHeaderRenderer"]?.try &.["channelId"].as_s? + raise InfoException.new("Impossible to extract channel ID from page") if !ucid else ucid = channel end @@ -244,11 +246,14 @@ def channel_search(query, page, channel) continuation = produce_channel_search_continuation(ucid, query, page) response_json = request_youtube_api_browse(continuation) - result = JSON.parse(response_json.match(/"continuationItems": (?\[.*\]),/m).try &.["items"] || "{}") - return 0, [] of SearchItem if result.size == 0 + result = JSON.parse(response_json) + continuationItems = result["onResponseReceivedActions"]? + .try &.[0]["appendContinuationItemsAction"]["continuationItems"] + + return 0, [] of SearchItem if !continuationItems items = [] of SearchItem - result.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each { |item| + continuationItems.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each { |item| extract_item(item["itemSectionRenderer"]["contents"].as_a[0]) .try { |t| items << t } }