|
1 | | -import re |
| 1 | +import collections |
2 | 2 | import copy |
3 | | -import urllib.parse |
4 | | -import os |
| 3 | +import itertools |
| 4 | +import json |
| 5 | +from typing import Iterable, Mapping, Tuple, TypeVar, Union, List |
| 6 | +from urllib.parse import urlencode |
5 | 7 |
|
6 | | -from youtubesearchpython.core.constants import ResultMode |
7 | | -from youtubesearchpython.core.componenthandler import getValue |
| 8 | +from youtubesearchpython.core.componenthandler import getVideoId, getValue |
| 9 | +from youtubesearchpython.core.constants import * |
8 | 10 | from youtubesearchpython.core.requests import RequestCore |
9 | 11 |
|
10 | | -isYtDLPinstalled = False |
| 12 | +K = TypeVar("K") |
| 13 | +T = TypeVar("T") |
11 | 14 |
|
12 | | -try: |
13 | | - from yt_dlp.extractor.youtube import YoutubeIE |
14 | | - from yt_dlp import YoutubeDL |
15 | | - from yt_dlp.utils import url_or_none, try_get, update_url_query, ExtractorError |
16 | 15 |
|
17 | | - isYtDLPinstalled = True |
18 | | -except ImportError: |
19 | | - pass |
20 | | - |
21 | | - |
22 | | -class StreamURLFetcherCore(RequestCore): |
23 | | - def __init__(self, proxy: str = None, cookies_file: str = None): |
24 | | - if not isYtDLPinstalled: |
25 | | - raise Exception('ERROR: yt-dlp is not installed. Install with: pip install yt-dlp') |
| 16 | +class CommentsCore(RequestCore): |
| 17 | + result = None |
| 18 | + continuationKey = None |
| 19 | + isNextRequest = False |
| 20 | + response = None |
26 | 21 |
|
| 22 | + def __init__(self, videoLink: str): |
27 | 23 | super().__init__() |
28 | | - |
29 | | - self._js_url = None |
30 | | - self._js = None |
31 | | - self.video_id = None |
32 | | - self._streams = [] |
33 | | - |
34 | | - ydl_opts = { |
35 | | - 'quiet': True, |
36 | | - 'no_warnings': True, |
37 | | - } |
38 | | - if proxy: |
39 | | - ydl_opts['proxy'] = proxy |
40 | | - if cookies_file and os.path.exists(cookies_file): |
41 | | - ydl_opts['cookiefile'] = cookies_file |
42 | | - |
43 | | - self.downloader = YoutubeDL(ydl_opts) |
44 | | - self.ytie = YoutubeIE() |
45 | | - self.ytie.set_downloader(self.downloader) |
46 | | - |
47 | | - def _getDecipheredURLs(self, videoFormats: dict, formatId: int = None) -> None: |
48 | | - self._streams = [] |
49 | | - self.video_id = videoFormats.get("id") |
50 | | - if not self.video_id: |
51 | | - return |
52 | | - |
53 | | - streaming_data = videoFormats.get("streamingData") |
54 | | - if not streaming_data: |
55 | | - return |
56 | | - |
57 | | - if not streaming_data.get("formats") and not streaming_data.get("adaptiveFormats"): |
58 | | - return |
59 | | - |
60 | | - self._streaming_data = copy.deepcopy(streaming_data) |
61 | | - |
62 | | - self._player_response = copy.deepcopy(streaming_data.get("formats", [])) |
63 | | - self._player_response.extend(streaming_data.get("adaptiveFormats", [])) |
64 | | - |
65 | | - self.format_id = formatId |
66 | | - self._decipher() |
67 | | - |
68 | | - def extract_js_url(self, res: str): |
69 | | - self._js_url = None |
70 | | - if not res: |
| 24 | + self.commentsComponent = {"result": []} |
| 25 | + self.responseSource = None |
| 26 | + self.videoLink = videoLink |
| 27 | + |
| 28 | + def prepare_continuation_request(self): |
| 29 | + if not searchKey: |
| 30 | + raise Exception("INNERTUBE API key (searchKey) is not set.") |
| 31 | + self.data = copy.deepcopy(requestPayload) |
| 32 | + ctx = self.data.setdefault("context", {}) |
| 33 | + client = ctx.setdefault("client", {}) |
| 34 | + client.setdefault("clientName", client.get("clientName", "WEB")) |
| 35 | + client.setdefault("clientVersion", client.get("clientVersion", "2.20210820.01.00")) |
| 36 | + self.data["videoId"] = getVideoId(self.videoLink) |
| 37 | + self.url = f"https://www.youtube.com/youtubei/v1/next?{urlencode({'key': searchKey})}" |
| 38 | + |
| 39 | + def prepare_comments_request(self): |
| 40 | + if not searchKey: |
| 41 | + raise Exception("INNERTUBE API key (searchKey) is not set.") |
| 42 | + self.data = copy.deepcopy(requestPayload) |
| 43 | + ctx = self.data.setdefault("context", {}) |
| 44 | + client = ctx.setdefault("client", {}) |
| 45 | + client.setdefault("clientName", client.get("clientName", "WEB")) |
| 46 | + client.setdefault("clientVersion", client.get("clientVersion", "2.20210820.01.00")) |
| 47 | + self.data["continuation"] = self.continuationKey |
| 48 | + self.url = f"https://www.youtube.com/youtubei/v1/next?{urlencode({'key': searchKey})}" |
| 49 | + |
| 50 | + def parse_source(self): |
| 51 | + data = self._safe_load_response(self.response) |
| 52 | + idx = 0 if self.isNextRequest else 1 |
| 53 | + path = [ |
| 54 | + "onResponseReceivedEndpoints", |
| 55 | + idx, |
| 56 | + "appendContinuationItemsAction" if self.isNextRequest else "reloadContinuationItemsCommand", |
| 57 | + "continuationItems", |
| 58 | + ] |
| 59 | + self.responseSource = getValue(data, path) |
| 60 | + |
| 61 | + def parse_continuation_source(self): |
| 62 | + data = self._safe_load_response(self.response) |
| 63 | + self.continuationKey = getValue( |
| 64 | + data, |
| 65 | + [ |
| 66 | + "contents", |
| 67 | + "twoColumnWatchNextResults", |
| 68 | + "results", |
| 69 | + "results", |
| 70 | + "contents", |
| 71 | + -1, |
| 72 | + "itemSectionRenderer", |
| 73 | + "contents", |
| 74 | + 0, |
| 75 | + "continuationItemRenderer", |
| 76 | + "continuationEndpoint", |
| 77 | + "continuationCommand", |
| 78 | + "token", |
| 79 | + ], |
| 80 | + ) |
| 81 | + |
| 82 | + def sync_make_comment_request(self): |
| 83 | + self.prepare_comments_request() |
| 84 | + self.response = self.syncPostRequest() |
| 85 | + if hasattr(self.response, "status_code") and self.response.status_code == 200: |
| 86 | + self.parse_source() |
| 87 | + |
| 88 | + def sync_make_continuation_request(self): |
| 89 | + self.prepare_continuation_request() |
| 90 | + self.response = self.syncPostRequest() |
| 91 | + if hasattr(self.response, "status_code") and self.response.status_code == 200: |
| 92 | + self.parse_continuation_source() |
| 93 | + if not self.continuationKey: |
| 94 | + raise Exception("Could not retrieve continuation token") |
| 95 | + else: |
| 96 | + raise Exception("Status code is not 200") |
| 97 | + |
| 98 | + async def async_make_comment_request(self): |
| 99 | + self.prepare_comments_request() |
| 100 | + self.response = await self.asyncPostRequest() |
| 101 | + if hasattr(self.response, "status_code") and self.response.status_code == 200: |
| 102 | + self.parse_source() |
| 103 | + |
| 104 | + async def async_make_continuation_request(self): |
| 105 | + self.prepare_continuation_request() |
| 106 | + self.response = await self.asyncPostRequest() |
| 107 | + if hasattr(self.response, "status_code") and self.response.status_code == 200: |
| 108 | + self.parse_continuation_source() |
| 109 | + if not self.continuationKey: |
| 110 | + raise Exception("Could not retrieve continuation token") |
| 111 | + else: |
| 112 | + raise Exception("Status code is not 200") |
| 113 | + |
| 114 | + def sync_create(self): |
| 115 | + self.sync_make_continuation_request() |
| 116 | + self.sync_make_comment_request() |
| 117 | + self.__getComponents() |
| 118 | + |
| 119 | + def sync_create_next(self): |
| 120 | + self.isNextRequest = True |
| 121 | + self.sync_make_comment_request() |
| 122 | + self.__getComponents() |
| 123 | + |
| 124 | + async def async_create(self): |
| 125 | + await self.async_make_continuation_request() |
| 126 | + await self.async_make_comment_request() |
| 127 | + self.__getComponents() |
| 128 | + |
| 129 | + async def async_create_next(self): |
| 130 | + self.isNextRequest = True |
| 131 | + await self.async_make_comment_request() |
| 132 | + self.__getComponents() |
| 133 | + |
| 134 | + def __getComponents(self) -> None: |
| 135 | + comments = [] |
| 136 | + src = self.responseSource or [] |
| 137 | + for comment in src: |
| 138 | + comment = getValue(comment, ["commentThreadRenderer", "comment", "commentRenderer"]) |
| 139 | + try: |
| 140 | + j = { |
| 141 | + "id": self.__getValue(comment, ["commentId"]), |
| 142 | + "author": { |
| 143 | + "id": self.__getValue(comment, ["authorEndpoint", "browseEndpoint", "browseId"]), |
| 144 | + "name": self.__getValue(comment, ["authorText", "simpleText"]), |
| 145 | + "thumbnails": self.__getValue(comment, ["authorThumbnail", "thumbnails"]), |
| 146 | + }, |
| 147 | + "content": self.__getValue(comment, ["contentText", "runs", 0, "text"]), |
| 148 | + "published": self.__getValue(comment, ["publishedTimeText", "runs", 0, "text"]), |
| 149 | + "isLiked": self.__getValue(comment, ["isLiked"]), |
| 150 | + "authorIsChannelOwner": self.__getValue(comment, ["authorIsChannelOwner"]), |
| 151 | + "voteStatus": self.__getValue(comment, ["voteStatus"]), |
| 152 | + "votes": { |
| 153 | + "simpleText": self.__getValue(comment, ["voteCount", "simpleText"]), |
| 154 | + "label": self.__getValue(comment, ["voteCount", "accessibility", "accessibilityData", "label"]), |
| 155 | + }, |
| 156 | + "replyCount": self.__getValue(comment, ["replyCount"]), |
| 157 | + } |
| 158 | + comments.append(j) |
| 159 | + except Exception: |
| 160 | + pass |
| 161 | + |
| 162 | + self.commentsComponent["result"].extend(comments) |
| 163 | + self.continuationKey = self.__getValue( |
| 164 | + self.responseSource or [], |
| 165 | + [-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], |
| 166 | + ) |
| 167 | + |
| 168 | + def __result(self, mode: int) -> Union[dict, str]: |
| 169 | + if mode == ResultMode.dict: |
| 170 | + return self.commentsComponent |
| 171 | + elif mode == ResultMode.json: |
| 172 | + return json.dumps(self.commentsComponent, indent=4) |
| 173 | + |
| 174 | + def __getValue(self, source: Union[dict, list, None], path: Iterable[Union[str, int]]) -> Union[str, int, dict, None]: |
| 175 | + value = source |
| 176 | + for key in path: |
| 177 | + if value is None: |
| 178 | + return None |
| 179 | + if isinstance(key, str): |
| 180 | + if isinstance(value, dict) and key in value: |
| 181 | + value = value[key] |
| 182 | + else: |
| 183 | + return None |
| 184 | + elif isinstance(key, int): |
| 185 | + if isinstance(value, list) and len(value) > abs(key): |
| 186 | + value = value[key] |
| 187 | + else: |
| 188 | + return None |
| 189 | + else: |
| 190 | + return None |
| 191 | + return value |
| 192 | + |
| 193 | + def __getAllWithKey(self, source: Iterable[Mapping[K, T]], key: K) -> Iterable[T]: |
| 194 | + for item in source or []: |
| 195 | + if key in item: |
| 196 | + yield item[key] |
| 197 | + |
| 198 | + def __getValueEx(self, source: dict, path: List[Union[str, None]]) -> Iterable[Union[str, int, dict, None]]: |
| 199 | + if len(path) <= 0: |
| 200 | + yield source |
71 | 201 | return |
72 | | - player_version = re.search(r'([0-9a-fA-F]{8})\\?', res) |
73 | | - if player_version: |
74 | | - player_version = player_version.group().replace("\\", "") |
75 | | - self._js_url = f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js' |
76 | | - |
77 | | - def _getJS(self) -> None: |
78 | | - if not self.video_id: |
79 | | - return |
80 | | - self.url = 'https://www.youtube.com/iframe_api' |
81 | | - res = self.syncGetRequest() |
82 | | - if res and getattr(res, "text", None): |
83 | | - self.extract_js_url(res.text) |
84 | | - |
85 | | - async def getJavaScript(self): |
86 | | - if not self.video_id: |
87 | | - return |
88 | | - self.url = 'https://www.youtube.com/iframe_api' |
89 | | - res = await self.asyncGetRequest() |
90 | | - if res and getattr(res, "text", None): |
91 | | - self.extract_js_url(res.text) |
92 | | - |
93 | | - def _decipher(self, retry: bool = False): |
94 | | - if not self.video_id: |
95 | | - return |
96 | | - |
97 | | - if not self._js_url or retry: |
98 | | - self._js_url = None |
99 | | - self._js = None |
100 | | - self._getJS() |
| 202 | + key = path[0] |
| 203 | + upcoming = path[1:] |
| 204 | + if key is None: |
| 205 | + if not upcoming: |
| 206 | + raise Exception("Invalid path") |
| 207 | + following_key = upcoming[0] |
| 208 | + upcoming = upcoming[1:] |
| 209 | + for val in self.__getAllWithKey(source or [], following_key): |
| 210 | + yield from self.__getValueEx(val, path=upcoming) |
| 211 | + else: |
| 212 | + val = self.__getValue(source, [key]) |
| 213 | + if val is None: |
| 214 | + return |
| 215 | + yield from self.__getValueEx(val, path=upcoming) |
101 | 216 |
|
102 | | - if not self._js_url: |
103 | | - return |
| 217 | + def __getFirstValue(self, source: dict, path: Iterable[Union[str, None]]) -> Union[str, int, dict, None]: |
| 218 | + values = self.__getValueEx(source or {}, list(path)) |
| 219 | + for val in values: |
| 220 | + if val is not None: |
| 221 | + return val |
| 222 | + return None |
104 | 223 |
|
| 224 | + def _safe_load_response(self, response): |
105 | 225 | try: |
106 | | - server_abr_url = getValue(self._streaming_data, ["serverAbrStreamingUrl"]) |
107 | | - |
108 | | - for yt_format in self._player_response: |
109 | | - if self.format_id is not None and yt_format.get("itag") != self.format_id: |
110 | | - continue |
111 | | - |
112 | | - if getValue(yt_format, ["url"]): |
113 | | - yt_format["throttled"] = False |
114 | | - self._streams.append(yt_format) |
115 | | - if self.format_id is not None: |
116 | | - return |
117 | | - continue |
118 | | - |
119 | | - if server_abr_url and not getValue(yt_format, ["signatureCipher"]): |
120 | | - yt_format["url"] = server_abr_url |
121 | | - yt_format["throttled"] = False |
122 | | - self._streams.append(yt_format) |
123 | | - if self.format_id is not None: |
124 | | - return |
125 | | - continue |
126 | | - |
127 | | - cipher = getValue(yt_format, ["signatureCipher"]) |
128 | | - if not cipher: |
129 | | - continue |
130 | | - |
131 | | - sc = urllib.parse.parse_qs(cipher) |
132 | | - fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0])) |
133 | | - encrypted_sig = try_get(sc, lambda x: x['s'][0]) |
134 | | - |
135 | | - if not (fmt_url and encrypted_sig): |
136 | | - yt_format["throttled"] = False |
137 | | - self._streams.append(yt_format) |
138 | | - continue |
139 | | - |
140 | | - try: |
141 | | - signature = self.ytie._decrypt_signature(encrypted_sig, self.video_id, self._js_url) |
142 | | - except: |
143 | | - continue |
144 | | - |
145 | | - sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' |
146 | | - fmt_url += '&' + sp + '=' + signature |
147 | | - |
148 | | - query = urllib.parse.parse_qs(fmt_url) |
149 | | - throttled = False |
150 | | - if query.get('n'): |
151 | | - try: |
152 | | - n_code = query['n'][0] |
153 | | - new_n = self.ytie._decrypt_nsig(n_code, self.video_id, self._js_url) |
154 | | - fmt_url = update_url_query(fmt_url, {'n': new_n}) |
155 | | - except ExtractorError: |
156 | | - throttled = True |
157 | | - |
158 | | - yt_format["url"] = fmt_url |
159 | | - yt_format["throttled"] = throttled |
160 | | - self._streams.append(yt_format) |
161 | | - |
162 | | - if self.format_id is not None: |
163 | | - return |
164 | | - |
| 226 | + if hasattr(response, "json"): |
| 227 | + return response.json() |
| 228 | + if hasattr(response, "text"): |
| 229 | + return json.loads(response.text) |
| 230 | + if isinstance(response, (str, bytes)): |
| 231 | + return json.loads(response) |
165 | 232 | except Exception: |
166 | | - if retry: |
167 | | - return |
168 | | - self._decipher(retry=True) |
| 233 | + return {} |
0 commit comments