Skip to content

Commit 3d2a889

Browse files
authored
Update comments.py
1 parent f0513a8 commit 3d2a889

1 file changed

Lines changed: 220 additions & 155 deletions

File tree

Lines changed: 220 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -1,168 +1,233 @@
1-
import re
1+
import collections
22
import copy
3-
import urllib.parse
4-
import os
3+
import itertools
4+
import json
5+
from typing import Iterable, Mapping, Tuple, TypeVar, Union, List
6+
from urllib.parse import urlencode
57

6-
from youtubesearchpython.core.constants import ResultMode
7-
from youtubesearchpython.core.componenthandler import getValue
8+
from youtubesearchpython.core.componenthandler import getVideoId, getValue
9+
from youtubesearchpython.core.constants import *
810
from youtubesearchpython.core.requests import RequestCore
911

10-
isYtDLPinstalled = False
12+
K = TypeVar("K")
13+
T = TypeVar("T")
1114

12-
try:
13-
from yt_dlp.extractor.youtube import YoutubeIE
14-
from yt_dlp import YoutubeDL
15-
from yt_dlp.utils import url_or_none, try_get, update_url_query, ExtractorError
1615

17-
isYtDLPinstalled = True
18-
except ImportError:
19-
pass
20-
21-
22-
class StreamURLFetcherCore(RequestCore):
23-
def __init__(self, proxy: str = None, cookies_file: str = None):
24-
if not isYtDLPinstalled:
25-
raise Exception('ERROR: yt-dlp is not installed. Install with: pip install yt-dlp')
16+
class CommentsCore(RequestCore):
17+
result = None
18+
continuationKey = None
19+
isNextRequest = False
20+
response = None
2621

22+
def __init__(self, videoLink: str):
2723
super().__init__()
28-
29-
self._js_url = None
30-
self._js = None
31-
self.video_id = None
32-
self._streams = []
33-
34-
ydl_opts = {
35-
'quiet': True,
36-
'no_warnings': True,
37-
}
38-
if proxy:
39-
ydl_opts['proxy'] = proxy
40-
if cookies_file and os.path.exists(cookies_file):
41-
ydl_opts['cookiefile'] = cookies_file
42-
43-
self.downloader = YoutubeDL(ydl_opts)
44-
self.ytie = YoutubeIE()
45-
self.ytie.set_downloader(self.downloader)
46-
47-
def _getDecipheredURLs(self, videoFormats: dict, formatId: int = None) -> None:
48-
self._streams = []
49-
self.video_id = videoFormats.get("id")
50-
if not self.video_id:
51-
return
52-
53-
streaming_data = videoFormats.get("streamingData")
54-
if not streaming_data:
55-
return
56-
57-
if not streaming_data.get("formats") and not streaming_data.get("adaptiveFormats"):
58-
return
59-
60-
self._streaming_data = copy.deepcopy(streaming_data)
61-
62-
self._player_response = copy.deepcopy(streaming_data.get("formats", []))
63-
self._player_response.extend(streaming_data.get("adaptiveFormats", []))
64-
65-
self.format_id = formatId
66-
self._decipher()
67-
68-
def extract_js_url(self, res: str):
69-
self._js_url = None
70-
if not res:
24+
self.commentsComponent = {"result": []}
25+
self.responseSource = None
26+
self.videoLink = videoLink
27+
28+
def prepare_continuation_request(self):
29+
if not searchKey:
30+
raise Exception("INNERTUBE API key (searchKey) is not set.")
31+
self.data = copy.deepcopy(requestPayload)
32+
ctx = self.data.setdefault("context", {})
33+
client = ctx.setdefault("client", {})
34+
client.setdefault("clientName", client.get("clientName", "WEB"))
35+
client.setdefault("clientVersion", client.get("clientVersion", "2.20210820.01.00"))
36+
self.data["videoId"] = getVideoId(self.videoLink)
37+
self.url = f"https://www.youtube.com/youtubei/v1/next?{urlencode({'key': searchKey})}"
38+
39+
def prepare_comments_request(self):
40+
if not searchKey:
41+
raise Exception("INNERTUBE API key (searchKey) is not set.")
42+
self.data = copy.deepcopy(requestPayload)
43+
ctx = self.data.setdefault("context", {})
44+
client = ctx.setdefault("client", {})
45+
client.setdefault("clientName", client.get("clientName", "WEB"))
46+
client.setdefault("clientVersion", client.get("clientVersion", "2.20210820.01.00"))
47+
self.data["continuation"] = self.continuationKey
48+
self.url = f"https://www.youtube.com/youtubei/v1/next?{urlencode({'key': searchKey})}"
49+
50+
def parse_source(self):
51+
data = self._safe_load_response(self.response)
52+
idx = 0 if self.isNextRequest else 1
53+
path = [
54+
"onResponseReceivedEndpoints",
55+
idx,
56+
"appendContinuationItemsAction" if self.isNextRequest else "reloadContinuationItemsCommand",
57+
"continuationItems",
58+
]
59+
self.responseSource = getValue(data, path)
60+
61+
def parse_continuation_source(self):
62+
data = self._safe_load_response(self.response)
63+
self.continuationKey = getValue(
64+
data,
65+
[
66+
"contents",
67+
"twoColumnWatchNextResults",
68+
"results",
69+
"results",
70+
"contents",
71+
-1,
72+
"itemSectionRenderer",
73+
"contents",
74+
0,
75+
"continuationItemRenderer",
76+
"continuationEndpoint",
77+
"continuationCommand",
78+
"token",
79+
],
80+
)
81+
82+
def sync_make_comment_request(self):
83+
self.prepare_comments_request()
84+
self.response = self.syncPostRequest()
85+
if hasattr(self.response, "status_code") and self.response.status_code == 200:
86+
self.parse_source()
87+
88+
def sync_make_continuation_request(self):
89+
self.prepare_continuation_request()
90+
self.response = self.syncPostRequest()
91+
if hasattr(self.response, "status_code") and self.response.status_code == 200:
92+
self.parse_continuation_source()
93+
if not self.continuationKey:
94+
raise Exception("Could not retrieve continuation token")
95+
else:
96+
raise Exception("Status code is not 200")
97+
98+
async def async_make_comment_request(self):
99+
self.prepare_comments_request()
100+
self.response = await self.asyncPostRequest()
101+
if hasattr(self.response, "status_code") and self.response.status_code == 200:
102+
self.parse_source()
103+
104+
async def async_make_continuation_request(self):
105+
self.prepare_continuation_request()
106+
self.response = await self.asyncPostRequest()
107+
if hasattr(self.response, "status_code") and self.response.status_code == 200:
108+
self.parse_continuation_source()
109+
if not self.continuationKey:
110+
raise Exception("Could not retrieve continuation token")
111+
else:
112+
raise Exception("Status code is not 200")
113+
114+
def sync_create(self):
115+
self.sync_make_continuation_request()
116+
self.sync_make_comment_request()
117+
self.__getComponents()
118+
119+
def sync_create_next(self):
120+
self.isNextRequest = True
121+
self.sync_make_comment_request()
122+
self.__getComponents()
123+
124+
async def async_create(self):
125+
await self.async_make_continuation_request()
126+
await self.async_make_comment_request()
127+
self.__getComponents()
128+
129+
async def async_create_next(self):
130+
self.isNextRequest = True
131+
await self.async_make_comment_request()
132+
self.__getComponents()
133+
134+
def __getComponents(self) -> None:
135+
comments = []
136+
src = self.responseSource or []
137+
for comment in src:
138+
comment = getValue(comment, ["commentThreadRenderer", "comment", "commentRenderer"])
139+
try:
140+
j = {
141+
"id": self.__getValue(comment, ["commentId"]),
142+
"author": {
143+
"id": self.__getValue(comment, ["authorEndpoint", "browseEndpoint", "browseId"]),
144+
"name": self.__getValue(comment, ["authorText", "simpleText"]),
145+
"thumbnails": self.__getValue(comment, ["authorThumbnail", "thumbnails"]),
146+
},
147+
"content": self.__getValue(comment, ["contentText", "runs", 0, "text"]),
148+
"published": self.__getValue(comment, ["publishedTimeText", "runs", 0, "text"]),
149+
"isLiked": self.__getValue(comment, ["isLiked"]),
150+
"authorIsChannelOwner": self.__getValue(comment, ["authorIsChannelOwner"]),
151+
"voteStatus": self.__getValue(comment, ["voteStatus"]),
152+
"votes": {
153+
"simpleText": self.__getValue(comment, ["voteCount", "simpleText"]),
154+
"label": self.__getValue(comment, ["voteCount", "accessibility", "accessibilityData", "label"]),
155+
},
156+
"replyCount": self.__getValue(comment, ["replyCount"]),
157+
}
158+
comments.append(j)
159+
except Exception:
160+
pass
161+
162+
self.commentsComponent["result"].extend(comments)
163+
self.continuationKey = self.__getValue(
164+
self.responseSource or [],
165+
[-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"],
166+
)
167+
168+
def __result(self, mode: int) -> Union[dict, str]:
169+
if mode == ResultMode.dict:
170+
return self.commentsComponent
171+
elif mode == ResultMode.json:
172+
return json.dumps(self.commentsComponent, indent=4)
173+
174+
def __getValue(self, source: Union[dict, list, None], path: Iterable[Union[str, int]]) -> Union[str, int, dict, None]:
175+
value = source
176+
for key in path:
177+
if value is None:
178+
return None
179+
if isinstance(key, str):
180+
if isinstance(value, dict) and key in value:
181+
value = value[key]
182+
else:
183+
return None
184+
elif isinstance(key, int):
185+
if isinstance(value, list) and len(value) > abs(key):
186+
value = value[key]
187+
else:
188+
return None
189+
else:
190+
return None
191+
return value
192+
193+
def __getAllWithKey(self, source: Iterable[Mapping[K, T]], key: K) -> Iterable[T]:
194+
for item in source or []:
195+
if key in item:
196+
yield item[key]
197+
198+
def __getValueEx(self, source: dict, path: List[Union[str, None]]) -> Iterable[Union[str, int, dict, None]]:
199+
if len(path) <= 0:
200+
yield source
71201
return
72-
player_version = re.search(r'([0-9a-fA-F]{8})\\?', res)
73-
if player_version:
74-
player_version = player_version.group().replace("\\", "")
75-
self._js_url = f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
76-
77-
def _getJS(self) -> None:
78-
if not self.video_id:
79-
return
80-
self.url = 'https://www.youtube.com/iframe_api'
81-
res = self.syncGetRequest()
82-
if res and getattr(res, "text", None):
83-
self.extract_js_url(res.text)
84-
85-
async def getJavaScript(self):
86-
if not self.video_id:
87-
return
88-
self.url = 'https://www.youtube.com/iframe_api'
89-
res = await self.asyncGetRequest()
90-
if res and getattr(res, "text", None):
91-
self.extract_js_url(res.text)
92-
93-
def _decipher(self, retry: bool = False):
94-
if not self.video_id:
95-
return
96-
97-
if not self._js_url or retry:
98-
self._js_url = None
99-
self._js = None
100-
self._getJS()
202+
key = path[0]
203+
upcoming = path[1:]
204+
if key is None:
205+
if not upcoming:
206+
raise Exception("Invalid path")
207+
following_key = upcoming[0]
208+
upcoming = upcoming[1:]
209+
for val in self.__getAllWithKey(source or [], following_key):
210+
yield from self.__getValueEx(val, path=upcoming)
211+
else:
212+
val = self.__getValue(source, [key])
213+
if val is None:
214+
return
215+
yield from self.__getValueEx(val, path=upcoming)
101216

102-
if not self._js_url:
103-
return
217+
def __getFirstValue(self, source: dict, path: Iterable[Union[str, None]]) -> Union[str, int, dict, None]:
218+
values = self.__getValueEx(source or {}, list(path))
219+
for val in values:
220+
if val is not None:
221+
return val
222+
return None
104223

224+
def _safe_load_response(self, response):
105225
try:
106-
server_abr_url = getValue(self._streaming_data, ["serverAbrStreamingUrl"])
107-
108-
for yt_format in self._player_response:
109-
if self.format_id is not None and yt_format.get("itag") != self.format_id:
110-
continue
111-
112-
if getValue(yt_format, ["url"]):
113-
yt_format["throttled"] = False
114-
self._streams.append(yt_format)
115-
if self.format_id is not None:
116-
return
117-
continue
118-
119-
if server_abr_url and not getValue(yt_format, ["signatureCipher"]):
120-
yt_format["url"] = server_abr_url
121-
yt_format["throttled"] = False
122-
self._streams.append(yt_format)
123-
if self.format_id is not None:
124-
return
125-
continue
126-
127-
cipher = getValue(yt_format, ["signatureCipher"])
128-
if not cipher:
129-
continue
130-
131-
sc = urllib.parse.parse_qs(cipher)
132-
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
133-
encrypted_sig = try_get(sc, lambda x: x['s'][0])
134-
135-
if not (fmt_url and encrypted_sig):
136-
yt_format["throttled"] = False
137-
self._streams.append(yt_format)
138-
continue
139-
140-
try:
141-
signature = self.ytie._decrypt_signature(encrypted_sig, self.video_id, self._js_url)
142-
except:
143-
continue
144-
145-
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
146-
fmt_url += '&' + sp + '=' + signature
147-
148-
query = urllib.parse.parse_qs(fmt_url)
149-
throttled = False
150-
if query.get('n'):
151-
try:
152-
n_code = query['n'][0]
153-
new_n = self.ytie._decrypt_nsig(n_code, self.video_id, self._js_url)
154-
fmt_url = update_url_query(fmt_url, {'n': new_n})
155-
except ExtractorError:
156-
throttled = True
157-
158-
yt_format["url"] = fmt_url
159-
yt_format["throttled"] = throttled
160-
self._streams.append(yt_format)
161-
162-
if self.format_id is not None:
163-
return
164-
226+
if hasattr(response, "json"):
227+
return response.json()
228+
if hasattr(response, "text"):
229+
return json.loads(response.text)
230+
if isinstance(response, (str, bytes)):
231+
return json.loads(response)
165232
except Exception:
166-
if retry:
167-
return
168-
self._decipher(retry=True)
233+
return {}

0 commit comments

Comments
 (0)