Update comments.py

BillaSpace · web-flow · commit 3d2a88965aab · 2025-12-24T14:22:42.000+05:30
diff --git a/youtubesearchpython/core/comments.py b/youtubesearchpython/core/comments.py
@@ -1,168 +1,233 @@
-import re
+import collections
 import copy
-import urllib.parse
-import os
+import itertools
+import json
+from typing import Iterable, Mapping, Tuple, TypeVar, Union, List
+from urllib.parse import urlencode
 
-from youtubesearchpython.core.constants import ResultMode
-from youtubesearchpython.core.componenthandler import getValue
+from youtubesearchpython.core.componenthandler import getVideoId, getValue
+from youtubesearchpython.core.constants import *
 from youtubesearchpython.core.requests import RequestCore
 
-isYtDLPinstalled = False
+K = TypeVar("K")
+T = TypeVar("T")
 
-try:
-    from yt_dlp.extractor.youtube import YoutubeIE
-    from yt_dlp import YoutubeDL
-    from yt_dlp.utils import url_or_none, try_get, update_url_query, ExtractorError
 
-    isYtDLPinstalled = True
-except ImportError:
-    pass
-
-
-class StreamURLFetcherCore(RequestCore):
-    def __init__(self, proxy: str = None, cookies_file: str = None):
-        if not isYtDLPinstalled:
-            raise Exception('ERROR: yt-dlp is not installed. Install with: pip install yt-dlp')
+class CommentsCore(RequestCore):
+    result = None
+    continuationKey = None
+    isNextRequest = False
+    response = None
 
+    def __init__(self, videoLink: str):
         super().__init__()
-
-        self._js_url = None
-        self._js = None
-        self.video_id = None
-        self._streams = []
-
-        ydl_opts = {
-            'quiet': True,
-            'no_warnings': True,
-        }
-        if proxy:
-            ydl_opts['proxy'] = proxy
-        if cookies_file and os.path.exists(cookies_file):
-            ydl_opts['cookiefile'] = cookies_file
-
-        self.downloader = YoutubeDL(ydl_opts)
-        self.ytie = YoutubeIE()
-        self.ytie.set_downloader(self.downloader)
-
-    def _getDecipheredURLs(self, videoFormats: dict, formatId: int = None) -> None:
-        self._streams = []
-        self.video_id = videoFormats.get("id")
-        if not self.video_id:
-            return
-
-        streaming_data = videoFormats.get("streamingData")
-        if not streaming_data:
-            return
-
-        if not streaming_data.get("formats") and not streaming_data.get("adaptiveFormats"):
-            return
-
-        self._streaming_data = copy.deepcopy(streaming_data)
-
-        self._player_response = copy.deepcopy(streaming_data.get("formats", []))
-        self._player_response.extend(streaming_data.get("adaptiveFormats", []))
-
-        self.format_id = formatId
-        self._decipher()
-
-    def extract_js_url(self, res: str):
-        self._js_url = None
-        if not res:
+        self.commentsComponent = {"result": []}
+        self.responseSource = None
+        self.videoLink = videoLink
+
+    def prepare_continuation_request(self):
+        if not searchKey:
+            raise Exception("INNERTUBE API key (searchKey) is not set.")
+        self.data = copy.deepcopy(requestPayload)
+        ctx = self.data.setdefault("context", {})
+        client = ctx.setdefault("client", {})
+        client.setdefault("clientName", client.get("clientName", "WEB"))
+        client.setdefault("clientVersion", client.get("clientVersion", "2.20210820.01.00"))
+        self.data["videoId"] = getVideoId(self.videoLink)
+        self.url = f"https://www.youtube.com/youtubei/v1/next?{urlencode({'key': searchKey})}"
+
+    def prepare_comments_request(self):
+        if not searchKey:
+            raise Exception("INNERTUBE API key (searchKey) is not set.")
+        self.data = copy.deepcopy(requestPayload)
+        ctx = self.data.setdefault("context", {})
+        client = ctx.setdefault("client", {})
+        client.setdefault("clientName", client.get("clientName", "WEB"))
+        client.setdefault("clientVersion", client.get("clientVersion", "2.20210820.01.00"))
+        self.data["continuation"] = self.continuationKey
+        self.url = f"https://www.youtube.com/youtubei/v1/next?{urlencode({'key': searchKey})}"
+
+    def parse_source(self):
+        data = self._safe_load_response(self.response)
+        idx = 0 if self.isNextRequest else 1
+        path = [
+            "onResponseReceivedEndpoints",
+            idx,
+            "appendContinuationItemsAction" if self.isNextRequest else "reloadContinuationItemsCommand",
+            "continuationItems",
+        ]
+        self.responseSource = getValue(data, path)
+
+    def parse_continuation_source(self):
+        data = self._safe_load_response(self.response)
+        self.continuationKey = getValue(
+            data,
+            [
+                "contents",
+                "twoColumnWatchNextResults",
+                "results",
+                "results",
+                "contents",
+                -1,
+                "itemSectionRenderer",
+                "contents",
+                0,
+                "continuationItemRenderer",
+                "continuationEndpoint",
+                "continuationCommand",
+                "token",
+            ],
+        )
+
+    def sync_make_comment_request(self):
+        self.prepare_comments_request()
+        self.response = self.syncPostRequest()
+        if hasattr(self.response, "status_code") and self.response.status_code == 200:
+            self.parse_source()
+
+    def sync_make_continuation_request(self):
+        self.prepare_continuation_request()
+        self.response = self.syncPostRequest()
+        if hasattr(self.response, "status_code") and self.response.status_code == 200:
+            self.parse_continuation_source()
+            if not self.continuationKey:
+                raise Exception("Could not retrieve continuation token")
+        else:
+            raise Exception("Status code is not 200")
+
+    async def async_make_comment_request(self):
+        self.prepare_comments_request()
+        self.response = await self.asyncPostRequest()
+        if hasattr(self.response, "status_code") and self.response.status_code == 200:
+            self.parse_source()
+
+    async def async_make_continuation_request(self):
+        self.prepare_continuation_request()
+        self.response = await self.asyncPostRequest()
+        if hasattr(self.response, "status_code") and self.response.status_code == 200:
+            self.parse_continuation_source()
+            if not self.continuationKey:
+                raise Exception("Could not retrieve continuation token")
+        else:
+            raise Exception("Status code is not 200")
+
+    def sync_create(self):
+        self.sync_make_continuation_request()
+        self.sync_make_comment_request()
+        self.__getComponents()
+
+    def sync_create_next(self):
+        self.isNextRequest = True
+        self.sync_make_comment_request()
+        self.__getComponents()
+
+    async def async_create(self):
+        await self.async_make_continuation_request()
+        await self.async_make_comment_request()
+        self.__getComponents()
+
+    async def async_create_next(self):
+        self.isNextRequest = True
+        await self.async_make_comment_request()
+        self.__getComponents()
+
+    def __getComponents(self) -> None:
+        comments = []
+        src = self.responseSource or []
+        for comment in src:
+            comment = getValue(comment, ["commentThreadRenderer", "comment", "commentRenderer"])
+            try:
+                j = {
+                    "id": self.__getValue(comment, ["commentId"]),
+                    "author": {
+                        "id": self.__getValue(comment, ["authorEndpoint", "browseEndpoint", "browseId"]),
+                        "name": self.__getValue(comment, ["authorText", "simpleText"]),
+                        "thumbnails": self.__getValue(comment, ["authorThumbnail", "thumbnails"]),
+                    },
+                    "content": self.__getValue(comment, ["contentText", "runs", 0, "text"]),
+                    "published": self.__getValue(comment, ["publishedTimeText", "runs", 0, "text"]),
+                    "isLiked": self.__getValue(comment, ["isLiked"]),
+                    "authorIsChannelOwner": self.__getValue(comment, ["authorIsChannelOwner"]),
+                    "voteStatus": self.__getValue(comment, ["voteStatus"]),
+                    "votes": {
+                        "simpleText": self.__getValue(comment, ["voteCount", "simpleText"]),
+                        "label": self.__getValue(comment, ["voteCount", "accessibility", "accessibilityData", "label"]),
+                    },
+                    "replyCount": self.__getValue(comment, ["replyCount"]),
+                }
+                comments.append(j)
+            except Exception:
+                pass
+
+        self.commentsComponent["result"].extend(comments)
+        self.continuationKey = self.__getValue(
+            self.responseSource or [],
+            [-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"],
+        )
+
+    def __result(self, mode: int) -> Union[dict, str]:
+        if mode == ResultMode.dict:
+            return self.commentsComponent
+        elif mode == ResultMode.json:
+            return json.dumps(self.commentsComponent, indent=4)
+
+    def __getValue(self, source: Union[dict, list, None], path: Iterable[Union[str, int]]) -> Union[str, int, dict, None]:
+        value = source
+        for key in path:
+            if value is None:
+                return None
+            if isinstance(key, str):
+                if isinstance(value, dict) and key in value:
+                    value = value[key]
+                else:
+                    return None
+            elif isinstance(key, int):
+                if isinstance(value, list) and len(value) > abs(key):
+                    value = value[key]
+                else:
+                    return None
+            else:
+                return None
+        return value
+
+    def __getAllWithKey(self, source: Iterable[Mapping[K, T]], key: K) -> Iterable[T]:
+        for item in source or []:
+            if key in item:
+                yield item[key]
+
+    def __getValueEx(self, source: dict, path: List[Union[str, None]]) -> Iterable[Union[str, int, dict, None]]:
+        if len(path) <= 0:
+            yield source
             return
-        player_version = re.search(r'([0-9a-fA-F]{8})\\?', res)
-        if player_version:
-            player_version = player_version.group().replace("\\", "")
-            self._js_url = f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
-
-    def _getJS(self) -> None:
-        if not self.video_id:
-            return
-        self.url = 'https://www.youtube.com/iframe_api'
-        res = self.syncGetRequest()
-        if res and getattr(res, "text", None):
-            self.extract_js_url(res.text)
-
-    async def getJavaScript(self):
-        if not self.video_id:
-            return
-        self.url = 'https://www.youtube.com/iframe_api'
-        res = await self.asyncGetRequest()
-        if res and getattr(res, "text", None):
-            self.extract_js_url(res.text)
-
-    def _decipher(self, retry: bool = False):
-        if not self.video_id:
-            return
-
-        if not self._js_url or retry:
-            self._js_url = None
-            self._js = None
-            self._getJS()
+        key = path[0]
+        upcoming = path[1:]
+        if key is None:
+            if not upcoming:
+                raise Exception("Invalid path")
+            following_key = upcoming[0]
+            upcoming = upcoming[1:]
+            for val in self.__getAllWithKey(source or [], following_key):
+                yield from self.__getValueEx(val, path=upcoming)
+        else:
+            val = self.__getValue(source, [key])
+            if val is None:
+                return
+            yield from self.__getValueEx(val, path=upcoming)
 
-        if not self._js_url:
-            return
+    def __getFirstValue(self, source: dict, path: Iterable[Union[str, None]]) -> Union[str, int, dict, None]:
+        values = self.__getValueEx(source or {}, list(path))
+        for val in values:
+            if val is not None:
+                return val
+        return None
 
+    def _safe_load_response(self, response):
         try:
-            server_abr_url = getValue(self._streaming_data, ["serverAbrStreamingUrl"])
-
-            for yt_format in self._player_response:
-                if self.format_id is not None and yt_format.get("itag") != self.format_id:
-                    continue
-
-                if getValue(yt_format, ["url"]):
-                    yt_format["throttled"] = False
-                    self._streams.append(yt_format)
-                    if self.format_id is not None:
-                        return
-                    continue
-
-                if server_abr_url and not getValue(yt_format, ["signatureCipher"]):
-                    yt_format["url"] = server_abr_url
-                    yt_format["throttled"] = False
-                    self._streams.append(yt_format)
-                    if self.format_id is not None:
-                        return
-                    continue
-
-                cipher = getValue(yt_format, ["signatureCipher"])
-                if not cipher:
-                    continue
-
-                sc = urllib.parse.parse_qs(cipher)
-                fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
-                encrypted_sig = try_get(sc, lambda x: x['s'][0])
-
-                if not (fmt_url and encrypted_sig):
-                    yt_format["throttled"] = False
-                    self._streams.append(yt_format)
-                    continue
-
-                try:
-                    signature = self.ytie._decrypt_signature(encrypted_sig, self.video_id, self._js_url)
-                except:
-                    continue
-
-                sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
-                fmt_url += '&' + sp + '=' + signature
-
-                query = urllib.parse.parse_qs(fmt_url)
-                throttled = False
-                if query.get('n'):
-                    try:
-                        n_code = query['n'][0]
-                        new_n = self.ytie._decrypt_nsig(n_code, self.video_id, self._js_url)
-                        fmt_url = update_url_query(fmt_url, {'n': new_n})
-                    except ExtractorError:
-                        throttled = True
-
-                yt_format["url"] = fmt_url
-                yt_format["throttled"] = throttled
-                self._streams.append(yt_format)
-
-                if self.format_id is not None:
-                    return
-
+            if hasattr(response, "json"):
+                return response.json()
+            if hasattr(response, "text"):
+                return json.loads(response.text)
+            if isinstance(response, (str, bytes)):
+                return json.loads(response)
         except Exception:
-            if retry:
-                return
-            self._decipher(retry=True)
+            return {}