1+ import json
2+ import requests
3+ from typing import Union , Dict , Any , List
4+
5+ from ..utils import get_logger
6+ from ..exceptions import ValidationError , APIError , AuthenticationError
7+
8+ logger = get_logger ('api.chatgpt' )
9+
10+
11+ class ChatGPTAPI :
12+ """Handles ChatGPT scraping operations using Bright Data's ChatGPT dataset API"""
13+
14+ def __init__ (self , session , api_token , default_timeout = 30 , max_retries = 3 , retry_backoff = 1.5 ):
15+ self .session = session
16+ self .api_token = api_token
17+ self .default_timeout = default_timeout
18+ self .max_retries = max_retries
19+ self .retry_backoff = retry_backoff
20+
21+ def scrape_chatgpt (
22+ self ,
23+ prompts : List [str ],
24+ countries : List [str ],
25+ additional_prompts : List [str ],
26+ web_searches : List [bool ],
27+ timeout : int = None
28+ ) -> Dict [str , Any ]:
29+ """
30+ Internal method to handle ChatGPT scraping API requests
31+
32+ Parameters:
33+ - prompts: List of prompts to send to ChatGPT
34+ - countries: List of country codes matching prompts
35+ - additional_prompts: List of follow-up prompts matching prompts
36+ - web_searches: List of web_search flags matching prompts
37+ - timeout: Request timeout in seconds
38+
39+ Returns:
40+ - Dict containing response with snapshot_id
41+ """
42+ url = "https://api.brightdata.com/datasets/v3/trigger"
43+ headers = {
44+ "Authorization" : f"Bearer { self .api_token } " ,
45+ "Content-Type" : "application/json"
46+ }
47+ params = {
48+ "dataset_id" : "gd_m7aof0k82r803d5bjm" ,
49+ "include_errors" : "true"
50+ }
51+
52+ data = []
53+ for i in range (len (prompts )):
54+ data .append ({
55+ "url" : "https://chatgpt.com/" ,
56+ "prompt" : prompts [i ],
57+ "country" : countries [i ],
58+ "additional_prompt" : additional_prompts [i ],
59+ "web_search" : web_searches [i ]
60+ })
61+
62+ try :
63+ response = self .session .post (
64+ url ,
65+ headers = headers ,
66+ params = params ,
67+ json = data ,
68+ timeout = timeout or self .default_timeout
69+ )
70+
71+ if response .status_code == 401 :
72+ raise AuthenticationError ("Invalid API token or insufficient permissions" )
73+ elif response .status_code != 200 :
74+ raise APIError (f"ChatGPT scraping request failed with status { response .status_code } : { response .text } " )
75+
76+ result = response .json ()
77+ snapshot_id = result .get ('snapshot_id' )
78+ if snapshot_id :
79+ logger .info (f"ChatGPT scraping job initiated successfully for { len (prompts )} prompt(s)" )
80+ print ("" )
81+ print ("Snapshot ID:" )
82+ print (snapshot_id )
83+ print ("" )
84+
85+ return result
86+
87+ except requests .exceptions .Timeout :
88+ raise APIError ("Timeout while initiating ChatGPT scraping" )
89+ except requests .exceptions .RequestException as e :
90+ raise APIError (f"Network error during ChatGPT scraping: { str (e )} " )
91+ except json .JSONDecodeError as e :
92+ raise APIError (f"Failed to parse ChatGPT scraping response: { str (e )} " )
93+ except Exception as e :
94+ if isinstance (e , (ValidationError , AuthenticationError , APIError )):
95+ raise
96+ raise APIError (f"Unexpected error during ChatGPT scraping: { str (e )} " )
0 commit comments