-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathImageDownloader.py
More file actions
116 lines (95 loc) · 4.13 KB
/
ImageDownloader.py
File metadata and controls
116 lines (95 loc) · 4.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import random
import numpy as np
import string
import requests
import os
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
images_desired = 100
base_url = "https://prnt.sc/"
save_path = r"C:\repo\ImageDownloader\PrntScnImages\\"
removed_image_path = r"C:\repo\ImageDownloader\assets\screenshot_removed.png"
imgur_not_available = r"C:\repo\ImageDownloader\assets\imgur_not_available.png"
existing_screenshots = os.listdir(save_path)
existing_screenshots = [screenshot[0:-4] for screenshot in existing_screenshots]
tried_strings_path = r"C:\repo\ImageDownloader\assets\random_strings.txt"
def generate_random_string(length):
characters = string.ascii_letters + string.digits
random_string = ''.join(random.choices(characters, k=length))
return random_string
def is_removed_image(downloaded_image_response):
prnt_sc_removed_image = Image.open(removed_image_path)
imgur_removed_image = Image.open(imgur_not_available)
downloaded_image = Image.open(BytesIO(downloaded_image_response.content))
width, height = downloaded_image.size
# Convert images to arrays and compare
arr1 = np.array(prnt_sc_removed_image)
arr2 = np.array(imgur_removed_image)
arr3 = np.array(downloaded_image)
is_imgur_removed = np.array_equal(arr3, arr2)
is_prntsc_removed = np.array_equal(arr3, arr1)
"""
looking for an exact image size is unfortunately one of
the only reliable ways to find these.
"""
if width == 422 and height == 108:
is_prntsc_removed = True
return is_prntsc_removed or is_imgur_removed
tried_strings = []
# Open the file in read mode
with open(tried_strings_path, 'r') as file:
# Read each line in the file
for line in file:
# Remove the newline character at the end of each line and add the line to the list
tried_strings.append(line.strip())
i = 0
while i < images_desired:
rand_text = generate_random_string(6)
# keep generating random string until it is one we haven't done before
while rand_text in existing_screenshots or rand_text in tried_strings:
rand_text = generate_random_string(6)
print(rand_text)
image_url = base_url + rand_text
filename = save_path + rand_text + '.png'
r = requests.get(image_url,
headers={'User-Agent': 'Chrome)'},
stream=True)
if r.status_code == 200:
soup = BeautifulSoup(r.content, features="html.parser")
img_elements = soup.find_all("img", {"class": "no-click screenshot-image"})
if len(img_elements) == 0:
print('no screenshot')
with open(tried_strings_path, 'a') as file: # record that we tried this string
file.write(rand_text + '\n')
continue
for image in img_elements:
image_url = image['src']
if 'http' not in image_url:
image_url = 'http:' + image['src']
s = requests.get(image_url,
headers={'User-Agent': 'Chrome)'},
stream=True)
else:
s = requests.get(image_url,
headers={'User-Agent': 'Chrome)'},
stream=True)
if s.status_code == 200 and s.headers['Content-Length'] != '4267':
s2 = s
# s.raw.decode_content = True
if is_removed_image(s2): # if the screenshot is the 'can't find image' image
print('screenshot removed')
else:
img = Image.open(BytesIO(s2.content))
img.save(filename)
print('Image successfully Downloaded: ', filename)
# with open(filename, 'wb') as f:
# shutil.copyfileobj(s.raw, f)
i += 1
with open(tried_strings_path, 'a') as file: # record that we tried this string
file.write(rand_text + '\n')
else:
print('Image could not be retrieved')
else:
print('Page could not be retrieved')
print('downloaded ' + str(images_desired) + ' images!')