This repository was archived by the owner on May 29, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
67 lines (56 loc) · 2.73 KB
/
main.py
File metadata and controls
67 lines (56 loc) · 2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from instagram_scraper.scraper import InstagramScraper
import config
import boto3
import json
import shutil
import requests
TARGET_INSTAGRAM_USER = config.TARGET_INSTAGRAM_USER
S3_BUCKET_NAME = config.S3_BUCKET_NAME
AWS_REGION_NAME = config.AWS_REGION_NAME
TARGET_INSTAGRAM_USERS = [TARGET_INSTAGRAM_USER]
local_metadata_filename = 'metadata/{}.json'.format(TARGET_INSTAGRAM_USER)
destination_directory = 'instagram/{}'.format(TARGET_INSTAGRAM_USER)
destination_metadata_filename = '{}/full-metadata.json'.format(destination_directory)
s3_client = boto3.client(
's3',
region_name=AWS_REGION_NAME,
aws_access_key_id=config.AWS_ACCESS_KEY_ID,
aws_secret_access_key=config.AWS_SECRET_ACCESS_KEY
)
def uploadFileToS3(s3_client, local_filename, destination_filename):
s3_client.put_object(Body=open(local_filename, 'rb'), Bucket=S3_BUCKET_NAME, Key=destination_filename)
def uploadStreamToS3(s3_client, sourcestream, destination_filename):
s3_client.put_object(Body=sourcestream, Bucket=S3_BUCKET_NAME, Key=destination_filename)
def extractKeyInformation(s3_client, local_metadata_filename, destination_directory):
with open(local_metadata_filename) as json_data:
json_data = json.load(json_data)
for entry in json_data:
keyInformation = {}
entryID = entry['id']
displayURL = entry['display_url']
keyInformation['entryID'] = entryID
keyInformation['displayURL'] = displayURL
keyInformation['dimensions'] = entry['dimensions']
keyInformation['edge_media_preview_like'] = entry['edge_media_preview_like']
keyInformation['edge_media_to_caption'] = entry['edge_media_to_caption']
keyInformation['edge_media_to_comment'] = entry['edge_media_to_comment']
keyInformation['tags'] = entry['tags']
keyInformation['comments'] = entry['comments']
keyInformation['location'] = entry['location']
s3_destination_directory = '{}/{}'.format(destination_directory, entryID)
response = requests.get(displayURL)
# TODO: Add support for more image formats
destination_filename = '{}/{}.jpg'.format(s3_destination_directory, entryID)
uploadStreamToS3(s3_client, response.content, destination_filename)
destination_filename = '{}/summary.json'.format(s3_destination_directory)
print(destination_filename)
uploadStreamToS3(s3_client, json.dumps(keyInformation), destination_filename)
def main():
scraper = InstagramScraper(media_types=['none'], login_user=config.INSTAGRAM_USER_ID, login_pass=config.INSTAGRAM_USER_PASSWORD,
usernames=TARGET_INSTAGRAM_USERS, comments=True, include_location=True)
scraper.login()
scraper.scrape()
uploadFileToS3(s3_client, local_metadata_filename, destination_metadata_filename)
extractKeyInformation(s3_client, local_metadata_filename, destination_directory)
if __name__ == '__main__':
main()