-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathsubreddit_dl.py
More file actions
44 lines (29 loc) · 1.01 KB
/
subreddit_dl.py
File metadata and controls
44 lines (29 loc) · 1.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
'''
Downloads the reddit TIL submissions, limited to top 1000.
If you want more you'll have to come across it some other way.
'''
import praw, os, json, codecs
from pprint import pprint
subreddit_name = "todayilearned"
# Login
user_agent = "Subdownloaded 0.1 by /u/hookedon"
agent = praw.Reddit(user_agent=user_agent)
sub = agent.get_subreddit(subreddit_name)
# Create save directories
os.system("mkdir -p data")
os.system("mkdir -p data/reddit")
submissions = sub.search("site:wikipedia",
limit=None,
sort="top",
period="all")
for k,result in enumerate(submissions):
js = vars(result)
js["author"] = str(js["author"])
js["subreddit"] = str(js["subreddit"])
js.pop("reddit_session")
name = js["name"]
jstr = json.dumps(js,indent=2)
f_out = os.path.join("data","reddit",name+'.json')
with codecs.open(f_out,"w","utf-8") as FOUT:
FOUT.write(jstr)
print k, js["score"], f_out, js["title"][:40]