forked from HarshCasper/Rotten-Scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimdb_top.py
More file actions
37 lines (34 loc) · 1.14 KB
/
imdb_top.py
File metadata and controls
37 lines (34 loc) · 1.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
"""IMDB top 250 movies.
"""
import re
import requests
from bs4 import BeautifulSoup
import pandas as pd
URL = "http://www.imdb.com/chart/top"
RESPONSE = requests.get(URL)
SOUP = BeautifulSoup(RESPONSE.text, features="lxml")
MOVIES = SOUP.select("td.titleColumn")
STARS = [a.attrs.get("title") for a in SOUP.select("td.titleColumn a")]
RATINGS = []
for b in SOUP.select("td.posterColumn span[name=ir]"):
RATINGS.append(round(float(b.attrs.get("data-value")), 1))
IMDB = []
# Store each item into dictionary (data), then put those into a list (imdb)
for index in range(0, len(MOVIES)):
movie_string = MOVIES[index].get_text()
movie = " ".join(movie_string.split()).replace(".", "")
movie_title = movie[len(str(index)) + 1 : -7]
year = re.search(r"\((.*?)\)", movie_string).group(1)
data = {
"movie_title": movie_title,
"year": year,
"star_cast": STARS[index],
"rating": RATINGS[index],
}
IMDB.append(data)
# CREATING A DATAFRAME
DF = pd.DataFrame(IMDB)
DF.index = DF.index.rename("S.No")
# copy data frame in to CSV file
DF.to_csv("imdb.csv")
print("csv file has been created in current working directory")