Skip to content

Commit 6fec794

Browse files
committed
Add support for adv_search
1 parent b7d3ac6 commit 6fec794

7 files changed

Lines changed: 218 additions & 10 deletions

File tree

.flake8

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[flake8]
2+
max-line-length = 160
3+
ignore = F403,F405,F401

PyMDL/Infopage.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,17 @@ def __init__(self, details: dict):
7474
else:
7575
if 'aired' in allkeys:
7676
self.date = self.details.pop('aired').strip()
77+
if 'episodes' in allkeys:
78+
self.episodes = int(self.details.pop('episodes'))
79+
else:
80+
self.episodes = -1
81+
if 'where_to_watch' in allkeys:
82+
self.where_to_watch = self.details.pop('where_to_watch')
83+
else:
84+
self.where_to_watch = None
85+
self.networks = "N/A"
86+
if 'networks' in allkeys:
87+
self.networks = self.details.pop('networks')
7788

7889
# Finding recommendations
7990
def get_recommendations(self):
@@ -123,10 +134,13 @@ def dumps(self) -> dict:
123134
'ratings': self.ratings,
124135
'synopsis': self.synopsis,
125136
'casts': self.casts,
137+
'where_to_watch': self.where_to_watch,
126138
'native title': self.native,
139+
'episodes': self.episodes,
127140
'genere': self.genre,
128141
'duration': self.duration,
129142
'country': self.country,
143+
"original network": self.networks,
130144
'aka': self.aka,
131145
'director': self.director,
132146
'screenwriter': self.screenwriter,
@@ -160,8 +174,7 @@ def save(self, file: str) -> bool:
160174
json.dump(self.dumps(), f, indent=4)
161175
return True
162176
except Exception as e:
163-
print("Got Exception\n", e)
164-
return False
177+
raise Exception("Got Exception\n", e)
165178

166179
def __str__(self):
167180
return str(self.dumps())
@@ -211,7 +224,7 @@ def info(link: str):
211224
details['ratings'] = details['ratings'].find("b").text
212225

213226
detailed_info = mainbox.find("div", class_="show-detailsxss").find("ul").find_all("li")
214-
req_info = ['native title', 'also known as', 'director', 'screenwriter', 'screenwriter & director', 'genres']
227+
req_info = ['native title', 'also known as', 'director', 'screenwriter', 'screenwriter & director', 'genres', 'original network']
215228
for item in detailed_info:
216229
try:
217230
# if item.text.split(":")[0].lower() == 'tags':
@@ -232,6 +245,24 @@ def info(link: str):
232245
for item in cast_names:
233246
casts.append(item.text)
234247
details['casts'] = casts
248+
249+
try:
250+
where_to_watch_names = soup.find('div', class_='col-lg-8 col-md-8 col-rightx'). \
251+
find("div", class_="wts").find_all("b")
252+
where_to_watch = []
253+
for item in where_to_watch_names:
254+
where_to_watch.append(item.text)
255+
details['where_to_watch'] = where_to_watch
256+
except AttributeError:
257+
details['where_to_watch'] = "N/A"
258+
259+
networks_names = soup.find('div', class_='col-lg-8 col-md-8 col-rightx'). \
260+
find("div", class_="box clear").find("div", class_="p-a-sm").find_all("b")
261+
networks = []
262+
for item in networks_names:
263+
networks.append(item.text)
264+
details['networks'] = networks
265+
235266
details_box = soup.find("div", class_="box-body light-b").ul.find_all("li")
236267
for item in details_box[1:]:
237268
details[item.text.split(":")[0].lower()] = item.text.split(":")[1].strip()

PyMDL/Search.py

Lines changed: 130 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import bs4
22
import requests
3+
import warnings
34
from typing import Union, List
45
from .Infopage import info, InfoPage
6+
from datetime import datetime
7+
from .enums import *
58

69

710
class SearchResult:
@@ -49,7 +52,7 @@ def get_all(self, limit: int = 20) -> List[InfoPage]:
4952
except TypeError:
5053
limit = 20
5154
for item in list(self.urls.keys())[:limit]:
52-
print('Getting:', item)
55+
print('Getting:', item) # noqa: 201
5356
lst.append(info(self.urls[item]))
5457
return lst
5558

@@ -58,7 +61,7 @@ def search(name: str, page: int = 1, style: str = None, year=None, eps: int = No
5861
match_all: bool = True, max_results: int = 20) -> Union[SearchResult, None]:
5962
urls = {}
6063
if max_results > 20:
61-
print("Cannot have more than 20 Results!")
64+
warnings.warn("Cannot have more than 20 Results! Setting it to 20 and continuing...")
6265
max_results = 20
6366
filters_given = any([style, year, eps, score])
6467
url = f"https://mydramalist.com/search?q={name.replace(' ', '+')}&page={page}"
@@ -136,7 +139,131 @@ def search(name: str, page: int = 1, style: str = None, year=None, eps: int = No
136139
filter_check |= 0b1000
137140

138141
# Add it to list if checks pass
139-
if match_all and filter_check == 15:
142+
elif match_all and filter_check == 15:
143+
urls[curr_title] = curr_url
144+
elif (not match_all) and filter_check != 0:
145+
urls[curr_title] = curr_url
146+
147+
else: # Directly add if no filters are given
148+
urls[curr_title] = curr_url
149+
if len(urls) >= max_results:
150+
break
151+
if len(urls) > 0:
152+
return SearchResult(urls)
153+
else:
154+
return None
155+
156+
157+
def adv_search(name: str = None, page: int = 1,
158+
search_type: SearchType = SearchType.TITLE,
159+
release_date_initial: int = 1890,
160+
release_date_final: int = datetime.now().strftime('%Y'),
161+
title_types: List[TitleType] = None,
162+
title_status: TitleStatus = None,
163+
title_sorted_by: TitleSortedByType = TitleSortedByType.MOST_POPULAR,
164+
style: str = None,
165+
year=None,
166+
eps: int = None,
167+
score: str = None,
168+
match_all: bool = True,
169+
max_results: int = 20) -> Union[SearchResult, None]:
170+
171+
urls = {}
172+
if max_results > 20:
173+
warnings.warn("Cannot have more than 20 Results! Setting it to 20 and continuing...")
174+
max_results = 20
175+
176+
title_type_encoded = ""
177+
if title_types is not None:
178+
title_type_encoded = f"&ty={','.join(map(lambda t: str(t), title_types))}"
179+
180+
title_status_encoded = ""
181+
if title_status is not None:
182+
title_status_encoded = f'&st={title_status}'
183+
184+
release_date_encoded = ""
185+
if release_date_initial != 1890 or release_date_final != datetime.now().strftime('%Y'):
186+
release_date_encoded = f"&re={release_date_initial},{release_date_final}"
187+
188+
sorted_encoded = f"&so={title_sorted_by}"
189+
190+
filters_given = any([style, year, eps, score])
191+
url = f"https://mydramalist.com/search?adv={search_type}{title_type_encoded}{title_status_encoded}{release_date_encoded}{sorted_encoded}&page={page}"
192+
base = requests.get(url)
193+
soup = bs4.BeautifulSoup(base.text, 'lxml')
194+
results_box = soup.find('div', class_='col-lg-8 col-md-8').find_all('div', class_='box')
195+
for item in results_box:
196+
# Get Title
197+
try:
198+
curr_title = item.find("h6").find('a').text
199+
except AttributeError:
200+
return None
201+
202+
# Get Category
203+
curr_cateory = item.find('span', class_='text-muted')
204+
205+
# Check if Ctegory Exists
206+
if curr_cateory:
207+
curr_cateory = curr_cateory.text
208+
else:
209+
continue
210+
211+
# Get URL
212+
curr_url = item.find("h6").find('a')['href']
213+
214+
# Apply filters
215+
if filters_given:
216+
if match_all:
217+
filter_check = 15 # Has to match all filters given
218+
else:
219+
filter_check = 0 # Has to match atleast one of the filters given
220+
# In Binary from MSB [0] is style, [1] is year, [2] is eps, [3] is score
221+
222+
# Check for Score
223+
curr_score = item.find('span', class_='score').text
224+
if score:
225+
if curr_score:
226+
if score.endswith('+'):
227+
if not float(curr_score) >= float(score.rstrip('+')):
228+
filter_check &= 0b1110
229+
else:
230+
filter_check |= 0b0001
231+
elif score.endswith('-'):
232+
if not float(curr_score) <= float(score.rstrip('-')):
233+
filter_check &= 0b1110
234+
else:
235+
filter_check |= 0b0001
236+
else:
237+
if not curr_score == score:
238+
filter_check &= 0b1110
239+
else:
240+
filter_check |= 0b0001
241+
else:
242+
filter_check &= 0b1110
243+
244+
# Check for Episodes Filter
245+
if eps:
246+
if not ((curr_cateory.split(',')[-1]).startswith(f" {eps} episode")):
247+
filter_check &= 0b1101
248+
else:
249+
filter_check |= 0b0010
250+
251+
# Check for Year Filter
252+
if year:
253+
if not curr_cateory.split(',')[0].split('-')[-1].strip() == str(year):
254+
filter_check &= 0b1011
255+
else:
256+
filter_check |= 0b0100
257+
258+
# Check for Style Filter
259+
if style:
260+
if curr_cateory.find(style) == -1:
261+
filter_check &= 0b0111
262+
else:
263+
filter_check |= 0b1000
264+
265+
# Add it to list if checks pass
266+
elif match_all and filter_check == 15:
140267
urls[curr_title] = curr_url
141268
elif (not match_all) and filter_check != 0:
142269
urls[curr_title] = curr_url

PyMDL/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
from .SearchPeople import search_people, PeopleSearchResult
44
from .Casts import casts, Cast
55
from .exceptions import *
6+
from .enums import *
67

78
__title__ = 'PyMDL'
89
__license__ = 'MIT'
9-
__version__ = '1.0.1'
10+
__version__ = '1.0.2'

PyMDL/enums.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from __future__ import annotations
2+
from enum import Enum
3+
4+
__all__ = ['SearchType', 'LocalEnum', 'TitleType', 'TitleStatus', 'TitleSortedByType']
5+
6+
7+
class LocalEnum(Enum):
8+
def __str__(self):
9+
return f'{self.value}'
10+
11+
12+
class SearchType(LocalEnum):
13+
"""
14+
Enumerator representing adv search type
15+
* **TITLE** - White
16+
* **PEOPLE** - Gray
17+
* **ARTICLES** - Black
18+
"""
19+
TITLE = 'titles'
20+
PEOPLE = 'people'
21+
ARTICLES = 'articles'
22+
23+
24+
class TitleType(LocalEnum):
25+
DRAMAS = 68
26+
DRAMA_SPECIAL = 83
27+
TV_SHOWS = 86
28+
MOVIES = 77
29+
30+
31+
class TitleStatus(LocalEnum):
32+
ONGOING = 1
33+
COMPLETED = 3
34+
UPCOMING = 2
35+
36+
37+
class TitleSortedByType(LocalEnum):
38+
RELEVANCE = "relevance"
39+
MOST_POPULAR = "popular"
40+
TOP_RANKED = "top"
41+
TOP_RATED = "rated"
42+
NEWEST = "newest"
43+
RELEASE_DATE = "date"
44+
RECENTLY_ADDED = "recently"

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@ pip install PyMDL
2323

2424
## Authors
2525

26-
Rocker2234
26+
Rocker2234
2727
RAiU14
2828

2929
## Last Release Changes
3030
* 1.0.1
3131
* Fix more Typos
32-
* Fix incorrect filter when style is used
32+
* Fix incorrect filter when style is used
33+
* 1.0.2
34+
* Add adv_search method
3335

3436
## License
3537

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
setup(
99
name='PyMDL',
10-
version='1.0.1',
10+
version='1.0.2',
1111
license='MIT',
1212
description='Web Scraping API to fetch data from MDL',
1313
url='https://github.com/Rocker2234/Python-MDL-API',

0 commit comments

Comments
 (0)