@@ -470,26 +470,18 @@ def parse_submission_figure(figure_tag: Tag) -> dict[str, Any]:
470470
471471
472472def parse_submission_author (author_tag : Tag ) -> dict [str , Any ]:
473- tag_author : Optional [Tag ] = author_tag .select_one ("div.submission-id-sub-container" )
474-
475- assert tag_author is not None , _raise_exception (ParsingError ("Missing author tag" ))
476-
477- tag_author_name : Optional [Tag ] = tag_author .select_one ("span.c-usernameBlockSimple__displayName" )
473+ tag_author_name : Optional [Tag ] = author_tag .select_one ('.c-usernameBlockSimple a[href^="/user/"]' )
478474 tag_author_icon : Optional [Tag ] = author_tag .select_one ("img.submission-user-icon" )
475+ tag_author_title : Optional [Tag ] = author_tag .select_one (".submission-title + div > span:nth-child(2)" )
479476
480477 assert tag_author_name is not None , _raise_exception (ParsingError ("Missing author name tag" ))
481478 assert tag_author_icon is not None , _raise_exception (ParsingError ("Missing author icon tag" ))
479+ assert tag_author_title is not None , _raise_exception (ParsingError ("Missing author title tag" ))
482480
483- author_name : str = tag_author_name . attrs [ "title" ] .strip ()
481+ author_name : str = get_attr ( tag_author_name , "href" ). removeprefix ( "/user/" ) .strip ( "/" ). lower ()
484482 author_display_name : str = tag_author_name .text .strip ()
485- author_title : str = ([* filter (
486- bool , [child .strip ()
487- for child in tag_author .children
488- if isinstance (child , NavigableString )][3 :]
489- )] or ["" ])[- 1 ]
490- author_title = author_title if tag_author .select_one ('a[href$="/#tip"]' ) is None else sub (r"\|$" , "" , author_title )
491- author_title = author_title .strip ("\xA0 " ) # NBSP
492- author_icon_url : str = "https:" + get_attr (tag_author_icon , "src" )
483+ author_title : str = tag_author_title .text .strip ()
484+ author_icon_url : str = "https:" + get_attr (tag_author_icon , "src" ).removeprefix ("https:" )
493485
494486 return {
495487 "author" : author_name ,
@@ -501,43 +493,46 @@ def parse_submission_author(author_tag: Tag) -> dict[str, Any]:
501493
502494def parse_submission_page (sub_page : BeautifulSoup ) -> dict [str , Any ]:
503495 tag_id : Optional [Tag ] = sub_page .select_one ("meta[property='og:url']" )
504- tag_sub_info : Optional [Tag ] = sub_page .select_one ("div.submission-id-sub-container" )
505-
506- assert tag_sub_info is not None , _raise_exception (ParsingError ("Missing info tag" ))
507-
508- tag_title : Optional [Tag ] = tag_sub_info .select_one ("div.submission-title" )
509- tag_author : Optional [Tag ] = sub_page .select_one ("div.submission-id-container" )
510- tag_date : Optional [Tag ] = sub_page .select_one ("div.submission-id-container span.popup_date[data-time]" )
511- tag_tags : list [Tag ] = sub_page .select ('section.tags-row a[href^="/"]' )
512- tag_views : Optional [Tag ] = sub_page .select_one ("div.views span" )
513- tag_comment_count : Optional [Tag ] = sub_page .select_one ("section.stats-container div.comments span" )
514- tag_favorites : Optional [Tag ] = sub_page .select_one ("div.favorites span" )
515- tag_rating : Optional [Tag ] = sub_page .select_one ("div.rating span.inline" )
496+ tag_title : Optional [Tag ] = sub_page .select_one (".submission-title > h2" )
497+ tag_author : Optional [Tag ] = sub_page .select_one ('.submission-description-artist' )
498+ tag_date : Optional [Tag ] = sub_page .select_one (".submission-description-header span.popup_date[data-time]" )
499+ tag_tags : list [Tag ] = sub_page .select ('.submission-tags a[data-tag-name] + a[href^="/search/"]' )
500+ tag_views : Optional [Tag ] = sub_page .select_one (".submission-page-stats > div:nth-child(1) > div:nth-child(1)" )
501+ tag_comment_count : Optional [Tag ] = sub_page .select_one (
502+ ".submission-page-stats > div:nth-child(2) > div:nth-child(1)"
503+ )
504+ tag_favorites : Optional [Tag ] = sub_page .select_one (".submission-page-stats > div:nth-child(3) > div:nth-child(1)" )
505+ tag_rating : Optional [Tag ] = sub_page .select_one (".submission-page-stats > div:nth-child(4) > div:nth-child(1)" )
516506 tag_type : Optional [Tag ] = sub_page .select_one ("div#submission_page[class^='page-content-type']" )
517- tag_fav : Optional [Tag ] = sub_page .select_one ("div.fav > a" )
518- tag_info : Optional [Tag ] = sub_page .select_one ("section.info.text" )
519- tag_user_folders : list [Tag ] = sub_page .select ("section.folder-list-container > div > a" )
520-
521- assert tag_info is not None , _raise_exception (ParsingError ("Missing info tag" ))
522-
523- tag_category1 : Optional [Tag ] = tag_info .select_one ("span.category-name" )
524- tag_category2 : Optional [Tag ] = tag_info .select_one ("span.type-name" )
525- tag_species : Optional [Tag ] = tag_info .select ("span" )[bool (tag_category1 ) + bool (tag_category2 )]
526- tag_description : Optional [Tag ] = sub_page .select_one ("div.submission-description" )
507+ tag_fav : Optional [Tag ] = sub_page .select_one ('#submission-options > a[href^="/fav/"]' )
508+ tag_category : Optional [Tag ] = sub_page .select_one (
509+ ".submission-content-stats > span:nth-child(2) > span:nth-child(1)"
510+ )
511+ tag_sub_category : Optional [Tag ] = sub_page .select_one (
512+ ".submission-content-stats > span:nth-child(2) > span:nth-child(2)"
513+ )
514+ tag_species : Optional [Tag ] = sub_page .select_one (
515+ ".submission-content-stats > span:nth-child(2) > span:nth-child(3)"
516+ )
517+ tag_user_folders : list [Tag ] = sub_page .select (".folder-list-container .submission-folder > a" )
518+ tag_description : Optional [Tag ] = sub_page .select_one (".submission-description-text" )
527519 tag_folder : Optional [Tag ] = (
528- sub_page .select_one ('.favorite-nav a[href^="/scraps/"], .favorite-nav a[href^="/gallery/"]' )
520+ sub_page .select_one ('#submission-options a[href^="/scraps/"], #submission-options a[href^="/gallery/"]' )
529521 or sub_page .select_one ('#minigallery a[href^="/scraps/"], #minigallery a[href^="/gallery/"]' )
530522 )
531- tag_file_url : Optional [Tag ] = sub_page .select_one ("div.download a" )
532- tag_thumbnail_url : Optional [Tag ] = sub_page .select_one ("img#submissionImg" )
533- tag_prev : Optional [Tag ] = (
534- sub_page .select_one ('div.submission-content div.favorite-nav a:nth-child(1)[href^="/view/"]' )
535- or sub_page .select_one (".minigallery-container > div:nth-child(1) figure:last-child a" )
536- )
537- tag_next : Optional [Tag ] = (
538- sub_page .select_one ('div.submission-content div.favorite-nav a:last-child[href^="/view/"]' )
539- or sub_page .select_one (".minigallery-container > div:last-child figure:nth-child(1) a" )
523+ tag_file_url : Optional [Tag ] = next (
524+ (a for a in sub_page .select ("#submission-options a" ) if a .text .strip ().lower () == "download" ),
525+ None
540526 )
527+ tag_thumbnail_url : Optional [Tag ] = sub_page .select_one ("img#submissionImg" )
528+ tag_newer : Optional [Tag ]
529+ tag_older : Optional [Tag ]
530+ if sub_page .select_one ("#minigallery" ):
531+ tags_prev_next : list [Tag ] = sub_page .select ('.minigallery-navigation a[href^="/view/"]' )
532+ tag_newer = next ((t for t in tags_prev_next if "newer" in t .text .strip ().lower ()), None )
533+ tag_older = next ((t for t in tags_prev_next if "older" in t .text .strip ().lower ()), None )
534+ else :
535+ raise NotImplementedError ("Requires minigallery" )
541536
542537 assert tag_id is not None , _raise_exception (ParsingError ("Missing id tag" ))
543538 assert tag_title is not None , _raise_exception (ParsingError ("Missing title tag" ))
@@ -549,6 +544,8 @@ def parse_submission_page(sub_page: BeautifulSoup) -> dict[str, Any]:
549544 assert tag_rating is not None , _raise_exception (ParsingError ("Missing rating tag" ))
550545 assert tag_type is not None , _raise_exception (ParsingError ("Missing type tag" ))
551546 assert tag_fav is not None , _raise_exception (ParsingError ("Missing fav tag" ))
547+ assert tag_category is not None , _raise_exception (ParsingError ("Missing category tag" ))
548+ assert tag_sub_category is not None , _raise_exception (ParsingError ("Missing sub category tag" ))
552549 assert tag_species is not None , _raise_exception (ParsingError ("Missing species tag" ))
553550 assert tag_description is not None , _raise_exception (ParsingError ("Missing description tag" ))
554551 assert tag_folder is not None , _raise_exception (ParsingError ("Missing folder tag" ))
@@ -560,12 +557,7 @@ def parse_submission_page(sub_page: BeautifulSoup) -> dict[str, Any]:
560557 title : str = tag_title .text .strip ()
561558 date : datetime = datetime .fromtimestamp (int (tag_date .attrs ["data-time" ]))
562559 tags : list [str ] = [t .text .strip () for t in tag_tags ]
563- category : str = ""
564- if tag_category1 :
565- category += tag_category1 .text .strip ()
566- if tag_category2 :
567- category += " / " + tag_category2 .text .strip ()
568- category .strip ()
560+ category : str = f"{ tag_category .text .strip ()} / { tag_sub_category .text .strip ()} "
569561 species : str = tag_species .text .strip ()
570562 rating : str = tag_rating .text .strip ()
571563 views : int = int (tag_views .text .strip ())
@@ -587,11 +579,11 @@ def parse_submission_page(sub_page: BeautifulSoup) -> dict[str, Any]:
587579 thumbnail_url = f"{ thumbnail_url .rsplit ('/' , 1 )[0 ]} /{ quote (thumbnail_url .rsplit ('/' , 1 )[1 ])} " \
588580 if thumbnail_url else ""
589581 prev_sub : Optional [int ] = int (
590- get_attr (tag_prev , "href" ).strip ("/" ).split ("/" )[- 1 ]
591- ) if tag_prev else None
582+ get_attr (tag_newer , "href" ).strip ("/" ).split ("/" )[- 1 ]
583+ ) if tag_newer else None
592584 next_sub : Optional [int ] = int (
593- get_attr (tag_next , "href" ).strip ("/" ).split ("/" )[- 1 ]
594- ) if tag_next else None
585+ get_attr (tag_older , "href" ).strip ("/" ).split ("/" )[- 1 ]
586+ ) if tag_older else None
595587 fav_link : Optional [str ] = f"{ root } { href } " if (href := get_attr (tag_fav , "href" )).startswith ("/fav/" ) else None
596588 unfav_link : Optional [str ] = f"{ root } { href } " if (href := get_attr (tag_fav , "href" )).startswith ("/unfav/" ) else None
597589 user_folders : list [tuple [str , str , str ]] = []
0 commit comments