Source code for moddb.boxes

from __future__ import annotations

import collections
import datetime
import logging
import re
import sys
from typing import TYPE_CHECKING, Any, Generic, List, Tuple, TypeVar

from typing_extensions import Self
from bs4 import BeautifulSoup

from .enums import (
    AddonCategory,
    ArticleCategory,
    Genre,
    GroupCategory,
    HardwareCategory,
    Licence,
    Membership,
    PlayerStyle,
    Scope,
    SearchCategory,
    SoftwareCategory,
    Status,
    TeamCategory,
    Theme,
    ThumbnailType,
)
from .utils import (
    BASE_URL,
    LOGGER,
    generate_hash,
    get,
    get_date,
    get_list_stats,
    get_page,
    get_page_type,
    get_siteareaid,
    get_views,
    join,
    normalize,
    time_mapping,
    unroll_number,
)

if TYPE_CHECKING:
    from .pages.article import Article

__all__ = [
    "Statistics",
    "Profile",
    "Style",
    "Thumbnail",
    "Comment",
    "MissingComment",
    "MemberProfile",
    "MemberStatistics",
    "PlatformStatistics",
    "PartialArticle",
    "Option",
    "Mirror",
    "ResultList",
    "CommentList",
]


[docs] class Statistics: """The stats box, on pages that have one. This represents total stats and daily stats in one neat package. Attributes ---------- files : int The number of files this page has uploaded articles : int The number of articles this page has uploaded reviews : int The number of reviews this page has been given watchers : int The number of people following this page mods : int The number of mods this page is related too (only applies to games, members and teams) addons : int The number of addons this page has uploaded members : int The number of members a group has (only applies to groups and teams) visits : int The total number of times this page has been viewed today : int The number of times this page has been viewed today rank : int The current rank of the page against all other pages of the same type total : int The maximum rank number updated : datetime.datetime The last time this page was updated """ def __init__(self, html: BeautifulSoup): misc = html.find_all( "h5", string=( "Files", "Articles", "Reviews", "Watchers", "Mods", "Addons", "Members", ), ) self.__dict__.update( {stat.string.lower(): int(normalize(stat.parent.a.string)) for stat in misc} ) visits = normalize(html.find("h5", string="Visits").parent.a.string) self.visits, self.today = get_views(visits) rank = normalize(html.find("h5", string="Rank").parent.a.string).split("of") self.rank = int(rank[0].replace(",", "")) self.total = int(rank[1].replace(",", "")) try: self.updated = get_date(html.find("time", itemprop="dateModified")["datetime"]) except TypeError: self.updated = None def __repr__(self): return f"<Statistics rank={self.rank}/{self.total}>"
[docs] class Profile: """The profile object is used for several models and as such attribute vary based on which model the profile is attached too. Profiles are only present on Mod, Game, Member, Addon, Engine, Company, Hardware, Software and Group pages. Parameters ----------- html : bs4.BeautifulSoup The html to parse. Allows for finer control. Attributes ----------- category : Union[AddonCategory, HardwareCategory, SoftwareCategory, TeamCategory, GroupCategory, SearchCategory] The category the page falls under within the context of what the page is. E.g the page is an Addon category will be an AddonCategory enum. If the category of the page doesn't fall under any of the above mentionned the attribute will be of type SearchCategory. contact : str The url to contact the page owner follow : str The url to click to follow the mod share : dict A dictionnary of share links with the place they will be shared as the key and the url for sharing as the value. private : bool Exclusive to Group and Team, True if the group is private, else False membership : Membership Exclusive to Group and Team, represents the join procedure (invitation only, private, public) icon : str Exclusive to Game, Mod and Addon pages. URL of the icon image developers : dict Exclusive to Game, Mods, Engine and Addon pages. Dictionnary of member/team like thumbnails as values and the role of the member/team as the key (creator, publisher, developer, ect...) release : datetime.datetime Exclusive to Game, Mods, Engine and Addon pages. Datetime object of when the page was released, can be None if the page hasn't seen a release yet. homepage : str Present on all pages but Group pages. URL to the page's homepage. Can be None engine : Thumbnail Exclusive to Game and Addon pages. Engine like thumbnails representing the engine the addon/game was built for. game : Thumbnail Exclusive to Mod pages. Game like thumbnail representing the game the mod was built for. licence : Licence Exclusive to Engine and Addon pages. Object representing the licence the engine operates under. platforms : List[Thumbnail] Exclusive to Game, Engine and Addon pages. List of platform like thumbnails representing the plaftorms the software was built for. status : Status Exclusive to Games, Mods, Addons, Engines, Hardware .Whether the thing is released, unreleased, ect... download_count: int Total count of all downloads on the page, this adds up downloads of all files and addons. Exclusive to mods and games. """ def __init__(self, html: BeautifulSoup): try: _name = html.find("a", itemprop="mainEntityOfPage").string except AttributeError: try: _name = html.find("span", itemprop="headline").string except AttributeError: _name = html.find("div", class_="title").h2.a.string try: url = html.find("meta", property="og:url")["content"] except TypeError: url = join(html.find("a", string=self.name)["href"]) regex = r"\/([a-z]+)\/" matches = re.findall(regex, url) matches.reverse() page_type = SearchCategory[matches[0] if matches[0].endswith("s") else matches[0] + "s"] self.category = page_type profile_raw = html.find("span", string="Profile").parent.parent.parent.find( "div", class_="table tablemenu" ) self.contact = join(html.find("h5", string="Contact").parent.span.a["href"]) self.follow = join(html.find("a", title="Follow")["href"]) try: share = profile_raw.find("h5", string="Share").parent.span.find_all("a") self.share = { "reddit": share[0]["href"], "mail": share[1]["href"], "twitter": share[2]["href"], "facebook": share[3]["href"], } except (AttributeError, IndexError): LOGGER.info( "Something funky about share box of %s %s", page_type.name, _name, exc_info=LOGGER.level >= logging.DEBUG, ) self.share = None if page_type in [SearchCategory.developers, SearchCategory.groups]: self.private = ( profile_raw.find("h5", string="Privacy").parent.span.string.strip() != "Public" ) membership = profile_raw.find("h5", string="Subscription").parent.span.string.strip() if membership == "Open to all members": self.membership = Membership(3) elif membership == "Must apply to join": self.membership = Membership(2) else: self.membership = Membership(1) if page_type in [ SearchCategory.games, SearchCategory.mods, SearchCategory.addons, ]: try: self.icon = profile_raw.find("h5", string="Icon").parent.span.img["src"] except AttributeError: self.icon = None LOGGER.info( "%s '%s' does not have an icon", page_type, _name, exc_info=LOGGER.level >= logging.DEBUG, ) if page_type in [ SearchCategory.games, SearchCategory.mods, SearchCategory.engines, SearchCategory.addons, SearchCategory.hardwares, SearchCategory.softwares, ]: people = profile_raw.find_all( "h5", string=[ "Developer", "Publisher", "Developer & Publisher", "Creator", "Company", ], ) self.developers = { x.string.lower(): Thumbnail( url=x.parent.a["href"], name=x.parent.a.string, type=ThumbnailType.team if x.string != "Creator" else ThumbnailType.member, ) for x in people } try: d = profile_raw.find("h5", string="Release date").parent.span.time self.release = get_date(d["datetime"]) except KeyError: LOGGER.info( "%s %s has not been released", page_type.name, _name, exc_info=LOGGER.level >= logging.DEBUG, ) self.release = None if "Coming" in d.string: self.status = Status.coming_soon elif "Early" in d.string: self.status = Status.early_access elif "Released" in d.string: self.status = Status.released else: self.status = Status.unreleased if page_type != SearchCategory.mods: platforms = profile_raw.find("h5", string="Platforms").parent.span.find_all("a") self.platforms = [ Thumbnail(name=x.string, url=x["href"], type=ThumbnailType.platform) for x in platforms ] if page_type != SearchCategory.groups: try: self.homepage = html.find("h5", string="Homepage").parent.span.a["href"] except AttributeError: self.homepage = None LOGGER.info( "%s %s has no homepage", page_type.name, _name, exc_info=LOGGER.level >= logging.DEBUG, ) if page_type in [SearchCategory.games, SearchCategory.addons]: engine = profile_raw.find("h5", string="Engine") url = engine.parent.span.a["href"] name = engine.parent.span.a.string self.engine = Thumbnail(url=url, name=name, type=ThumbnailType.engine) if page_type == SearchCategory.mods: game = profile_raw.find("h5", string="Game") url = game.parent.span.a["href"] name = game.parent.span.a.string self.game = Thumbnail(url=url, name=name, type=ThumbnailType.game) if page_type in [SearchCategory.engines, SearchCategory.addons]: self.licence = Licence( int(profile_raw.find("h5", string="Licence").parent.span.a["href"].split("=")[-1]) ) if page_type == SearchCategory.hardwares: self.category = HardwareCategory( int(profile_raw.find("h5", string="Category").parent.span.a["href"].split("=")[-1]) ) if page_type == SearchCategory.softwares: self.category = SoftwareCategory( int(profile_raw.find("h5", string="Category").parent.span.a["href"].split("=")[-1]) ) if page_type == SearchCategory.addons: self.category = AddonCategory( int(profile_raw.find("h5", string="Category").parent.span.a["href"].split("=")[-1]) ) if page_type == SearchCategory.developers: category = html.find("h3").string.strip().lower() try: self.category = TeamCategory[category] except KeyError: self.category = TeamCategory(7) if page_type == SearchCategory.groups: category = html.find("h3").string.strip().lower().replace(" & ", "_") self.category = GroupCategory[category] if page_type in [SearchCategory.games, SearchCategory.mods]: try: self.download_count = unroll_number( html.find("a", class_="downloadautotoggle").span.string ) except AttributeError: self.download_count = 0 LOGGER.info( "%s %s has no download count", page_type.name, _name, exc_info=LOGGER.level >= logging.DEBUG, ) def __repr__(self): return f"<Profile category={self.category.name}>"
[docs] class Style: """Represents semantic information on the page's theme. Parameters ----------- html : bs4.BeautifulSoup The html to parse. Allows for finer control. Attributes ---------- theme : Theme fantasy, sci-fi, ect... genre : Genre fps, rpg, moba, ect... players : PlayerStyle Singplayer, multiplayer, ect... scope : Scope Triple A games or indie boxart : str URL of the boxart for the page. """ def __init__(self, html: BeautifulSoup): misc = html.find_all("h5", string=("Theme", "Genre", "Players")) styles = { style.string.lower(): re.findall(r"(\d*)$", style.parent.a["href"])[0] for style in misc } self.theme = Theme(int(styles["theme"])) self.genre = Genre(int(styles["genre"])) self.players = PlayerStyle(int(styles["theme"])) try: self.scope = Scope(int(html.find("h5", string="Project").parent.a["href"][-1])) except AttributeError: LOGGER.info("Has no scope", exc_info=LOGGER.level >= logging.DEBUG) try: self.boxart = html.find("h5", string="Boxart").parent.span.a.img["src"] except AttributeError: LOGGER.info("Has no boxart", exc_info=LOGGER.level >= logging.DEBUG) def __repr__(self): return ( f"<Style genre={self.genre.name} theme={self.theme.name} players={str(self.players)}>" )
[docs] class Thumbnail: """Thumbnail objects are minature version of ModDB models. They can be parsed to return the full version of the model. Attributes ----------- url : str The url to the full model, mandatory attribute. name : str The name of the model image : str The optional thumbnail image of the model summary : str Optional bit of fluff date : datetime.datetime A date related to this timestamp if it exists. Can be None type : ThumbnailType The type of the resource, mandatory attribute """ def __init__(self, **attrs): self.url: str = join(attrs.get("url")) self.name: str | None = attrs.get("name", None) self.image: str | None = attrs.get("image", None) self.summary: str | None = attrs.get("summary", None) self.date: datetime.datetime | None = attrs.get("date", None) self.type: ThumbnailType = attrs.get("type") def __repr__(self): return f"<Thumbnail name={self.name} type={self.type.name}>"
[docs] def parse(self) -> Any: """Uses the Thumbnail's mandatory attributes to get the full html of the model and parse them with the appropriate object. Returns -------- Any The model that was parsed, can be any model from the list of the ThumbnailType enum. """ return getattr(sys.modules["moddb"], self.type.name.title())(get_page(self.url))
def _parse_results(html): result_box = html.find("div", class_="normalbox browsebox") try: search_raws = ( result_box.find("div", class_="inner") .find("div", class_="table") .find_all("div", class_=["rowcontent"]) ) except AttributeError: return [], 1, 1, 0 results = [] try: for obj in search_raws: date = obj.find("time") summary = obj.find("p") results.append( Thumbnail( name=obj.a["title"], url=obj.a["href"], image=obj.a.img["src"], type=get_page_type(join(obj.a["href"])), summary=summary.string if summary else None, date=get_date(date["datetime"]) if date and date.string != "TBD" else None, ) ) except (TypeError, KeyError): # parse as a title-content pair of articles LOGGER.info( "Parsing articles as key-value pair list", exc_info=LOGGER.level >= logging.DEBUG ) for title, content in zip(search_raws[::2], search_raws[1::2]): date = title.find("time") url = title.find("h4").a results.append( Thumbnail( name=url.text, url=url["href"], image=None, type=get_page_type(join(url["href"])), summary=content.text, date=get_date(date["datetime"]) if date and date.string != "TBD" else None, ) ) current_page, total_page, total_results = get_list_stats(result_box) if total_results is None: total_results = len(results) return results, current_page, total_page, total_results def _parse_comments(html): comments = [] comment_box = html.find("div", id="comments") if comment_box is None: return [], 1, 1, 0 current_page, total_page, total_results = get_list_stats(comment_box) try: url = html.find("meta", property="og:url")["content"] except TypeError: url = join(html.find("a", itemprop="mainEntityOfPage")["href"]) comments_raw = comment_box.find("div", class_=["tablecomments"]).find_all( "div", class_="row", recursive=False ) if total_results is None: total_results = len(comments_raw) for raw in comments_raw: comment = Comment(raw) comment._url = f"{url}/page/{current_page}" if comment.position == 1: try: comments[-1].children.append(comment) except IndexError: comments.append(MissingComment(0)) comments[-1].children.append(comment) elif comment.position == 2: try: comments[-1].children[-1].children.append(comment) except IndexError: try: comments[-1].children.append(MissingComment(1)) comments[-1].children[-1].children.append(comment) except IndexError: comments.append(MissingComment(0)) comments[-1].children.append(MissingComment(1)) comments[-1].children[-1].children.append(comment) else: comments.append(comment) return comments, current_page, total_page, total_results class CommentAuthor(Thumbnail): """Represents the thumbnail of a user having left a comment on a page. Functions the same as a thumbnail but with an extra attribute. Attributes ----------- comment_count : int Number of comments the user has posted """ def __init__(self, **attrs): super().__init__(**attrs) self.comment_count: int = attrs.get("comment_count", 0) def __repr__(self): return ( f"<Thumbnail name={self.name} type={self.type.name} comment_count={self.comment_count}>" )
[docs] class Comment: """A moddb comment object. Parameters ----------- html : bs4.Tag The html to parse into the object. Must be the exact div of the comment. Attributes ----------- id : int The ID of the comment author : Thumbnail A member like thumbnail of the member who posted the comment date : datetime.datetime Date and time of the comment creation position : int Ranging from 0-2 represents the nested level of the comment. children : int Comment object replying directly to this one. If the comment is parsed on its own it will be null. It is only populated if originating from a CommentList content : str Text of the comment can be none if the comment only contains embeds embeds : list List of urls that have been embeded karma : int The current karma count upvote : str Link to upvote the comment downvote : str Link to downvote the comment approved : bool Whether or not the comment is still waiting for admin approval and is visible to the guest members developer : bool Whether or not the comment was posted one of the page creators staff : bool Wether or not the comment was posted by one of moddb's staff members subscriber : bool Whether or not the comment was posted by a moddb subscriber guest : bool Whether or not the comment was posted by a guest user location : Thumbnail Thumbnail of the place the comment is, only available when getting comments from get_member_comments. This thumbnail does not guarantee that you will find the comment if you parse it, since the url does not contain the page number. """ def __init__(self, html: BeautifulSoup): author = html.find("a", class_="avatar") self.id = int(html["id"]) comment_count = int( html.find("span", class_="heading") .text.strip() .split("-")[-1] .replace("comments", "") .replace(",", "") ) self.author = CommentAuthor( name=author["title"], url=author["href"], image=author.img["src"], type=ThumbnailType.member, comment_count=comment_count, ) self.date = get_date(html.find("time")["datetime"]) actions = html.find("span", class_="actions") self._fetch_time = datetime.datetime.utcnow() position = html["class"] if "reply1" in position: self.position = 1 elif "reply2" in position: self.position = 2 else: self.position = 0 self.children = [] try: links = html.find("div", class_="comment").find_all("a") for link in links: link.string = link["href"] self.content = html.find("div", class_="comment").text except AttributeError: LOGGER.info( "Comment %s by %s has no content, likely embed", self.id, self.author.name, exc_info=LOGGER.level >= logging.DEBUG, ) self.content = None try: karma = actions.span.string self.karma = int(re.findall(r"[+-]?\d", karma)[0].replace(",", "")) self.upvote = join(actions.find_all("a")[1]["href"]) self.downvote = join(actions.find_all("a")[2]["href"]) self.approved = True except AttributeError: self.karma = 0 self.upvote = None self.downvote = None self.approved = False except IndexError: self.downvote = None self.approved = True self.developer = bool(html.find("span", class_="developer")) self.staff = bool(html.find("span", class_="staff")) self.subscriber = bool(html.find("span", class_="subscriber")) self.guest = self.author.name.lower() == "guest" self.embeds = [x["src"] for x in html.find_all("iframe")] self.location = html.find("a", class_="related") if self.location is not None: url = join(self.location["href"]) page_type = get_page_type(url) self.location = Thumbnail(name=self.location.string, url=url, type=page_type) try: self._hash = html.find("a", title=("Delete", "Undelete"))["href"].split("=")[-1] except TypeError: self._hash = None
[docs] def is_stale(self): """Comments are very volatile. If they are pushed onto another page by other comments it becomes impossible to use objects with the previous page number. In addition, calculating the new page number is not possible. Pages do not have a defined size but rather grow and shrink based on sizes of individual comments. Finally, comments also have token that can be used to modify them. These tokens have a hard life of 30 minutes from the time of the request. This function puts in place several mechanism to verify wether or not the object can still be trusted. Returns -------- bool True, the comment is stale and you should fetch a new version, False you **should** be good to continue using it. """ return self._fetch_time + datetime.timedelta(minute=30) > datetime.datetime.utcnow()
def __repr__(self): return ( f"<Comment author={self.author.name} position={self.position} approved={self.approved}>" )
[docs] class MissingComment: """An object to represent a missing comment. This is used in the cases where a parent comment with children is deleted so that the children may still be accessible, missing comment will have the same attributes as a :class:`.Comment` but they will all be equal to None or False apart from children and the comment position, which will have the children of the comment that was deleted attached to it. """ def __init__(self, position): self.id = None self.author = None self.date = None self.position = position self.content = None self.karma = 0 self.upvote = None self.downvote = None self.approved = False self.children = [] self.developer = False self.staff = False self.subscriber = False self.guest = False self.embeds = [] self.location = None def __repr__(self): return f"<MissingComment position={self.position}>" def is_stale(self): return True
[docs] class MemberProfile: """Member profiles are separate entities because they share nothing with the other profile boxes. Where as all other profile boxes share at least 4 attributes, a member shares none. Parameters ----------- html : bs4.BeautifulSoup The html to parse. Allows for finer control. Attributes ----------- name : str Name of the member level : int Current level progress : float Percentage progress to next level title : str Member title avatar : str Url of the member avatar online : bool Whether or not the member is currently online last_online : datetime.datetime None if the member is currently online, datetime the user was last seen online gender : str Gender of the member, can be None homepage : str URL of the member's homepage country : str The member's chosen country follow : str Link to follow a member """ def __init__(self, html: BeautifulSoup): profile_raw = html.find("span", string="Profile").parent.parent.parent.find( "div", class_="table tablemenu" ) level_raw = profile_raw.find("h5", string="Level").parent.span.div self.name = html.find("meta", property="og:title")["content"] self.level = int(level_raw.find("span", class_="level").string) self.progress = float( "0." + level_raw.find("span", class_="info").strong.string.replace("%", "") ) self.title = level_raw.find("span", class_="info").a.string self.avatar = profile_raw.find("div", class_="avatarinfo").img["src"] self.online = bool(profile_raw.find("h5", string="Status")) last_online = profile_raw.find("h5", string="Last Online") self.last_online = ( get_date(last_online.parent.span.time["datetime"]) if last_online else None ) try: self.gender = profile_raw.find("h5", string="Gender").parent.span.string.strip() except AttributeError: LOGGER.info( "Member %s has not publicized their gender", self.name, exc_info=LOGGER.level >= logging.DEBUG, ) self.gender = None try: self.homepage = html.find("h5", string="Homepage").parent.span.a["href"] except AttributeError: self.homepage = None LOGGER.info( "Member %s has no homepage", self.name, exc_info=LOGGER.level >= logging.DEBUG ) try: self.country = profile_raw.find("h5", string="Country").parent.span.string.strip() except AttributeError: self.country = None LOGGER.info( "Member %s country is not visible (happens when not logged in)", self.name, exc_info=LOGGER.level >= logging.DEBUG, ) try: self.follow = join(html.find("a", title="Follow")["href"]) except TypeError: LOGGER.info( "Can't watch yourself, narcissist...", exc_info=LOGGER.level >= logging.DEBUG ) self.follow = None def __repr__(self): return f"<MemberProfile name={self.name}>"
[docs] class MemberStatistics: """Similarly, a member statistics shared no common ground with other stats and therefore there was a need for a separate object. Parameters ----------- html : bs4.BeautifulSoup The html to parse. Allows for finer control. Attributes ----------- watchers : int How many members are following this member acivity_points : int Activity points comments : int How many comments the member has made tags : int How many tags the member has created visits : int How many people have viewed this page site_visits : int How many time this user has visited the site today : int How many people have viewed this page today time : int How many seconds the member has spent online rank : int The member's current rank (compared to other members) total : int the maximum rank """ def __init__(self, html: BeautifulSoup): def get(parent): return parent.a.string.strip() if parent.a else parent.span.string.strip() name = html.find("meta", property="og:title")["content"] misc = html.find_all( "h5", string=("Watchers", "Activity Points", "Comments", "Tags", "Site visits"), ) self.__dict__.update( { stat.string.lower().replace(" ", "_"): int(normalize(get(stat.parent))) for stat in misc } ) visits = normalize(html.find("h5", string="Visitors").parent.a.string) self.visits, self.today = get_views(visits) time, mapping = html.find("h5", string="Time Online").parent.span.string.strip().split(" ") self.time = time_mapping[mapping.replace("s", "")] * int(time) try: rank = normalize(html.find("h5", string="Rank").parent.span.string).split("of") self.rank = int(rank[0].replace(",", "")) self.total = int(rank[1].replace(",", "")) except AttributeError: self.rank = 0 self.total = 0 LOGGER.info("Member %s has no rank", name, exc_info=LOGGER.level >= logging.DEBUG) def __repr__(self): return f"<MemberStatistics rank={self.rank}/{self.total}>"
[docs] class PlatformStatistics: """Stats for platform pages. Parameters ----------- html : bs4.BeautifulSoup The html to parse. Allows for finer control. Attributes ----------- hardware : int Number of harware created for this platform software : int Number of software created for this platform engines : int Number of engines created for this platform games : int Number of games created for this platform mods : int Number of mods created for this platform """ def __init__(self, html: BeautifulSoup): headings = ("Hardware", "Software", "Engines", "Games", "Mods") html_headings = html.find_all("h5", string=headings) self.__dict__.update( { headings[html_headings.index(x)].lower(): int(normalize(x.parent.span.a.string)) for x in html_headings } ) def __repr__(self): return "<PlatformStatistics>"
[docs] class PartialArticle: """A partial article is an article object missing attributes due to being parsed from the front page intead of from the article page itself. In general, it' is simple enough for previewing the article but if you need a full article with comments, profile, ect... Then parse it with the method Parameters ----------- html : bs4.BeautifulSoup The html to parse. Allows for finer control. Attributes ----------- name : str Name of the articles url : str Link to the article date : datetime.datetime Date the article was published type : ArticleCategory Type of the article content : str html of the article content plaintext : str plaintext of the article content (without html) """ def __init__(self, html: BeautifulSoup): meta_raw = html.find("div", class_="row rowcontent rownoimage clear") self.name = meta_raw.h4.a.string self.url = join(meta_raw.h4.a["href"]) self.date = get_date(meta_raw.find("time")["datetime"]) try: self.type = ArticleCategory[ meta_raw.find("span", class_="subheading").text.strip().split(" ")[0].lower() ] except KeyError: self.type = ArticleCategory.news content = html.find("div", class_="row rowcontent rowcontentnext clear") self.content = str(content) self.plaintext = content.text def __repr__(self): return f"<PartialArticle title={self.name}>"
[docs] def get_article(self) -> Article: """Returns the full article object of this article. Returns -------- Article The complete article object """ from .pages.article import Article return Article(get_page(self.url))
[docs] class Option: """Represents one of the choice from the poll they are attached to, should not be created manually, prefer relying on the Poll. Attributes ----------- id : int The id of the option, can be None and will be None in most cases. text : str The option's text votes : int The number of votes that have been cast on this option percent : int The percent of all votes that have been cast on this option """ def __init__(self, **kwargs): self.id = kwargs.get("id", None) self.text = kwargs.get("text") self.votes = kwargs.get("votes") self.percent = kwargs.get("percent") def __repr__(self): return f"<Option text={self.text}>"
T = TypeVar("T") class ModDBList(collections.abc.MutableSequence[T], Generic[T]): """Base List type for the lib Attributes ----------- current_page : int The page of results this objects represents total_pages : int The total amount of result pages available total_results : int The total amount of results available """ def __init__(self, **kwargs): self._results: List[T] = kwargs.pop("results") self._params: dict = kwargs.pop("params", {}) self._url = kwargs.pop("url") self.total_pages = kwargs.pop("total_pages") self.current_page = kwargs.pop("current_page") self.total_results = kwargs.pop("total_results") def _parse_method(self, html: BeautifulSoup): raise NotImplementedError def _do_request(self, **kwargs): page = kwargs.pop("page", self.current_page) params = {**self._params, **kwargs} html = get_page(f"{self._url}/page/{page}", params=params) results, current_page, total_pages, total_results = self._parse_method(html) return self.__class__( results=results, params=params, url=self._url, total_pages=total_pages, current_page=current_page, total_results=total_results, ) def next_page(self) -> Self: """Returns the next page of results as either a CommentList if you are retriving comments or as a ResultList if it's literally anything else. Returns -------- Self The new search objects containing a new set of results. Raises ------- ValueError There is no next page """ if self.current_page == self.total_pages: raise ValueError("Reached last page already") return self.to_page(self.current_page + 1) def previous_page(self) -> Self: """Returns the previous page of results as either a CommentList if you are retriving comments or as a ResultList if it's literally anything else. Returns -------- Self The new list-like object of results. Raises ------- ValueError There is no previous page """ if self.current_page == 1: raise ValueError("Reached first page already") return self.to_page(self.current_page - 1) def to_page(self, page: int) -> Self: """Returns the desired page of results as either a CommentList if you are retriving comments or as a ResultList if it's literally anything else. Parameters ----------- page : int A page number within the range 1 - max_page inclusive Returns -------- Self The new list-like object of results. Raises ------- ValueError This page does not exist """ if page < 1 or page > self.total_pages: raise ValueError(f"Please pick a page between 1 and {self.total_pages}") return self._do_request(page=page) def get_all_results(self) -> Self: """An expensive methods that iterates over every page of the result query and returns all the results. This may return more results than you expected if new page have fit the criteria while iterating. Returns -------- Union[CommentList[Any], ResultList[Any]] The list of things you were searching for """ search = self.to_page(1) results = list(search) while True: try: search = search.next_page() except ValueError: break else: results.extend(search) LOGGER.info("Parsed page %s/%s", search.current_page, search.total_pages) def key(element): if isinstance(element, Comment): return element.id else: return element.name search._results = list({key(e): e for e in results}.values()) return search def __repr__(self) -> str: return f"<{self.__class__.__name__} pages={self.current_page}/{self.total_pages}, results={self._results}>" def __getitem__(self, index: int) -> T: return self._results.__getitem__(index) def __delitem__(self, index: int): self._results.__delitem__(index) def __len__(self) -> int: return self._results.__len__() def __setitem__(self, key: int, value: T): self._results.__setitem__(key, value) def insert(self, index: int, value: T): self._results.insert(index, value)
[docs] class ResultList(ModDBList[Thumbnail]): """Represents a list of result gotten from one of the many get methods the library uses. This is returned over a regular list because it has additional methods that allow for easily go through all the results. In the same way that the moddb site works, you don't have to re-run the query manually to get the next page, you simply click a button, same here, you don't have to recall the base get method, simply use on of the methods here to traverse the results. This emulates a list and will behave like one, so you can use any of the regular list operators in addition to the methods defined below Attributes ----------- current_page : int The page of results this objects represents total_pages : int The total amount of result pages available total_results : int The total amount of results available """ def _parse_method(self, html: BeautifulSoup): return _parse_results(html)
[docs] def resort(self, new_sort: Tuple[str, str]) -> Self: """Allows you to sort the whole search by a new sorting parameters. Returns a new search object. Parameters ----------- new_sort : Tuple[str, str] The new sorting tuple to check by Returns ------- ResultList The new set of results with the updated sort order """ return self._do_request(sort=f"{new_sort[0]}-{new_sort[1]}")
def __contains__(self, element: Thumbnail) -> bool: return get(self._results, name=element.name) is not None
[docs] class CommentList(ModDBList[Comment]): """Represents a list of comments. This emulates a list and will behave like one, so you can use any of the regular list operators in addition to the methods defined below. Attributes ----------- current_page : int The page of results this objects represents total_pages : int The total amount of result pages available total_results : int The total amount of results available """ def _parse_method(self, html: BeautifulSoup): return _parse_comments(html) def __contains__(self, element: Comment) -> bool: return get(self._results, name=element.name) is not None
[docs] def flatten(self) -> List[Comment]: """Returns a 'flattened' list of comments where children of comments are added right after the parent comment so: [ Comment1 ] ├── Comment2\n | ├── Comment3\n | └── Comment4\n └── Comment5 would become: [Comment1, Comment2, Comment3, Comment4, Comment5] Returns -------- List[Comment] The flattened list of comments """ top_list = [] for comment in self._results: top_list.append(comment) for child in comment.children: top_list.append(child) top_list.extend(child.children) return top_list
[docs] class Mirror: """Represents a download mirror from which the user can download a file Attributes ----------- name : str The name of the mirror index : int The index of the mirror, as multiple mirrors have the same name. Index starts at 1 city : str Alpha 2 code, or full name, of the city the server is located in. Sometimes represents a country. country : str Alpha 2 code for the country the server is located in. Sometimes represents a continent. served : int How many downloads of this file this mirror has served capacity : float The current capacity of this server as a percentage. E.g. 35.5 -> 35.5%. Lower is better for speed. """ def __init__(self, **kwargs): self.name = kwargs.get("name") self.index = kwargs.get("index") self.city = kwargs.get("city") self.country = kwargs.get("country") self.served = kwargs.get("served") self.capacity = kwargs.get("capacity") self._url = kwargs.get("url") def __repr__(self): return f"<Mirror name={self.name} index={self.index} >"
PartialTag = collections.namedtuple("PartialTag", "name name_id url")
[docs] class Tag: """Represents a tag, useful to vote on stuff Parameters ----------- id : Optional[int] ID of the tag. None if obtained from parsing a page name_id : str Name id of the tag name : str Name of the tag date : Optional[datetime.datetime] Creation date of the tag. None if obtained from parsing a page official : Optional[bool] Whether the tag is official or user created. None if obtained from parsing a page sitearea : int Site area siteareaid : int Site area id positive : int Number of positive votes negative : int Number of negative votes rank : Optiona[int] Rank of the tag in trending. None if obtained from parsing a page url : str Url to the tag """ def __init__(self, **kwargs): self.id = int(kwargs.pop("id")) self.date = datetime.datetime.fromtimestamp(int(kwargs.pop("date"))) self.official = kwargs.pop("official") == "1" self.sitearea = int(kwargs.pop("sitearea")) self.siteareaid = int(kwargs.pop("siteareaid")) self.positive = int(kwargs.pop("positive")) self.negative = int(kwargs.pop("negative")) self.name_id = kwargs.pop("tagid") self.name = kwargs.pop("tag") self.rank = int(kwargs.pop("trending")) self.url = f"{BASE_URL}/tags/{self.name_id}" def __repr__(self) -> str: return f"< Tag id={self.id} name_id={self.name_id} >" def _get_members(self): """Get a list of the members that have voted for this tag Returns --------- List[Thumbnail] List of member typed thumbnail """ LOGGER.warning("_get_members is undocumented and unreliable for the time being") params = { "ajax": "t", "tag": self.name_id, "sitearea": get_siteareaid(self.sitearea), "siteareaid": self.siteareaid, "hash": generate_hash(), } resp = get_page(f"{BASE_URL}/tags/ajax/who", params=params) return [ Thumbnail(url=join(thumb["href"]), name=thumb.string, type=ThumbnailType.member) for thumb in resp.find("div", class_="successboxachtung").find_all("a") ]