Source code for moddb.pages.file

import datetime
import re
import sys

import bs4
import requests

from ..boxes import Mirror, Thumbnail
from ..enums import AddonCategory, FileCategory, MediaCategory, ThumbnailType
from ..utils import (
    BASE_URL,
    concat_docs,
    get_date,
    get_page,
    get_views,
    join,
    prepare_request,
    raise_for_status,
)
from .base import BaseMetaClass


[docs] @concat_docs class File(BaseMetaClass): """An oject representing a file on ModDB, a file is something posted by the page owner which is directly linked to the page. It is endorsed by the page owner and they should do everythign they can to make sure that it is safe. As compared to an addon that may be added by fans to the page and that are files meant to work with the page but that are not directly related to the page. E.x the file of a mod page would be the mod files used to install the mod whereas an addon could be something like a fan-made texture pack for the mod or a map. Parameters ----------- html : bs4.BeautifulSoup The html to parse. Allows for finer control. Filtering ---------- category : :class:`.FileCategory` The type of file (audio, video, demo, full version....) categoryaddon : :class:`.AddonCategory` The type of addon (map, textures, ect...) game : Union[:class:`.Game`, :class:`.Object`] An game object or an object with an id attribute which represents the game the file belongs to. timeframe : :class:`.TimeFrame` The time period this was released in (last 24hr, last week, last month) Sorting -------- * **released** - when the object was released, asc is oldest, desc is most recent * **id** - when it was added to moddb, asc is oldest, desc is most recent * **ranktoday** - order by daily ranking, asc is highest ranked, desc is lowest rank * **visitstotal** - order by most views, asc is highest views, desc is lowest views * **rating** - order by rating, asc is highest rating, desc is lowest rating * **name** - order alphabetically, asc is a-z, desc is z-a * **date** - order by upload date, asc is most recent first, desc is oldest first Attributes ----------- filename : str The name of the file hash : str The MD5 hash of the file name : str The name of the page size : int the file size in bytes today : int The number of downloads today downloads : int The total number of times this file has been downloaded category : FileCategory The category of the file author : Thumbnail A member type thumbnail of the member who uploaded the file date : datetime.datetime The date the file was uploaded button : str html code for the embed button widget : str html code for the embed widget description : str Description of the file, as written by the author preview : str URL of the preview image for the file """ def __init__(self, html: bs4.BeautifulSoup): if html.find("span", string="File Deleted", class_="heading"): raise ValueError("This file has been removed") info = html.find("div", class_="table tablemenu") file = { x.string.lower(): x.parent.span.string.strip() for x in info.find_all("h5", string=("Filename", "Size", "MD5 Hash")) } self.name = ( html.find("a", title="Report").parent.parent.find("span", class_="heading").string ) self.filename = file["filename"] super().__init__(html) self.hash = file["md5 hash"] self.size = int(re.sub(r"[(),bytes]", "", file["size"].split(" ")[1])) downloads = html.find("h5", string="Downloads").parent.a.string self.today = int(re.sub(r"[(),today]", "", downloads.split(" ")[1])) self.downloads = int(downloads.split(" ")[0].replace(",", "")) try: self.category = FileCategory( int(info.find("h5", string="Category").parent.a["href"].split("=")[-1]) ) except ValueError: self.category = AddonCategory( int(info.find("h5", string="Category").parent.a["href"].split("=")[-1]) ) uploader = info.find("h5", string="Uploader").parent.a self.author = Thumbnail( url=uploader["href"], name=uploader.string, type=ThumbnailType.member ) self.date = get_date(info.find("h5", string="Added").parent.span.time["datetime"]) self.button = info.find("h5", string="Embed Button").parent.span.input["value"] self.widget = info.find("h5", string="Embed Widget").parent.span.input["value"] self.description = html.find("p", id="downloadsummary").string self.preview = html.find_all("img", src=True)[0]["src"] def __repr__(self): return f"<{self.__class__.__name__} name={self.name} type={self.category.name}>"
[docs] def save(self, file_obj, *, mirror=None): """Save the file to an object. This functions makes two requests. If you pass a valid mirror it will make only one request. Parameters ----------- file_obj : typing.BinaryIO The file obj to save the file to. The binary data will be streamed to that object. mirror : Optional[Mirror] An optional mirror object to download the file from a specific moddb mirror """ if mirror is None: download = get_page(f"{BASE_URL}/downloads/start/{self.id}") url = download.find("a", string=f"download {self.filename}")["href"] else: url = mirror._url SESSION = sys.modules["moddb"].SESSION prepped = prepare_request(requests.Request("GET", join(url)), SESSION) with SESSION.send(prepped, stream=True) as r: raise_for_status(r) for chunk in r.iter_content(chunk_size=8192): file_obj.write(chunk)
[docs] def get_mirrors(self): """Get all the mirrors from which a file can be downloaded. This can then be passed to File.save to download from a specific mirror. Returns -------- List[Mirror] A list of Mirror objects""" html = get_page(f"https://www.moddb.com/downloads/start/{self.id}/all") mirrors_div = html.find("div", class_="mirrors").find_all("div", recursive=False) mirrors = [] for mirror in mirrors_div: mirror_match = re.match( r"(.*) #([0-9]*) \((\w+), (\w+)\)", mirror.div.p.contents[-1].strip() ) stats_match = re.match( r"([0-9,]*) downloads? served, ([0-9.]*)% capacity", mirror.div.span.string, ) mirrors.append( Mirror( name=mirror_match.group(1), index=int(mirror_match.group(2)), city=mirror_match.group(3), country=mirror_match.group(4), served=int(stats_match.group(1).replace(",", "")), capacity=float(stats_match.group(2)), url=mirror.div.p.a["href"], ) ) return mirrors
[docs] @concat_docs class Addon(File): """Object representing an addon. Seemingly the only difference between an addon and a file is in the semantics. A file often represents something official released by the page, e.g. the mod installation or an official guide where as addons are often fan made and might not be directly endorsed by the page owners even if it is allowed. They literally add on to the page's content without becoming part of it. There is a slight difference in their profiles but nothing beyond that. Parameters ----------- html : bs4.BeautifulSoup The html to parse. Allows for finer control. Filtering ---------- categoryaddon : :class:`.AddonCategory` The type of addon (map, textures, ect...) licence : :class:`.Licence` The licence of the addon game : Union[:class:`.Game`, :class:`.Object`] An game object or an object with an id attribute which represents the game the addon belongs to. timeframe : :class:`.TimeFrame` The time period this was released in (last 24hr, last week, last month) Sorting -------- * **released** - when the object was released, asc is oldest, desc is most recent * **id** - when it was added to moddb, asc is oldest, desc is most recent * **ranktoday** - order by daily ranking, asc is highest ranked, desc is lowest rank * **visitstotal** - order by most views, asc is highest views, desc is lowest views * **rating** - order by rating, asc is highest rating, desc is lowest rating * **name** - order alphabetically, asc is a-z, desc is z-a * **licence** - order based on licence * **date** - order by upload date, asc is most recent first, desc is oldest first """ pass
[docs] @concat_docs class Media(BaseMetaClass): """Represents an image, audio file or video file on Parameters ----------- html : bs4.BeautifulSoup The html to parse. Allows for finer control. Filtering ----------- sitearea : :class:`.Category` The type of model the media belongs to. Category.downloads is not valid for this. Sorting -------- * **ranktoday** - order by daily ranking, asc is highest ranked, desc is lowest rank * **visitstotal** - order by most views, asc is highest views, desc is lowest views * **name** - order alphabetically, asc is a-z, desc is z-a * **id** - order by upload date, asc is most recent first, desc is oldest first Exclusive to videos and audios * **duration** - order by duration, asc is shortest to longest, desc is longest first Attributes ----------- date : datetime.datetime The date the media was uploaded name : str The name of the media author : Thumbnail Member type thumbnail of the media uploader duration : datetime.timedelta Duration of the media in seconds, 0 if it's an image size : int Size of the files in bytes views : int Total amount of views today : int Amount of views today filename : str The name of the file for the media fileurl : str The url of the file for the media category : MediaCategory Whether the media is an image, a video or an audio description : str The description of the file as given by the file uploader. """ def __init__(self, html: bs4.BeautifulSoup): try: self.name = html.find("meta", itemprop="name")["content"] except TypeError: self.name = html.find("img", id="mediaimage")["title"] super().__init__(html) medias = html.find_all("h5", string=("Date", "By", "Duration", "Size", "Views", "Filename")) raw_media = {media.string.lower(): media.parent for media in medias} self.date = get_date(raw_media["date"].span.time["datetime"]) author = raw_media["by"].span.a self.author = Thumbnail( url=author["href"], name=author.string.strip(), type=ThumbnailType.member ) if "duration" in raw_media: duration = raw_media["duration"].span.time.string.strip().split(":") duration.reverse() times = ["seconds", "minutes", "hours"] self.duration = datetime.timedelta( **{times[duration.index(x)]: int(x) for x in duration} ) else: self.duration = 0 if "size" in raw_media: self.size = tuple(raw_media["size"].span.string.strip().split("×")) self.views, self.today = get_views(raw_media["views"].a.string) media_player = html.find("video", id="mediaplayer") if not media_player: self.category = MediaCategory.image self.fileurl = html.find("meta", property="og:image")["content"] else: self.fileurl = media_player.source["src"] if "audio" in media_player.source["type"]: self.category = MediaCategory.audio else: self.category = MediaCategory.video if "filename" in raw_media: self.filename = raw_media["filename"].span.string.strip() else: self.filename = self.fileurl.split("/")[-1] self.description = html.find("meta", {"name": "description"})["content"] def __repr__(self): return f"<Media name={self.name} type={self.category.name}>"
[docs] def save(self, file_obj): """Save the media to an object. Parameters ----------- file_obj : typing.BinaryIO The file obj to save the file to. The binary data will be streamed to that object. """ SESSION = sys.modules["moddb"].SESSION prepped = prepare_request(requests.Request("GET", self.fileurl), SESSION) with SESSION.send(prepped, stream=True) as r: raise_for_status(r) for chunk in r.iter_content(chunk_size=8192): file_obj.write(chunk)