From a463742cf684d2e314c825d734c4ea5bc5c40b62 Mon Sep 17 00:00:00 2001 From: Isaac Shoebottom Date: Sat, 2 Dec 2023 23:40:27 -0400 Subject: [PATCH] Vimm's lair scraper --- vimm/.gitignore | 5 + vimm/.idea/.gitignore | 8 ++ .../inspectionProfiles/profiles_settings.xml | 6 + vimm/.idea/misc.xml | 11 ++ vimm/.idea/modules.xml | 8 ++ vimm/.idea/runConfigurations/update_reqs.xml | 17 +++ vimm/.idea/runConfigurations/vimm.xml | 24 ++++ vimm/.idea/runConfigurations/wbfs_test.xml | 17 +++ vimm/.idea/vimm.iml | 8 ++ vimm/README.md | 9 ++ vimm/requirements.txt | 2 + vimm/vimm-bulk.py | 18 +++ vimm/vimm.py | 119 ++++++++++++++++++ vimm/wbfs-test.ps1 | 1 + vimm/wii-games.txt | 35 ++++++ 15 files changed, 288 insertions(+) create mode 100644 vimm/.gitignore create mode 100644 vimm/.idea/.gitignore create mode 100644 vimm/.idea/inspectionProfiles/profiles_settings.xml create mode 100644 vimm/.idea/misc.xml create mode 100644 vimm/.idea/modules.xml create mode 100644 vimm/.idea/runConfigurations/update_reqs.xml create mode 100644 vimm/.idea/runConfigurations/vimm.xml create mode 100644 vimm/.idea/runConfigurations/wbfs_test.xml create mode 100644 vimm/.idea/vimm.iml create mode 100644 vimm/README.md create mode 100644 vimm/requirements.txt create mode 100644 vimm/vimm-bulk.py create mode 100644 vimm/vimm.py create mode 100644 vimm/wbfs-test.ps1 create mode 100644 vimm/wii-games.txt diff --git a/vimm/.gitignore b/vimm/.gitignore new file mode 100644 index 0000000..d5ed807 --- /dev/null +++ b/vimm/.gitignore @@ -0,0 +1,5 @@ +# Don't commit virtualenv +venv + +# Ignore pycache +__pycache__ \ No newline at end of file diff --git a/vimm/.idea/.gitignore b/vimm/.idea/.gitignore new file mode 100644 index 0000000..1c2fda5 --- /dev/null +++ b/vimm/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/vimm/.idea/inspectionProfiles/profiles_settings.xml b/vimm/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/vimm/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/vimm/.idea/misc.xml b/vimm/.idea/misc.xml new file mode 100644 index 0000000..518b831 --- /dev/null +++ b/vimm/.idea/misc.xml @@ -0,0 +1,11 @@ + + + + + + + + \ No newline at end of file diff --git a/vimm/.idea/modules.xml b/vimm/.idea/modules.xml new file mode 100644 index 0000000..bd8ac6f --- /dev/null +++ b/vimm/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/vimm/.idea/runConfigurations/update_reqs.xml b/vimm/.idea/runConfigurations/update_reqs.xml new file mode 100644 index 0000000..c34a067 --- /dev/null +++ b/vimm/.idea/runConfigurations/update_reqs.xml @@ -0,0 +1,17 @@ + + + + \ No newline at end of file diff --git a/vimm/.idea/runConfigurations/vimm.xml b/vimm/.idea/runConfigurations/vimm.xml new file mode 100644 index 0000000..66d1639 --- /dev/null +++ b/vimm/.idea/runConfigurations/vimm.xml @@ -0,0 +1,24 @@ + + + + + \ No newline at end of file diff --git a/vimm/.idea/runConfigurations/wbfs_test.xml b/vimm/.idea/runConfigurations/wbfs_test.xml new file mode 100644 index 0000000..33936a5 --- /dev/null +++ b/vimm/.idea/runConfigurations/wbfs_test.xml @@ -0,0 +1,17 @@ + + + + \ No newline at end of file diff --git a/vimm/.idea/vimm.iml b/vimm/.idea/vimm.iml new file mode 100644 index 0000000..1425276 --- /dev/null +++ b/vimm/.idea/vimm.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/vimm/README.md b/vimm/README.md new file mode 100644 index 0000000..c8a9c9a --- /dev/null +++ b/vimm/README.md @@ -0,0 +1,9 @@ +# Vimm's Lair Downloader + +Functional but less than optimal Vimm's lair downloader + +Only tested on Wii games, but it could be adapted for other stuff + +Created because I wanted to download without clicking overnight (does not bypass more than 1 file limit) + +But after doing some research and remembering I made my archive.org ROM downloader, which is very simple but more fully featured \ No newline at end of file diff --git a/vimm/requirements.txt b/vimm/requirements.txt new file mode 100644 index 0000000..da73b1d --- /dev/null +++ b/vimm/requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4==4.12.2 +Requests==2.31.0 diff --git a/vimm/vimm-bulk.py b/vimm/vimm-bulk.py new file mode 100644 index 0000000..6ad9e28 --- /dev/null +++ b/vimm/vimm-bulk.py @@ -0,0 +1,18 @@ +import sys + +from vimm import download + + +def main(): + if len(sys.argv) != 3: + print("Usage: python vimm-bulk.py ") + exit(1) + + with open(sys.argv[1], "r") as file: + for line in file: + line = line.strip() + download(line, sys.argv[2]) + + +if __name__ == "__main__": + main() diff --git a/vimm/vimm.py b/vimm/vimm.py new file mode 100644 index 0000000..64a25fb --- /dev/null +++ b/vimm/vimm.py @@ -0,0 +1,119 @@ +import json +import sys +import logging + +import requests +from enum import Enum +from bs4 import BeautifulSoup + +# User Agent +user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0" + +# Referer +referer = "https://vimm.net/" + +# Download URL +download_url2 = "https://download2.vimm.net/download/" + + +# Enum for type of download, +class WiiType(Enum): + wbfs = 0 + rvz = 1 + + +def get_media_id(page_content): + # Could be used later to be a method to extract the id from passed in json + pass + + +def download(url, filetype): + page_request = requests.get(url, headers={"User-Agent": user_agent}) + page_content = BeautifulSoup(page_request.content, 'html.parser') + + # Find div with the class "sectionTitle" + section_titles = page_content.find_all("div", class_="sectionTitle") + section_title = section_titles[0].text + + # TODO: use match statement + if section_title == "Wii": + if filetype == "wbfs": + download_wii(page_content, WiiType.wbfs) + elif filetype == "rvz": + download_wii(page_content, WiiType.rvz) + + +def download_wii(page_content, filetype): + # Wish there was a better way to do this, manually parsing javascript for a variable + + # Find script that contains 'var media = ' + scripts = page_content.find_all("script") + + script = str + # Find script that contains 'var media = ' + for i in scripts: + if "var media = " in i.text: + script = i.string + + media = list() + # This is the default usage, pycharm bug + # noinspection PyArgumentList + for lines in script.splitlines(): + if "var media = " in lines: + media.append(lines) + + for i in range(0, len(media)): + media[i] = media[i].replace(" var media = ", "") + media[i] = media[i].replace(";", "") + + # Parse lines as json + media_json = list() + for i in media: + media_json.append(json.loads(i)) + + # Sort by version + media_json.sort(key=lambda x: x["Version"]) + + # Get ID of last entry + last_id = media_json[-1]["ID"] + + logging.log(logging.INFO, "File ID of Download: " + str(last_id)) + + # Get the title of the last entry + filename = media_json[-1]["GoodTitle"].replace(".iso", ".7z") + + logging.log(logging.INFO, "File Name of Download: " + filename) + + # TODO: Parse json for file size + + chunk_size = 1024 * 1024 * 10 # 10 MB + + # Build request + request = requests.Session() + request.headers.update({"User-Agent": user_agent, "Referer": referer}) + request.params.update({"mediaId": last_id}) + if filetype == WiiType.rvz: + request.params.update({"alt": filetype.value}) + r = request.get(download_url2, stream=True) + with open(filename, "wb") as file: + for chunk in r.iter_content(chunk_size=chunk_size): + if chunk: + file.write(chunk) + + +def main(): + # Comment out to disable logging + logging.getLogger().setLevel(logging.INFO) + + if len(sys.argv) != 3: + print("Usage: python vimm.py ") + exit(1) + + # Get arguments + url = sys.argv[1] + filetype = sys.argv[2] + download(url, filetype) + + +if __name__ == "__main__": + main() diff --git a/vimm/wbfs-test.ps1 b/vimm/wbfs-test.ps1 new file mode 100644 index 0000000..32a73d7 --- /dev/null +++ b/vimm/wbfs-test.ps1 @@ -0,0 +1 @@ +python vimm.py https://vimm.net/vault/18170 wbfs \ No newline at end of file diff --git a/vimm/wii-games.txt b/vimm/wii-games.txt new file mode 100644 index 0000000..ff0c0eb --- /dev/null +++ b/vimm/wii-games.txt @@ -0,0 +1,35 @@ +https://vimm.net/vault/17392 +https://vimm.net/vault/17508 +https://vimm.net/vault/17667 +https://vimm.net/vault/17668 +https://vimm.net/vault/17669 +https://vimm.net/vault/17677 +https://vimm.net/vault/17683 +https://vimm.net/vault/17746 +https://vimm.net/vault/17874 +https://vimm.net/vault/17892 +https://vimm.net/vault/17893 +https://vimm.net/vault/17902 +https://vimm.net/vault/17873 +https://vimm.net/vault/17938 +https://vimm.net/vault/17951 +https://vimm.net/vault/17978 +https://vimm.net/vault/18170 +https://vimm.net/vault/18171 +https://vimm.net/vault/18172 +https://vimm.net/vault/18175 +https://vimm.net/vault/18177 +https://vimm.net/vault/18235 +https://vimm.net/vault/18234 +https://vimm.net/vault/18236 +https://vimm.net/vault/18275 +https://vimm.net/vault/18293 +https://vimm.net/vault/18294 +https://vimm.net/vault/18295 +https://vimm.net/vault/18296 +https://vimm.net/vault/18297 +https://vimm.net/vault/18298 +https://vimm.net/vault/18299 +https://vimm.net/vault/18292 +https://vimm.net/vault/18291 +https://vimm.net/vault/18334