Vimm's lair scraper

This commit is contained in:
Isaac Shoebottom 2023-12-02 23:40:27 -04:00
parent b352f4eff6
commit a463742cf6
15 changed files with 288 additions and 0 deletions

5
vimm/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
# Don't commit virtualenv
venv
# Ignore pycache
__pycache__

8
vimm/.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

11
vimm/.idea/misc.xml Normal file
View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.11" />
</component>
<component name="DiscordProjectSettings">
<option name="show" value="ASK" />
<option name="description" value="" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (vimm)" project-jdk-type="Python SDK" />
</project>

8
vimm/.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/vimm.iml" filepath="$PROJECT_DIR$/.idea/vimm.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1,17 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="update reqs" type="ShConfigurationType">
<option name="SCRIPT_TEXT" value="pipreqs ." />
<option name="INDEPENDENT_SCRIPT_PATH" value="false" />
<option name="SCRIPT_PATH" value="$PROJECT_DIR$/wbfs-test.ps1" />
<option name="SCRIPT_OPTIONS" value="" />
<option name="INDEPENDENT_SCRIPT_WORKING_DIRECTORY" value="true" />
<option name="SCRIPT_WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="INDEPENDENT_INTERPRETER_PATH" value="true" />
<option name="INTERPRETER_PATH" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="EXECUTE_IN_TERMINAL" value="true" />
<option name="EXECUTE_SCRIPT_FILE" value="false" />
<envs />
<method v="2" />
</configuration>
</component>

View File

@ -0,0 +1,24 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="vimm" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
<module name="vimm" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/vimm.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
</component>

View File

@ -0,0 +1,17 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="wbfs test" type="ShConfigurationType">
<option name="SCRIPT_TEXT" value="" />
<option name="INDEPENDENT_SCRIPT_PATH" value="false" />
<option name="SCRIPT_PATH" value="$PROJECT_DIR$/wbfs-test.ps1" />
<option name="SCRIPT_OPTIONS" value="" />
<option name="INDEPENDENT_SCRIPT_WORKING_DIRECTORY" value="true" />
<option name="SCRIPT_WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="INDEPENDENT_INTERPRETER_PATH" value="true" />
<option name="INTERPRETER_PATH" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="EXECUTE_IN_TERMINAL" value="true" />
<option name="EXECUTE_SCRIPT_FILE" value="true" />
<envs />
<method v="2" />
</configuration>
</component>

8
vimm/.idea/vimm.iml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.9 (vimm)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

9
vimm/README.md Normal file
View File

@ -0,0 +1,9 @@
# Vimm's Lair Downloader
Functional but less than optimal Vimm's lair downloader
Only tested on Wii games, but it could be adapted for other stuff
Created because I wanted to download without clicking overnight (does not bypass more than 1 file limit)
But after doing some research and remembering I made my archive.org ROM downloader, which is very simple but more fully featured

2
vimm/requirements.txt Normal file
View File

@ -0,0 +1,2 @@
beautifulsoup4==4.12.2
Requests==2.31.0

18
vimm/vimm-bulk.py Normal file
View File

@ -0,0 +1,18 @@
import sys
from vimm import download
def main():
if len(sys.argv) != 3:
print("Usage: python vimm-bulk.py <filename> <filetype>")
exit(1)
with open(sys.argv[1], "r") as file:
for line in file:
line = line.strip()
download(line, sys.argv[2])
if __name__ == "__main__":
main()

119
vimm/vimm.py Normal file
View File

@ -0,0 +1,119 @@
import json
import sys
import logging
import requests
from enum import Enum
from bs4 import BeautifulSoup
# User Agent
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0"
# Referer
referer = "https://vimm.net/"
# Download URL
download_url2 = "https://download2.vimm.net/download/"
# Enum for type of download,
class WiiType(Enum):
wbfs = 0
rvz = 1
def get_media_id(page_content):
# Could be used later to be a method to extract the id from passed in json
pass
def download(url, filetype):
page_request = requests.get(url, headers={"User-Agent": user_agent})
page_content = BeautifulSoup(page_request.content, 'html.parser')
# Find div with the class "sectionTitle"
section_titles = page_content.find_all("div", class_="sectionTitle")
section_title = section_titles[0].text
# TODO: use match statement
if section_title == "Wii":
if filetype == "wbfs":
download_wii(page_content, WiiType.wbfs)
elif filetype == "rvz":
download_wii(page_content, WiiType.rvz)
def download_wii(page_content, filetype):
# Wish there was a better way to do this, manually parsing javascript for a variable
# Find script that contains 'var media = '
scripts = page_content.find_all("script")
script = str
# Find script that contains 'var media = '
for i in scripts:
if "var media = " in i.text:
script = i.string
media = list()
# This is the default usage, pycharm bug
# noinspection PyArgumentList
for lines in script.splitlines():
if "var media = " in lines:
media.append(lines)
for i in range(0, len(media)):
media[i] = media[i].replace(" var media = ", "")
media[i] = media[i].replace(";", "")
# Parse lines as json
media_json = list()
for i in media:
media_json.append(json.loads(i))
# Sort by version
media_json.sort(key=lambda x: x["Version"])
# Get ID of last entry
last_id = media_json[-1]["ID"]
logging.log(logging.INFO, "File ID of Download: " + str(last_id))
# Get the title of the last entry
filename = media_json[-1]["GoodTitle"].replace(".iso", ".7z")
logging.log(logging.INFO, "File Name of Download: " + filename)
# TODO: Parse json for file size
chunk_size = 1024 * 1024 * 10 # 10 MB
# Build request
request = requests.Session()
request.headers.update({"User-Agent": user_agent, "Referer": referer})
request.params.update({"mediaId": last_id})
if filetype == WiiType.rvz:
request.params.update({"alt": filetype.value})
r = request.get(download_url2, stream=True)
with open(filename, "wb") as file:
for chunk in r.iter_content(chunk_size=chunk_size):
if chunk:
file.write(chunk)
def main():
# Comment out to disable logging
logging.getLogger().setLevel(logging.INFO)
if len(sys.argv) != 3:
print("Usage: python vimm.py <url> <filetype>")
exit(1)
# Get arguments
url = sys.argv[1]
filetype = sys.argv[2]
download(url, filetype)
if __name__ == "__main__":
main()

1
vimm/wbfs-test.ps1 Normal file
View File

@ -0,0 +1 @@
python vimm.py https://vimm.net/vault/18170 wbfs

35
vimm/wii-games.txt Normal file
View File

@ -0,0 +1,35 @@
https://vimm.net/vault/17392
https://vimm.net/vault/17508
https://vimm.net/vault/17667
https://vimm.net/vault/17668
https://vimm.net/vault/17669
https://vimm.net/vault/17677
https://vimm.net/vault/17683
https://vimm.net/vault/17746
https://vimm.net/vault/17874
https://vimm.net/vault/17892
https://vimm.net/vault/17893
https://vimm.net/vault/17902
https://vimm.net/vault/17873
https://vimm.net/vault/17938
https://vimm.net/vault/17951
https://vimm.net/vault/17978
https://vimm.net/vault/18170
https://vimm.net/vault/18171
https://vimm.net/vault/18172
https://vimm.net/vault/18175
https://vimm.net/vault/18177
https://vimm.net/vault/18235
https://vimm.net/vault/18234
https://vimm.net/vault/18236
https://vimm.net/vault/18275
https://vimm.net/vault/18293
https://vimm.net/vault/18294
https://vimm.net/vault/18295
https://vimm.net/vault/18296
https://vimm.net/vault/18297
https://vimm.net/vault/18298
https://vimm.net/vault/18299
https://vimm.net/vault/18292
https://vimm.net/vault/18291
https://vimm.net/vault/18334