Vimm's lair scraper
This commit is contained in:
parent
b352f4eff6
commit
a463742cf6
5
vimm/.gitignore
vendored
Normal file
5
vimm/.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
# Don't commit virtualenv
|
||||
venv
|
||||
|
||||
# Ignore pycache
|
||||
__pycache__
|
8
vimm/.idea/.gitignore
vendored
Normal file
8
vimm/.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
6
vimm/.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
vimm/.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
11
vimm/.idea/misc.xml
Normal file
11
vimm/.idea/misc.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.11" />
|
||||
</component>
|
||||
<component name="DiscordProjectSettings">
|
||||
<option name="show" value="ASK" />
|
||||
<option name="description" value="" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (vimm)" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
vimm/.idea/modules.xml
Normal file
8
vimm/.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/vimm.iml" filepath="$PROJECT_DIR$/.idea/vimm.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
17
vimm/.idea/runConfigurations/update_reqs.xml
Normal file
17
vimm/.idea/runConfigurations/update_reqs.xml
Normal file
@ -0,0 +1,17 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="update reqs" type="ShConfigurationType">
|
||||
<option name="SCRIPT_TEXT" value="pipreqs ." />
|
||||
<option name="INDEPENDENT_SCRIPT_PATH" value="false" />
|
||||
<option name="SCRIPT_PATH" value="$PROJECT_DIR$/wbfs-test.ps1" />
|
||||
<option name="SCRIPT_OPTIONS" value="" />
|
||||
<option name="INDEPENDENT_SCRIPT_WORKING_DIRECTORY" value="true" />
|
||||
<option name="SCRIPT_WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="INDEPENDENT_INTERPRETER_PATH" value="true" />
|
||||
<option name="INTERPRETER_PATH" value="" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="EXECUTE_IN_TERMINAL" value="true" />
|
||||
<option name="EXECUTE_SCRIPT_FILE" value="false" />
|
||||
<envs />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
</component>
|
24
vimm/.idea/runConfigurations/vimm.xml
Normal file
24
vimm/.idea/runConfigurations/vimm.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="vimm" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
|
||||
<module name="vimm" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/vimm.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
</component>
|
17
vimm/.idea/runConfigurations/wbfs_test.xml
Normal file
17
vimm/.idea/runConfigurations/wbfs_test.xml
Normal file
@ -0,0 +1,17 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="wbfs test" type="ShConfigurationType">
|
||||
<option name="SCRIPT_TEXT" value="" />
|
||||
<option name="INDEPENDENT_SCRIPT_PATH" value="false" />
|
||||
<option name="SCRIPT_PATH" value="$PROJECT_DIR$/wbfs-test.ps1" />
|
||||
<option name="SCRIPT_OPTIONS" value="" />
|
||||
<option name="INDEPENDENT_SCRIPT_WORKING_DIRECTORY" value="true" />
|
||||
<option name="SCRIPT_WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="INDEPENDENT_INTERPRETER_PATH" value="true" />
|
||||
<option name="INTERPRETER_PATH" value="" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="EXECUTE_IN_TERMINAL" value="true" />
|
||||
<option name="EXECUTE_SCRIPT_FILE" value="true" />
|
||||
<envs />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
</component>
|
8
vimm/.idea/vimm.iml
Normal file
8
vimm/.idea/vimm.iml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.9 (vimm)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
9
vimm/README.md
Normal file
9
vimm/README.md
Normal file
@ -0,0 +1,9 @@
|
||||
# Vimm's Lair Downloader
|
||||
|
||||
Functional but less than optimal Vimm's lair downloader
|
||||
|
||||
Only tested on Wii games, but it could be adapted for other stuff
|
||||
|
||||
Created because I wanted to download without clicking overnight (does not bypass more than 1 file limit)
|
||||
|
||||
But after doing some research and remembering I made my archive.org ROM downloader, which is very simple but more fully featured
|
2
vimm/requirements.txt
Normal file
2
vimm/requirements.txt
Normal file
@ -0,0 +1,2 @@
|
||||
beautifulsoup4==4.12.2
|
||||
Requests==2.31.0
|
18
vimm/vimm-bulk.py
Normal file
18
vimm/vimm-bulk.py
Normal file
@ -0,0 +1,18 @@
|
||||
import sys
|
||||
|
||||
from vimm import download
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python vimm-bulk.py <filename> <filetype>")
|
||||
exit(1)
|
||||
|
||||
with open(sys.argv[1], "r") as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
download(line, sys.argv[2])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
119
vimm/vimm.py
Normal file
119
vimm/vimm.py
Normal file
@ -0,0 +1,119 @@
|
||||
import json
|
||||
import sys
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from enum import Enum
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# User Agent
|
||||
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0"
|
||||
|
||||
# Referer
|
||||
referer = "https://vimm.net/"
|
||||
|
||||
# Download URL
|
||||
download_url2 = "https://download2.vimm.net/download/"
|
||||
|
||||
|
||||
# Enum for type of download,
|
||||
class WiiType(Enum):
|
||||
wbfs = 0
|
||||
rvz = 1
|
||||
|
||||
|
||||
def get_media_id(page_content):
|
||||
# Could be used later to be a method to extract the id from passed in json
|
||||
pass
|
||||
|
||||
|
||||
def download(url, filetype):
|
||||
page_request = requests.get(url, headers={"User-Agent": user_agent})
|
||||
page_content = BeautifulSoup(page_request.content, 'html.parser')
|
||||
|
||||
# Find div with the class "sectionTitle"
|
||||
section_titles = page_content.find_all("div", class_="sectionTitle")
|
||||
section_title = section_titles[0].text
|
||||
|
||||
# TODO: use match statement
|
||||
if section_title == "Wii":
|
||||
if filetype == "wbfs":
|
||||
download_wii(page_content, WiiType.wbfs)
|
||||
elif filetype == "rvz":
|
||||
download_wii(page_content, WiiType.rvz)
|
||||
|
||||
|
||||
def download_wii(page_content, filetype):
|
||||
# Wish there was a better way to do this, manually parsing javascript for a variable
|
||||
|
||||
# Find script that contains 'var media = '
|
||||
scripts = page_content.find_all("script")
|
||||
|
||||
script = str
|
||||
# Find script that contains 'var media = '
|
||||
for i in scripts:
|
||||
if "var media = " in i.text:
|
||||
script = i.string
|
||||
|
||||
media = list()
|
||||
# This is the default usage, pycharm bug
|
||||
# noinspection PyArgumentList
|
||||
for lines in script.splitlines():
|
||||
if "var media = " in lines:
|
||||
media.append(lines)
|
||||
|
||||
for i in range(0, len(media)):
|
||||
media[i] = media[i].replace(" var media = ", "")
|
||||
media[i] = media[i].replace(";", "")
|
||||
|
||||
# Parse lines as json
|
||||
media_json = list()
|
||||
for i in media:
|
||||
media_json.append(json.loads(i))
|
||||
|
||||
# Sort by version
|
||||
media_json.sort(key=lambda x: x["Version"])
|
||||
|
||||
# Get ID of last entry
|
||||
last_id = media_json[-1]["ID"]
|
||||
|
||||
logging.log(logging.INFO, "File ID of Download: " + str(last_id))
|
||||
|
||||
# Get the title of the last entry
|
||||
filename = media_json[-1]["GoodTitle"].replace(".iso", ".7z")
|
||||
|
||||
logging.log(logging.INFO, "File Name of Download: " + filename)
|
||||
|
||||
# TODO: Parse json for file size
|
||||
|
||||
chunk_size = 1024 * 1024 * 10 # 10 MB
|
||||
|
||||
# Build request
|
||||
request = requests.Session()
|
||||
request.headers.update({"User-Agent": user_agent, "Referer": referer})
|
||||
request.params.update({"mediaId": last_id})
|
||||
if filetype == WiiType.rvz:
|
||||
request.params.update({"alt": filetype.value})
|
||||
r = request.get(download_url2, stream=True)
|
||||
with open(filename, "wb") as file:
|
||||
for chunk in r.iter_content(chunk_size=chunk_size):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
|
||||
|
||||
def main():
|
||||
# Comment out to disable logging
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python vimm.py <url> <filetype>")
|
||||
exit(1)
|
||||
|
||||
# Get arguments
|
||||
url = sys.argv[1]
|
||||
filetype = sys.argv[2]
|
||||
download(url, filetype)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
1
vimm/wbfs-test.ps1
Normal file
1
vimm/wbfs-test.ps1
Normal file
@ -0,0 +1 @@
|
||||
python vimm.py https://vimm.net/vault/18170 wbfs
|
35
vimm/wii-games.txt
Normal file
35
vimm/wii-games.txt
Normal file
@ -0,0 +1,35 @@
|
||||
https://vimm.net/vault/17392
|
||||
https://vimm.net/vault/17508
|
||||
https://vimm.net/vault/17667
|
||||
https://vimm.net/vault/17668
|
||||
https://vimm.net/vault/17669
|
||||
https://vimm.net/vault/17677
|
||||
https://vimm.net/vault/17683
|
||||
https://vimm.net/vault/17746
|
||||
https://vimm.net/vault/17874
|
||||
https://vimm.net/vault/17892
|
||||
https://vimm.net/vault/17893
|
||||
https://vimm.net/vault/17902
|
||||
https://vimm.net/vault/17873
|
||||
https://vimm.net/vault/17938
|
||||
https://vimm.net/vault/17951
|
||||
https://vimm.net/vault/17978
|
||||
https://vimm.net/vault/18170
|
||||
https://vimm.net/vault/18171
|
||||
https://vimm.net/vault/18172
|
||||
https://vimm.net/vault/18175
|
||||
https://vimm.net/vault/18177
|
||||
https://vimm.net/vault/18235
|
||||
https://vimm.net/vault/18234
|
||||
https://vimm.net/vault/18236
|
||||
https://vimm.net/vault/18275
|
||||
https://vimm.net/vault/18293
|
||||
https://vimm.net/vault/18294
|
||||
https://vimm.net/vault/18295
|
||||
https://vimm.net/vault/18296
|
||||
https://vimm.net/vault/18297
|
||||
https://vimm.net/vault/18298
|
||||
https://vimm.net/vault/18299
|
||||
https://vimm.net/vault/18292
|
||||
https://vimm.net/vault/18291
|
||||
https://vimm.net/vault/18334
|
Loading…
Reference in New Issue
Block a user