2023-01-24 15:01:58 -04:00
|
|
|
import time
|
|
|
|
import urllib.request
|
|
|
|
import urllib.error
|
2023-01-24 15:17:49 -04:00
|
|
|
import sys
|
2023-01-24 15:01:58 -04:00
|
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
|
|
|
# Made for a friend with love
|
|
|
|
# Tested with Python 3.10.7 on Windows 10
|
|
|
|
|
2023-01-25 23:11:21 -04:00
|
|
|
# TODO: Look into using requests, urllib3, and/or selenium to scrape the page
|
|
|
|
# https://pypi.org/project/selenium/
|
|
|
|
# https://pypi.org/project/urllib3/
|
|
|
|
# https://pypi.org/project/requests/
|
|
|
|
|
2023-01-24 15:01:58 -04:00
|
|
|
def scrape():
|
|
|
|
contents = urllib.request.urlopen("https://kingslanding.nb.ca/employment/").read()
|
|
|
|
soup = BeautifulSoup(contents, "html.parser")
|
|
|
|
|
|
|
|
# Other ways to parse the HTML to check for differences
|
|
|
|
|
|
|
|
# div_id = "wrapper"
|
|
|
|
# whole_page = soup.find("div", {"id": div_id})
|
|
|
|
|
|
|
|
# main_id = "main"
|
|
|
|
# main_content = whole_page.find("main", {"id": main_id})
|
|
|
|
|
|
|
|
text_id = "content-holder"
|
|
|
|
text_content = soup.find("div", {"class": text_id})
|
|
|
|
|
|
|
|
return text_content.text
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2023-01-31 19:55:09 -04:00
|
|
|
testing = False
|
2023-01-24 15:39:53 -04:00
|
|
|
will_exit = False
|
|
|
|
|
2023-01-24 15:17:49 -04:00
|
|
|
hours = 3
|
|
|
|
|
|
|
|
arg_count = len(sys.argv)
|
2023-01-24 15:39:53 -04:00
|
|
|
if arg_count == 1: # No arguments
|
2023-01-24 15:17:49 -04:00
|
|
|
print("No arguments given, using default value of 3 hours")
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
hours = int(sys.argv[1]) # First argument is the script name, second is the first argument
|
|
|
|
print("Using argument value of " + str(hours) + " hours")
|
|
|
|
except ValueError:
|
|
|
|
print("Invalid argument given, using default value of 3 hours")
|
|
|
|
|
2023-01-24 15:01:58 -04:00
|
|
|
# start = time.time()
|
|
|
|
|
|
|
|
# 60 seconds * 60 minutes * 3 hours
|
2023-01-24 15:17:49 -04:00
|
|
|
timer = 60 * 60 * hours
|
2023-01-24 15:01:58 -04:00
|
|
|
text = scrape()
|
2023-01-24 15:17:49 -04:00
|
|
|
print("Started checking for new jobs")
|
2023-01-24 15:01:58 -04:00
|
|
|
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
tmp = scrape()
|
2023-01-24 15:39:53 -04:00
|
|
|
if text != tmp or testing:
|
|
|
|
print("\a")
|
2023-01-24 15:01:58 -04:00
|
|
|
print("New job posting! Here's the text:")
|
|
|
|
print(tmp)
|
|
|
|
print("And here's the link: https://kingslanding.nb.ca/employment/")
|
|
|
|
print("Do you want to keep checking? (y/n): ", end="")
|
|
|
|
choice = input()
|
|
|
|
if choice == "n" or choice == "N":
|
2023-01-24 15:39:53 -04:00
|
|
|
will_exit = True
|
2023-01-24 15:01:58 -04:00
|
|
|
else:
|
|
|
|
text = tmp
|
|
|
|
except urllib.error.URLError as err:
|
|
|
|
print("An error occurred when scraping: {}".format(err.reason))
|
|
|
|
|
|
|
|
except Exception as err:
|
|
|
|
print("An unknown error has occurred: {}".format(err))
|
|
|
|
|
|
|
|
finally:
|
2023-01-24 15:39:53 -04:00
|
|
|
if will_exit:
|
|
|
|
exit(0)
|
|
|
|
else:
|
|
|
|
time.sleep(timer)
|