Initial commit

2023-01-24 15:01:58 -04:00
commit 1f7d2974b4
1 changed files with 57 additions and 0 deletions
--- a/scraper.py
+++ b/scraper.py
@@ -0,0 +1,57 @@
+import time
+import urllib.request
+import urllib.error
+
+from bs4 import BeautifulSoup
+
+
+# Made for a friend with love
+# Tested with Python 3.10.7 on Windows 10
+
+def scrape():
+    contents = urllib.request.urlopen("https://kingslanding.nb.ca/employment/").read()
+    soup = BeautifulSoup(contents, "html.parser")
+
+    # Other ways to parse the HTML to check for differences
+
+    # div_id = "wrapper"
+    # whole_page = soup.find("div", {"id": div_id})
+
+    # main_id = "main"
+    # main_content = whole_page.find("main", {"id": main_id})
+
+    text_id = "content-holder"
+    text_content = soup.find("div", {"class": text_id})
+
+    return text_content.text
+
+
+if __name__ == '__main__':
+    # start = time.time()
+
+    # 60 seconds * 60 minutes * 3 hours
+    timer = 60 * 60 * 3
+    text = scrape()
+    print("Started checking for new jobs, will check every 3 hours.")
+
+    while True:
+        try:
+            tmp = scrape()
+            if text != tmp:
+                print("New job posting! Here's the text:")
+                print(tmp)
+                print("And here's the link: https://kingslanding.nb.ca/employment/")
+                print("Do you want to keep checking? (y/n): ", end="")
+                choice = input()
+                if choice == "n" or choice == "N":
+                    break
+                else:
+                    text = tmp
+        except urllib.error.URLError as err:
+            print("An error occurred when scraping: {}".format(err.reason))
+
+        except Exception as err:
+            print("An unknown error has occurred: {}".format(err))
+
+        finally:
+            time.sleep(timer)