V1.0

Initial Release Of This Program
davidteather · Sep 17, 2019 · c498e00 · c498e00
1 parent 3e5c215
commit c498e00
Show file tree

Hide file tree

Showing 5 changed files with 203 additions and 69 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +0,0 @@
-
-geckodriver.log

diff --git a/README.md b/README.md
@@ -1,2 +1,85 @@
-# Hotukdeals-Discord-Notifier
- A commissioned project to notify a user on discord about new deals matching specific criteria. 
+# HotUkDeals-Discord-Notifier
+
+This is a website scraping program designed to scrape [this](https://www.hotukdeals.com/) website. It allows the user to input specific criteria and when the program finds a result that matches the criteria you will be notified on discord.
+
+## Getting Started
+
+The following instructions will help you get you running this software.
+
+### Prerequisites
+
+To install the python requirements please run the command below.
+
+```
+pip install pip install beautifulsoup4
+```
+
+### Installing
+
+Have python 3.x installed. This was tested with 3.7.3
+
+## Running the program
+
+To run the software you first need to add information into the proxies.txt and settings.json
+
+### **Example of settings.json**
+```
+{
+    "min_upvotes": "500",
+    "max_upvotes": "1000",
+    "base_url": "https://www.hotukdeals.com",
+    "pages_to_index": "10",
+    "discord_api_token": "1234567890",
+    "min_price": "0",
+    "max_price": "500",
+    "discord_channel_id": "1234567890",
+    "time_interval_seconds": "1800"
+}
+```
+
+**min_upvotes** - The minimum amount of upvotes / degrees to be notified of.
+
+**max_upvotes** - The maximum amount of upvotes / degrees to be notified of.
+
+**base_url** - The base url to be scanned. Default works fine.
+
+**pages_to_index** - The amount of pages you want to index default is 10.
+
+**discord_api_token** - Your discord API token for your bot. [Here](https://www.writebots.com/discord-bot-token/) is a good article on how to get your bot's api token.
+
+**min_price** - The minimum price of the deal you want to be notified of.
+
+**max_price** - The maximum price of the deal you want to be notified of.
+
+**discord_channel_id** - The discord channel ID you want your bot to be talk in and notify you of. [Here](https://support.discordapp.com/hc/en-us/articles/206346498-Where-can-I-find-my-User-Server-Message-ID-) is a good article on how you are able to get your discord channel id.
+
+**time_interval_seconds** - The amount of time in seconds that you want to delay after all the pages are scraped. I recommend over 30 minutes. 
+
+### **Example of proxies.txt**
+```
+1234.1234.1234:1010
+```
+
+Each line should be a new proxy with a port, must be able to manage SSL.
+
+### Executing the program
+
+Once you have all of the json files configured as you would like simpily run the command below.
+
+```
+python main.py
+```
+
+The bot will then notify you on discord when the deals match your criteria.
+
+## Built With
+
+* [Python 3.7](https://www.python.org/) - The language used
+
+## Authors
+
+* **David Teather** - *Initial work* - [davidteather](https://github.com/davidteather)
+
+## License
+
+This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details
diff --git a/main.py b/main.py
@@ -5,6 +5,7 @@
 from selenium.webdriver.firefox.options import Options
 import json
 
+
 with open('settings.json') as data:
     settings = json.load(data)
 
@@ -32,96 +33,148 @@ def __init__(self, channel, *args, **kwargs):
 
         # create the background task and run it in the background
         self.bg_task = self.loop.create_task(self.my_background_task())
-
 
-    def checkDeals(self, url):
-        # Selenium stuff
-        options = Options()
-        options.headless = False
-        driver = webdriver.Firefox(options=options)
-        driver.set_window_position(0, 0)
-        driver.set_window_size(1920, 1080)
+
+    # Check deals
+    def checkDealsBeautifulSoup(self, url):
+        # Imports
+        import requests
+        from bs4 import BeautifulSoup
+        import json
+        import random
+
+        # Loads JSON and vars
+        with open('settings.json') as data:
+            settings = json.load(data)
+
+            min_upvotes = int(settings["min_upvotes"])
+            max_upvotes = int(settings["max_upvotes"])
+
+            min_price = float(settings["min_price"])
+            max_price = float(settings["max_price"])
+
+        # Loads proxies
+        with open('proxies.txt', 'r') as proxies:
+            proxies = proxies.readlines()
+
+        # Picks random proxy
+        proxy = random.choice(proxies)
 
         returnMsgs = []
         newArray = []
 
+        # Reads already used things
         with open('data/usedLinks.txt', 'r') as data:
             usedArray = data.readlines()
 
-        # Gets webpage
-        driver.get(url)
-
-
-
-        deals = driver.find_elements_by_xpath('//article[@data-handler="history"]/div[@class="threadGrid"]/div[@class="threadGrid-headerMeta"]/div[@class="flex boxAlign-ai--all-c boxAlign-jc--all-sb space--b-2"]/div[@class="cept-vote-box vote-box overflow--hidden border border--color-borderGrey bRad--a"]/span')
-
-
-        print(len(deals))
-        for index in range(0,len(deals)):
-            print(index)
-            # '//div[@class="cept-vote-box vote-box overflow--hidden border border--color-borderGrey bRad--a"]/span'
-            # '//article[@data-handler="history"]/div[@class="threadGrid"]/div[@class="threadGrid-headerMeta"]/div[@class="flex boxAlign-ai--all-c boxAlign-jc--all-sb space--b-2"]/div[@class="cept-vote-box vote-box overflow--hidden border border--color-borderGrey bRad--a"]/span'
-            upvotes = int(driver.find_elements_by_xpath('//article[@data-handler="history"]/div[@class="threadGrid"]/div[@class="threadGrid-headerMeta"]/div[@class="flex boxAlign-ai--all-c boxAlign-jc--all-sb space--b-2"]/div[@class="cept-vote-box vote-box overflow--hidden border border--color-borderGrey bRad--a"]/span')[index].text.strip().replace(" ", "").replace("°", "").replace("\n", ""))
-            priceString = driver.find_elements_by_xpath('//span[@class="thread-price text--b vAlign--all-tt cept-tp size--all-l size--fromW3-xl"]')[index].text.strip().replace("£", "")
-            url = driver.find_elements_by_xpath('//a[@class="cept-tt thread-link linkPlain thread-title--list"]')[index].get_attribute('href')
-
-
-            if priceString != "FREE":
-                price = float(priceString)
-            else:
-                price = 0
-
-            if min_price <= price <= max_price and min_upvotes <= upvotes <= max_upvotes:
-                if url not in usedArray:
-                    # Return Message
-                    message = url + " Satisfies your deal criteria. It is at " + str(upvotes) + " degrees and costs " + str(priceString)
-                    returnMsgs.append(message)
-                    usedArray.append(url)
-                    newArray.append(newArray)
-
-        print('here')
+        # Sets up proxy
+        proxies = {
+            "http": "http://" + proxy,
+            "https": "https://" + proxy,
+        }
+
+        page = requests.get(url, proxies=proxy)
+        soup = BeautifulSoup(page.text, 'html.parser')
+        var = False
+
+        # Tries to get things
+        try:
+            listings = soup.find_all(
+                'article', attrs={'data-handler': 'history'})
+            upvotes = soup.find_all('span', attrs={'class': 'cept-vote-temp'})
+            pricing = soup.find_all('span', attrs={'class': 'thread-price'})
+            urls = soup.find_all(
+                'a', attrs={'class': 'cept-thread-image-link'})
+            var = True
+        except:
+            var = False
+
+        if var == True:
+            upvotesIndex = 0
+            index = 0
+            for x in range(0, len(listings)):
+
+                try:
+                    upvote = upvotes[upvotesIndex].text.strip().replace(
+                        " ", "").replace("°", "").replace("\n", "")
+                    if "Deal" in upvote or "alerts" in upvote:
+                        upvotesIndex += 1
+                        upvote = upvotes[upvotesIndex].text.strip().replace(
+                            " ", "").replace("°", "").replace("\n", "")
+
+                except:
+                    upvote = 0
+
+                try:
+                    price = pricing[index].text.strip().replace("£", "")
+                except:
+                    price = 0
+                try:
+                    url = urls[index].get('href')
+                except:
+                    url = None
+                if price != "FREE":
+                    try:
+                        price = float(price.replace(",", ""))
+                    except:
+                        price = 0
+                else:
+                    price = 0
+
+                if min_price <= price <= max_price:
+                    if min_upvotes <= int(upvote) <= max_upvotes:
+                        if url != None:
+                            if url + "\n" not in usedArray:
+                                # Return Message
+                                message = url + " Satisfies your deal criteria. It is at " + \
+                                    str(upvote) + \
+                                    " degrees and costs £" + str(price)
+                                returnMsgs.append(message)
+                                usedArray.append(url)
+                                newArray.append(url)
+
+                upvotesIndex += 1
+                index += 1
+
+        # Saves new logged files
         with open('data/usedLinks.txt', 'a') as fileObj:
             for line in newArray:
-                fileObj.write(line)
-
-
-
-        driver.quit()
+                fileObj.write(line + "\n")
 
+        # Returns stuff
         return returnMsgs
 
+
+    # On start
     async def on_ready(self):
         print('Logged in as')
         print(self.user.name)
         print(self.user.id)
         print('------')
 
 
+    # On message
     async def on_message(self, message):
         if message.author.id == self.user.id:
             return
 
-        if message.content.startswith('!add-url'):
-            text = message.content
-            self.checkUrls.append(text.split("!add-url ")[1])
-            await message.channel.send(text.split("!add-url ")[1] + " added to the program.")
-
-        if message.content.startswith('!remove-url'):
-            text = message.content.split("!remove-url ")[1]
-            self.checkUrls.remove(text)
-            await message.channel.send(text + " removed from the program.")
-
 
+    # Background manager
     async def my_background_task(self):
         await self.wait_until_ready()
-        channel = self.get_channel(int(channel_id)) # channel ID goes here
+        channel = self.get_channel(int(channel_id))
         while not self.is_closed():
-            for page in range(0,int(pages_to_index)):
-                res = self.checkDeals(base_url + "?page=" + str(page))
+            for page in range(0, int(pages_to_index)):
+                print('checking page ' + str(page))
+                res = self.checkDealsBeautifulSoup(
+                    base_url + "?page=" + str(page))
                 if res != []:
                     for msg in res:
                         await channel.send(msg)
             await asyncio.sleep(int(time_interval_seconds))
 
+
+
+# Main
 client = MyClient(channel_id)
-client.run(discord_api_key)
+client.run(discord_api_key)
diff --git a/proxies.txt b/proxies.txt
diff --git a/settings.json b/settings.json
@@ -1,11 +1,11 @@
 {
-    "min_upvotes": "100",
-    "max_upvotes": "500",
+    "min_upvotes": "500",
+    "max_upvotes": "1000",
     "base_url": "https://www.hotukdeals.com",
     "pages_to_index": "10",
-    "discord_api_token": "1123231",
-    "min_price": "0.99",
+    "discord_api_token": "1234567890",
+    "min_price": "0",
     "max_price": "500",
-    "discord_channel_id": "123456789",
-    "time_interval_seconds": "10"
+    "discord_channel_id": "1234567890",
+    "time_interval_seconds": "1800"
 }