diff --git a/.gitignore b/.gitignore index 09cbb65..e69de29 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +0,0 @@ - -geckodriver.log diff --git a/README.md b/README.md index 3f2133c..712f966 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,85 @@ -# Hotukdeals-Discord-Notifier - A commissioned project to notify a user on discord about new deals matching specific criteria. +# HotUkDeals-Discord-Notifier + +This is a website scraping program designed to scrape [this](https://www.hotukdeals.com/) website. It allows the user to input specific criteria and when the program finds a result that matches the criteria you will be notified on discord. + +## Getting Started + +The following instructions will help you get you running this software. + +### Prerequisites + +To install the python requirements please run the command below. + +``` +pip install pip install beautifulsoup4 +``` + +### Installing + +Have python 3.x installed. This was tested with 3.7.3 + +## Running the program + +To run the software you first need to add information into the proxies.txt and settings.json + +### **Example of settings.json** +``` +{ + "min_upvotes": "500", + "max_upvotes": "1000", + "base_url": "https://www.hotukdeals.com", + "pages_to_index": "10", + "discord_api_token": "1234567890", + "min_price": "0", + "max_price": "500", + "discord_channel_id": "1234567890", + "time_interval_seconds": "1800" +} +``` + +**min_upvotes** - The minimum amount of upvotes / degrees to be notified of. + +**max_upvotes** - The maximum amount of upvotes / degrees to be notified of. + +**base_url** - The base url to be scanned. Default works fine. + +**pages_to_index** - The amount of pages you want to index default is 10. + +**discord_api_token** - Your discord API token for your bot. [Here](https://www.writebots.com/discord-bot-token/) is a good article on how to get your bot's api token. + +**min_price** - The minimum price of the deal you want to be notified of. + +**max_price** - The maximum price of the deal you want to be notified of. + +**discord_channel_id** - The discord channel ID you want your bot to be talk in and notify you of. [Here](https://support.discordapp.com/hc/en-us/articles/206346498-Where-can-I-find-my-User-Server-Message-ID-) is a good article on how you are able to get your discord channel id. + +**time_interval_seconds** - The amount of time in seconds that you want to delay after all the pages are scraped. I recommend over 30 minutes. + +### **Example of proxies.txt** +``` +1234.1234.1234:1010 +``` + +Each line should be a new proxy with a port, must be able to manage SSL. + +### Executing the program + +Once you have all of the json files configured as you would like simpily run the command below. + +``` +python main.py +``` + +The bot will then notify you on discord when the deals match your criteria. + +## Built With + +* [Python 3.7](https://www.python.org/) - The language used + +## Authors + +* **David Teather** - *Initial work* - [davidteather](https://github.com/davidteather) + +## License + +This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details \ No newline at end of file diff --git a/main.py b/main.py index 073e4b8..03b740f 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,7 @@ from selenium.webdriver.firefox.options import Options import json + with open('settings.json') as data: settings = json.load(data) @@ -32,64 +33,119 @@ def __init__(self, channel, *args, **kwargs): # create the background task and run it in the background self.bg_task = self.loop.create_task(self.my_background_task()) - - def checkDeals(self, url): - # Selenium stuff - options = Options() - options.headless = False - driver = webdriver.Firefox(options=options) - driver.set_window_position(0, 0) - driver.set_window_size(1920, 1080) + + # Check deals + def checkDealsBeautifulSoup(self, url): + # Imports + import requests + from bs4 import BeautifulSoup + import json + import random + + # Loads JSON and vars + with open('settings.json') as data: + settings = json.load(data) + + min_upvotes = int(settings["min_upvotes"]) + max_upvotes = int(settings["max_upvotes"]) + + min_price = float(settings["min_price"]) + max_price = float(settings["max_price"]) + + # Loads proxies + with open('proxies.txt', 'r') as proxies: + proxies = proxies.readlines() + + # Picks random proxy + proxy = random.choice(proxies) returnMsgs = [] newArray = [] + # Reads already used things with open('data/usedLinks.txt', 'r') as data: usedArray = data.readlines() - # Gets webpage - driver.get(url) - - - - deals = driver.find_elements_by_xpath('//article[@data-handler="history"]/div[@class="threadGrid"]/div[@class="threadGrid-headerMeta"]/div[@class="flex boxAlign-ai--all-c boxAlign-jc--all-sb space--b-2"]/div[@class="cept-vote-box vote-box overflow--hidden border border--color-borderGrey bRad--a"]/span') - - - print(len(deals)) - for index in range(0,len(deals)): - print(index) - # '//div[@class="cept-vote-box vote-box overflow--hidden border border--color-borderGrey bRad--a"]/span' - # '//article[@data-handler="history"]/div[@class="threadGrid"]/div[@class="threadGrid-headerMeta"]/div[@class="flex boxAlign-ai--all-c boxAlign-jc--all-sb space--b-2"]/div[@class="cept-vote-box vote-box overflow--hidden border border--color-borderGrey bRad--a"]/span' - upvotes = int(driver.find_elements_by_xpath('//article[@data-handler="history"]/div[@class="threadGrid"]/div[@class="threadGrid-headerMeta"]/div[@class="flex boxAlign-ai--all-c boxAlign-jc--all-sb space--b-2"]/div[@class="cept-vote-box vote-box overflow--hidden border border--color-borderGrey bRad--a"]/span')[index].text.strip().replace(" ", "").replace("°", "").replace("\n", "")) - priceString = driver.find_elements_by_xpath('//span[@class="thread-price text--b vAlign--all-tt cept-tp size--all-l size--fromW3-xl"]')[index].text.strip().replace("£", "") - url = driver.find_elements_by_xpath('//a[@class="cept-tt thread-link linkPlain thread-title--list"]')[index].get_attribute('href') - - - if priceString != "FREE": - price = float(priceString) - else: - price = 0 - - if min_price <= price <= max_price and min_upvotes <= upvotes <= max_upvotes: - if url not in usedArray: - # Return Message - message = url + " Satisfies your deal criteria. It is at " + str(upvotes) + " degrees and costs " + str(priceString) - returnMsgs.append(message) - usedArray.append(url) - newArray.append(newArray) - - print('here') + # Sets up proxy + proxies = { + "http": "http://" + proxy, + "https": "https://" + proxy, + } + + page = requests.get(url, proxies=proxy) + soup = BeautifulSoup(page.text, 'html.parser') + var = False + + # Tries to get things + try: + listings = soup.find_all( + 'article', attrs={'data-handler': 'history'}) + upvotes = soup.find_all('span', attrs={'class': 'cept-vote-temp'}) + pricing = soup.find_all('span', attrs={'class': 'thread-price'}) + urls = soup.find_all( + 'a', attrs={'class': 'cept-thread-image-link'}) + var = True + except: + var = False + + if var == True: + upvotesIndex = 0 + index = 0 + for x in range(0, len(listings)): + + try: + upvote = upvotes[upvotesIndex].text.strip().replace( + " ", "").replace("°", "").replace("\n", "") + if "Deal" in upvote or "alerts" in upvote: + upvotesIndex += 1 + upvote = upvotes[upvotesIndex].text.strip().replace( + " ", "").replace("°", "").replace("\n", "") + + except: + upvote = 0 + + try: + price = pricing[index].text.strip().replace("£", "") + except: + price = 0 + try: + url = urls[index].get('href') + except: + url = None + if price != "FREE": + try: + price = float(price.replace(",", "")) + except: + price = 0 + else: + price = 0 + + if min_price <= price <= max_price: + if min_upvotes <= int(upvote) <= max_upvotes: + if url != None: + if url + "\n" not in usedArray: + # Return Message + message = url + " Satisfies your deal criteria. It is at " + \ + str(upvote) + \ + " degrees and costs £" + str(price) + returnMsgs.append(message) + usedArray.append(url) + newArray.append(url) + + upvotesIndex += 1 + index += 1 + + # Saves new logged files with open('data/usedLinks.txt', 'a') as fileObj: for line in newArray: - fileObj.write(line) - - - - driver.quit() + fileObj.write(line + "\n") + # Returns stuff return returnMsgs + + # On start async def on_ready(self): print('Logged in as') print(self.user.name) @@ -97,31 +153,28 @@ async def on_ready(self): print('------') + # On message async def on_message(self, message): if message.author.id == self.user.id: return - if message.content.startswith('!add-url'): - text = message.content - self.checkUrls.append(text.split("!add-url ")[1]) - await message.channel.send(text.split("!add-url ")[1] + " added to the program.") - - if message.content.startswith('!remove-url'): - text = message.content.split("!remove-url ")[1] - self.checkUrls.remove(text) - await message.channel.send(text + " removed from the program.") - + # Background manager async def my_background_task(self): await self.wait_until_ready() - channel = self.get_channel(int(channel_id)) # channel ID goes here + channel = self.get_channel(int(channel_id)) while not self.is_closed(): - for page in range(0,int(pages_to_index)): - res = self.checkDeals(base_url + "?page=" + str(page)) + for page in range(0, int(pages_to_index)): + print('checking page ' + str(page)) + res = self.checkDealsBeautifulSoup( + base_url + "?page=" + str(page)) if res != []: for msg in res: await channel.send(msg) await asyncio.sleep(int(time_interval_seconds)) + + +# Main client = MyClient(channel_id) -client.run(discord_api_key) \ No newline at end of file +client.run(discord_api_key) diff --git a/proxies.txt b/proxies.txt new file mode 100644 index 0000000..e69de29 diff --git a/settings.json b/settings.json index f0f8a7f..dd44ab8 100644 --- a/settings.json +++ b/settings.json @@ -1,11 +1,11 @@ { - "min_upvotes": "100", - "max_upvotes": "500", + "min_upvotes": "500", + "max_upvotes": "1000", "base_url": "https://www.hotukdeals.com", "pages_to_index": "10", - "discord_api_token": "1123231", - "min_price": "0.99", + "discord_api_token": "1234567890", + "min_price": "0", "max_price": "500", - "discord_channel_id": "123456789", - "time_interval_seconds": "10" + "discord_channel_id": "1234567890", + "time_interval_seconds": "1800" } \ No newline at end of file