diff --git a/README.md b/README.md index 712f966..7af7d7f 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Have python 3.x installed. This was tested with 3.7.3 ## Running the program -To run the software you first need to add information into the proxies.txt and settings.json +To run the software you first need to add information into the proxies.txt and settings.json. ### **Example of settings.json** ``` @@ -55,13 +55,16 @@ To run the software you first need to add information into the proxies.txt and s **time_interval_seconds** - The amount of time in seconds that you want to delay after all the pages are scraped. I recommend over 30 minutes. -### **Example of proxies.txt** +### **Example of proxies.txt and httpsProxy.txt** ``` 1234.1234.1234:1010 ``` Each line should be a new proxy with a port, must be able to manage SSL. +Http proxy addresses go into the proxies.txt and https proxy addresses go into the httpsProxy.txt file. + + ### Executing the program Once you have all of the json files configured as you would like simpily run the command below. diff --git a/data/usedLinks.txt b/data/usedLinks.txt index e69de29..b3a4252 100644 --- a/data/usedLinks.txt +++ b/data/usedLinks.txt @@ -0,0 +1 @@ +placeholder \ No newline at end of file diff --git a/httpsProxy.txt b/httpsProxy.txt new file mode 100644 index 0000000..027573a --- /dev/null +++ b/httpsProxy.txt @@ -0,0 +1 @@ +103.129.195.163:8080 \ No newline at end of file diff --git a/main.py b/main.py index 03b740f..824eb9d 100644 --- a/main.py +++ b/main.py @@ -34,13 +34,14 @@ def __init__(self, channel, *args, **kwargs): # create the background task and run it in the background self.bg_task = self.loop.create_task(self.my_background_task()) - # Check deals + def checkDealsBeautifulSoup(self, url): # Imports import requests from bs4 import BeautifulSoup import json + from requests.adapters import HTTPAdapter import random # Loads JSON and vars @@ -53,12 +54,19 @@ def checkDealsBeautifulSoup(self, url): min_price = float(settings["min_price"]) max_price = float(settings["max_price"]) + proxyvar = settings['proxy'] + # Loads proxies - with open('proxies.txt', 'r') as proxies: - proxies = proxies.readlines() + with open('proxies.txt', 'r') as proxiesf: + proxies = proxiesf.readlines() + + with open('httpsProxy.txt', 'r') as proxieshttps: + httpsProxies = proxieshttps.readlines() # Picks random proxy - proxy = random.choice(proxies) + proxy1 = random.choice(proxies) + httpsProxy = random.choice(httpsProxies) + returnMsgs = [] newArray = [] @@ -68,32 +76,33 @@ def checkDealsBeautifulSoup(self, url): usedArray = data.readlines() # Sets up proxy - proxies = { - "http": "http://" + proxy, - "https": "https://" + proxy, - } + #session = requests.session() - page = requests.get(url, proxies=proxy) - soup = BeautifulSoup(page.text, 'html.parser') - var = False + if proxyvar == "True" or proxyvar == True: + proxy = {"http": "http://" + proxy1, + "https": "https://" + httpsProxy} + + page = requests.get(url, proxies=proxy) + else: + page = requests.get(url) + + soup = BeautifulSoup(page.text, 'html.parser') # Tries to get things - try: - listings = soup.find_all( - 'article', attrs={'data-handler': 'history'}) - upvotes = soup.find_all('span', attrs={'class': 'cept-vote-temp'}) - pricing = soup.find_all('span', attrs={'class': 'thread-price'}) - urls = soup.find_all( - 'a', attrs={'class': 'cept-thread-image-link'}) - var = True - except: - var = False + listings = soup.find_all( + 'article', attrs={'data-handler': 'history'}) + upvotes = soup.find_all('span', attrs={'class': 'cept-vote-temp'}) + pricing = soup.find_all('span', attrs={'class': 'thread-price'}) + urls = soup.find_all( + 'a', attrs={'class': 'cept-thread-image-link'}) + thumbnails = soup.find_all('img', attrs={'class': 'thread-image'}) + titles = soup.find_all('a', attrs={'class': 'thread-link'}) + var = True if var == True: upvotesIndex = 0 index = 0 for x in range(0, len(listings)): - try: upvote = upvotes[upvotesIndex].text.strip().replace( " ", "").replace("°", "").replace("\n", "") @@ -105,12 +114,14 @@ def checkDealsBeautifulSoup(self, url): except: upvote = 0 + try: price = pricing[index].text.strip().replace("£", "") except: price = 0 try: url = urls[index].get('href') + title = titles[index].get('title') except: url = None if price != "FREE": @@ -121,18 +132,62 @@ def checkDealsBeautifulSoup(self, url): else: price = 0 + + try: + thumbnail = thumbnails[index].get('src') + except: + thumbnail = "https://proxy.duckduckgo.com/iu/?u=https%3A%2F%2Fsitechecker.pro%2Fwp-content%2Fuploads%2F2017%2F12%2F404.png&f=1&nofb=1" + if min_price <= price <= max_price: if min_upvotes <= int(upvote) <= max_upvotes: if url != None: - if url + "\n" not in usedArray: + if url + "\n" not in usedArray and "/deals/" in url: # Return Message message = url + " Satisfies your deal criteria. It is at " + \ str(upvote) + \ " degrees and costs £" + str(price) - returnMsgs.append(message) + + # "Apple iPad Air 3 10.5 2019 model, 64 GB 256 GB" + # "https://www.hotukdeals.com/deals/ipad-air-2019-3293151" + # "\n**Price**: $price \n**Temperature**: $upvote" + if title != None and thumbnail != None: + json = { + "title": title, + "url": url, + "temp": int(upvote), + "price": float(price), + "thumbnail": thumbnail + } + elif title == None and thumbnail != None: + json = { + "title": "Error", + "url": url, + "temp": int(upvote), + "price": float(price), + "thumbnail": thumbnail + } + + elif title != None and thumbnail == None: + json = { + "title": title, + "url": url, + "temp": int(upvote), + "price": float(price), + "thumbnail": "https://proxy.duckduckgo.com/iu/?u=http%3A%2F%2Fegyptianstreets.com%2Fwp-content%2Fuploads%2F2017%2F07%2F404.jpg&f=1&nofb=1" + } + + elif title == None and thumbnail == None: + json = { + "title": "Error", + "url": url, + "temp": int(upvote), + "price": float(price), + "thumbnail": "https://proxy.duckduckgo.com/iu/?u=http%3A%2F%2Fegyptianstreets.com%2Fwp-content%2Fuploads%2F2017%2F07%2F404.jpg&f=1&nofb=1" + } + + returnMsgs.append(json) usedArray.append(url) newArray.append(url) - upvotesIndex += 1 index += 1 @@ -144,37 +199,55 @@ def checkDealsBeautifulSoup(self, url): # Returns stuff return returnMsgs - # On start + async def on_ready(self): print('Logged in as') print(self.user.name) print(self.user.id) print('------') - # On message + async def on_message(self, message): if message.author.id == self.user.id: return - # Background manager + async def my_background_task(self): + from discord import Embed + import discord await self.wait_until_ready() channel = self.get_channel(int(channel_id)) while not self.is_closed(): for page in range(0, int(pages_to_index)): print('checking page ' + str(page)) - res = self.checkDealsBeautifulSoup( - base_url + "?page=" + str(page)) + try: + res = self.checkDealsBeautifulSoup( + base_url + "?page=" + str(page)) + except: + res = [] if res != []: for msg in res: - await channel.send(msg) + try: + embed = discord.Embed( + title=msg['title'], url=msg['url']) + embed.set_thumbnail(url=msg['thumbnail']) + embed.add_field( + name="Price", value=msg['price'], inline=True) + embed.add_field(name="Temperature", + value=msg['temp'], inline=True) + # await self.bot.say(embed=embed) + + # await self.bot.say(embed=embed) + await channel.send(embed=embed) + except: + print('Failed on') + print(msg) await asyncio.sleep(int(time_interval_seconds)) - # Main client = MyClient(channel_id) client.run(discord_api_key) diff --git a/proxies.txt b/proxies.txt index e69de29..2d6689f 100644 --- a/proxies.txt +++ b/proxies.txt @@ -0,0 +1 @@ +143.208.154.11:3128 \ No newline at end of file