its-Kumar
diff --git a/‎automated_login_and_browsing/1_google_bot.py
Lines changed: 36 additions & 0 deletions b/‎automated_login_and_browsing/1_google_bot.py
Lines changed: 36 additions & 0 deletions
diff --git a/‎automated_login_and_browsing/2_instaBot.py
Lines changed: 191 additions & 0 deletions b/‎automated_login_and_browsing/2_instaBot.py
Lines changed: 191 additions & 0 deletions
diff --git a/‎automated_login_and_browsing/README.MD
Lines changed: 39 additions & 0 deletions b/‎automated_login_and_browsing/README.MD
Lines changed: 39 additions & 0 deletions
diff --git a/‎automated_login_and_browsing/requirements.txt
Lines changed: 2 additions & 0 deletions b/‎automated_login_and_browsing/requirements.txt
Lines changed: 2 additions & 0 deletions
@@ -0,0 +1,36 @@
+import os
+import time
+
+from selenium import webdriver
+
+BASE_PATH = os.path.dirname(__file__)
+driver_path = os.path.join(BASE_PATH, 'geckodriver')
+
+
+# Firefox Browser
+browser = webdriver.Firefox(
+    executable_path=driver_path)
+# browser.get("http://dscrecbijnor.com")
+
+
+# Chrome Browser
+# chromium_driver_path = os.path.join(BASE_PATH, "<path>")
+# browser = webdriver.Chrome(executable_path=chromium_driver_path)
+# browser.get("http://dscrecbijnor.com")
+
+
+# Automated google search
+url = "https://google.com/"
+browser.get(url)
+time.sleep(2)
+name = 'q'
+search_el = browser.find_element_by_name(name)
+
+# print(search_el)
+search_el.send_keys("selenium python")
+submit_btn_el = browser.find_element_by_css_selector("input[type=submit]")
+print(submit_btn_el.get_attribute('name'))
+time.sleep(1)
+submit_btn_el.click()
+
+# now scrape the content using web scraping
@@ -0,0 +1,191 @@
+# Import Libraries
+import os
+import time
+from getpass import getpass
+from urllib.parse import urlparse
+
+import requests
+from selenium import webdriver
+
+# Set Path for files
+BASE_PATH = os.path.dirname(os.path.abspath(__file__))
+driver_path = os.path.join(BASE_PATH, 'geckodriver')
+
+# credientials
+username = 'its_yours_kumar'
+my_pass = getpass("What is your password?")
+
+
+# Firefox Browser
+browser = webdriver.Firefox(executable_path=driver_path)
+url = "https://instagram.com/"
+browser.get(url)
+
+# Login
+time.sleep(2)
+username_el = browser.find_element_by_name("username")
+username_el.send_keys(username)
+
+password_el = browser.find_element_by_name("password")
+password_el.send_keys(my_pass)
+
+time.sleep(1.5)
+submit_btn_el = browser.find_element_by_css_selector("button[type='submit']")
+submit_btn_el.click()
+
+
+body_el = browser.find_element_by_css_selector("body")
+html_text = body_el.get_attribute("innerHTML")
+# print(html_text)
+# use webscraping here to scrape html
+
+
+# Automatic Follow
+# follow = browser.find_element_by_css_selector("button")
+
+# xpath
+# my_button_xpath = "//button"
+# browser.find_element_by_xpath(my_button_xpath)
+
+
+def click_to_follow(browser):
+    my_follow_btn_xpath = "//button[contains(text(), 'Follow')][not (contains(text(), 'Following'))]"
+    follow_btn_elements = browser.find_elements_by_xpath(my_follow_btn_xpath)
+    for btn in follow_btn_elements:
+        time.sleep(2)
+        try:
+            btn.click()
+        except Exception:
+            pass
+
+
+new_user = "https://instagram.com/ted/"
+browser.get(new_user)
+click_to_follow(browser)
+
+
+# Scraping content from any post
+time.sleep(50)
+user_profile_url = "https://www.instagram.com/dscrecbijnor/"
+browser.get(user_profile_url)
+
+post_url_pattern = "https://www.instagram.com/p/<post-slug-id>"
+post_xpath_str = "//a[contains(@href, '/p/')]"
+post_links = browser.find_elements_by_xpath(post_xpath_str)
+post_link_el = None
+
+if len(post_links) > 0:
+    post_link_el = post_links[0]
+
+if post_link_el is not None:
+    post_href = post_link_el.get_attribute("href")
+    browser.get(post_href)
+
+video_els = browser.find_elements_by_xpath("//video")
+image_els = browser.find_elements_by_xpath("//img")
+
+"""
+img_dir = os.path.join(BASE_PATH, "images")
+os.makedirs(img_dir, exist_ok=True)
+for img in image_els:
+    # print(img.get_attribute('src'))
+    url = img.get_attribute('src')
+    base_url = urlparse(url).path
+    filename = os.path.basename(base_url)
+    filepath = os.path.join(img_dir, filename)
+    with requests.get(url, stream=True) as r:
+        try:
+            r.raise_for_status()
+        except Exception:
+            continue
+        with open(filepath, "w") as f:
+            for chunk in r.iter_content():
+                if chunk:
+                    f.write(chunk) """
+
+
+def scrape_and_save(elements):
+    data_dir = os.path.join(BASE_PATH, "data")
+    os.makedirs(data_dir, exist_ok=True)
+    for el in elements:
+
+        url = el.get_attribute('src')
+        base_url = urlparse(url).path
+        filename = os.path.basename(base_url)
+        filepath = os.path.join(data_dir, filename)
+        if os.path.exists(filepath):
+            continue
+        with requests.get(url, stream=True) as r:
+            try:
+                r.raise_for_status()
+            except Exception:
+                continue
+            with open(filepath, "wb") as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+
+
+scrape_and_save(image_els)
+# scrape_and_save(video_els)
+
+
+# Automatic like and coments on posts
+"""
+LONG TERM GOAL
+Use Machine Learning to classify the post's
+image or videos
+and then comment in a relative fashion
+"""
+
+# automate comment
+
+
+def automated_comment(browser, content="that's cool!"):
+    time.sleep(3)
+    comment_xpath_str = "//textarea[contains(@placeholder,  'Add a comment')]"
+    comment_el = browser.find_element_by_xpath(comment_xpath_str)
+    # print(comment_el)
+    comment_el.send_keys(content)
+    submit_btns = "button[type='submit']"
+    submit_btn_els = browser.find_elements_by_css_selector(submit_btns)
+    time.sleep(2)
+    for btn in submit_btn_els:
+        try:
+            btn.click()
+        except Exception:
+            pass
+
+
+automated_comment(browser)
+
+# automate like
+"""
+Like button is actually not a button it's a svg.
+"""
+
+
+def automated_like(browser):
+    like_heart_svg_xpath = "//*[contains(@aria-label, 'Like')]"
+    all_like_heart_els = browser.find_elements_by_xpath(like_heart_svg_xpath)
+
+    max_heart_h = -1
+    for heart_el in all_like_heart_els:
+        h = heart_el.get_attribute("height")
+        max_heart_h = max(max_heart_h, int(h))
+
+    all_like_heart_els = browser.find_elements_by_xpath(
+        like_heart_svg_xpath)
+    for heart_el in all_like_heart_els:
+        h = heart_el.get_attribute("height")
+        # print(h)
+        if h == max_heart_h or h == f"{max_heart_h}":
+            parent_button = heart_el.find_element_by_xpath('..')
+            time.sleep(2)
+            try:
+                parent_button.click()
+            except Exception:
+                pass
+
+
+automated_like(browser)
@@ -0,0 +1,39 @@
+# automated_login_and_browsing
+
+This project is all about automating login to account and browsing the internet, searching the content, scraping the content and saving it to local machine.
+
+
+## Automated google search and scrape
+
+The file [1_google_bot.py](automated_login_and_browsing/1_google_bot.py) contains the code for automated google web seaarch
+
+## Intagram Bot
+
+This is a [intagram bot](automated_login_and_browsing/2_instaBot.py), which will automatically login in to your instagram account and open user's profile and download thier posts. It also provides automated follow, automated like and automated comments
+
+### Setup
+
+1. This project uses the requirements from `requirements.txt` file like - *selenium* , *requests*, etc. install them
+2. After installing requirements download the webdriver for your browser
+<table><thead><tr><th>Browser</th><th>Supported OS</th><th>Maintained by</th><th>Download</th><th>Issue Tracker</th></tr></thead><tbody><tr><td>Chromium/Chrome</td><td>Windows/macOS/Linux</td><td>Google</td><td><a href="https://chromedriver.storage.googleapis.com/index.html" class="highlight">Downloads</a></td><td><a href="https://bugs.chromium.org/p/chromedriver/issues/list" class="highlight">Issues</a></td></tr><tr><td>Firefox</td><td>Windows/macOS/Linux</td><td>Mozilla</td><td><a href="https://github.com/mozilla/geckodriver/releases" class="highlight">Downloads</a></td><td><a href="https://github.com/mozilla/geckodriver/issues" class="highlight">Issues</a></td></tr><tr><td>Edge</td><td>Windows 10</td><td>Microsoft</td><td><a href="https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/" class="highlight">Downloads</a></td><td><a href="https://developer.microsoft.com/en-us/microsoft-edge/platform/issues/?page=1&amp;q=webdriver" class="highlight">Issues</a></td></tr><tr><td>Internet Explorer</td><td>Windows</td><td>Selenium Project</td><td><a href="https://selenium-release.storage.googleapis.com/index.html" class="highlight">Downloads</a></td><td><a href="https://github.com/SeleniumHQ/selenium/labels/D-IE" class="highlight">Issues</a></td></tr><tr><td>Safari</td><td>macOS El Capitan and newer</td><td>Apple</td><td>Built in</td><td><a href="https://bugreport.apple.com/logon" class="highlight">Issues</a></td></tr><tr><td>Opera</td><td>Windows/macOS/Linux</td><td>Opera</td><td><a href="https://github.com/operasoftware/operachromiumdriver/releases" class="highlight">Downloads</a></td><td><a href="https://github.com/operasoftware/operachromiumdriver/issues" class="highlight">Issues</a></td></tr></tbody></table>
+
+
+3. Now import webdriver from selenuim package and enjoy
+
+```python
+from selenium import webdriver
+
+# if firefox browser
+browser = webdriver.Firefox(executable_path=driver_path)
+
+# if chrome browser
+browser = webdriver.Chrome(executable_path=chromium_driver_path)
+
+url = "https:https://google.com/"
+browser.get(url)
+```
+
+
+### Author
+
+[Kumar Shanu](https:https://its-kumar.herokuapp.com/)
@@ -0,0 +1,2 @@
+selenium
+requests