Skip to content

Commit

Permalink
Add scraper/ to gitignore
Browse files Browse the repository at this point in the history
  • Loading branch information
yeexunwei committed Sep 30, 2021
1 parent c61f409 commit 6e79638
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
data/
*.wav
*.txt
A-Hackers-AI-Voice-Assistant-master/
speech_command*
SpeechCommands/
train-clean*
Libri*
scraper/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
7 changes: 1 addition & 6 deletions scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
logging.basicConfig(level="INFO", format="%(levelname)s: %(filename)s: %(message)s")

# Storage locations
DATA_STORAGE_ROOT = os.path.join(os.getcwd(), "scraper", "data")
print('======test', os.getcwd())
DATA_STORAGE_ROOT = os.path.join(os.getcwd(), "data")

# Scraper constants
TED_URL_HOMEPAGE = "https://www.ted.com"
Expand Down Expand Up @@ -54,10 +53,6 @@ def get_all_video_urls(base_url):
# i.e. search from "page=1", "page=2", "page=3"...
page_number += 1

# Only doing proof of concept
if page_number > 1:
break

response = requests.get(base_url + str(page_number))

page_soup = BeautifulSoup(response.text, "html.parser")
Expand Down

0 comments on commit 6e79638

Please sign in to comment.