andysingal
diff --git a/‎.DS_Store
6 KB b/‎.DS_Store
6 KB
diff --git a/‎3 - API_Engineer_Peer_Review_Assignment_Extract.ipynb ‎web_scraping/3 - API_Engineer_Peer_Review_Assignment_Extract.ipynb b/‎3 - API_Engineer_Peer_Review_Assignment_Extract.ipynb ‎web_scraping/3 - API_Engineer_Peer_Review_Assignment_Extract.ipynb
diff --git a/‎Section 3 - Additional API functionalities.ipynb ‎web_scraping/Section 3 - Additional API functionalities.ipynb b/‎Section 3 - Additional API functionalities.ipynb ‎web_scraping/Section 3 - Additional API functionalities.ipynb
diff --git a/‎Section 3 - Creating a simple currency converter.ipynb ‎web_scraping/Section 3 - Creating a simple currency converter.ipynb b/‎Section 3 - Creating a simple currency converter.ipynb ‎web_scraping/Section 3 - Creating a simple currency converter.ipynb
diff --git a/‎web_scraping/Section 3 - Downloading files with requests.ipynb
+288 b/‎web_scraping/Section 3 - Downloading files with requests.ipynb
+288
diff --git a/‎Section 3 - EDAMAM API - Initial setup and registration.ipynb ‎web_scraping/Section 3 - EDAMAM API - Initial setup and registration.ipynb b/‎Section 3 - EDAMAM API - Initial setup and registration.ipynb ‎web_scraping/Section 3 - EDAMAM API - Initial setup and registration.ipynb
@@ -0,0 +1,288 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Downloading Files with Requests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The requests package can also be used to download files from the web.\n",
+    "import requests"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Naive downloading"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# One way to 'download' a file is to send a request to it.\n",
+    "# Then, export the content of the response to a local file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's use an image from wikipedia for this purpose\n",
+    "file_url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/d9/Collage_of_Nine_Dogs.jpg/1024px-Collage_of_Nine_Dogs.jpg\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "200"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response = requests.get(file_url)\n",
+    "response.status_code"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x01\\x00H\\x00H\\x00\\x00\\xff\\xfe\\x00OFile source: https://commons.wikimedia.org/wiki/File:Collage_of_Nine_Dogs.jpg\\xff\\xe2\\x02\\x1cICC_PROFILE\\x00\\x01\\x01\\x00\\x00\\x02\\x0clcms\\x02\\x10\\x00\\x00mntrRGB XYZ \\x07\\xdc\\x00\\x01\\x00\\x19\\x00\\x03\\x00)\\x009acspAPPL\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xf6\\xd6\\x00\\x01\\x00\\x00\\x00\\x00\\xd3-lcms\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\ndesc\\x00\\x00\\x00\\xfc\\x00\\x00\\x00^cprt\\x00\\x00\\x01\\\\\\x00\\x00\\x00\\x0bwtpt\\x00\\x00\\x01h\\x00\\x00\\x00\\x14bkpt\\x00\\x00\\x01|\\x00\\x00\\x00\\x14rXYZ\\x00\\x00\\x01\\x90\\x00\\x00\\x00\\x14gXYZ\\x00\\x00\\x01\\xa4\\x00\\x00\\x00\\x14bXYZ\\x00\\x00\\x01\\xb8\\x00\\x00\\x00\\x14rTRC\\x00\\x00\\x01\\xcc\\x00\\x00\\x00@gTRC\\x00\\x00\\x01\\xcc\\x00\\x00\\x00@bTRC\\x00\\x00\\x01\\xcc\\x00\\x00\\x00@desc\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x03c2\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00text\\x00\\x00\\x00\\x00FB\\x00\\x00XYZ \\x00\\x00\\x00\\x00\\x00\\x00\\xf6\\xd6\\x00\\x01\\x00\\x00\\x00\\x00\\xd3-X'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Printing out the begining of the content of the response\n",
+    "# It is in a binary-encoded format, thus it looks like gibberish\n",
+    "response.content[:500]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We need to export this to an image file (jpg, png, gif...)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Writing to a file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We open/create a file with the function 'open()'\n",
+    "file = open(\"dog_image.jpg\", \"wb\")\n",
+    "\n",
+    "# Then, write to it\n",
+    "file.write(response.content)\n",
+    "\n",
+    "# And close the file after finishing\n",
+    "file.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The two parameters in the function open() are:\n",
+    "# - the name of the file (along with a path to it if it is not in the same directory as our program)\n",
+    "# - the mode in wich we want to edit the file\n",
+    "\n",
+    "# Some popular modes are:\n",
+    "# - 'r' : Opens the file in read-only mode;\n",
+    "# - 'rb' : Opens the file as read-only in binary format;\n",
+    "# - 'w' : Creates a file in write-only mode. If the file already exists, it will overwrite it;\n",
+    "# - 'wb': Write-only mode in binary format;\n",
+    "# - 'a' : Opens the file for appending new information to the end;\n",
+    "# - 'w+' : Opens the file for writing and reading;\n",
+    "\n",
+    "# We have used 'wb' in this example, since we want to export the data to a file (thus, write to it)\n",
+    "# and response.content is in bytes\n",
+    "\n",
+    "# Never forget to close the file!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# To ensure the file will always be closed, use the 'with' statement\n",
+    "# This automatically calls file.close() at the end"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"dog_image_2.jpg\", \"wb\") as file:\n",
+    "    file.write(response.content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Here, we first receive the whole file and store it in the RAM, then export it to the hard disk\n",
+    "# This method is really inefficient, especially for bigger files\n",
+    "# In effect we download the file to the RAM\n",
+    "\n",
+    "# We can fix that with a couple of small changes to our code"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Streaming the download to a file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instead of reading the whole response immidiatelly, \n",
+    "# we can signal the program to only read part of the response when we tell it to.\n",
+    "\n",
+    "# This is achieved with the 'stream' parameter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# I will use test video files provided by file-examples.com\n",
+    "url = \"https://file-examples.com/wp-content/uploads/2017/04/file_example_MP4_480_1_5MG.mp4\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r = requests.get(url, stream = True)\n",
+    "\n",
+    "with open(\"Sample_video_1,5_MB.mp4\", \"wb\") as f:\n",
+    "    \n",
+    "    # Now we iterate over the response in chunks\n",
+    "    for chunk in r.iter_content(chunk_size = 16*1024):\n",
+    "        f.write(chunk)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can change the chunk size to optimize the fastest download speed for your system"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# However, when using 'stream=True' requests will not close the connection to the server until all data has been read\n",
+    "# Thus, sometimes the connection needs to be closed manually\n",
+    "\n",
+    "# Again, that is best done using the 'with' statement"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# So, the final code for file download is\n",
+    "url = \"https://file-examples.com/wp-content/uploads/2017/04/file_example_MP4_1920_18MG.mp4\"\n",
+    "\n",
+    "with requests.get(url, stream = True) as r:\n",
+    "    with open(\"Sample_video_18_MB.mp4\", \"wb\") as f:\n",
+    "        for chunk in r.iter_content(chunk_size = 16*1024):\n",
+    "            f.write(chunk)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}