its-Kumar
diff --git a/‎async_webscraping/Pipfile
Lines changed: 13 additions & 0 deletions b/‎async_webscraping/Pipfile
Lines changed: 13 additions & 0 deletions
diff --git a/‎async_webscraping/ascrape.py
Lines changed: 18 additions & 0 deletions b/‎async_webscraping/ascrape.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎async_webscraping/ascrape_multi.py
Lines changed: 43 additions & 0 deletions b/‎async_webscraping/ascrape_multi.py
Lines changed: 43 additions & 0 deletions
diff --git a/‎async_webscraping/async.py
Lines changed: 37 additions & 0 deletions b/‎async_webscraping/async.py
Lines changed: 37 additions & 0 deletions
diff --git a/‎async_webscraping/snapshots/2016.html
Lines changed: 1728 additions & 0 deletions b/‎async_webscraping/snapshots/2016.html
Lines changed: 1728 additions & 0 deletions
diff --git a/‎async_webscraping/snapshots/2017.html
Lines changed: 1724 additions & 0 deletions b/‎async_webscraping/snapshots/2017.html
Lines changed: 1724 additions & 0 deletions
diff --git a/‎async_webscraping/snapshots/2018.html
Lines changed: 1998 additions & 0 deletions b/‎async_webscraping/snapshots/2018.html
Lines changed: 1998 additions & 0 deletions
diff --git a/‎async_webscraping/snapshots/2019.html
Lines changed: 1828 additions & 0 deletions b/‎async_webscraping/snapshots/2019.html
Lines changed: 1828 additions & 0 deletions
diff --git a/‎async_webscraping/snapshots/2020.html
Lines changed: 906 additions & 0 deletions b/‎async_webscraping/snapshots/2020.html
Lines changed: 906 additions & 0 deletions
diff --git a/‎async_webscraping/sync.py
Lines changed: 24 additions & 0 deletions b/‎async_webscraping/sync.py
Lines changed: 24 additions & 0 deletions
@@ -0,0 +1,13 @@
+[[source]]
+name = "pypi"
+url = "https://pypi.org/simple"
+verify_ssl = true
+
+[dev-packages]
+
+[packages]
+aiohttp = "*"
+
+
+[requires]
+python_version = "3.8"
@@ -0,0 +1,18 @@
+import asyncio
+import pathlib
+
+from aiohttp import ClientSession
+
+
+async def main():
+    url = 'https://www.boxofficemojo.com/year/2019'
+    async with ClientSession() as session:
+        async with session.get(url) as response:
+            html_body = await response.read()
+            return html_body
+
+html_data = asyncio.run(main())
+OUTPUT_DIR = pathlib.Path().resolve() / "snapshots"
+OUTPUT_DIR.mkdir(parent=True, exist_ok=True)
+OUTPUT_FILE = OUTPUT_DIR / "2019.html"
+OUTPUT_FILE.write_text(html_data.decode())
@@ -0,0 +1,43 @@
+import asyncio
+import pathlib
+
+from aiohttp import ClientSession
+
+
+async def fetch(url, session, year=None):
+    async with session.get(url) as response:
+        html_body = await response.read()
+        return {"body": html_body, "year": year}
+
+
+async def fetch_with_sem(url, session, year, sem):
+    async with sem:
+        return await fetch(url, session, year)
+
+
+async def main(start_year=2020, years_ago=5):
+    pages_content = {}
+    tasks = []
+    # semaphore
+    sem = asyncio.Semaphore(10)
+    async with ClientSession() as session:
+        for i in range(0, years_ago):
+            year = start_year - i
+            url = f'https://www.boxofficemojo.com/year/{year}/'
+            print(year, url)
+            tasks.append(
+                asyncio.create_task(fetch_with_sem(url, session, year, sem))
+            )
+            pages_content = await asyncio.gather(*tasks)
+        return pages_content
+
+
+results = asyncio.run(main())
+# print(results)
+OUTPUT_DIR = pathlib.Path().resolve() / "snapshots"
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+for result in results:
+    current_year = result.get("year")
+    html_data = result.get("body")
+    OUTPUT_FILE = OUTPUT_DIR / f"{current_year}.html"
+    OUTPUT_FILE.write_text(html_data.decode())
@@ -0,0 +1,37 @@
+import asyncio
+import time
+
+iteration_times = [1, 3, 2, 4]
+
+
+async def sleeper(seconds, i=-1):
+    start_time = time.time()
+    if i != -1:
+        print(f"{i}\t{seconds}s")
+    await asyncio.sleep(seconds)
+    return time.time() - start_time
+
+run_time = 0
+total_compute_run_time = 0
+
+
+async def main():  # coroutine
+    global run_time
+    global total_compute_run_time
+    # await sleeper(1, i=0)
+    tasks = []
+    for i, second in enumerate(iteration_times):
+        tasks.append(
+            asyncio.create_task(
+                sleeper(second, i=i)
+            )
+        )
+    results = await asyncio.gather(*tasks)
+    for run_time_result in results:
+        total_compute_run_time += run_time_result
+        if run_time_result > run_time:
+            run_time = run_time_result
+
+# main()
+asyncio.run(main())
+print(f"Ran for {run_time} seconds, with a total of {total_compute_run_time} and {run_time / total_compute_run_time }")
@@ -0,0 +1,24 @@
+import time
+
+iteration_times = [1, 3, 2, 4]
+
+
+def sleeper(seconds, i=-1):
+    start_time = time.time()
+    if i != -1:
+        print(f"{i}\t{seconds}s")
+    time.sleep(seconds)
+    return time.time() - start_time
+
+
+run_time = 0
+
+
+def main():
+    global run_time
+    for i, second in enumerate(iteration_times):
+        run_time += sleeper(second, i=i)
+
+
+main()
+print(f"Ran for {run_time} seconds")