resume downloading

hwfan · Jun 26, 2022 · 31dd7ff · 31dd7ff
1 parent 5ea12a5
commit 31dd7ff
Show file tree

Hide file tree

Showing 9 changed files with 142 additions and 96 deletions.
diff --git a/DriveDownloader/downloader.py b/DriveDownloader/downloader.py
@@ -22,11 +22,11 @@
 MAJOR_VERSION = 1
 MINOR_VERSION = 6
 POST_VERSION = 0
-__version__ = "{MAJOR_VERSION}.{MINOR_VERSION}.{POST_VERSION}"
-console = Console()
+__version__ = f"{MAJOR_VERSION}.{MINOR_VERSION}.{POST_VERSION}"
+console = Console(width=72)
 single_progress = Progress(
     TextColumn("[bold blue]Downloading: ", justify="left"),
-    BarColumn(bar_width=None),
+    BarColumn(bar_width=15),
     "[progress.percentage]{task.percentage:>3.1f}%",
     "|",
     DownloadColumn(),
@@ -38,7 +38,7 @@
 )
 multi_progress = Progress(
     TextColumn("[bold blue]Thread {task.fields[proc_id]}: ", justify="left"),
-    BarColumn(bar_width=None),
+    BarColumn(bar_width=15),
     "[progress.percentage]{task.percentage:>3.1f}%",
     "|",
     DownloadColumn(),
@@ -55,10 +55,11 @@ def parse_args():
     parser.add_argument('--filename', '-o', help='Target file name.', default='', type=str)
     parser.add_argument('--thread-number', '-n', help='thread number of multithread.', type=int, default=1)
     parser.add_argument('--version', '-v', action='version', version=__version__, help='Version.')
+    parser.add_argument('--force-back-google','-F',help='Force to use the backup downloader for GoogleDrive.', action='store_true')
     args = parser.parse_args()
     return args
 
-def download_single_file(url, filename="", thread_number=1, list_suffix=None):
+def download_single_file(url, filename="", thread_number=1, force_back_google=False, list_suffix=None):
     scheme = judge_scheme(url)
     if scheme == 'http':
         if len(os.environ["http_proxy"]) > 0:
@@ -77,28 +78,30 @@ def download_single_file(url, filename="", thread_number=1, list_suffix=None):
     session_name = judge_session(url)
     session_func = get_session(session_name)
     google_fix_logic = False
-    if session_name == 'GoogleDrive' and thread_number > 1:
+    if session_name == 'GoogleDrive' and thread_number > 1 and not force_back_google:
         thread_number = 1
         google_fix_logic = True
     progress_applied = multi_progress if thread_number > 1 else single_progress
     download_session = session_func(used_proxy)
-    download_session.connect(url, filename)
+    download_session.connect(url, filename, force_backup=force_back_google if session_name == 'GoogleDrive' else False)
     final_filename = download_session.filename
     download_session.show_info(progress_applied, list_suffix)
     if google_fix_logic:
         console.print('[yellow]Warning: Google Drive URL detected. Only one thread will be created.')
 
     if thread_number > 1:
         download_session = MultiThreadDownloader(progress_applied, session_func, used_proxy, download_session.filesize, thread_number)
-        interrupted = download_session.get(url, final_filename)
+        interrupted = download_session.get(url, final_filename, force_back_google)
         if interrupted:
             return 
         download_session.concatenate(final_filename)
     else:
-        interrupted = download_session.save_response_content(progress_bar=progress_applied)
-        if interrupted:
-            return
-    console.print('[green]Finished.')
+        with progress_applied:
+            task_id = progress_applied.add_task("download", filename=final_filename, proc_id=0, start=False)
+            interrupted = download_session.save_response_content(progress_bar=progress_applied)
+            if interrupted:
+                return
+    console.print('[green]Bye.')
 
 def download_filelist(args):
     lines = [line for line in open(args.url, 'r')]
@@ -108,19 +111,19 @@ def download_filelist(args):
         download_single_file(*splitted_line, args.thread_number, list_suffix)
 
 def simple_cli():
-    console.print(f"*******************************************************")
-    console.print(f"*                                                     *")
-    console.print(f"*             DriveDownloader {MAJOR_VERSION}.{MINOR_VERSION}.{POST_VERSION}                   *")
-    console.print(f"*  Homesite: https://github.com/hwfan/DriveDownloader *")
-    console.print(f"*                                                     *")
-    console.print(f"*******************************************************")
+    console.print(f"***********************************************************************")
+    console.print(f"*                                                                     *")
+    console.print(f"*                     DriveDownloader {MAJOR_VERSION}.{MINOR_VERSION}.{POST_VERSION}                           *")
+    console.print(f"*          Homesite: https://github.com/hwfan/DriveDownloader         *")
+    console.print(f"*                                                                     *")
+    console.print(f"***********************************************************************")
     args = parse_args()
     assert len(args.url) > 0, "Please input your URL or filelist path!"
     if os.path.exists(args.url):
         console.print('Downloading filelist: {:s}'.format(os.path.basename(args.url)))
         download_filelist(args)
     else:
-        download_single_file(args.url, args.filename, args.thread_number)
+        download_single_file(args.url, args.filename, args.thread_number, args.force_back_google)
 
 if __name__ == '__main__':
     simple_cli()
diff --git a/DriveDownloader/netdrives/basedrive.py b/DriveDownloader/netdrives/basedrive.py
@@ -12,9 +12,15 @@
 from DriveDownloader.utils.misc import *
 from threading import Event
 import signal
+from rich.console import Console
+from googleapiclient.http import _retry_request, DEFAULT_CHUNK_SIZE
+import time
+import random
 
+console = Console(width=71)
 done_event = Event()
 def handle_sigint(signum, frame):
+    console.print("\n[yellow]Interrupted. Will shutdown after the latest chunk is downloaded.\n")
     done_event.set()
 signal.signal(signal.SIGINT, handle_sigint)
 
@@ -32,13 +38,14 @@ def __init__(self, proxy=None, chunk_size=32768):
     self.filesize = None
     self.response = None
     self.file_handler = None
-
+    self.base_url = None
+
   def generate_url(self, url):
     raise NotImplementedError
 
   def set_range(self, start, end):
-    self.session.headers['Range'] = 'bytes={:s}-{:s}'.format(start, end)
-
+    self.session.headers['Range'] = 'bytes={:s}-{:s}'.format(str(start), str(end))
+    
   def parse_response_header(self):
     try:
         pattern = re.compile(r'filename=\"(.*?)\"')
@@ -53,42 +60,13 @@ def parse_response_header(self):
 
     return filename, header_size
 
-  def save_response_content(self, start=None, proc_id=-1, progress_bar=None):
+  def save_response_content(self, start=None, end=None, proc_id=-1, progress_bar=None):
     dirname = os.path.dirname(self.filename)
     if len(dirname) > 0:
         os.makedirs(dirname, exist_ok=True)
     interrupted = False
-    if proc_id == -1:
-      self.file_handler = open(self.filename, "wb")
-      with progress_bar:
-        progress_bar.add_task("download", start=False, filename=self.filename)
-        progress_bar.update(0, total=self.filesize)
-        progress_bar.start_task(0)
-        if 'googleapiclient' in str(type(self.response)):
-          from googleapiclient.http import MediaIoBaseDownload, DEFAULT_CHUNK_SIZE
-          self.chunk_size = DEFAULT_CHUNK_SIZE
-          downloader = MediaIoBaseDownload(self.file_handler, self.response, self.chunk_size)
-          done = False
-          prev_state = 0
-          cur_state = 0
-          while done is False:
-            status, done = downloader.next_chunk()
-            cur_state = status.resumable_progress
-            progress_bar.update(0, advance=cur_state - prev_state)
-            prev_state = status.resumable_progress
-            if done_event.is_set():
-                interrupted = True
-                return interrupted
-        else:
-          for chunk in self.response.iter_content(self.chunk_size):
-              if chunk:
-                  self.file_handler.write(chunk)
-                  chunk_num = len(chunk)
-                  progress_bar.update(0, advance=chunk_num)
-                  if done_event.is_set():
-                    interrupted = True
-                    return interrupted
-    else:
+
+    if proc_id >= 0:
       name, ext = os.path.splitext(self.filename)
       name = name + '_{}'.format(proc_id)
       sub_filename = name + ext
@@ -97,19 +75,71 @@ def save_response_content(self, start=None, proc_id=-1, progress_bar=None):
       sub_tmp_dirname = os.path.join(sub_dirname, 'tmp')
       os.makedirs(sub_tmp_dirname, exist_ok=True)
       sub_filename = os.path.join(sub_tmp_dirname, sub_basename)
-      self.file_handler = open(sub_filename, "wb")
-      progress_bar.update(proc_id, total=self.filesize)
-      progress_bar.start_task(proc_id)
+      used_filename = sub_filename
+    else:
+      proc_id = 0
+      used_filename = self.filename
+      start = 0
+      end = self.filesize-1
+
+    ori_filesize = os.path.getsize(used_filename) if os.path.exists(used_filename) else 0
+    self.file_handler = open(used_filename, 'ab' if ori_filesize > 0 else 'wb' )
+    progress_bar.update(proc_id, total=end+1-start)
+    progress_bar.start_task(proc_id)
+    progress_bar.update(proc_id, advance=ori_filesize)
+
+    if 'googleapiclient' in str(type(self.response)):
+      self.chunk_size = 1 * 1024 * 1024
+      _headers = {}
+      for k, v in self.response.headers.items():
+        if not k.lower() in ("accept", "accept-encoding", "user-agent"):
+            _headers[k] = v
+      cur_state = start + ori_filesize
+      while cur_state < end + 1:
+        headers = _headers.copy()
+        remained = end + 1 - cur_state
+        chunk_size = self.chunk_size if remained >= self.chunk_size else remained
+        headers["range"] = "bytes=%d-%d" % (
+            cur_state,
+            cur_state + chunk_size - 1,
+        )
+        http = self.response.http
+        resp, content = _retry_request(
+            http,
+            0,
+            "media download",
+            time.sleep,
+            random.random,
+            self.response.uri,
+            "GET",
+            headers=headers,
+        )
+        self.file_handler.write(content)
+        progress_bar.update(proc_id, advance=len(content))
+        cur_state += len(content)
+        if done_event.is_set():
+            interrupted = True
+            return interrupted
+    else:
+      if ori_filesize > 0:
+        self.set_range(start + ori_filesize, end)
+        self.response = self.session.get(self.base_url, params=self.params, proxies=self.proxies, stream=True)
+      else:
+        self.set_range(start, end)
+      cur_state = start + ori_filesize
       for chunk in self.response.iter_content(self.chunk_size):
-          if chunk: # filter out keep-alive new chunks
-              self.file_handler.write(chunk)
-              chunk_num = len(chunk)
-              progress_bar.update(proc_id, advance=chunk_num)
-              if done_event.is_set():
-                interrupted = True
-                return interrupted
-    return interrupted
+        self.file_handler.write(chunk)
+        chunk_num = len(chunk)
+        progress_bar.update(proc_id, advance=chunk_num)
+        cur_state += chunk_num
+        if done_event.is_set():
+          interrupted = True
+          return interrupted
+        if cur_state >= end + 1:
+          break
+
   def connect(self, url, custom_filename=''):
+    self.base_url = url
     self.response = self.session.get(url, params=self.params, proxies=self.proxies, stream=True)
     if self.response.status_code // 100 >= 4:
       raise RuntimeError("Bad status code {}. Please check your connection.".format(self.response.status_code))

diff --git a/DriveDownloader/netdrives/directlink.py b/DriveDownloader/netdrives/directlink.py
@@ -23,6 +23,6 @@ def parse_response_header(self):
     def generate_url(self, url):
         return url
 
-    def connect(self, url, custom_filename=''):
+    def connect(self, url, custom_filename='', proc_id=-1, force_backup=False):
         generated_url = self.generate_url(url)
         DriveSession.connect(self, generated_url, custom_filename=custom_filename)
diff --git a/DriveDownloader/netdrives/dropbox.py b/DriveDownloader/netdrives/dropbox.py
@@ -22,6 +22,6 @@ def generate_url(self, url):
         resultUrl = urlparse.urlunparse(parsed_url)
         return resultUrl
 
-    def connect(self, url, custom_filename=''):
+    def connect(self, url, custom_filename='', proc_id=-1, force_backup=False):
         generated_url = self.generate_url(url)
         DriveSession.connect(self, generated_url, custom_filename=custom_filename)
diff --git a/DriveDownloader/netdrives/googledrive.py b/DriveDownloader/netdrives/googledrive.py
@@ -12,7 +12,8 @@
 import sys
 from rich.console import Console
 
-googleauthdata = '''
+googleauthdata = \
+'''
 client_config_backend: settings
 client_config:
   client_id: 367116221053-7n0vf5akeru7on6o2fjinrecpdoe99eg.apps.googleusercontent.com
@@ -27,7 +28,17 @@
   - https://www.googleapis.com/auth/drive
 '''
 
-console = Console()
+info = \
+'''
++-------------------------------------------------------------------+
+|Warning: DriveDownloader is using the backup downloader due to the |
+|forbiddance or manual setting. If this is the first time you meet  |
+|the notice, please follow the instructions to login your Google    |
+|Account. This operation only needs to be done once.                |
++-------------------------------------------------------------------+
+'''
+
+console = Console(width=71)
 class GoogleDriveSession(DriveSession):
     def __init__(self, *args, **kwargs):
         DriveSession.__init__(self, *args, **kwargs)
@@ -46,28 +57,30 @@ def generate_url(self, url):
         replaced_url = "https://drive.google.com/u/0/uc?export=download"
         return replaced_url, id_str
 
-    def connect(self, url, custom_filename=''):
+    def connect(self, url, custom_filename='', force_backup=False, proc_id=-1):
       replaced_url, id_str = self.generate_url(url)
+      if force_backup:
+        self.backup_connect(url, custom_filename, id_str, proc_id=proc_id)
+        return
       try:
         self.params["id"] = id_str
         self.params["confirm"] = "t"
         DriveSession.connect(self, replaced_url, custom_filename=custom_filename)
       except:
-        info = '''+-------------------------------------------------------------------------------------------+
-  |Warning: The default request is forbidden by GoogleDrive due to the frequent downloading,  |
-  |and DriveDownloader is now using the backup downloader. If this is the first time you meet |
-  |the problem, please follow the instructions to login your Google Account. Once this action |
-  |is performed, the downloading procedure will automatically start for all the time.         |
-  +-------------------------------------------------------------------------------------------+'''
+        self.backup_connect(url, custom_filename, id_str, proc_id=proc_id)
+
+    def backup_connect(self, url, custom_filename, id_str, proc_id=-1):
+      if proc_id == -1:
         console.print(info)
-        settings_file_path = os.path.join(os.path.dirname(__file__), 'settings.yaml')
-        if not os.path.exists(settings_file_path):
-          with open(settings_file_path, "w") as f:
-            f.write(googleauthdata)
-        gauth = GoogleAuth(settings_file=settings_file_path)
-        gauth.CommandLineAuth()
-        drive = GoogleDrive(gauth)
-        file = drive.CreateFile({"id": id_str})
-        self.filename = file['title'] if len(custom_filename) == 0 else custom_filename
-        self.filesize = float(file['fileSize'])
-        self.response = gauth.service.files().get_media(fileId=id_str)
+      settings_file_path = os.path.join(os.path.dirname(__file__), 'settings.yaml')
+      if not os.path.exists(settings_file_path):
+        with open(settings_file_path, "w") as f:
+          f.write(googleauthdata)
+      self.gauth = GoogleAuth(settings_file=settings_file_path)
+      self.gauth.CommandLineAuth()
+      self.gid_str = id_str
+      drive = GoogleDrive(self.gauth)
+      file = drive.CreateFile({"id": id_str})
+      self.filename = file['title'] if len(custom_filename) == 0 else custom_filename
+      self.filesize = float(file['fileSize'])
+      self.response = self.gauth.service.files().get_media(fileId=id_str)
diff --git a/DriveDownloader/netdrives/onedrive.py b/DriveDownloader/netdrives/onedrive.py
@@ -20,6 +20,6 @@ def generate_url(self, url):
         resultUrl = f"https://api.onedrive.com/v1.0/shares/u!{data_bytes64_String}/root/content"
         return resultUrl
 
-    def connect(self, url, custom_filename=''):
+    def connect(self, url, custom_filename='', proc_id=-1, force_backup=False):
         generated_url = self.generate_url(url)
         DriveSession.connect(self, generated_url, custom_filename=custom_filename)
diff --git a/DriveDownloader/netdrives/sharepoint.py b/DriveDownloader/netdrives/sharepoint.py
@@ -23,6 +23,6 @@ def generate_url(self, url):
         resultUrl = f"https://{netloc}/{personal_attr}/{domain}/_layouts/52/download.aspx?share={sharelink}"
         return resultUrl
 
-    def connect(self, url, custom_filename=''):
+    def connect(self, url, custom_filename='', proc_id=-1, force_backup=False):
         generated_url = self.generate_url(url)
         DriveSession.connect(self, generated_url, custom_filename=custom_filename)