Skip to content

Commit

Permalink
resume downloading
Browse files Browse the repository at this point in the history
  • Loading branch information
hwfan committed Jun 26, 2022
1 parent 5ea12a5 commit 31dd7ff
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 96 deletions.
41 changes: 22 additions & 19 deletions DriveDownloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
MAJOR_VERSION = 1
MINOR_VERSION = 6
POST_VERSION = 0
__version__ = "{MAJOR_VERSION}.{MINOR_VERSION}.{POST_VERSION}"
console = Console()
__version__ = f"{MAJOR_VERSION}.{MINOR_VERSION}.{POST_VERSION}"
console = Console(width=72)
single_progress = Progress(
TextColumn("[bold blue]Downloading: ", justify="left"),
BarColumn(bar_width=None),
BarColumn(bar_width=15),
"[progress.percentage]{task.percentage:>3.1f}%",
"|",
DownloadColumn(),
Expand All @@ -38,7 +38,7 @@
)
multi_progress = Progress(
TextColumn("[bold blue]Thread {task.fields[proc_id]}: ", justify="left"),
BarColumn(bar_width=None),
BarColumn(bar_width=15),
"[progress.percentage]{task.percentage:>3.1f}%",
"|",
DownloadColumn(),
Expand All @@ -55,10 +55,11 @@ def parse_args():
parser.add_argument('--filename', '-o', help='Target file name.', default='', type=str)
parser.add_argument('--thread-number', '-n', help='thread number of multithread.', type=int, default=1)
parser.add_argument('--version', '-v', action='version', version=__version__, help='Version.')
parser.add_argument('--force-back-google','-F',help='Force to use the backup downloader for GoogleDrive.', action='store_true')
args = parser.parse_args()
return args

def download_single_file(url, filename="", thread_number=1, list_suffix=None):
def download_single_file(url, filename="", thread_number=1, force_back_google=False, list_suffix=None):
scheme = judge_scheme(url)
if scheme == 'http':
if len(os.environ["http_proxy"]) > 0:
Expand All @@ -77,28 +78,30 @@ def download_single_file(url, filename="", thread_number=1, list_suffix=None):
session_name = judge_session(url)
session_func = get_session(session_name)
google_fix_logic = False
if session_name == 'GoogleDrive' and thread_number > 1:
if session_name == 'GoogleDrive' and thread_number > 1 and not force_back_google:
thread_number = 1
google_fix_logic = True
progress_applied = multi_progress if thread_number > 1 else single_progress
download_session = session_func(used_proxy)
download_session.connect(url, filename)
download_session.connect(url, filename, force_backup=force_back_google if session_name == 'GoogleDrive' else False)
final_filename = download_session.filename
download_session.show_info(progress_applied, list_suffix)
if google_fix_logic:
console.print('[yellow]Warning: Google Drive URL detected. Only one thread will be created.')

if thread_number > 1:
download_session = MultiThreadDownloader(progress_applied, session_func, used_proxy, download_session.filesize, thread_number)
interrupted = download_session.get(url, final_filename)
interrupted = download_session.get(url, final_filename, force_back_google)
if interrupted:
return
download_session.concatenate(final_filename)
else:
interrupted = download_session.save_response_content(progress_bar=progress_applied)
if interrupted:
return
console.print('[green]Finished.')
with progress_applied:
task_id = progress_applied.add_task("download", filename=final_filename, proc_id=0, start=False)
interrupted = download_session.save_response_content(progress_bar=progress_applied)
if interrupted:
return
console.print('[green]Bye.')

def download_filelist(args):
lines = [line for line in open(args.url, 'r')]
Expand All @@ -108,19 +111,19 @@ def download_filelist(args):
download_single_file(*splitted_line, args.thread_number, list_suffix)

def simple_cli():
console.print(f"*******************************************************")
console.print(f"* *")
console.print(f"* DriveDownloader {MAJOR_VERSION}.{MINOR_VERSION}.{POST_VERSION} *")
console.print(f"* Homesite: https://github.com/hwfan/DriveDownloader *")
console.print(f"* *")
console.print(f"*******************************************************")
console.print(f"***********************************************************************")
console.print(f"* *")
console.print(f"* DriveDownloader {MAJOR_VERSION}.{MINOR_VERSION}.{POST_VERSION} *")
console.print(f"* Homesite: https://github.com/hwfan/DriveDownloader *")
console.print(f"* *")
console.print(f"***********************************************************************")
args = parse_args()
assert len(args.url) > 0, "Please input your URL or filelist path!"
if os.path.exists(args.url):
console.print('Downloading filelist: {:s}'.format(os.path.basename(args.url)))
download_filelist(args)
else:
download_single_file(args.url, args.filename, args.thread_number)
download_single_file(args.url, args.filename, args.thread_number, args.force_back_google)

if __name__ == '__main__':
simple_cli()
122 changes: 76 additions & 46 deletions DriveDownloader/netdrives/basedrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,15 @@
from DriveDownloader.utils.misc import *
from threading import Event
import signal
from rich.console import Console
from googleapiclient.http import _retry_request, DEFAULT_CHUNK_SIZE
import time
import random

console = Console(width=71)
done_event = Event()
def handle_sigint(signum, frame):
console.print("\n[yellow]Interrupted. Will shutdown after the latest chunk is downloaded.\n")
done_event.set()
signal.signal(signal.SIGINT, handle_sigint)

Expand All @@ -32,13 +38,14 @@ def __init__(self, proxy=None, chunk_size=32768):
self.filesize = None
self.response = None
self.file_handler = None

self.base_url = None

def generate_url(self, url):
raise NotImplementedError

def set_range(self, start, end):
self.session.headers['Range'] = 'bytes={:s}-{:s}'.format(start, end)

self.session.headers['Range'] = 'bytes={:s}-{:s}'.format(str(start), str(end))
def parse_response_header(self):
try:
pattern = re.compile(r'filename=\"(.*?)\"')
Expand All @@ -53,42 +60,13 @@ def parse_response_header(self):

return filename, header_size

def save_response_content(self, start=None, proc_id=-1, progress_bar=None):
def save_response_content(self, start=None, end=None, proc_id=-1, progress_bar=None):
dirname = os.path.dirname(self.filename)
if len(dirname) > 0:
os.makedirs(dirname, exist_ok=True)
interrupted = False
if proc_id == -1:
self.file_handler = open(self.filename, "wb")
with progress_bar:
progress_bar.add_task("download", start=False, filename=self.filename)
progress_bar.update(0, total=self.filesize)
progress_bar.start_task(0)
if 'googleapiclient' in str(type(self.response)):
from googleapiclient.http import MediaIoBaseDownload, DEFAULT_CHUNK_SIZE
self.chunk_size = DEFAULT_CHUNK_SIZE
downloader = MediaIoBaseDownload(self.file_handler, self.response, self.chunk_size)
done = False
prev_state = 0
cur_state = 0
while done is False:
status, done = downloader.next_chunk()
cur_state = status.resumable_progress
progress_bar.update(0, advance=cur_state - prev_state)
prev_state = status.resumable_progress
if done_event.is_set():
interrupted = True
return interrupted
else:
for chunk in self.response.iter_content(self.chunk_size):
if chunk:
self.file_handler.write(chunk)
chunk_num = len(chunk)
progress_bar.update(0, advance=chunk_num)
if done_event.is_set():
interrupted = True
return interrupted
else:

if proc_id >= 0:
name, ext = os.path.splitext(self.filename)
name = name + '_{}'.format(proc_id)
sub_filename = name + ext
Expand All @@ -97,19 +75,71 @@ def save_response_content(self, start=None, proc_id=-1, progress_bar=None):
sub_tmp_dirname = os.path.join(sub_dirname, 'tmp')
os.makedirs(sub_tmp_dirname, exist_ok=True)
sub_filename = os.path.join(sub_tmp_dirname, sub_basename)
self.file_handler = open(sub_filename, "wb")
progress_bar.update(proc_id, total=self.filesize)
progress_bar.start_task(proc_id)
used_filename = sub_filename
else:
proc_id = 0
used_filename = self.filename
start = 0
end = self.filesize-1

ori_filesize = os.path.getsize(used_filename) if os.path.exists(used_filename) else 0
self.file_handler = open(used_filename, 'ab' if ori_filesize > 0 else 'wb' )
progress_bar.update(proc_id, total=end+1-start)
progress_bar.start_task(proc_id)
progress_bar.update(proc_id, advance=ori_filesize)

if 'googleapiclient' in str(type(self.response)):
self.chunk_size = 1 * 1024 * 1024
_headers = {}
for k, v in self.response.headers.items():
if not k.lower() in ("accept", "accept-encoding", "user-agent"):
_headers[k] = v
cur_state = start + ori_filesize
while cur_state < end + 1:
headers = _headers.copy()
remained = end + 1 - cur_state
chunk_size = self.chunk_size if remained >= self.chunk_size else remained
headers["range"] = "bytes=%d-%d" % (
cur_state,
cur_state + chunk_size - 1,
)
http = self.response.http
resp, content = _retry_request(
http,
0,
"media download",
time.sleep,
random.random,
self.response.uri,
"GET",
headers=headers,
)
self.file_handler.write(content)
progress_bar.update(proc_id, advance=len(content))
cur_state += len(content)
if done_event.is_set():
interrupted = True
return interrupted
else:
if ori_filesize > 0:
self.set_range(start + ori_filesize, end)
self.response = self.session.get(self.base_url, params=self.params, proxies=self.proxies, stream=True)
else:
self.set_range(start, end)
cur_state = start + ori_filesize
for chunk in self.response.iter_content(self.chunk_size):
if chunk: # filter out keep-alive new chunks
self.file_handler.write(chunk)
chunk_num = len(chunk)
progress_bar.update(proc_id, advance=chunk_num)
if done_event.is_set():
interrupted = True
return interrupted
return interrupted
self.file_handler.write(chunk)
chunk_num = len(chunk)
progress_bar.update(proc_id, advance=chunk_num)
cur_state += chunk_num
if done_event.is_set():
interrupted = True
return interrupted
if cur_state >= end + 1:
break

def connect(self, url, custom_filename=''):
self.base_url = url
self.response = self.session.get(url, params=self.params, proxies=self.proxies, stream=True)
if self.response.status_code // 100 >= 4:
raise RuntimeError("Bad status code {}. Please check your connection.".format(self.response.status_code))
Expand Down
2 changes: 1 addition & 1 deletion DriveDownloader/netdrives/directlink.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ def parse_response_header(self):
def generate_url(self, url):
return url

def connect(self, url, custom_filename=''):
def connect(self, url, custom_filename='', proc_id=-1, force_backup=False):
generated_url = self.generate_url(url)
DriveSession.connect(self, generated_url, custom_filename=custom_filename)
2 changes: 1 addition & 1 deletion DriveDownloader/netdrives/dropbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ def generate_url(self, url):
resultUrl = urlparse.urlunparse(parsed_url)
return resultUrl

def connect(self, url, custom_filename=''):
def connect(self, url, custom_filename='', proc_id=-1, force_backup=False):
generated_url = self.generate_url(url)
DriveSession.connect(self, generated_url, custom_filename=custom_filename)
53 changes: 33 additions & 20 deletions DriveDownloader/netdrives/googledrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
import sys
from rich.console import Console

googleauthdata = '''
googleauthdata = \
'''
client_config_backend: settings
client_config:
client_id: 367116221053-7n0vf5akeru7on6o2fjinrecpdoe99eg.apps.googleusercontent.com
Expand All @@ -27,7 +28,17 @@
- https://www.googleapis.com/auth/drive
'''

console = Console()
info = \
'''
+-------------------------------------------------------------------+
|Warning: DriveDownloader is using the backup downloader due to the |
|forbiddance or manual setting. If this is the first time you meet |
|the notice, please follow the instructions to login your Google |
|Account. This operation only needs to be done once. |
+-------------------------------------------------------------------+
'''

console = Console(width=71)
class GoogleDriveSession(DriveSession):
def __init__(self, *args, **kwargs):
DriveSession.__init__(self, *args, **kwargs)
Expand All @@ -46,28 +57,30 @@ def generate_url(self, url):
replaced_url = "https://drive.google.com/u/0/uc?export=download"
return replaced_url, id_str

def connect(self, url, custom_filename=''):
def connect(self, url, custom_filename='', force_backup=False, proc_id=-1):
replaced_url, id_str = self.generate_url(url)
if force_backup:
self.backup_connect(url, custom_filename, id_str, proc_id=proc_id)
return
try:
self.params["id"] = id_str
self.params["confirm"] = "t"
DriveSession.connect(self, replaced_url, custom_filename=custom_filename)
except:
info = '''+-------------------------------------------------------------------------------------------+
|Warning: The default request is forbidden by GoogleDrive due to the frequent downloading, |
|and DriveDownloader is now using the backup downloader. If this is the first time you meet |
|the problem, please follow the instructions to login your Google Account. Once this action |
|is performed, the downloading procedure will automatically start for all the time. |
+-------------------------------------------------------------------------------------------+'''
self.backup_connect(url, custom_filename, id_str, proc_id=proc_id)

def backup_connect(self, url, custom_filename, id_str, proc_id=-1):
if proc_id == -1:
console.print(info)
settings_file_path = os.path.join(os.path.dirname(__file__), 'settings.yaml')
if not os.path.exists(settings_file_path):
with open(settings_file_path, "w") as f:
f.write(googleauthdata)
gauth = GoogleAuth(settings_file=settings_file_path)
gauth.CommandLineAuth()
drive = GoogleDrive(gauth)
file = drive.CreateFile({"id": id_str})
self.filename = file['title'] if len(custom_filename) == 0 else custom_filename
self.filesize = float(file['fileSize'])
self.response = gauth.service.files().get_media(fileId=id_str)
settings_file_path = os.path.join(os.path.dirname(__file__), 'settings.yaml')
if not os.path.exists(settings_file_path):
with open(settings_file_path, "w") as f:
f.write(googleauthdata)
self.gauth = GoogleAuth(settings_file=settings_file_path)
self.gauth.CommandLineAuth()
self.gid_str = id_str
drive = GoogleDrive(self.gauth)
file = drive.CreateFile({"id": id_str})
self.filename = file['title'] if len(custom_filename) == 0 else custom_filename
self.filesize = float(file['fileSize'])
self.response = self.gauth.service.files().get_media(fileId=id_str)
2 changes: 1 addition & 1 deletion DriveDownloader/netdrives/onedrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ def generate_url(self, url):
resultUrl = f"https://api.onedrive.com/v1.0/shares/u!{data_bytes64_String}/root/content"
return resultUrl

def connect(self, url, custom_filename=''):
def connect(self, url, custom_filename='', proc_id=-1, force_backup=False):
generated_url = self.generate_url(url)
DriveSession.connect(self, generated_url, custom_filename=custom_filename)
2 changes: 1 addition & 1 deletion DriveDownloader/netdrives/sharepoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ def generate_url(self, url):
resultUrl = f"https://{netloc}/{personal_attr}/{domain}/_layouts/52/download.aspx?share={sharelink}"
return resultUrl

def connect(self, url, custom_filename=''):
def connect(self, url, custom_filename='', proc_id=-1, force_backup=False):
generated_url = self.generate_url(url)
DriveSession.connect(self, generated_url, custom_filename=custom_filename)
Loading

0 comments on commit 31dd7ff

Please sign in to comment.