@@ -440,6 +440,12 @@ def parse_args(args=None):
440440 dest = "include_assets" ,
441441 help = "include assets alongside release information; only applies if including releases" ,
442442 )
443+ parser .add_argument (
444+ "--skip-assets-on" ,
445+ dest = "skip_assets_on" ,
446+ nargs = "*" ,
447+ help = "skip asset downloads for these repositories" ,
448+ )
443449 parser .add_argument (
444450 "--attachments" ,
445451 action = "store_true" ,
@@ -561,7 +567,7 @@ def get_github_host(args):
561567
562568
563569def read_file_contents (file_uri ):
564- return open (file_uri [len (FILE_URI_PREFIX ):], "rt" ).readline ().strip ()
570+ return open (file_uri [len (FILE_URI_PREFIX ) :], "rt" ).readline ().strip ()
565571
566572
567573def get_github_repo_url (args , repository ):
@@ -631,7 +637,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
631637 pass
632638 raise RepositoryUnavailableError (
633639 "Repository unavailable due to legal reasons (HTTP 451)" ,
634- dmca_url = dmca_url
640+ dmca_url = dmca_url ,
635641 )
636642
637643 # Check if we got correct data
@@ -709,7 +715,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
709715 # Parse Link header: <https://api.github.com/...?per_page=100&after=cursor>; rel="next"
710716 for link in link_header .split ("," ):
711717 if 'rel="next"' in link :
712- next_url = link [link .find ("<" ) + 1 : link .find (">" )]
718+ next_url = link [link .find ("<" ) + 1 : link .find (">" )]
713719 break
714720 if not next_url :
715721 break
@@ -763,9 +769,7 @@ def _get_response(request, auth, template):
763769 return r , errors
764770
765771
766- def _construct_request (
767- per_page , query_args , template , auth , as_app = None , fine = False
768- ):
772+ def _construct_request (per_page , query_args , template , auth , as_app = None , fine = False ):
769773 # If template is already a full URL with query params (from Link header), use it directly
770774 if "?" in template and template .startswith ("http" ):
771775 request_url = template
@@ -1480,9 +1484,11 @@ def download_attachments(
14801484 manifest = {
14811485 "issue_number" : number ,
14821486 "issue_type" : item_type ,
1483- "repository" : f"{ args .user } /{ args .repository } "
1484- if hasattr (args , "repository" ) and args .repository
1485- else args .user ,
1487+ "repository" : (
1488+ f"{ args .user } /{ args .repository } "
1489+ if hasattr (args , "repository" ) and args .repository
1490+ else args .user
1491+ ),
14861492 "manifest_updated_at" : datetime .now (timezone .utc ).isoformat (),
14871493 "attachments" : attachment_metadata_list ,
14881494 }
@@ -1538,9 +1544,7 @@ def retrieve_repositories(args, authenticated_user):
15381544 else :
15391545 repo_path = "{0}/{1}" .format (args .user , args .repository )
15401546 single_request = True
1541- template = "https://{0}/repos/{1}" .format (
1542- get_github_api_host (args ), repo_path
1543- )
1547+ template = "https://{0}/repos/{1}" .format (get_github_api_host (args ), repo_path )
15441548
15451549 repos = retrieve_data (args , template , single_request = single_request )
15461550
@@ -1565,7 +1569,10 @@ def retrieve_repositories(args, authenticated_user):
15651569 repos .extend (gists )
15661570
15671571 if args .include_starred_gists :
1568- if not authenticated_user .get ("login" ) or args .user .lower () != authenticated_user ["login" ].lower ():
1572+ if (
1573+ not authenticated_user .get ("login" )
1574+ or args .user .lower () != authenticated_user ["login" ].lower ()
1575+ ):
15691576 logger .warning (
15701577 "Cannot retrieve starred gists for '%s'. GitHub only allows access to the authenticated user's starred gists." ,
15711578 args .user ,
@@ -1673,9 +1680,11 @@ def backup_repositories(args, output_directory, repositories):
16731680
16741681 include_gists = args .include_gists or args .include_starred_gists
16751682 include_starred = args .all_starred and repository .get ("is_starred" )
1676- if (args .include_repository or args .include_everything ) or (
1677- include_gists and repository .get ("is_gist" )
1678- ) or include_starred :
1683+ if (
1684+ (args .include_repository or args .include_everything )
1685+ or (include_gists and repository .get ("is_gist" ))
1686+ or include_starred
1687+ ):
16791688 repo_name = (
16801689 repository .get ("name" )
16811690 if not repository .get ("is_gist" )
@@ -1735,7 +1744,9 @@ def backup_repositories(args, output_directory, repositories):
17351744 include_assets = args .include_assets or args .include_everything ,
17361745 )
17371746 except RepositoryUnavailableError as e :
1738- logger .warning (f"Repository { repository ['full_name' ]} is unavailable (HTTP 451)" )
1747+ logger .warning (
1748+ f"Repository { repository ['full_name' ]} is unavailable (HTTP 451)"
1749+ )
17391750 if e .dmca_url :
17401751 logger .warning (f"DMCA notice: { e .dmca_url } " )
17411752 logger .info (f"Skipping remaining resources for { repository ['full_name' ]} " )
@@ -1795,7 +1806,11 @@ def backup_issues(args, repo_cwd, repository, repos_template):
17951806 modified = os .path .getmtime (issue_file )
17961807 modified = datetime .fromtimestamp (modified ).strftime ("%Y-%m-%dT%H:%M:%SZ" )
17971808 if modified > issue ["updated_at" ]:
1798- logger .info ("Skipping issue {0} because it wasn't modified since last backup" .format (number ))
1809+ logger .info (
1810+ "Skipping issue {0} because it wasn't modified since last backup" .format (
1811+ number
1812+ )
1813+ )
17991814 continue
18001815
18011816 if args .include_issue_comments or args .include_everything :
@@ -1811,7 +1826,9 @@ def backup_issues(args, repo_cwd, repository, repos_template):
18111826
18121827 with codecs .open (issue_file + ".temp" , "w" , encoding = "utf-8" ) as f :
18131828 json_dump (issue , f )
1814- os .rename (issue_file + ".temp" , issue_file ) # Unlike json_dump, this is atomic
1829+ os .rename (
1830+ issue_file + ".temp" , issue_file
1831+ ) # Unlike json_dump, this is atomic
18151832
18161833
18171834def backup_pulls (args , repo_cwd , repository , repos_template ):
@@ -1869,7 +1886,11 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
18691886 modified = os .path .getmtime (pull_file )
18701887 modified = datetime .fromtimestamp (modified ).strftime ("%Y-%m-%dT%H:%M:%SZ" )
18711888 if modified > pull ["updated_at" ]:
1872- logger .info ("Skipping pull request {0} because it wasn't modified since last backup" .format (number ))
1889+ logger .info (
1890+ "Skipping pull request {0} because it wasn't modified since last backup" .format (
1891+ number
1892+ )
1893+ )
18731894 continue
18741895 if args .include_pull_comments or args .include_everything :
18751896 template = comments_regular_template .format (number )
@@ -1886,7 +1907,9 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
18861907
18871908 with codecs .open (pull_file + ".temp" , "w" , encoding = "utf-8" ) as f :
18881909 json_dump (pull , f )
1889- os .rename (pull_file + ".temp" , pull_file ) # Unlike json_dump, this is atomic
1910+ os .rename (
1911+ pull_file + ".temp" , pull_file
1912+ ) # Unlike json_dump, this is atomic
18901913
18911914
18921915def backup_milestones (args , repo_cwd , repository , repos_template ):
@@ -1919,9 +1942,11 @@ def backup_milestones(args, repo_cwd, repository, repos_template):
19191942 elif written_count == 0 :
19201943 logger .info ("{0} milestones unchanged, skipped write" .format (total ))
19211944 else :
1922- logger .info ("Saved {0} of {1} milestones to disk ({2} unchanged)" .format (
1923- written_count , total , total - written_count
1924- ))
1945+ logger .info (
1946+ "Saved {0} of {1} milestones to disk ({2} unchanged)" .format (
1947+ written_count , total , total - written_count
1948+ )
1949+ )
19251950
19261951
19271952def backup_labels (args , repo_cwd , repository , repos_template ):
@@ -1975,6 +2000,20 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
19752000 )
19762001 releases = releases [: args .number_of_latest_releases ]
19772002
2003+ # Check if this repo should skip asset downloads (case-insensitive)
2004+ skip_assets = False
2005+ if include_assets :
2006+ repo_name = repository .get ("name" , "" ).lower ()
2007+ repo_full_name = repository .get ("full_name" , "" ).lower ()
2008+ skip_repos = [r .lower () for r in (args .skip_assets_on or [])]
2009+ skip_assets = repo_name in skip_repos or repo_full_name in skip_repos
2010+ if skip_assets :
2011+ logger .info (
2012+ "Skipping assets for {0} ({1} releases) due to --skip-assets-on" .format (
2013+ repository .get ("name" ), len (releases )
2014+ )
2015+ )
2016+
19782017 # for each release, store it
19792018 written_count = 0
19802019 for release in releases :
@@ -1986,7 +2025,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
19862025 if json_dump_if_changed (release , output_filepath ):
19872026 written_count += 1
19882027
1989- if include_assets :
2028+ if include_assets and not skip_assets :
19902029 assets = retrieve_data (args , release ["assets_url" ])
19912030 if len (assets ) > 0 :
19922031 # give release asset files somewhere to live & download them (not including source archives)
@@ -2008,9 +2047,11 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F
20082047 elif written_count == 0 :
20092048 logger .info ("{0} releases unchanged, skipped write" .format (total ))
20102049 else :
2011- logger .info ("Saved {0} of {1} releases to disk ({2} unchanged)" .format (
2012- written_count , total , total - written_count
2013- ))
2050+ logger .info (
2051+ "Saved {0} of {1} releases to disk ({2} unchanged)" .format (
2052+ written_count , total , total - written_count
2053+ )
2054+ )
20142055
20152056
20162057def fetch_repository (
@@ -2024,9 +2065,12 @@ def fetch_repository(
20242065):
20252066 if bare_clone :
20262067 if os .path .exists (local_dir ):
2027- clone_exists = subprocess .check_output (
2028- ["git" , "rev-parse" , "--is-bare-repository" ], cwd = local_dir
2029- ) == b"true\n "
2068+ clone_exists = (
2069+ subprocess .check_output (
2070+ ["git" , "rev-parse" , "--is-bare-repository" ], cwd = local_dir
2071+ )
2072+ == b"true\n "
2073+ )
20302074 else :
20312075 clone_exists = False
20322076 else :
@@ -2047,7 +2091,9 @@ def fetch_repository(
20472091 )
20482092 else :
20492093 logger .info (
2050- "Skipping {0} (repository not accessible - may be empty, private, or credentials invalid)" .format (name )
2094+ "Skipping {0} (repository not accessible - may be empty, private, or credentials invalid)" .format (
2095+ name
2096+ )
20512097 )
20522098 return
20532099
0 commit comments