From 5454ffed362cbc59876eff0160113adf021d240e Mon Sep 17 00:00:00 2001 From: jacopofar Date: Wed, 6 Dec 2017 14:48:17 +0100 Subject: [PATCH 01/12] Make the downloader retrieve both the ad and the keyword performance reports --- CHANGELOG.md | 4 ++ bingads_downloader/cli.py | 5 +- bingads_downloader/config.py | 14 +++-- bingads_downloader/downloader.py | 95 ++++++++++++++++++++++++++++---- setup.py | 2 +- 5 files changed, 101 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f78111b..39ab326 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 2.0 +2017-12-06 +- Download both the keywords and the ad performance reports as two separate files (major version bump as the file name is different) + ## 1.1.0 *2017-09-21 diff --git a/bingads_downloader/cli.py b/bingads_downloader/cli.py index a6ae1f2..7dfbf4a 100644 --- a/bingads_downloader/cli.py +++ b/bingads_downloader/cli.py @@ -51,11 +51,12 @@ def refresh_oauth2_token(**kwargs): @config_option(config.oauth2_client_secret) @config_option(config.oauth2_refresh_token) @config_option(config.data_dir) -@config_option(config.data_file) +@config_option(config.ad_performance_data_file) +@config_option(config.keyword_performance_data_file) @config_option(config.first_date) @config_option(config.environment) @config_option(config.timeout) -@config_option(config.total_attempts_for_single_file) +@config_option(config.total_attempts_for_single_day) @config_option(config.retry_timeout_interval) def download_data(**kwargs): """ diff --git a/bingads_downloader/config.py b/bingads_downloader/config.py index ab9ecd0..a2857b5 100644 --- a/bingads_downloader/config.py +++ b/bingads_downloader/config.py @@ -8,11 +8,15 @@ def data_dir() -> str: return '/tmp/bingads/' -def data_file() -> str: - """The name of the file the result is written to""" +def ad_performance_data_file() -> str: + """The name of the file the ad performance result is written to""" return 'ad_performance.csv.gz' +def keyword_performance_data_file() -> str: + """The name of the file the keyword performance result is written to""" + return 'keyword_performance.csv.gz' + def first_date() -> str: """The first day from which on data will be downloaded""" return '2015-01-01' @@ -47,11 +51,11 @@ def timeout() -> int: return 3600000 -def total_attempts_for_single_file() -> int: - """The attempts to download a single file in case of HTTP errors or timeouts""" +def total_attempts_for_single_day() -> int: + """The attempts to download a single day (ad and keyword performance) in case of HTTP errors or timeouts""" return 5 def retry_timeout_interval() -> int: """number of seconds to wait before trying again to download a single day""" - return 10 + return 10 \ No newline at end of file diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index 9157d4d..98d8f2a 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -47,12 +47,12 @@ def download_data_sets(api_client: BingReportClient): """ authenticate_with_oauth(api_client) - download_ad_performance_data(api_client) + download_performance_data(api_client) -def download_ad_performance_data(api_client: BingReportClient): +def download_performance_data(api_client: BingReportClient): """ - Downloads BingAds performance reports by creating report objects + Downloads BingAds Ads performance reports by creating report objects for every day since config.first_date() till today Args: api_client: BingAdsApiClient @@ -60,7 +60,7 @@ def download_ad_performance_data(api_client: BingReportClient): first_date = datetime.datetime.strptime(config.first_date(), '%Y-%m-%d') last_date = datetime.datetime.now() - datetime.timedelta(days=1) current_date = last_date - remaining_attempts = config.total_attempts_for_single_file + remaining_attempts = config.total_attempts_for_single_day while current_date >= first_date: print(current_date) relative_filepath = Path('{date:%Y/%m/%d}/bing/'.format( @@ -68,18 +68,24 @@ def download_ad_performance_data(api_client: BingReportClient): filepath = ensure_data_directory(relative_filepath) if not filepath.is_dir() or (last_date - current_date).days < 31: - report_request = build_ad_performance_request_for_single_day(api_client, current_date) + report_request_ad = build_ad_performance_request_for_single_day(api_client, current_date) + report_request_keyword = build_keyword_performance_request_for_single_day(api_client, current_date) + with tempfile.TemporaryDirectory() as tmp_dir: tmp_filepath = Path(tmp_dir, relative_filepath) tmp_filepath.parent.mkdir(exist_ok=True, parents=True) try: start_time = time.time() - submit_and_download(report_request, api_client, str(filepath)) - print('Successfully downloaded data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + submit_and_download(report_request_ad, api_client, str(filepath), config.ad_performance_data_file()) + print('Successfully downloaded ad data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) + start_time = time.time() + submit_and_download(report_request_keyword, api_client, str(filepath), config.keyword_performance_data_file()) + print('Successfully downloaded keyword data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + .format(date=current_date, elapsed=time.time() - start_time)) # date is decreased only if the download above does not fail current_date -= datetime.timedelta(days=1) - remaining_attempts = config.total_attempts_for_single_file + remaining_attempts = config.total_attempts_for_single_day except urllib.error.URLError as url_error: if remaining_attempts == 0: print('Too many failed attempts while downloading this day, quitting', file=sys.stderr) @@ -93,10 +99,76 @@ def download_ad_performance_data(api_client: BingReportClient): current_date -= datetime.timedelta(days=1) + + + def build_ad_performance_request_for_single_day(api_client: BingReportClient, current_date: datetime): """ - Creates a report report request object with hard coded parameters for a give date. + Creates an Ad report request object with hard coded parameters for a give date. + Args: + api_client: BingApiClient object + current_date: date for which the report object will be created + + Returns: + A report request object with our specific hard coded settings for a given date + """ + report_request = api_client.factory.create('AdPerformanceReportRequest') + report_request.Format = 'Csv' + report_request.ReportName = 'My Ad Performance Report' + report_request.ReturnOnlyCompleteData = False + report_request.Aggregation = 'Daily' + report_request.Language = 'English' + + report_time = api_client.factory.create('ReportTime') + + # You may either use a custom date range + custom_date_range_start = api_client.factory.create('Date') + custom_date_range_start.Day = current_date.day + custom_date_range_start.Month = current_date.month + custom_date_range_start.Year = current_date.year + report_time.CustomDateRangeStart = custom_date_range_start + report_time.CustomDateRangeEnd = custom_date_range_start + report_time.PredefinedTime = None + report_request.Time = report_time + + report_columns = api_client.factory.create('ArrayOfAdPerformanceReportColumn') + report_columns.AdPerformanceReportColumn.append([ + "AccountName", + "AccountNumber", + "AccountId", + "TimePeriod", + "CampaignName", + "CampaignId", + "AdGroupName", + "AdId", + "AdGroupId", + "AdTitle", + "AdDescription", + "AdType", + "Impressions", + "Clicks", + "Ctr", + "Spend", + "AveragePosition", + "Conversions", + "ConversionRate", + "CostPerConversion", + "DeviceType", + "AccountStatus", + "CampaignStatus", + "AdGroupStatus", + "AdLabels" + ]) + report_request.Columns = report_columns + + return report_request + + +def build_keyword_performance_request_for_single_day(api_client: BingReportClient, + current_date: datetime): + """ + Creates a Keyword report request object with hard coded parameters for a give date. Args: api_client: BingApiClient object current_date: date for which the report object will be created @@ -158,7 +230,7 @@ def build_ad_performance_request_for_single_day(api_client: BingReportClient, return report_request -def submit_and_download(report_request, api_client, data_dir): +def submit_and_download(report_request, api_client, data_dir, data_file): """ Submit the download request and then use the ReportingDownloadOperation result to track status until the report is complete. @@ -166,6 +238,7 @@ def submit_and_download(report_request, api_client, data_dir): report_request: report_request object e.g. created by get_ad_performance_for_single_day api_client: BingApiClient object data_dir: target directory of the files containing the reports + data_file: the name of the file containing the data """ current_reporting_service_manager = \ @@ -195,7 +268,7 @@ def submit_and_download(report_request, api_client, data_dir): result_file_path = reporting_download_operation.download_result_file( result_file_directory=data_dir, - result_file_name=config.data_file(), + result_file_name=data_file, decompress=False, overwrite=True, # Set this value true if you want to overwrite the same file. timeout_in_milliseconds=config.timeout() diff --git a/setup.py b/setup.py index c9be4c3..5e5eb9c 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='bingads-performance-downloader', - version='1.1.0', + version='2.0.0', description="Downloads data from the BingAds Api to local files for usage in a data warehouse", From dcf33b93461600a12ba724ae6398c3983c3d9fed Mon Sep 17 00:00:00 2001 From: jacopofar Date: Fri, 8 Dec 2017 15:23:14 +0100 Subject: [PATCH 02/12] Use a new path for bing data --- bingads_downloader/downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index 98d8f2a..77ece3e 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -63,7 +63,7 @@ def download_performance_data(api_client: BingReportClient): remaining_attempts = config.total_attempts_for_single_day while current_date >= first_date: print(current_date) - relative_filepath = Path('{date:%Y/%m/%d}/bing/'.format( + relative_filepath = Path('{date:%Y/%m/%d}/bing_v2/'.format( date=current_date)) filepath = ensure_data_directory(relative_filepath) From 0ebcb02248c3bb73849043b1fc3a41a224d33700 Mon Sep 17 00:00:00 2001 From: jacopofar Date: Wed, 20 Dec 2017 13:32:43 +0100 Subject: [PATCH 03/12] Download campaign data as well Typo Remove sorting altogether from capaign data Remove request metadata, orde rcolumn according to the doc order Reintroduce mandatory language parameter Reintroduce mandatory parameter for aggregation Add a measure column as it's mandatory Log the beginning of each file download Adjust comment to refer to the actual project --- bingads_downloader/cli.py | 2 +- bingads_downloader/config.py | 4 +++ bingads_downloader/downloader.py | 60 ++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/bingads_downloader/cli.py b/bingads_downloader/cli.py index 7dfbf4a..fd4501f 100644 --- a/bingads_downloader/cli.py +++ b/bingads_downloader/cli.py @@ -1,4 +1,4 @@ -"""Command line interface for adwords downloader""" +"""Command line interface for Bing downloader""" import sys diff --git a/bingads_downloader/config.py b/bingads_downloader/config.py index a2857b5..e5a78b4 100644 --- a/bingads_downloader/config.py +++ b/bingads_downloader/config.py @@ -17,6 +17,10 @@ def keyword_performance_data_file() -> str: """The name of the file the keyword performance result is written to""" return 'keyword_performance.csv.gz' +def campaign_performance_data_file() -> str: + """The name of the file the campaign performance result is written to""" + return 'campaign_performance.csv.gz' + def first_date() -> str: """The first day from which on data will be downloaded""" return '2015-01-01' diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index 77ece3e..e803ae1 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -70,19 +70,30 @@ def download_performance_data(api_client: BingReportClient): if not filepath.is_dir() or (last_date - current_date).days < 31: report_request_ad = build_ad_performance_request_for_single_day(api_client, current_date) report_request_keyword = build_keyword_performance_request_for_single_day(api_client, current_date) + report_request_campaign = build_campaign_performance_request_for_single_day(api_client, current_date) + with tempfile.TemporaryDirectory() as tmp_dir: tmp_filepath = Path(tmp_dir, relative_filepath) tmp_filepath.parent.mkdir(exist_ok=True, parents=True) try: start_time = time.time() + print('About to download ad data for {date:%Y-%m-%d}' + .format(date=current_date)) submit_and_download(report_request_ad, api_client, str(filepath), config.ad_performance_data_file()) print('Successfully downloaded ad data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) start_time = time.time() + print('About to download keyword data for {date:%Y-%m-%d}' + .format(date=current_date)) submit_and_download(report_request_keyword, api_client, str(filepath), config.keyword_performance_data_file()) print('Successfully downloaded keyword data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) + print('About to download campaign data for {date:%Y-%m-%d}' + .format(date=current_date)) + submit_and_download(report_request_campaign, api_client, str(filepath), config.campaign_performance_data_file()) + print('Successfully downloaded campaign data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + .format(date=current_date, elapsed=time.time() - start_time)) # date is decreased only if the download above does not fail current_date -= datetime.timedelta(days=1) remaining_attempts = config.total_attempts_for_single_day @@ -96,6 +107,13 @@ def download_performance_data(api_client: BingReportClient): time.sleep(config.retry_timeout_interval) remaining_attempts -= 1 else: + if not filepath.is_dir(): + print(f'Skipping the day since directory {str(filepath)} already exists') + + if (last_date - current_date).days < 31: + print(f'Skipping the day since {str(last_date)} is more than 31 days before {str(current_date)}') + + current_date -= datetime.timedelta(days=1) @@ -230,6 +248,48 @@ def build_keyword_performance_request_for_single_day(api_client: BingReportClien return report_request +def build_campaign_performance_request_for_single_day(api_client: BingReportClient, + current_date: datetime): + """ + Creates a Campaign report request object with hard coded parameters for a give date. + Args: + api_client: BingApiClient object + current_date: date for which the report object will be created + + Returns: + A report request object with our specific hard coded settings for a given date + """ + report_request = api_client.factory.create('CampaignPerformanceReportRequest') + report_request.Format = 'Csv' + report_request.ReportName = 'My Campaign Performance Report' + report_request.ReturnOnlyCompleteData = False + report_request.Language = 'English' + report_request.Aggregation = 'Daily' + report_time = api_client.factory.create('ReportTime') + + # You may either use a custom date range + custom_date_range_start = api_client.factory.create('Date') + custom_date_range_start.Day = current_date.day + custom_date_range_start.Month = current_date.month + custom_date_range_start.Year = current_date.year + report_time.CustomDateRangeStart = custom_date_range_start + report_time.CustomDateRangeEnd = custom_date_range_start + report_time.PredefinedTime = None + report_request.Time = report_time + + report_columns = api_client.factory.create('ArrayOfCampaignPerformanceReportColumn') + report_columns.CampaignPerformanceReportColumn.append([ + "AccountName", + "AccountId", + "TimePeriod", + "CampaignName", + "CampaignId", + "Spend", + "CampaignLabels" + ]) + report_request.Columns = report_columns + return report_request + def submit_and_download(report_request, api_client, data_dir, data_file): """ Submit the download request and then use the ReportingDownloadOperation result to From c2dbf1243990fbb2f22bd597f40463c46ed9290c Mon Sep 17 00:00:00 2001 From: jacopofar Date: Wed, 6 Dec 2017 14:48:17 +0100 Subject: [PATCH 04/12] Make the downloader retrieve both the ad and the keyword performance reports --- CHANGELOG.md | 5 ++ bingads_downloader/cli.py | 5 +- bingads_downloader/config.py | 14 +++-- bingads_downloader/downloader.py | 95 ++++++++++++++++++++++++++++---- setup.py | 4 +- 5 files changed, 103 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c1a4aa..fd0ca6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 2.0 +2017-12-06 +- Download both the keywords and the ad performance reports as two separate files (major version bump as the file name is different) + ## 1.2.1 *2017-01-10 @@ -10,6 +14,7 @@ - Uses v11 reporting + ## 1.1.0 *2017-09-21 diff --git a/bingads_downloader/cli.py b/bingads_downloader/cli.py index a6ae1f2..7dfbf4a 100644 --- a/bingads_downloader/cli.py +++ b/bingads_downloader/cli.py @@ -51,11 +51,12 @@ def refresh_oauth2_token(**kwargs): @config_option(config.oauth2_client_secret) @config_option(config.oauth2_refresh_token) @config_option(config.data_dir) -@config_option(config.data_file) +@config_option(config.ad_performance_data_file) +@config_option(config.keyword_performance_data_file) @config_option(config.first_date) @config_option(config.environment) @config_option(config.timeout) -@config_option(config.total_attempts_for_single_file) +@config_option(config.total_attempts_for_single_day) @config_option(config.retry_timeout_interval) def download_data(**kwargs): """ diff --git a/bingads_downloader/config.py b/bingads_downloader/config.py index ab9ecd0..a2857b5 100644 --- a/bingads_downloader/config.py +++ b/bingads_downloader/config.py @@ -8,11 +8,15 @@ def data_dir() -> str: return '/tmp/bingads/' -def data_file() -> str: - """The name of the file the result is written to""" +def ad_performance_data_file() -> str: + """The name of the file the ad performance result is written to""" return 'ad_performance.csv.gz' +def keyword_performance_data_file() -> str: + """The name of the file the keyword performance result is written to""" + return 'keyword_performance.csv.gz' + def first_date() -> str: """The first day from which on data will be downloaded""" return '2015-01-01' @@ -47,11 +51,11 @@ def timeout() -> int: return 3600000 -def total_attempts_for_single_file() -> int: - """The attempts to download a single file in case of HTTP errors or timeouts""" +def total_attempts_for_single_day() -> int: + """The attempts to download a single day (ad and keyword performance) in case of HTTP errors or timeouts""" return 5 def retry_timeout_interval() -> int: """number of seconds to wait before trying again to download a single day""" - return 10 + return 10 \ No newline at end of file diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index 9157d4d..98d8f2a 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -47,12 +47,12 @@ def download_data_sets(api_client: BingReportClient): """ authenticate_with_oauth(api_client) - download_ad_performance_data(api_client) + download_performance_data(api_client) -def download_ad_performance_data(api_client: BingReportClient): +def download_performance_data(api_client: BingReportClient): """ - Downloads BingAds performance reports by creating report objects + Downloads BingAds Ads performance reports by creating report objects for every day since config.first_date() till today Args: api_client: BingAdsApiClient @@ -60,7 +60,7 @@ def download_ad_performance_data(api_client: BingReportClient): first_date = datetime.datetime.strptime(config.first_date(), '%Y-%m-%d') last_date = datetime.datetime.now() - datetime.timedelta(days=1) current_date = last_date - remaining_attempts = config.total_attempts_for_single_file + remaining_attempts = config.total_attempts_for_single_day while current_date >= first_date: print(current_date) relative_filepath = Path('{date:%Y/%m/%d}/bing/'.format( @@ -68,18 +68,24 @@ def download_ad_performance_data(api_client: BingReportClient): filepath = ensure_data_directory(relative_filepath) if not filepath.is_dir() or (last_date - current_date).days < 31: - report_request = build_ad_performance_request_for_single_day(api_client, current_date) + report_request_ad = build_ad_performance_request_for_single_day(api_client, current_date) + report_request_keyword = build_keyword_performance_request_for_single_day(api_client, current_date) + with tempfile.TemporaryDirectory() as tmp_dir: tmp_filepath = Path(tmp_dir, relative_filepath) tmp_filepath.parent.mkdir(exist_ok=True, parents=True) try: start_time = time.time() - submit_and_download(report_request, api_client, str(filepath)) - print('Successfully downloaded data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + submit_and_download(report_request_ad, api_client, str(filepath), config.ad_performance_data_file()) + print('Successfully downloaded ad data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) + start_time = time.time() + submit_and_download(report_request_keyword, api_client, str(filepath), config.keyword_performance_data_file()) + print('Successfully downloaded keyword data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + .format(date=current_date, elapsed=time.time() - start_time)) # date is decreased only if the download above does not fail current_date -= datetime.timedelta(days=1) - remaining_attempts = config.total_attempts_for_single_file + remaining_attempts = config.total_attempts_for_single_day except urllib.error.URLError as url_error: if remaining_attempts == 0: print('Too many failed attempts while downloading this day, quitting', file=sys.stderr) @@ -93,10 +99,76 @@ def download_ad_performance_data(api_client: BingReportClient): current_date -= datetime.timedelta(days=1) + + + def build_ad_performance_request_for_single_day(api_client: BingReportClient, current_date: datetime): """ - Creates a report report request object with hard coded parameters for a give date. + Creates an Ad report request object with hard coded parameters for a give date. + Args: + api_client: BingApiClient object + current_date: date for which the report object will be created + + Returns: + A report request object with our specific hard coded settings for a given date + """ + report_request = api_client.factory.create('AdPerformanceReportRequest') + report_request.Format = 'Csv' + report_request.ReportName = 'My Ad Performance Report' + report_request.ReturnOnlyCompleteData = False + report_request.Aggregation = 'Daily' + report_request.Language = 'English' + + report_time = api_client.factory.create('ReportTime') + + # You may either use a custom date range + custom_date_range_start = api_client.factory.create('Date') + custom_date_range_start.Day = current_date.day + custom_date_range_start.Month = current_date.month + custom_date_range_start.Year = current_date.year + report_time.CustomDateRangeStart = custom_date_range_start + report_time.CustomDateRangeEnd = custom_date_range_start + report_time.PredefinedTime = None + report_request.Time = report_time + + report_columns = api_client.factory.create('ArrayOfAdPerformanceReportColumn') + report_columns.AdPerformanceReportColumn.append([ + "AccountName", + "AccountNumber", + "AccountId", + "TimePeriod", + "CampaignName", + "CampaignId", + "AdGroupName", + "AdId", + "AdGroupId", + "AdTitle", + "AdDescription", + "AdType", + "Impressions", + "Clicks", + "Ctr", + "Spend", + "AveragePosition", + "Conversions", + "ConversionRate", + "CostPerConversion", + "DeviceType", + "AccountStatus", + "CampaignStatus", + "AdGroupStatus", + "AdLabels" + ]) + report_request.Columns = report_columns + + return report_request + + +def build_keyword_performance_request_for_single_day(api_client: BingReportClient, + current_date: datetime): + """ + Creates a Keyword report request object with hard coded parameters for a give date. Args: api_client: BingApiClient object current_date: date for which the report object will be created @@ -158,7 +230,7 @@ def build_ad_performance_request_for_single_day(api_client: BingReportClient, return report_request -def submit_and_download(report_request, api_client, data_dir): +def submit_and_download(report_request, api_client, data_dir, data_file): """ Submit the download request and then use the ReportingDownloadOperation result to track status until the report is complete. @@ -166,6 +238,7 @@ def submit_and_download(report_request, api_client, data_dir): report_request: report_request object e.g. created by get_ad_performance_for_single_day api_client: BingApiClient object data_dir: target directory of the files containing the reports + data_file: the name of the file containing the data """ current_reporting_service_manager = \ @@ -195,7 +268,7 @@ def submit_and_download(report_request, api_client, data_dir): result_file_path = reporting_download_operation.download_result_file( result_file_directory=data_dir, - result_file_name=config.data_file(), + result_file_name=data_file, decompress=False, overwrite=True, # Set this value true if you want to overwrite the same file. timeout_in_milliseconds=config.timeout() diff --git a/setup.py b/setup.py index 55154a9..0181e0b 100644 --- a/setup.py +++ b/setup.py @@ -2,12 +2,12 @@ setup( name='bingads-performance-downloader', - version='1.2.1', + version='2.0.0', description="Downloads data from the BingAds Api to local files for usage in a data warehouse", install_requires=[ - 'bingads==11.5.5.1', + 'bingads==11.5.8', 'click>=6.0' ], From a2aea115aecc072d1635b65d6e1312e953ea0ff1 Mon Sep 17 00:00:00 2001 From: jacopofar Date: Fri, 8 Dec 2017 15:23:14 +0100 Subject: [PATCH 05/12] Use a new path for bing data --- bingads_downloader/downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index 98d8f2a..77ece3e 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -63,7 +63,7 @@ def download_performance_data(api_client: BingReportClient): remaining_attempts = config.total_attempts_for_single_day while current_date >= first_date: print(current_date) - relative_filepath = Path('{date:%Y/%m/%d}/bing/'.format( + relative_filepath = Path('{date:%Y/%m/%d}/bing_v2/'.format( date=current_date)) filepath = ensure_data_directory(relative_filepath) From f3c37b542eae6c5ed5d0e3ff19bcae14630c9ab2 Mon Sep 17 00:00:00 2001 From: jacopofar Date: Wed, 20 Dec 2017 13:32:43 +0100 Subject: [PATCH 06/12] Download campaign data as well Typo Remove sorting altogether from capaign data Remove request metadata, orde rcolumn according to the doc order Reintroduce mandatory language parameter Reintroduce mandatory parameter for aggregation Add a measure column as it's mandatory Log the beginning of each file download Adjust comment to refer to the actual project --- bingads_downloader/cli.py | 2 +- bingads_downloader/config.py | 4 +++ bingads_downloader/downloader.py | 60 ++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/bingads_downloader/cli.py b/bingads_downloader/cli.py index 7dfbf4a..fd4501f 100644 --- a/bingads_downloader/cli.py +++ b/bingads_downloader/cli.py @@ -1,4 +1,4 @@ -"""Command line interface for adwords downloader""" +"""Command line interface for Bing downloader""" import sys diff --git a/bingads_downloader/config.py b/bingads_downloader/config.py index a2857b5..e5a78b4 100644 --- a/bingads_downloader/config.py +++ b/bingads_downloader/config.py @@ -17,6 +17,10 @@ def keyword_performance_data_file() -> str: """The name of the file the keyword performance result is written to""" return 'keyword_performance.csv.gz' +def campaign_performance_data_file() -> str: + """The name of the file the campaign performance result is written to""" + return 'campaign_performance.csv.gz' + def first_date() -> str: """The first day from which on data will be downloaded""" return '2015-01-01' diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index 77ece3e..e803ae1 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -70,19 +70,30 @@ def download_performance_data(api_client: BingReportClient): if not filepath.is_dir() or (last_date - current_date).days < 31: report_request_ad = build_ad_performance_request_for_single_day(api_client, current_date) report_request_keyword = build_keyword_performance_request_for_single_day(api_client, current_date) + report_request_campaign = build_campaign_performance_request_for_single_day(api_client, current_date) + with tempfile.TemporaryDirectory() as tmp_dir: tmp_filepath = Path(tmp_dir, relative_filepath) tmp_filepath.parent.mkdir(exist_ok=True, parents=True) try: start_time = time.time() + print('About to download ad data for {date:%Y-%m-%d}' + .format(date=current_date)) submit_and_download(report_request_ad, api_client, str(filepath), config.ad_performance_data_file()) print('Successfully downloaded ad data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) start_time = time.time() + print('About to download keyword data for {date:%Y-%m-%d}' + .format(date=current_date)) submit_and_download(report_request_keyword, api_client, str(filepath), config.keyword_performance_data_file()) print('Successfully downloaded keyword data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) + print('About to download campaign data for {date:%Y-%m-%d}' + .format(date=current_date)) + submit_and_download(report_request_campaign, api_client, str(filepath), config.campaign_performance_data_file()) + print('Successfully downloaded campaign data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + .format(date=current_date, elapsed=time.time() - start_time)) # date is decreased only if the download above does not fail current_date -= datetime.timedelta(days=1) remaining_attempts = config.total_attempts_for_single_day @@ -96,6 +107,13 @@ def download_performance_data(api_client: BingReportClient): time.sleep(config.retry_timeout_interval) remaining_attempts -= 1 else: + if not filepath.is_dir(): + print(f'Skipping the day since directory {str(filepath)} already exists') + + if (last_date - current_date).days < 31: + print(f'Skipping the day since {str(last_date)} is more than 31 days before {str(current_date)}') + + current_date -= datetime.timedelta(days=1) @@ -230,6 +248,48 @@ def build_keyword_performance_request_for_single_day(api_client: BingReportClien return report_request +def build_campaign_performance_request_for_single_day(api_client: BingReportClient, + current_date: datetime): + """ + Creates a Campaign report request object with hard coded parameters for a give date. + Args: + api_client: BingApiClient object + current_date: date for which the report object will be created + + Returns: + A report request object with our specific hard coded settings for a given date + """ + report_request = api_client.factory.create('CampaignPerformanceReportRequest') + report_request.Format = 'Csv' + report_request.ReportName = 'My Campaign Performance Report' + report_request.ReturnOnlyCompleteData = False + report_request.Language = 'English' + report_request.Aggregation = 'Daily' + report_time = api_client.factory.create('ReportTime') + + # You may either use a custom date range + custom_date_range_start = api_client.factory.create('Date') + custom_date_range_start.Day = current_date.day + custom_date_range_start.Month = current_date.month + custom_date_range_start.Year = current_date.year + report_time.CustomDateRangeStart = custom_date_range_start + report_time.CustomDateRangeEnd = custom_date_range_start + report_time.PredefinedTime = None + report_request.Time = report_time + + report_columns = api_client.factory.create('ArrayOfCampaignPerformanceReportColumn') + report_columns.CampaignPerformanceReportColumn.append([ + "AccountName", + "AccountId", + "TimePeriod", + "CampaignName", + "CampaignId", + "Spend", + "CampaignLabels" + ]) + report_request.Columns = report_columns + return report_request + def submit_and_download(report_request, api_client, data_dir, data_file): """ Submit the download request and then use the ReportingDownloadOperation result to From 313b574ffba71ec5cc777f856ecbb1cc3b628402 Mon Sep 17 00:00:00 2001 From: jacopofar Date: Wed, 14 Mar 2018 12:02:03 +0100 Subject: [PATCH 07/12] Download three reports, update README and config accordingly --- CHANGELOG.md | 7 ++- README.md | 88 ++++++++++++++++++++++++++++-------- bingads_downloader/cli.py | 3 +- bingads_downloader/config.py | 9 ++-- 4 files changed, 82 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6abc141..46fb500 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,14 @@ # Changelog ## 2.0 -2018-03-08 +2018-03-14 - Download both the keywords and the ad performance reports as two separate files (major version bump as the file name is different) +## 1.2.2 +*2018-03-13 + +- cosmetic changes, upgrade bingads dependency + ## 1.2.1 *2017-01-10 diff --git a/README.md b/README.md index 28e315d..e3efd31 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,16 @@ # BingAds Performance Downloader -A Python script for downloading performance data from the [BingAds API version 11](https://msdn.microsoft.com/en-us/library/bing-ads-overview(v=msads.100).aspx) to local files. The code is largely based on [Bing Ads Python SDK](https://github.com/BingAds/BingAds-Python-SDK). +A Python script for downloading performance and keyword data from the [BingAds API version 11](https://msdn.microsoft.com/en-us/library/bing-ads-overview(v=msads.100).aspx) to local files. The code is largely based on [Bing Ads Python SDK](https://github.com/BingAds/BingAds-Python-SDK). ## Resulting data -**BingAds Performance Downloader** gives measures such as impressions, clicks and cost. The script creates one csv file per day in a specified time range: +**BingAds Performance Downloader** gives measures such as impressions, clicks and cost. The script creates three csv files per day in a specified time range, for example: - /tmp/bingads/2016/05/02/bing/ad_performance.csv.gz /tmp/bingads/2016/05/03/bing/ad_performance.csv.gz + /tmp/bingads/2016/05/03/bing/keyword_performance.csv.gz + /tmp/bingads/2016/05/03/bing/campaign_performance.csv.gz - - Each line contains one ad for one day: + Each line of `keyword_performance` contains one ad for one day: GregorianDate | 2/12/2016 AccountId | 17837800573 @@ -34,6 +34,44 @@ A Python script for downloading performance data from the [BingAds API version 1 Revenue | 0 Network | Bing and Yahoo! search +While `ad_performance` has these columns: + + AccountName | Name + AccountNumber | X001342 + AccountId | 67688 + TimePeriod | 2018-02-03 + CampaignName | 12. Campaign name + CampaignId | 1234567 + AdGroupName | Name of the group group + AdId | 8123456789 + AdGroupId | 123456789011 + AdTitle | title of the ad, optional + AdDescription | Description of the ad + AdType | Expanded text ad + Impressions | 2 + Clicks | 3 + Ctr | 45.78% + Spend | 4.34 + AveragePosition | 1.00 + Conversions | 12 + ConversionRate | 23.8% + CostPerConversion | 2.34 + DeviceType | Computer + AccountStatus | Active + CampaignStatus | Active + AdGroupStatus | Active + AdLabels | "(label1=value)(label2=value2)" + + +And `Campaign performance` is: + + AccountName | Name of the account + AccountId | 123456 + TimePeriod | 2018-01-10 + CampaignName | 1. Banner 03 + CampaignId | 1234567 + Spend | 1.23 + CampaignLabel | {channel=display} ## Getting Started @@ -42,9 +80,9 @@ A Python script for downloading performance data from the [BingAds API version 1 The Bing AdWords Performance Downloader requires: - Python (>= 3.5) - bingads (==11.5.5.1) - click (>=6.0) + Python (>= 3.6) + bingads (automatically installed by setup.py) + click (automatically installed by setup.py) The easiest way to install bing-adwords-downloader is using pip @@ -86,7 +124,7 @@ In order to access the BingAds API you have to obtain the OAuth2 credentials fro --oauth2_client_id 123456789 \ --oauth2_client_secret aBcDeFg -This will open a webbrowser to allow the OAuth2 credentials to access the API on your behalf. +This will open a web browser to allow the OAuth2 credentials to access the API on your behalf. ![](docs/oauth1.png) ![](docs/oauth2.png) @@ -107,7 +145,7 @@ To run the BingAds Performance Downloader call `download-bingsads-performance-da --oauth2_refresh_token MCQL58pByMOdq*sU7 \ --data_dir /tmp/bingads -For all options, see +For all options, see the _help_ $ download-bingsads-performance-data --help Usage: download-bingsads-performance-data [OPTIONS] @@ -116,7 +154,7 @@ For all options, see config.py are used. Options: - + --developer_token TEXT The developer token that is used to access the BingAds API. Default: "012345679ABCDEF" --oauth2_client_id TEXT The Oauth client id obtained from the @@ -133,22 +171,32 @@ For all options, see ABCDefgh!1234567890" --data_dir TEXT The directory where result data is written to. Default: "/tmp/bingads/" - --data_file TEXT The name of the file the result is written - to. Default: "ad_performance.csv.gz" + --ad_performance_data_file TEXT + The name of the file the ad performance + result is written to. Default: + "ad_performance.csv.gz" + --keyword_performance_data_file TEXT + The name of the file the keyword performance + result is written to. Default: + "keyword_performance.csv.gz" + --campaign_performance_data_file TEXT + The name of the file the campaign + performance result is written to. Default: + "campaign_performance.csv.gz" --first_date TEXT The first day from which on data will be downloaded. Default: "2015-01-01" --environment TEXT The deployment environment. Default: "production" - --timeout INTEGER The maximum amount of time (in milliseconds) + --timeout TEXT The maximum amount of time (in milliseconds) that you want to wait for the report download. Default: "3600000" - --total_attempts_for_single_file INTEGER - The attempts to download a single file in - case of HTTP errors or timeouts. Default: - "5" - --retry_timeout_interval INTEGER - number of seconds to wait before trying + --total_attempts_for_single_day TEXT + The attempts to download a single day (ad + and keyword performance) in case of HTTP + errors or timeouts. Default: "5" + --retry_timeout_interval TEXT number of seconds to wait before trying again to download a single day. Default: "10" --help Show this message and exit. + diff --git a/bingads_downloader/cli.py b/bingads_downloader/cli.py index fd4501f..a9761b5 100644 --- a/bingads_downloader/cli.py +++ b/bingads_downloader/cli.py @@ -53,6 +53,7 @@ def refresh_oauth2_token(**kwargs): @config_option(config.data_dir) @config_option(config.ad_performance_data_file) @config_option(config.keyword_performance_data_file) +@config_option(config.campaign_performance_data_file) @config_option(config.first_date) @config_option(config.environment) @config_option(config.timeout) @@ -65,4 +66,4 @@ def download_data(**kwargs): """ apply_options(kwargs) show_version() - downloader.download_data() \ No newline at end of file + downloader.download_data() diff --git a/bingads_downloader/config.py b/bingads_downloader/config.py index e5a78b4..977d38e 100644 --- a/bingads_downloader/config.py +++ b/bingads_downloader/config.py @@ -17,14 +17,17 @@ def keyword_performance_data_file() -> str: """The name of the file the keyword performance result is written to""" return 'keyword_performance.csv.gz' + def campaign_performance_data_file() -> str: - """The name of the file the campaign performance result is written to""" - return 'campaign_performance.csv.gz' + """The name of the file the campaign performance result is written to""" + return 'campaign_performance.csv.gz' + def first_date() -> str: """The first day from which on data will be downloaded""" return '2015-01-01' + def developer_token() -> str: """The developer token that is used to access the BingAds API""" return '012345679ABCDEF' @@ -62,4 +65,4 @@ def total_attempts_for_single_day() -> int: def retry_timeout_interval() -> int: """number of seconds to wait before trying again to download a single day""" - return 10 \ No newline at end of file + return 10 From e639122015aa9ad4f8caa5c14e4f780ef6c48970 Mon Sep 17 00:00:00 2001 From: jacopofar Date: Wed, 14 Mar 2018 12:21:31 +0100 Subject: [PATCH 08/12] Remove Bing_v2 folder, use filenames for versioning --- bingads_downloader/config.py | 8 ++++---- bingads_downloader/downloader.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bingads_downloader/config.py b/bingads_downloader/config.py index 977d38e..89fe2f7 100644 --- a/bingads_downloader/config.py +++ b/bingads_downloader/config.py @@ -10,17 +10,17 @@ def data_dir() -> str: def ad_performance_data_file() -> str: """The name of the file the ad performance result is written to""" - return 'ad_performance.csv.gz' + return 'ad_performance_v2.csv.gz' def keyword_performance_data_file() -> str: """The name of the file the keyword performance result is written to""" - return 'keyword_performance.csv.gz' + return 'keyword_performance_v2.csv.gz' def campaign_performance_data_file() -> str: - """The name of the file the campaign performance result is written to""" - return 'campaign_performance.csv.gz' + """The name of the file the campaign performance result is written to""" + return 'campaign_performance_v2.csv.gz' def first_date() -> str: diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index e803ae1..ed49966 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -63,7 +63,7 @@ def download_performance_data(api_client: BingReportClient): remaining_attempts = config.total_attempts_for_single_day while current_date >= first_date: print(current_date) - relative_filepath = Path('{date:%Y/%m/%d}/bing_v2/'.format( + relative_filepath = Path('{date:%Y/%m/%d}/bing/'.format( date=current_date)) filepath = ensure_data_directory(relative_filepath) From 92173c6a00cc43c2fbb9fc6907c91b3127ad935d Mon Sep 17 00:00:00 2001 From: jacopofar Date: Thu, 15 Mar 2018 14:25:41 +0100 Subject: [PATCH 09/12] Use a single config for the version instead of many vor different files --- README.md | 22 +++++++--------------- bingads_downloader/cli.py | 4 +--- bingads_downloader/config.py | 20 +++++--------------- bingads_downloader/downloader.py | 6 +++--- 4 files changed, 16 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index e3efd31..004ed51 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # BingAds Performance Downloader -A Python script for downloading performance and keyword data from the [BingAds API version 11](https://msdn.microsoft.com/en-us/library/bing-ads-overview(v=msads.100).aspx) to local files. The code is largely based on [Bing Ads Python SDK](https://github.com/BingAds/BingAds-Python-SDK). +A Python script for downloading performance and keyword data from the [BingAds API version 11](https://msdn.microsoft.com/en-us/library/bing-ads-overview(v=msads.100).aspx) to local files. The code is largely based on [Bing Ads Python SDK](https://docs.microsoft.com/en-us/bingads/guides/?view=bingads-11). ## Resulting data @@ -60,7 +60,7 @@ While `ad_performance` has these columns: AccountStatus | Active CampaignStatus | Active AdGroupStatus | Active - AdLabels | "(label1=value)(label2=value2)" + AdLabels | {label1=value1};{label2=value2} And `Campaign performance` is: @@ -154,7 +154,7 @@ For all options, see the _help_ config.py are used. Options: - + --developer_token TEXT The developer token that is used to access the BingAds API. Default: "012345679ABCDEF" --oauth2_client_id TEXT The Oauth client id obtained from the @@ -171,18 +171,9 @@ For all options, see the _help_ ABCDefgh!1234567890" --data_dir TEXT The directory where result data is written to. Default: "/tmp/bingads/" - --ad_performance_data_file TEXT - The name of the file the ad performance - result is written to. Default: - "ad_performance.csv.gz" - --keyword_performance_data_file TEXT - The name of the file the keyword performance - result is written to. Default: - "keyword_performance.csv.gz" - --campaign_performance_data_file TEXT - The name of the file the campaign - performance result is written to. Default: - "campaign_performance.csv.gz" + --output_file_version TEXT A suffix that is added to output files, + denoting a version of the data format. + Default: "v2" --first_date TEXT The first day from which on data will be downloaded. Default: "2015-01-01" --environment TEXT The deployment environment. Default: @@ -200,3 +191,4 @@ For all options, see the _help_ --help Show this message and exit. + diff --git a/bingads_downloader/cli.py b/bingads_downloader/cli.py index a9761b5..a295230 100644 --- a/bingads_downloader/cli.py +++ b/bingads_downloader/cli.py @@ -51,9 +51,7 @@ def refresh_oauth2_token(**kwargs): @config_option(config.oauth2_client_secret) @config_option(config.oauth2_refresh_token) @config_option(config.data_dir) -@config_option(config.ad_performance_data_file) -@config_option(config.keyword_performance_data_file) -@config_option(config.campaign_performance_data_file) +@config_option(config.output_file_version) @config_option(config.first_date) @config_option(config.environment) @config_option(config.timeout) diff --git a/bingads_downloader/config.py b/bingads_downloader/config.py index 89fe2f7..aca5197 100644 --- a/bingads_downloader/config.py +++ b/bingads_downloader/config.py @@ -8,21 +8,6 @@ def data_dir() -> str: return '/tmp/bingads/' -def ad_performance_data_file() -> str: - """The name of the file the ad performance result is written to""" - return 'ad_performance_v2.csv.gz' - - -def keyword_performance_data_file() -> str: - """The name of the file the keyword performance result is written to""" - return 'keyword_performance_v2.csv.gz' - - -def campaign_performance_data_file() -> str: - """The name of the file the campaign performance result is written to""" - return 'campaign_performance_v2.csv.gz' - - def first_date() -> str: """The first day from which on data will be downloaded""" return '2015-01-01' @@ -66,3 +51,8 @@ def total_attempts_for_single_day() -> int: def retry_timeout_interval() -> int: """number of seconds to wait before trying again to download a single day""" return 10 + + +def output_file_version() -> str: + """A suffix that is added to output files, denoting a version of the data format""" + return 'v2' diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index ed49966..8f5be2d 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -80,18 +80,18 @@ def download_performance_data(api_client: BingReportClient): start_time = time.time() print('About to download ad data for {date:%Y-%m-%d}' .format(date=current_date)) - submit_and_download(report_request_ad, api_client, str(filepath), config.ad_performance_data_file()) + submit_and_download(report_request_ad, api_client, str(filepath), f'ad_performance_{config.output_file_version()}.csv.gz') print('Successfully downloaded ad data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) start_time = time.time() print('About to download keyword data for {date:%Y-%m-%d}' .format(date=current_date)) - submit_and_download(report_request_keyword, api_client, str(filepath), config.keyword_performance_data_file()) + submit_and_download(report_request_keyword, api_client, str(filepath), f'keyword_performance_{config.output_file_version()}.csv.gz') print('Successfully downloaded keyword data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) print('About to download campaign data for {date:%Y-%m-%d}' .format(date=current_date)) - submit_and_download(report_request_campaign, api_client, str(filepath), config.campaign_performance_data_file()) + submit_and_download(report_request_campaign, api_client, str(filepath), f'campaign_performance_{config.output_file_version()}.csv.gz') print('Successfully downloaded campaign data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) # date is decreased only if the download above does not fail From d3064eeb2647fc75f8a8a312be2f4ded6877bbb0 Mon Sep 17 00:00:00 2001 From: jacopofar Date: Thu, 15 Mar 2018 15:39:22 +0100 Subject: [PATCH 10/12] Avoid downloading again files that are already present --- bingads_downloader/downloader.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index 8f5be2d..259a77e 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -2,6 +2,7 @@ import errno import sys import tempfile +import os import urllib import webbrowser from pathlib import Path @@ -67,12 +68,11 @@ def download_performance_data(api_client: BingReportClient): date=current_date)) filepath = ensure_data_directory(relative_filepath) - if not filepath.is_dir() or (last_date - current_date).days < 31: + if (last_date - current_date).days < 31: report_request_ad = build_ad_performance_request_for_single_day(api_client, current_date) report_request_keyword = build_keyword_performance_request_for_single_day(api_client, current_date) report_request_campaign = build_campaign_performance_request_for_single_day(api_client, current_date) - with tempfile.TemporaryDirectory() as tmp_dir: tmp_filepath = Path(tmp_dir, relative_filepath) tmp_filepath.parent.mkdir(exist_ok=True, parents=True) @@ -290,16 +290,22 @@ def build_campaign_performance_request_for_single_day(api_client: BingReportClie report_request.Columns = report_columns return report_request + def submit_and_download(report_request, api_client, data_dir, data_file): """ Submit the download request and then use the ReportingDownloadOperation result to track status until the report is complete. + Id the file already exists, do nothing Args: report_request: report_request object e.g. created by get_ad_performance_for_single_day api_client: BingApiClient object data_dir: target directory of the files containing the reports data_file: the name of the file containing the data """ + target_file = data_dir + '/' + data_file + if os.path.exists(target_file): + print(f'The file {target_file} already exists, skipping it') + return current_reporting_service_manager = \ ReportingServiceManager( From 6ae6f956716dc1694799767a28a4eb69247f3623 Mon Sep 17 00:00:00 2001 From: jacopofar Date: Thu, 15 Mar 2018 16:02:09 +0100 Subject: [PATCH 11/12] Download again and overwrite the files more recent that 31 days --- bingads_downloader/downloader.py | 90 +++++++++++++++----------------- 1 file changed, 42 insertions(+), 48 deletions(-) diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index 259a77e..df0425f 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -68,58 +68,51 @@ def download_performance_data(api_client: BingReportClient): date=current_date)) filepath = ensure_data_directory(relative_filepath) - if (last_date - current_date).days < 31: - report_request_ad = build_ad_performance_request_for_single_day(api_client, current_date) - report_request_keyword = build_keyword_performance_request_for_single_day(api_client, current_date) - report_request_campaign = build_campaign_performance_request_for_single_day(api_client, current_date) - - with tempfile.TemporaryDirectory() as tmp_dir: - tmp_filepath = Path(tmp_dir, relative_filepath) - tmp_filepath.parent.mkdir(exist_ok=True, parents=True) - try: - start_time = time.time() - print('About to download ad data for {date:%Y-%m-%d}' - .format(date=current_date)) - submit_and_download(report_request_ad, api_client, str(filepath), f'ad_performance_{config.output_file_version()}.csv.gz') - print('Successfully downloaded ad data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' - .format(date=current_date, elapsed=time.time() - start_time)) - start_time = time.time() - print('About to download keyword data for {date:%Y-%m-%d}' - .format(date=current_date)) - submit_and_download(report_request_keyword, api_client, str(filepath), f'keyword_performance_{config.output_file_version()}.csv.gz') - print('Successfully downloaded keyword data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + overwrite_if_exists = (last_date - current_date).days < 31 + if overwrite_if_exists: + print(f'The data for {current_date:%Y-%m-%d} will be downloaded even if the files are already present, will be overwritten') + report_request_ad = build_ad_performance_request_for_single_day(api_client, current_date) + report_request_keyword = build_keyword_performance_request_for_single_day(api_client, current_date) + report_request_campaign = build_campaign_performance_request_for_single_day(api_client, current_date) + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_filepath = Path(tmp_dir, relative_filepath) + tmp_filepath.parent.mkdir(exist_ok=True, parents=True) + try: + start_time = time.time() + print('About to download ad data for {date:%Y-%m-%d}' + .format(date=current_date)) + submit_and_download(report_request_ad, api_client, str(filepath), f'ad_performance_{config.output_file_version()}.csv.gz', overwrite_if_exists) + print('Successfully downloaded ad data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) - print('About to download campaign data for {date:%Y-%m-%d}' - .format(date=current_date)) - submit_and_download(report_request_campaign, api_client, str(filepath), f'campaign_performance_{config.output_file_version()}.csv.gz') - print('Successfully downloaded campaign data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' - .format(date=current_date, elapsed=time.time() - start_time)) - # date is decreased only if the download above does not fail - current_date -= datetime.timedelta(days=1) - remaining_attempts = config.total_attempts_for_single_day - except urllib.error.URLError as url_error: - if remaining_attempts == 0: - print('Too many failed attempts while downloading this day, quitting', file=sys.stderr) - raise - print('ERROR WHILE DOWNLOADING REPORT, RETRYING in {} seconds, attempt {}#...' - .format(config.retry_timeout_interval, remaining_attempts), file=sys.stderr) - print(url_error, file=sys.stderr) - time.sleep(config.retry_timeout_interval) - remaining_attempts -= 1 - else: - if not filepath.is_dir(): - print(f'Skipping the day since directory {str(filepath)} already exists') - - if (last_date - current_date).days < 31: - print(f'Skipping the day since {str(last_date)} is more than 31 days before {str(current_date)}') + start_time = time.time() + print('About to download keyword data for {date:%Y-%m-%d}' + .format(date=current_date)) + submit_and_download(report_request_keyword, api_client, str(filepath), f'keyword_performance_{config.output_file_version()}.csv.gz', overwrite_if_exists) + print('Successfully downloaded keyword data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + .format(date=current_date, elapsed=time.time() - start_time)) + print('About to download campaign data for {date:%Y-%m-%d}' + .format(date=current_date)) + submit_and_download(report_request_campaign, api_client, str(filepath), f'campaign_performance_{config.output_file_version()}.csv.gz', overwrite_if_exists) + print('Successfully downloaded campaign data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + .format(date=current_date, elapsed=time.time() - start_time)) + # date is decreased only if the download above does not fail + current_date -= datetime.timedelta(days=1) + remaining_attempts = config.total_attempts_for_single_day + except urllib.error.URLError as url_error: + if remaining_attempts == 0: + print('Too many failed attempts while downloading this day, quitting', file=sys.stderr) + raise + print('ERROR WHILE DOWNLOADING REPORT, RETRYING in {} seconds, attempt {}#...' + .format(config.retry_timeout_interval, remaining_attempts), file=sys.stderr) + print(url_error, file=sys.stderr) + time.sleep(config.retry_timeout_interval) + remaining_attempts -= 1 current_date -= datetime.timedelta(days=1) - - - def build_ad_performance_request_for_single_day(api_client: BingReportClient, current_date: datetime): """ @@ -291,7 +284,7 @@ def build_campaign_performance_request_for_single_day(api_client: BingReportClie return report_request -def submit_and_download(report_request, api_client, data_dir, data_file): +def submit_and_download(report_request, api_client, data_dir, data_file, overwrite_if_exists): """ Submit the download request and then use the ReportingDownloadOperation result to track status until the report is complete. @@ -301,9 +294,10 @@ def submit_and_download(report_request, api_client, data_dir, data_file): api_client: BingApiClient object data_dir: target directory of the files containing the reports data_file: the name of the file containing the data + overwrite_if_exists: if True, overwrite the file """ target_file = data_dir + '/' + data_file - if os.path.exists(target_file): + if os.path.exists(target_file) and not overwrite_if_exists: print(f'The file {target_file} already exists, skipping it') return From 51fc25162d790e268d2040233f75c134afc9cf25 Mon Sep 17 00:00:00 2001 From: jacopofar Date: Fri, 16 Mar 2018 10:30:44 +0100 Subject: [PATCH 12/12] Avoid skipping days --- bingads_downloader/downloader.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index df0425f..477a67d 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -110,8 +110,6 @@ def download_performance_data(api_client: BingReportClient): remaining_attempts -= 1 - current_date -= datetime.timedelta(days=1) - def build_ad_performance_request_for_single_day(api_client: BingReportClient, current_date: datetime):