diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cbe7ac..46fb500 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 2.0 +2018-03-14 +- Download both the keywords and the ad performance reports as two separate files (major version bump as the file name is different) + ## 1.2.2 *2018-03-13 @@ -15,6 +19,7 @@ - Uses v11 reporting + ## 1.1.0 *2017-09-21 diff --git a/README.md b/README.md index d347542..fecfe84 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,16 @@ # BingAds Performance Downloader -A Python script for downloading performance data from the [BingAds API version 11](https://msdn.microsoft.com/en-us/library/bing-ads-overview(v=msads.100).aspx) to local files. The code is largely based on [Bing Ads Python SDK](https://github.com/BingAds/BingAds-Python-SDK). +A Python script for downloading performance and keyword data from the [BingAds API version 11](https://msdn.microsoft.com/en-us/library/bing-ads-overview(v=msads.100).aspx) to local files. The code is largely based on [Bing Ads Python SDK](https://docs.microsoft.com/en-us/bingads/guides/?view=bingads-11). ## Resulting data -**BingAds Performance Downloader** gives measures such as impressions, clicks and cost. The script creates one csv file per day in a specified time range: +**BingAds Performance Downloader** gives measures such as impressions, clicks and cost. The script creates three csv files per day in a specified time range, for example: - /tmp/bingads/2016/05/02/bing/ad_performance.csv.gz /tmp/bingads/2016/05/03/bing/ad_performance.csv.gz + /tmp/bingads/2016/05/03/bing/keyword_performance.csv.gz + /tmp/bingads/2016/05/03/bing/campaign_performance.csv.gz - - Each line contains one ad for one day: + Each line of `keyword_performance` contains one ad for one day: GregorianDate | 2/12/2016 AccountId | 17837800573 @@ -34,6 +34,44 @@ A Python script for downloading performance data from the [BingAds API version 1 Revenue | 0 Network | Bing and Yahoo! search +While `ad_performance` has these columns: + + AccountName | Name + AccountNumber | X001342 + AccountId | 67688 + TimePeriod | 2018-02-03 + CampaignName | 12. Campaign name + CampaignId | 1234567 + AdGroupName | Name of the group group + AdId | 8123456789 + AdGroupId | 123456789011 + AdTitle | title of the ad, optional + AdDescription | Description of the ad + AdType | Expanded text ad + Impressions | 2 + Clicks | 3 + Ctr | 45.78% + Spend | 4.34 + AveragePosition | 1.00 + Conversions | 12 + ConversionRate | 23.8% + CostPerConversion | 2.34 + DeviceType | Computer + AccountStatus | Active + CampaignStatus | Active + AdGroupStatus | Active + AdLabels | {label1=value1};{label2=value2} + + +And `Campaign performance` is: + + AccountName | Name of the account + AccountId | 123456 + TimePeriod | 2018-01-10 + CampaignName | 1. Banner 03 + CampaignId | 1234567 + Spend | 1.23 + CampaignLabel | {channel=display} ## Getting Started @@ -84,9 +122,9 @@ In order to access the BingAds API you have to obtain the OAuth2 credentials fro $ refresh-bingsads-api-oauth2-token \ --developer_token ABCDEFEGHIJKL \ --oauth2_client_id 123456789 \ - --oauth2_client_secret aBcDeFg This will open a web browser to allow the OAuth2 credentials to access the API on your behalf. + ![](docs/oauth1.png) ![](docs/oauth2.png) @@ -107,7 +145,7 @@ To run the BingAds Performance Downloader call `download-bingsads-performance-da --oauth2_refresh_token MCQL58pByMOdq*sU7 \ --data_dir /tmp/bingads -For all options, see +For all options, see the _help_ $ download-bingsads-performance-data --help Usage: download-bingsads-performance-data [OPTIONS] @@ -116,7 +154,7 @@ For all options, see config.py are used. Options: - + --developer_token TEXT The developer token that is used to access the BingAds API. Default: "012345679ABCDEF" --oauth2_client_id TEXT The Oauth client id obtained from the @@ -133,22 +171,24 @@ For all options, see ABCDefgh!1234567890" --data_dir TEXT The directory where result data is written to. Default: "/tmp/bingads/" - --data_file TEXT The name of the file the result is written - to. Default: "ad_performance.csv.gz" + --output_file_version TEXT A suffix that is added to output files, + denoting a version of the data format. + Default: "v2" --first_date TEXT The first day from which on data will be downloaded. Default: "2015-01-01" --environment TEXT The deployment environment. Default: "production" - --timeout INTEGER The maximum amount of time (in milliseconds) + --timeout TEXT The maximum amount of time (in milliseconds) that you want to wait for the report download. Default: "3600000" - --total_attempts_for_single_file INTEGER - The attempts to download a single file in - case of HTTP errors or timeouts. Default: - "5" - --retry_timeout_interval INTEGER - number of seconds to wait before trying + --total_attempts_for_single_day TEXT + The attempts to download a single day (ad + and keyword performance) in case of HTTP + errors or timeouts. Default: "5" + --retry_timeout_interval TEXT number of seconds to wait before trying again to download a single day. Default: "10" --help Show this message and exit. + + diff --git a/bingads_downloader/cli.py b/bingads_downloader/cli.py index 217c51a..a295230 100644 --- a/bingads_downloader/cli.py +++ b/bingads_downloader/cli.py @@ -51,11 +51,11 @@ def refresh_oauth2_token(**kwargs): @config_option(config.oauth2_client_secret) @config_option(config.oauth2_refresh_token) @config_option(config.data_dir) -@config_option(config.data_file) +@config_option(config.output_file_version) @config_option(config.first_date) @config_option(config.environment) @config_option(config.timeout) -@config_option(config.total_attempts_for_single_file) +@config_option(config.total_attempts_for_single_day) @config_option(config.retry_timeout_interval) def download_data(**kwargs): """ @@ -64,4 +64,4 @@ def download_data(**kwargs): """ apply_options(kwargs) show_version() - downloader.download_data() \ No newline at end of file + downloader.download_data() diff --git a/bingads_downloader/config.py b/bingads_downloader/config.py index ab9ecd0..aca5197 100644 --- a/bingads_downloader/config.py +++ b/bingads_downloader/config.py @@ -8,15 +8,11 @@ def data_dir() -> str: return '/tmp/bingads/' -def data_file() -> str: - """The name of the file the result is written to""" - return 'ad_performance.csv.gz' - - def first_date() -> str: """The first day from which on data will be downloaded""" return '2015-01-01' + def developer_token() -> str: """The developer token that is used to access the BingAds API""" return '012345679ABCDEF' @@ -47,11 +43,16 @@ def timeout() -> int: return 3600000 -def total_attempts_for_single_file() -> int: - """The attempts to download a single file in case of HTTP errors or timeouts""" +def total_attempts_for_single_day() -> int: + """The attempts to download a single day (ad and keyword performance) in case of HTTP errors or timeouts""" return 5 def retry_timeout_interval() -> int: """number of seconds to wait before trying again to download a single day""" return 10 + + +def output_file_version() -> str: + """A suffix that is added to output files, denoting a version of the data format""" + return 'v2' diff --git a/bingads_downloader/downloader.py b/bingads_downloader/downloader.py index 9157d4d..477a67d 100644 --- a/bingads_downloader/downloader.py +++ b/bingads_downloader/downloader.py @@ -2,6 +2,7 @@ import errno import sys import tempfile +import os import urllib import webbrowser from pathlib import Path @@ -47,12 +48,12 @@ def download_data_sets(api_client: BingReportClient): """ authenticate_with_oauth(api_client) - download_ad_performance_data(api_client) + download_performance_data(api_client) -def download_ad_performance_data(api_client: BingReportClient): +def download_performance_data(api_client: BingReportClient): """ - Downloads BingAds performance reports by creating report objects + Downloads BingAds Ads performance reports by creating report objects for every day since config.first_date() till today Args: api_client: BingAdsApiClient @@ -60,43 +61,123 @@ def download_ad_performance_data(api_client: BingReportClient): first_date = datetime.datetime.strptime(config.first_date(), '%Y-%m-%d') last_date = datetime.datetime.now() - datetime.timedelta(days=1) current_date = last_date - remaining_attempts = config.total_attempts_for_single_file + remaining_attempts = config.total_attempts_for_single_day while current_date >= first_date: print(current_date) relative_filepath = Path('{date:%Y/%m/%d}/bing/'.format( date=current_date)) filepath = ensure_data_directory(relative_filepath) - if not filepath.is_dir() or (last_date - current_date).days < 31: - report_request = build_ad_performance_request_for_single_day(api_client, current_date) - with tempfile.TemporaryDirectory() as tmp_dir: - tmp_filepath = Path(tmp_dir, relative_filepath) - tmp_filepath.parent.mkdir(exist_ok=True, parents=True) - try: - start_time = time.time() - submit_and_download(report_request, api_client, str(filepath)) - print('Successfully downloaded data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' - .format(date=current_date, elapsed=time.time() - start_time)) - # date is decreased only if the download above does not fail - current_date -= datetime.timedelta(days=1) - remaining_attempts = config.total_attempts_for_single_file - except urllib.error.URLError as url_error: - if remaining_attempts == 0: - print('Too many failed attempts while downloading this day, quitting', file=sys.stderr) - raise - print('ERROR WHILE DOWNLOADING REPORT, RETRYING in {} seconds, attempt {}#...' - .format(config.retry_timeout_interval, remaining_attempts), file=sys.stderr) - print(url_error, file=sys.stderr) - time.sleep(config.retry_timeout_interval) - remaining_attempts -= 1 - else: - current_date -= datetime.timedelta(days=1) + overwrite_if_exists = (last_date - current_date).days < 31 + if overwrite_if_exists: + print(f'The data for {current_date:%Y-%m-%d} will be downloaded even if the files are already present, will be overwritten') + report_request_ad = build_ad_performance_request_for_single_day(api_client, current_date) + report_request_keyword = build_keyword_performance_request_for_single_day(api_client, current_date) + report_request_campaign = build_campaign_performance_request_for_single_day(api_client, current_date) + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_filepath = Path(tmp_dir, relative_filepath) + tmp_filepath.parent.mkdir(exist_ok=True, parents=True) + try: + start_time = time.time() + print('About to download ad data for {date:%Y-%m-%d}' + .format(date=current_date)) + submit_and_download(report_request_ad, api_client, str(filepath), f'ad_performance_{config.output_file_version()}.csv.gz', overwrite_if_exists) + print('Successfully downloaded ad data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + .format(date=current_date, elapsed=time.time() - start_time)) + start_time = time.time() + print('About to download keyword data for {date:%Y-%m-%d}' + .format(date=current_date)) + submit_and_download(report_request_keyword, api_client, str(filepath), f'keyword_performance_{config.output_file_version()}.csv.gz', overwrite_if_exists) + print('Successfully downloaded keyword data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + .format(date=current_date, elapsed=time.time() - start_time)) + print('About to download campaign data for {date:%Y-%m-%d}' + .format(date=current_date)) + submit_and_download(report_request_campaign, api_client, str(filepath), f'campaign_performance_{config.output_file_version()}.csv.gz', overwrite_if_exists) + print('Successfully downloaded campaign data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' + .format(date=current_date, elapsed=time.time() - start_time)) + # date is decreased only if the download above does not fail + current_date -= datetime.timedelta(days=1) + remaining_attempts = config.total_attempts_for_single_day + except urllib.error.URLError as url_error: + if remaining_attempts == 0: + print('Too many failed attempts while downloading this day, quitting', file=sys.stderr) + raise + print('ERROR WHILE DOWNLOADING REPORT, RETRYING in {} seconds, attempt {}#...' + .format(config.retry_timeout_interval, remaining_attempts), file=sys.stderr) + print(url_error, file=sys.stderr) + time.sleep(config.retry_timeout_interval) + remaining_attempts -= 1 + def build_ad_performance_request_for_single_day(api_client: BingReportClient, current_date: datetime): """ - Creates a report report request object with hard coded parameters for a give date. + Creates an Ad report request object with hard coded parameters for a give date. + Args: + api_client: BingApiClient object + current_date: date for which the report object will be created + + Returns: + A report request object with our specific hard coded settings for a given date + """ + report_request = api_client.factory.create('AdPerformanceReportRequest') + report_request.Format = 'Csv' + report_request.ReportName = 'My Ad Performance Report' + report_request.ReturnOnlyCompleteData = False + report_request.Aggregation = 'Daily' + report_request.Language = 'English' + + report_time = api_client.factory.create('ReportTime') + + # You may either use a custom date range + custom_date_range_start = api_client.factory.create('Date') + custom_date_range_start.Day = current_date.day + custom_date_range_start.Month = current_date.month + custom_date_range_start.Year = current_date.year + report_time.CustomDateRangeStart = custom_date_range_start + report_time.CustomDateRangeEnd = custom_date_range_start + report_time.PredefinedTime = None + report_request.Time = report_time + + report_columns = api_client.factory.create('ArrayOfAdPerformanceReportColumn') + report_columns.AdPerformanceReportColumn.append([ + "AccountName", + "AccountNumber", + "AccountId", + "TimePeriod", + "CampaignName", + "CampaignId", + "AdGroupName", + "AdId", + "AdGroupId", + "AdTitle", + "AdDescription", + "AdType", + "Impressions", + "Clicks", + "Ctr", + "Spend", + "AveragePosition", + "Conversions", + "ConversionRate", + "CostPerConversion", + "DeviceType", + "AccountStatus", + "CampaignStatus", + "AdGroupStatus", + "AdLabels" + ]) + report_request.Columns = report_columns + + return report_request + + +def build_keyword_performance_request_for_single_day(api_client: BingReportClient, + current_date: datetime): + """ + Creates a Keyword report request object with hard coded parameters for a give date. Args: api_client: BingApiClient object current_date: date for which the report object will be created @@ -158,15 +239,65 @@ def build_ad_performance_request_for_single_day(api_client: BingReportClient, return report_request -def submit_and_download(report_request, api_client, data_dir): +def build_campaign_performance_request_for_single_day(api_client: BingReportClient, + current_date: datetime): + """ + Creates a Campaign report request object with hard coded parameters for a give date. + Args: + api_client: BingApiClient object + current_date: date for which the report object will be created + + Returns: + A report request object with our specific hard coded settings for a given date + """ + report_request = api_client.factory.create('CampaignPerformanceReportRequest') + report_request.Format = 'Csv' + report_request.ReportName = 'My Campaign Performance Report' + report_request.ReturnOnlyCompleteData = False + report_request.Language = 'English' + report_request.Aggregation = 'Daily' + report_time = api_client.factory.create('ReportTime') + + # You may either use a custom date range + custom_date_range_start = api_client.factory.create('Date') + custom_date_range_start.Day = current_date.day + custom_date_range_start.Month = current_date.month + custom_date_range_start.Year = current_date.year + report_time.CustomDateRangeStart = custom_date_range_start + report_time.CustomDateRangeEnd = custom_date_range_start + report_time.PredefinedTime = None + report_request.Time = report_time + + report_columns = api_client.factory.create('ArrayOfCampaignPerformanceReportColumn') + report_columns.CampaignPerformanceReportColumn.append([ + "AccountName", + "AccountId", + "TimePeriod", + "CampaignName", + "CampaignId", + "Spend", + "CampaignLabels" + ]) + report_request.Columns = report_columns + return report_request + + +def submit_and_download(report_request, api_client, data_dir, data_file, overwrite_if_exists): """ Submit the download request and then use the ReportingDownloadOperation result to track status until the report is complete. + Id the file already exists, do nothing Args: report_request: report_request object e.g. created by get_ad_performance_for_single_day api_client: BingApiClient object data_dir: target directory of the files containing the reports + data_file: the name of the file containing the data + overwrite_if_exists: if True, overwrite the file """ + target_file = data_dir + '/' + data_file + if os.path.exists(target_file) and not overwrite_if_exists: + print(f'The file {target_file} already exists, skipping it') + return current_reporting_service_manager = \ ReportingServiceManager( @@ -195,7 +326,7 @@ def submit_and_download(report_request, api_client, data_dir): result_file_path = reporting_download_operation.download_result_file( result_file_directory=data_dir, - result_file_name=config.data_file(), + result_file_name=data_file, decompress=False, overwrite=True, # Set this value true if you want to overwrite the same file. timeout_in_milliseconds=config.timeout() diff --git a/setup.py b/setup.py index 865f87e..461e2af 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,8 @@ setup( name='bingads-performance-downloader', - version='1.2.2', + + version='2.0.0', description="Downloads data from the BingAds Api to local files for usage in a data warehouse",