diff --git a/tests/base.py b/tests/base.py index 1be2c74..4c50d18 100644 --- a/tests/base.py +++ b/tests/base.py @@ -12,6 +12,19 @@ from tap_tester import connections, menagerie, runner, LOGGER from tap_tester.base_case import BaseCase +from tap_tester.jira_client import JiraClient as jira_client +from tap_tester.jira_client import CONFIGURATION_ENVIRONMENT as jira_config + +JIRA_CLIENT = jira_client({**jira_config}) + +def skipUntilDone(jira_ticket): + + def wrap(test_method): + is_done = JIRA_CLIENT.get_status_category(jira_ticket) == "done" + return BaseCase.skipUnless(is_done, jira_ticket)(test_method) + + return wrap + def backoff_wait_times(): """Create a generator of wait times as [30, 60, 120, 240, 480, ...]""" return backoff.expo(factor=30) @@ -41,6 +54,9 @@ class BingAdsBaseTest(BaseCase): DEFAULT_CONVERSION_WINDOW = -30 # days REQUIRED_KEYS = "required_keys" + # respect tap-bing-ads data retention window by looking back a maximum of about 3 years + start_date = dt.strftime(dt.now() - timedelta(days=365*3), "%Y-%m-%dT00:00:00Z") + @staticmethod def tap_name(): """The name of the tap""" @@ -54,7 +70,8 @@ def get_type(): def get_properties(self, original: bool = True): """Configuration properties required for the tap.""" return_value = { - 'start_date': '2020-10-01T00:00:00Z', + # 'start_date': '2020-10-01T00:00:00Z', # original start_date + 'start_date': self.start_date, 'customer_id': '163875182', 'account_ids': '163078754,140168565,71086605', # 'conversion_window': '-15', # advanced option @@ -63,13 +80,6 @@ def get_properties(self, original: bool = True): # cid=42183085 aid=163078754 uid=71069166 (Stitch) # cid=42183085 aid=140168565 uid=71069166 (TestAccount) - if original: - return return_value - - # This test needs the new connections start date to be larger than the default - assert self.start_date > return_value["start_date"] - - return_value["start_date"] = self.start_date return return_value @staticmethod diff --git a/tests/base_new_framework.py b/tests/base_new_framework.py index 6f971c0..f1cfa6d 100644 --- a/tests/base_new_framework.py +++ b/tests/base_new_framework.py @@ -5,7 +5,6 @@ from datetime import datetime as dt from datetime import timezone as tz -#from tap_tester import connections, menagerie, runner, LOGGER from tap_tester.base_suite_tests.base_case import BaseCase def backoff_wait_times(): @@ -25,6 +24,9 @@ class BingAdsBaseTest(BaseCase): """ REQUIRED_KEYS = "required_keys" + # respect tap-bing-ads data retention window by looking back a maximum of about 3 years + start_date = dt.strftime(dt.now() - timedelta(days=365*3), "%Y-%m-%dT00:00:00Z") + @staticmethod def tap_name(): """The name of the tap""" @@ -35,10 +37,10 @@ def get_type(): """the expected url route ending""" return "platform.bing-ads" - def get_properties(self, original: bool = True): + def get_properties(self): """Configuration properties required for the tap.""" return_value = { - 'start_date': '2020-10-01T00:00:00Z', + 'start_date': self.start_date, 'customer_id': '163875182', 'account_ids': '163078754,140168565,71086605', # 'conversion_window': '-15', # advanced option @@ -47,13 +49,6 @@ def get_properties(self, original: bool = True): # cid=42183085 aid=163078754 uid=71069166 (Stitch) # cid=42183085 aid=140168565 uid=71069166 (TestAccount) - if original: - return return_value - - # This test needs the new connections start date to be larger than the default - assert self.start_date > return_value["start_date"] - - return_value["start_date"] = self.start_date return return_value @staticmethod @@ -77,7 +72,7 @@ def expected_metadata(): default_report = { BaseCase.PRIMARY_KEYS: set(), # "_sdc_report_datetime" is added by tap BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, - BaseCase.REPLICATION_KEYS: {"TimePeriod"}, # It used in sync but not mentioned in catalog. Bug: TDL-15816 + BaseCase.REPLICATION_KEYS: {"TimePeriod"}, # in sync but not in catalog TDL-15816 BaseCase.FOREIGN_KEYS: {"AccountId"} } accounts_meta = { @@ -106,12 +101,16 @@ def expected_metadata(): } age_gender_report = copy.deepcopy(default_report) - age_gender_report[BingAdsBaseTest.REQUIRED_KEYS] = {'AccountName', 'AdGroupName', 'AgeGroup', 'Gender'} + age_gender_report[BingAdsBaseTest.REQUIRED_KEYS] = {'AccountName', + 'AdGroupName', + 'AgeGroup', + 'Gender'} return { "accounts": accounts_meta, - "ad_extension_detail_report": extension_report, # BUG_DOC-1504 | https://stitchdata.atlassian.net/browse/DOC-1504 - "ad_group_performance_report": default_report, # BUG_DOC-1567 https://stitchdata.atlassian.net/browse/DOC-1567 + "ad_extension_detail_report": extension_report, # BUG_DOC-1504 + # https://stitchdata.atlassian.net/browse/DOC-1504 + "ad_group_performance_report": default_report, # BUG_DOC-1567 "ad_groups": default, "ad_performance_report": default_report, "ads": default, @@ -129,9 +128,8 @@ def expected_metadata(): def setUpClass(cls): super().setUpClass(logging="Ensuring environment variables are sourced.") missing_envs = [ - x for x in [ - 'TAP_BING_ADS_OAUTH_CLIENT_ID','TAP_BING_ADS_OAUTH_CLIENT_SECRET','TAP_BING_ADS_REFRESH_TOKEN', - 'TAP_BING_ADS_DEVELOPER_TOKEN', + x for x in ['TAP_BING_ADS_OAUTH_CLIENT_ID', 'TAP_BING_ADS_OAUTH_CLIENT_SECRET', + 'TAP_BING_ADS_REFRESH_TOKEN', 'TAP_BING_ADS_DEVELOPER_TOKEN', ] if os.getenv(x) is None ] @@ -139,10 +137,9 @@ def setUpClass(cls): raise Exception("Missing environment variables: {}".format(missing_envs)) def expected_replication_method(self,stream=None): - """return a dictionary with key of table name nd value of replication method - TDL-15816 - Currently, in tap, all streams are FULL_TABLE except accounts. - But as per the doc https://www.stitchdata.com/docs/integrations/saas/microsoft-advertising, + """ Return a dictionary with key of table name nd value of replication method + TDL-15816 - Currently, in tap, all streams are FULL_TABLE except accounts. But as per + the doc https://www.stitchdata.com/docs/integrations/saas/microsoft-advertising, only the below streams are FULL TABLE, all other streams are INCREMENTAL. ads ad_groups @@ -153,48 +150,49 @@ def expected_replication_method(self,stream=None): for table, properties in self.expected_metadata().items(): rep_method[table] = properties.get(self.REPLICATION_METHOD, None) for streams in rep_method.keys(): - if streams in [ 'ad_extension_detail_report', 'ad_group_performance_report', 'ad_performance_report', - 'age_gender_audience_report', 'audience_performance_report', 'campaign_performance_report', 'geographic_performance_report', 'goals_and_funnels_report', 'keyword_performance_report', - 'search_query_performance_report']: + if streams in [ 'ad_extension_detail_report', 'ad_group_performance_report', + 'ad_performance_report', 'age_gender_audience_report', + 'audience_performance_report', 'campaign_performance_report', + 'geographic_performance_report', 'goals_and_funnels_report', + 'keyword_performance_report', 'search_query_performance_report']: rep_method[streams] = 'FULL_TABLE' if not stream: return rep_method return rep_method[stream] - + def expected_replication_keys(self,stream=None): """ - return a dictionary with key of table name - and value as a set of replication key fields + Return a dictionary with key of table name and value as a set of replication key fields """ - """ - As all streams are FULL TABLE according to the tap, there is no replication key specified for any of - the streams.TDL-15816, hence removing the "TimePeriod" key from expected replication keys. - Need to determine the correct replication menthod and replication keys accordingly. - """ - - replication_keys = {table: properties.get(self.REPLICATION_KEYS, set())-{"TimePeriod"} + + # As all streams are FULL TABLE according to the tap, there is no replication key specified + # for any of the streams. TDL-15816, hence removing the "TimePeriod" key from expected + # replication keys. Need to determine the correct replication menthod and replication keys + # accordingly. + + replication_keys = {table: properties.get(self.REPLICATION_KEYS, set()) - {"TimePeriod"} for table, properties in self.expected_metadata().items()} if not stream: return replication_keys return replication_keys[stream] - + def expected_automatic_fields(self,stream=None): """ - return a dictionary with key of table name - and value as a set of automatic fields - """ - """ - Sdc_report_datetime is mentioned as primary key for most of the stream in docs, - but is not returned as primary key by the tap, hence adding it explicitly to automatic fields TDL-15816 + Return a dictionary with key of table name and value as a set of automatic fields """ + + # _sdc_report_datetime is mentioned as primary key for most streams in docs, but is not + # returned as pk by the tap, hence adding it explicitly to automatic fields TDL-15816 + auto_fields = {} for k, v in self.expected_metadata().items(): - auto_fields[k] = v.get(self.PRIMARY_KEYS, set())|v.get(self.REPLICATION_KEYS, set()) \ - |v.get(self.FOREIGN_KEYS, set())|v.get(self.REQUIRED_KEYS, set())|{'_sdc_report_datetime'} + auto_fields[k] = v.get(self.PRIMARY_KEYS, set()) | v.get(self.REPLICATION_KEYS, set()) \ + | v.get(self.FOREIGN_KEYS, set()) | v.get(self.REQUIRED_KEYS, set()) | \ + {'_sdc_report_datetime'} for streams in auto_fields.keys(): if streams in ['ads', 'ad_groups', 'campaigns', 'accounts']: - auto_fields[streams] = auto_fields[stream]-{'_sdc_report_datetime'} + auto_fields[streams] = auto_fields[stream] - {'_sdc_report_datetime'} if not stream: return auto_fields return auto_fields[stream] diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py index 4a7ce65..ecfe402 100644 --- a/tests/test_all_fields.py +++ b/tests/test_all_fields.py @@ -1,62 +1,78 @@ - - from tap_tester.base_suite_tests.all_fields_test import AllFieldsTest from base_new_framework import BingAdsBaseTest +from tap_tester.base_case import BaseCase as base +from tap_tester.jira_client import JiraClient as jira_client +from tap_tester.jira_client import CONFIGURATION_ENVIRONMENT as jira_config + +JIRA_CLIENT = jira_client({**jira_config}) + class AllFieldsTest(AllFieldsTest,BingAdsBaseTest): """ Test the tap all_fields """ + start_date = '2021-01-01T00:00:00Z' + @staticmethod def name(): return "tap_tester_bing_ads_all_fields_test" + + # update all tests in repo when JIRA cards are complete + TDL_23223_is_done = JIRA_CLIENT.get_status_category("TDL-23223") == "done" + assert TDL_23223_is_done == False, "TDL-23223 is done, Re-add streams with fixed exclusions" + TDL_24648_is_done = JIRA_CLIENT.get_status_category("TDL-24648") == "done" + assert TDL_24648_is_done == False, "TDL-24648 is done, Re-add streams that have data" + def streams_to_test(self): - streams_to_exclude={'ad_group_performance_report','campaign_performance_report','goals_and_funnels_report'} - """ - TODO - Excluded the ad_group and campaign report streams, has the Exclusion's file doesn't have the latest exclusions, - to be removed after TDL-23223 is fixed - Goals stream has no active data - """ + # TODO Excluded ad_group and campaign report streams due to errors exclusions file errors + # current file doesn't appear to have the latest exclusions, to be removed after TDL-23223 + # is fixed. Data has aged out of the 3 year retention window for other report streams. + # Work with marketing / dev to see if new data can be generated. + streams_to_exclude = {'ad_extension_detail_report', + 'ad_performance_report', + 'ad_group_performance_report', # TDL-23223 + 'age_gender_audience_report', + 'audience_performance_report', + 'campaign_performance_report', # TDL-23223 + 'geographic_performance_report', + 'goals_and_funnels_report', + 'keyword_performance_report', + 'search_query_performance_report'} + return self.expected_stream_names().difference(streams_to_exclude) - def missing_fields(self): - return { - 'accounts':{ - 'TaxCertificate', - 'AccountMode' - }, - 'ads':{ - 'Descriptions', - 'LongHeadlineString', - 'BusinessName', - 'Videos', - 'LongHeadlines', - 'Images', - 'LongHeadline', - 'PromotionalText', - 'CallToAction', - 'AppStoreId', - 'Headlines', - 'ImpressionTrackingUrls', - 'CallToActionLanguage', - 'Headline', - 'AppPlatform' - }, - 'campaigns':{ - 'MultimediaAdsBidAdjustment', - 'AdScheduleUseSearcherTimeZone', - 'BidStrategyId' - }, - 'ad_groups':{ - 'CpvBid', - 'AdGroupType',#Talend Data Loader TDL-23228 -- data present in fronend but not returned in synced records - 'MultimediaAdsBidAdjustment', - 'AdScheduleUseSearcherTimeZone', - 'CpmBid' - } - } - def test_all_fields_for_streams_are_replicated(self): - self.selected_fields = {k:v - self.missing_fields().get(k, set()) - for k,v in AllFieldsTest.selected_fields.items()} - super().test_all_fields_for_streams_are_replicated() + MISSING_FIELDS = { + 'accounts':{ + 'TaxCertificate', + 'AccountMode' + }, + 'ads':{ + 'Descriptions', + 'LongHeadlineString', + 'BusinessName', + 'Videos', + 'LongHeadlines', + 'Images', + 'LongHeadline', + 'PromotionalText', + 'CallToAction', + 'AppStoreId', + 'Headlines', + 'ImpressionTrackingUrls', + 'CallToActionLanguage', + 'Headline', + 'AppPlatform' + }, + 'campaigns':{ + 'MultimediaAdsBidAdjustment', + 'AdScheduleUseSearcherTimeZone', + 'BidStrategyId' + }, + 'ad_groups':{ + 'CpvBid', + 'AdGroupType', # TDL-23228 -- data present in fronend but not returned in synced records + 'MultimediaAdsBidAdjustment', + 'AdScheduleUseSearcherTimeZone', + 'CpmBid' + } + } diff --git a/tests/test_automatic_fields.py b/tests/test_automatic_fields.py index 7474e20..aa272fd 100644 --- a/tests/test_automatic_fields.py +++ b/tests/test_automatic_fields.py @@ -6,6 +6,8 @@ from base import BingAdsBaseTest +import base + class MinimumSelectionTest(BingAdsBaseTest): """Test that with no fields selected for a stream automatic fields are still replicated""" @@ -15,26 +17,27 @@ def name(): return "tap_tester_bing_ads_minimum_fields_test" def expected_sync_streams(self): - # BUG_SRCE-4313 To reproduce grep jira id across tests dir and comment/uncomment correpsonding lines. - # You will need to comment out all _report streams prior to running test as well. - # You will probably get an OperationError during the sync, that's because we aren't selecting any fields - # prior to the sync, this means only automatic fields will be replicated. Except we aren't marking the + # BUG_SRCE-4313 To reproduce grep jira id across tests dir and comment/uncomment + # corresponding lines. You will need to comment out all _report streams prior + # to running test as well. You will probably get an OperationError during the + # sync, that's because we aren't selecting any fields prior to the sync, this + # means only automatic fields will be replicated. Except we aren't marking the # necessary fields as automatic. Hence this bug. return { 'accounts', - 'ad_extension_detail_report', - 'ad_group_performance_report', + # 'ad_extension_detail_report', + # 'ad_group_performance_report', 'ad_groups', - 'ad_performance_report', + # 'ad_performance_report', 'ads', - 'age_gender_audience_report', - 'audience_performance_report', - 'campaign_performance_report', + # 'age_gender_audience_report', + # 'audience_performance_report', + # 'campaign_performance_report', 'campaigns', - 'geographic_performance_report', - 'goals_and_funnels_report', # Unable to generate data for this stream, workaround in test - 'keyword_performance_report', - 'search_query_performance_report', + # 'geographic_performance_report', + # 'goals_and_funnels_report', # Unable to generate data for this stream, workaround in test + # 'keyword_performance_report', + # 'search_query_performance_report', } def report_measure_fields(self): @@ -65,8 +68,9 @@ def report_automatic_fields(self): for stream in self.expected_sync_streams() if stream.endswith('_report') } - stream_to_fields['goals_and_funnels_report'].remove('Clicks') - stream_to_fields['goals_and_funnels_report'].update({'Assists'}) + # TODO uncomment below two lines when TDL-24648 is resolved + # stream_to_fields['goals_and_funnels_report'].remove('Clicks') + # stream_to_fields['goals_and_funnels_report'].update({'Assists'}) return stream_to_fields @@ -93,18 +97,26 @@ def test_run(self): fetch of data. For instance if you have a limit of 250 records ensure that 251 (or more) records have been posted for that stream. """ - self.start_date = '2020-11-10T00:00:00Z' - conn_id = self.create_connection(original_properties=False) + + TDL_24648_is_done = base.JIRA_CLIENT.get_status_category("TDL-24648") == "done" + assert TDL_24648_is_done == False, ("TDL-24648 is done, Re-add report streams to " + "expected_sync_streams and report_automatic_fields") + + conn_id = self.create_connection() # Select all parent streams and no fields within streams - # Select all (testable) report streams and only fields which are automatic and/or required by bing to genereate a report + # Select all (testable) report streams and only fields which are automatic and/or required + # by bing to genereate a report found_catalogs = menagerie.get_catalogs(conn_id) test_catalogs = [catalog for catalog in found_catalogs if catalog.get('tap_stream_id') in self.expected_sync_streams()] - # BUG_SRCE-4313 (https://stitchdata.atlassian.net/browse/SRCE-4313) streams missing automatic fields - specific_fields = {**self.report_automatic_fields(), **self.parent_automatic_fields()} # COMMENT to reproduce - # specific_fields = {**self.report_measure_fields(), **self.parent_automatic_fields()} # UNCOMMENT to reproduce + # BUG_SRCE-4313 (https://stitchdata.atlassian.net/browse/SRCE-4313) + # streams missing automatic fields + # COMMENT out line below to reproduce + specific_fields = {**self.report_automatic_fields(), **self.parent_automatic_fields()} + # UNCOMMENT 2 lines below to reproduce + # specific_fields = {**self.report_measure_fields(), **self.parent_automatic_fields()} # specific_fields = self.report_measure_fields() # TODO Use this line once bugs addressed. self.perform_and_verify_adjusted_selection( @@ -123,22 +135,25 @@ def test_run(self): with self.subTest(stream=stream): if stream == 'goals_and_funnels_report': # SKIP TESTING FOR THIS STREAM - continue # There is no data available, since we would need to implement a tracking script on singer's site + # no data available, would need to implement a tracking script on singer's site + continue # verify that you get some records for each stream self.assertGreater( record_count_by_stream.get(stream, -1), 0, msg="The number of records is not over the stream max limit") - # verify that only the automatic fields are sent to the target for parent streams, and that - # automatic fields, _sdc_report_datetime, AND specific measure fields are sent to target for report streams + # verify that only the automatic fields are sent to the target for parent streams, + # and that automatic fields, _sdc_report_datetime, AND specific measure fields are + # sent to target for report streams actual = actual_fields_by_stream.get(stream) or set() expected = self.expected_automatic_fields().get(stream, set()) if stream.endswith('_report'): # update expectations for report streams expected_measure = 'Assists' if stream.startswith('goals') else 'Clicks' expected.update({ '_sdc_report_datetime', # tap applies sdc value as pk for all reports - expected_measure # reports require a perf measure (which is intentionally not automatic) + # reports require a perf measure (which is intentionally not automatic) + expected_measure }) self.assertSetEqual(expected, actual) diff --git a/tests/test_bookmarks.py b/tests/test_bookmarks.py index 37f1668..c16977a 100644 --- a/tests/test_bookmarks.py +++ b/tests/test_bookmarks.py @@ -104,7 +104,8 @@ def test_run(self): # UPDATE STATE BETWEEN SYNCS new_state = {'bookmarks': dict()} for stream, bookmark in self.calculated_states_by_stream(first_sync_bookmarks).items(): - new_state['bookmarks'][stream] = {self.get_bookmark_key(stream): bookmark} # BUG_SRCE-4609 + # BUG_SRCE-4609 + new_state['bookmarks'][stream] = {self.get_bookmark_key(stream): bookmark} # new_state['bookmarks'][stream] = {self.expected_replication_key(stream): bookmark} menagerie.set_state(conn_id, new_state) @@ -125,8 +126,10 @@ def test_run(self): second_sync_count = second_sync_record_count.get(stream, 0) # record messages - first_sync_messages = first_sync_records.get(stream, {'messages': []}).get('messages') - second_sync_messages = second_sync_records.get(stream, {'messages': []}).get('messages') + first_sync_messages = first_sync_records.get(stream, {'messages': []}).get( + 'messages') + second_sync_messages = second_sync_records.get(stream, {'messages': []}).get( + 'messages') # bookmarked states (top level objects) first_sync_bookmark_key_value = first_sync_bookmarks.get('bookmarks').get(stream) @@ -146,36 +149,44 @@ def test_run(self): first_sync_bookmark_value = first_sync_bookmark_key_value.get(bookmark_key) second_sync_bookmark_value = second_sync_bookmark_key_value.get(bookmark_key) # bookmarked values as utc for comparing against records - first_sync_bookmark_value_utc = self.convert_state_to_utc(first_sync_bookmark_value) - second_sync_bookmark_value_utc = self.convert_state_to_utc(second_sync_bookmark_value) + first_sync_bookmark_value_utc = self.convert_state_to_utc( + first_sync_bookmark_value) + second_sync_bookmark_value_utc = self.convert_state_to_utc( + second_sync_bookmark_value) simulated_bookmark_value = new_state['bookmarks'][stream][bookmark_key] # Verify the second sync bookmark is Equal to the first sync bookmark - self.assertEqual(second_sync_bookmark_value, first_sync_bookmark_value) # assumes no changes to data during test + # assumes no changes to data during test + self.assertEqual(second_sync_bookmark_value, first_sync_bookmark_value) - # Verify the first sync bookmark value is the max replication key value for a given stream + # Verify first sync bookmark value is max replication key value for given stream for message in first_sync_messages: replication_key_value = message.get('data').get(replication_key) self.assertLessEqual(replication_key_value, first_sync_bookmark_value_utc, - msg="First sync bookmark was set incorrectly, a record with a greater rep key value was synced") + msg=("First sync bookmark was set incorrectly, a " + "record with a greater rep key value was synced")) for message in second_sync_messages: replication_key_value = message.get('data').get(replication_key) - # Verify the second sync records respect the previous (simulated) bookmark value + # Verify second sync records respect the previous (simulated) bookmark value self.assertGreaterEqual(replication_key_value, simulated_bookmark_value, - msg="Second sync records do not repect the previous bookmark.") + msg=("Second sync records do not repect the " + "previous bookmark.")) - # Verify the second sync bookmark value is the max replication key value for a given stream + # Verify the second sync bookmark value is the max replication key value + # for a given stream self.assertLessEqual(replication_key_value, second_sync_bookmark_value_utc, - msg="Second sync bookmark was set incorrectly, a record with a greater rep key value was synced") + msg=("Second sync bookmark was set incorrectly, a " + "record with a greater rep key value was synced")) # Verify the number of records in the 2nd sync is less then the first self.assertLess(second_sync_count, first_sync_count) # Verify at least 1 record was replicated in the second sync - self.assertGreater(second_sync_count, 0, msg="We are not fully testing bookmarking for {}".format(stream)) + self.assertGreater(second_sync_count, 0, + msg="Not fully testing bookmarking for {}".format(stream)) elif replication_method == self.FULL_TABLE: # Verify the first sync sets a bookmark of the expected form @@ -185,4 +196,5 @@ def test_run(self): self.assertIsNone(second_sync_bookmark_key_value) else: - raise NotImplementedError("invalid replication method: {}".format(replication_method)) + raise NotImplementedError("invalid replication method: {}".format( + replication_method)) diff --git a/tests/test_bookmarks_reports.py b/tests/test_bookmarks_reports.py index 8c3df3e..40e2cb8 100644 --- a/tests/test_bookmarks_reports.py +++ b/tests/test_bookmarks_reports.py @@ -1,10 +1,9 @@ +import base import datetime import dateutil.parser import pytz -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner +from tap_tester import connections, menagerie, runner from base import BingAdsBaseTest @@ -178,6 +177,7 @@ def calculated_states_by_stream(self, current_state): return stream_to_calculated_bookmark_value + @base.skipUntilDone("TDL-24648") def test_run(self): """ Test is parametrized to account for the exclusions in some report streams. diff --git a/tests/test_start_date.py b/tests/test_start_date.py index eea74bb..83d7302 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -1,13 +1,11 @@ from datetime import datetime as dt from datetime import timedelta -import tap_tester.connections as connections -import tap_tester.runner as runner -import tap_tester.menagerie as menagerie -from tap_tester import LOGGER - +from tap_tester import connections, menagerie, runner, LOGGER from base import BingAdsBaseTest +import base + class BingAdsStartDateTest(BingAdsBaseTest): @@ -21,22 +19,21 @@ def name(): def expected_sync_streams(self): return { 'accounts', - 'ad_extension_detail_report', - 'ad_group_performance_report', + # 'ad_extension_detail_report', + # 'ad_group_performance_report', 'ad_groups', - 'ad_performance_report', + # 'ad_performance_report', 'ads', - 'age_gender_audience_report', - 'audience_performance_report', - 'campaign_performance_report', + # 'age_gender_audience_report', + # 'audience_performance_report', + # 'campaign_performance_report', 'campaigns', - 'geographic_performance_report', + # 'geographic_performance_report', # 'goals_and_funnels_report', # cannot test no data available - 'keyword_performance_report', - 'search_query_performance_report', + # 'keyword_performance_report', + # 'search_query_performance_report', } - def test_run(self): """ Test is parametrized to account for the exclusions in some report streams. @@ -52,22 +49,28 @@ def test_run(self): Both runs account for uncategorized exclusion fields. See method in base.py. """ - # Test start date selecting all fields for standard streams, and all statistic fields for streams with exclusions + TDL_24648_is_done = base.JIRA_CLIENT.get_status_category("TDL-24648") == "done" + assert TDL_24648_is_done == False, ("TDL-24648 is done, Re-add report streams to " + "expected_sync_streams") + + # Test start date selecting all fields for standard streams, and all statistic fields for + # streams with exclusions streams_to_fields_with_statistics = dict() for stream in self.expected_streams_with_exclusions(): - streams_to_fields_with_statistics[stream] = self.get_as_many_fields_as_possbible_excluding_attributes(stream) + streams_to_fields_with_statistics[stream] = \ + self.get_as_many_fields_as_possbible_excluding_attributes(stream) self.start_date_test(streams_to_fields_with_statistics) - - # Test start date selecting all fields for standard streams and all attribute fields for streams with exclusions + # Test start date selecting all fields for standard streams and all attribute fields for + # streams with exclusions streams_to_fields_with_attributes = dict() for stream in self.expected_streams_with_exclusions(): - streams_to_fields_with_attributes[stream] = self.get_as_many_fields_as_possbible_excluding_statistics(stream) + streams_to_fields_with_attributes[stream] = \ + self.get_as_many_fields_as_possbible_excluding_statistics(stream) self.start_date_test(streams_to_fields_with_attributes) - def start_date_test(self, streams_to_fields_with_exclusions): """Instantiate start date according to the desired data set and run the test""" @@ -87,28 +90,43 @@ def start_date_test(self, streams_to_fields_with_exclusions): found_catalogs_1 = menagerie.get_catalogs(conn_id_1) # ensure our expectations are consistent for streams with exclusions - self.assertSetEqual(self.expected_streams_with_exclusions(), set(self.get_all_attributes().keys())) - self.assertSetEqual(self.expected_streams_with_exclusions(), set(self.get_all_statistics().keys())) + self.assertSetEqual(self.expected_streams_with_exclusions(), + set(self.get_all_attributes().keys())) + self.assertSetEqual(self.expected_streams_with_exclusions(), + set(self.get_all_statistics().keys())) # table and field selection - test_catalogs_1_all_fields = [catalog for catalog in found_catalogs_1 - if catalog.get('tap_stream_id') in self.expected_sync_streams() - and catalog.get('tap_stream_id') not in self.expected_streams_with_exclusions()] + test_catalogs_1_all_fields = [ + catalog for catalog in found_catalogs_1 + if catalog.get('tap_stream_id') in self.expected_sync_streams() + and catalog.get('tap_stream_id') not in self.expected_streams_with_exclusions()] + # BUG (https://stitchdata.atlassian.net/browse/SRCE-4304) - # self.perform_and_verify_and_field_selection(conn_id_1, test_catalogs_1_all_fields, select_all_fields=True) - self.select_all_streams_and_fields(conn_id_1, test_catalogs_1_all_fields, select_all_fields=True) # BUG_SRCE-4304 - test_catalogs_1_specific_fields = [catalog for catalog in found_catalogs_1 - if catalog.get('tap_stream_id') in self.expected_sync_streams() - and catalog.get('tap_stream_id') in self.expected_streams_with_exclusions()] - self.perform_and_verify_adjusted_selection(conn_id_1, test_catalogs_1_specific_fields, - select_all_fields=False, specific_fields=streams_to_fields_with_exclusions) + # self.perform_and_verify_and_field_selection(conn_id_1, + # test_catalogs_1_all_fields, + # select_all_fields=True) + self.select_all_streams_and_fields(conn_id_1, + test_catalogs_1_all_fields, + select_all_fields=True) # BUG_SRCE-4304 + + test_catalogs_1_specific_fields = [ + catalog for catalog in found_catalogs_1 + if catalog.get('tap_stream_id') in self.expected_sync_streams() + and catalog.get('tap_stream_id') in self.expected_streams_with_exclusions()] + + self.perform_and_verify_adjusted_selection( + conn_id_1, + test_catalogs_1_specific_fields, + select_all_fields=False, + specific_fields=streams_to_fields_with_exclusions) # run initial sync state = menagerie.get_state(conn_id_1) record_count_by_stream_1 = self.run_and_verify_sync(conn_id_1, state) replicated_row_count_1 = sum(record_count_by_stream_1.values()) - self.assertGreater(replicated_row_count_1, 0, msg="failed to replicate any data: {}".format(record_count_by_stream_1)) + self.assertGreater(replicated_row_count_1, 0, + msg="failed to replicate any data: {}".format(record_count_by_stream_1)) LOGGER.info("total replicated row count: %s", replicated_row_count_1) synced_records_1 = runner.get_records_from_target_output() @@ -116,7 +134,8 @@ def start_date_test(self, streams_to_fields_with_exclusions): ### Update START DATE Between Syncs ########################################################################## - LOGGER.info("REPLICATION START DATE CHANGE: %s ===>>> %s ", self.start_date, self.start_date_2) + LOGGER.info("REPLICATION START DATE CHANGE: %s ===>>> %s ", self.start_date, + self.start_date_2) self.start_date = self.start_date_2 ########################################################################## @@ -124,23 +143,32 @@ def start_date_test(self, streams_to_fields_with_exclusions): ########################################################################## # create a new connection with the new start_date - conn_id_2 = self.create_connection(original_properties=False) + conn_id_2 = self.create_connection() # run check mode found_catalogs_2 = menagerie.get_catalogs(conn_id_2) # table and field selection - test_catalogs_2_all_fields = [catalog for catalog in found_catalogs_2 - if catalog.get('tap_stream_id') in self.expected_sync_streams() - and catalog.get('tap_stream_id') not in self.expected_streams_with_exclusions()] + test_catalogs_2_all_fields = [ + catalog for catalog in found_catalogs_2 + if catalog.get('tap_stream_id') in self.expected_sync_streams() + and catalog.get('tap_stream_id') not in self.expected_streams_with_exclusions()] # BUG (https://stitchdata.atlassian.net/browse/SRCE-4304) - # self.perform_and_verify_and_field_selection(conn_id_2, test_catalogs_2_all_fields, select_all_fields=True) - self.select_all_streams_and_fields(conn_id_2, test_catalogs_2_all_fields, select_all_fields=True) # BUG_SRCE-4304 - test_catalogs_2_specific_fields = [catalog for catalog in found_catalogs_2 - if catalog.get('tap_stream_id') in self.expected_sync_streams() - and catalog.get('tap_stream_id') in self.expected_streams_with_exclusions()] - self.perform_and_verify_adjusted_selection(conn_id_2, test_catalogs_2_specific_fields, - select_all_fields=False, specific_fields=streams_to_fields_with_exclusions) + # self.perform_and_verify_and_field_selection(conn_id_2, + # test_catalogs_2_all_fields, + # select_all_fields=True) + self.select_all_streams_and_fields(conn_id_2, + test_catalogs_2_all_fields, + select_all_fields=True) # BUG_SRCE-4304 + test_catalogs_2_specific_fields = [ + catalog for catalog in found_catalogs_2 + if catalog.get('tap_stream_id') in self.expected_sync_streams() + and catalog.get('tap_stream_id') in self.expected_streams_with_exclusions()] + self.perform_and_verify_adjusted_selection( + conn_id_2, + test_catalogs_2_specific_fields, + select_all_fields=False, + specific_fields=streams_to_fields_with_exclusions) # run sync state = menagerie.get_state(conn_id_2) @@ -163,32 +191,36 @@ def start_date_test(self, streams_to_fields_with_exclusions): if self.is_report(stream): # Verify replication key is greater or equal to start_date for sync 1 - replication_dates_1 =[row.get('data').get(replication_key) - for row in synced_records_1.get(stream, []).get('messages', [])] + replication_dates_1 =[ + row.get('data').get(replication_key) + for row in synced_records_1.get(stream, []).get('messages', [])] for replication_date in replication_dates_1: self.assertGreaterEqual( - self.parse_date(replication_date), self.parse_date(self.start_date_1), - msg="Report pertains to a date prior to our start date.\n" + + self.parse_date(replication_date), + self.parse_date(self.start_date_1), + msg="Report pertains to a date prior to our start date.\n" + "Sync start_date: {}\n".format(self.start_date_1) + "Record date: {} ".format(replication_date) ) # Verify replication key is greater or equal to start_date for sync 2 - replication_dates_2 =[row.get('data').get(replication_key) - for row in synced_records_2.get(stream, []).get('messages', [])] + replication_dates_2 =[ + row.get('data').get(replication_key) + for row in synced_records_2.get(stream, []).get('messages', [])] for replication_date in replication_dates_2: self.assertGreaterEqual( - self.parse_date(replication_date), self.parse_date(self.start_date_2), - msg="Report pertains to a date prior to our start date.\n" + + self.parse_date(replication_date), + self.parse_date(self.start_date_2), + msg="Report pertains to a date prior to our start date.\n" + "Sync start_date: {}\n".format(self.start_date_2) + "Record date: {} ".format(replication_date) ) elif stream == 'accounts': - # Verify that the 2nd sync with a later start date replicates the same number of - # records as the 1st sync. + # Verify that the 2nd sync with a later start date replicates the same + # number of records as the 1st sync. self.assertEqual( record_count_2, record_count_1, msg="Second sync should result in fewer records\n" + @@ -198,7 +230,8 @@ def start_date_test(self, streams_to_fields_with_exclusions): "Sync 2 record_count: {}".format(record_count_2)) else: - raise NotImplementedError("Stream is not report-based and incremental. Must add assertion for it.") + raise NotImplementedError("Stream is not report-based and incremental. " + "Must add assertion for it.") elif replication_type == self.FULL_TABLE: @@ -214,8 +247,6 @@ def start_date_test(self, streams_to_fields_with_exclusions): else: - raise Exception( - "Expectations are set incorrectly. {} cannot have a replication method of {}".format( - stream, replication_type - ) + raise Exception("Expectations are set incorrectly. {} cannot have a " + "replication method of {}".format(stream, replication_type) ) diff --git a/tests/test_sync_rows.py b/tests/test_sync_rows.py index 3015f38..6b37255 100644 --- a/tests/test_sync_rows.py +++ b/tests/test_sync_rows.py @@ -44,7 +44,8 @@ def expected_pks(self): def test_run(self): # Select our catalogs # found_catalogs = menagerie.get_catalogs(conn_id) - # our_catalogs = [c for c in found_catalogs if c.get('tap_stream_id') in self.expected_sync_streams()] + # our_catalogs = [c for c in found_catalogs + # if c.get('tap_stream_id') in self.expected_sync_streams()] # for c in our_catalogs: # c_annotated = menagerie.get_annotated_schema(conn_id, c['stream_id']) # c_metadata = metadata.to_map(c_annotated['metadata']) @@ -56,7 +57,8 @@ def test_run(self): menagerie.set_state(conn_id, {}) # Select a stream found_catalogs = menagerie.get_catalogs(conn_id) - our_catalogs = [catalog for catalog in found_catalogs if catalog.get('tap_stream_id') in self.expected_sync_streams()] + our_catalogs = [catalog for catalog in found_catalogs + if catalog.get('tap_stream_id') in self.expected_sync_streams()] self.select_all_streams_and_fields(conn_id, our_catalogs, select_all_fields=False) # Run a sync job using orchestrator @@ -86,6 +88,6 @@ def test_run(self): self.assertTrue(stream not in bookmarks) else: - raise NotImplementedError( - "stream {} has an invalid replication method {}".format(stream, replication_method) + raise NotImplementedError("stream {} has an invalid replication " + "method {}".format(stream, replication_method) )