Skip to content

Commit

Permalink
Update tests due to aged out report data (#112)
Browse files Browse the repository at this point in the history
  • Loading branch information
bhtowles authored Jan 22, 2024
1 parent 8c8d90f commit e1f418c
Show file tree
Hide file tree
Showing 8 changed files with 293 additions and 209 deletions.
26 changes: 18 additions & 8 deletions tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,19 @@
from tap_tester import connections, menagerie, runner, LOGGER
from tap_tester.base_case import BaseCase

from tap_tester.jira_client import JiraClient as jira_client
from tap_tester.jira_client import CONFIGURATION_ENVIRONMENT as jira_config

JIRA_CLIENT = jira_client({**jira_config})

def skipUntilDone(jira_ticket):

def wrap(test_method):
is_done = JIRA_CLIENT.get_status_category(jira_ticket) == "done"
return BaseCase.skipUnless(is_done, jira_ticket)(test_method)

return wrap

def backoff_wait_times():
"""Create a generator of wait times as [30, 60, 120, 240, 480, ...]"""
return backoff.expo(factor=30)
Expand Down Expand Up @@ -41,6 +54,9 @@ class BingAdsBaseTest(BaseCase):
DEFAULT_CONVERSION_WINDOW = -30 # days
REQUIRED_KEYS = "required_keys"

# respect tap-bing-ads data retention window by looking back a maximum of about 3 years
start_date = dt.strftime(dt.now() - timedelta(days=365*3), "%Y-%m-%dT00:00:00Z")

@staticmethod
def tap_name():
"""The name of the tap"""
Expand All @@ -54,7 +70,8 @@ def get_type():
def get_properties(self, original: bool = True):
"""Configuration properties required for the tap."""
return_value = {
'start_date': '2020-10-01T00:00:00Z',
# 'start_date': '2020-10-01T00:00:00Z', # original start_date
'start_date': self.start_date,
'customer_id': '163875182',
'account_ids': '163078754,140168565,71086605',
# 'conversion_window': '-15', # advanced option
Expand All @@ -63,13 +80,6 @@ def get_properties(self, original: bool = True):
# cid=42183085 aid=163078754 uid=71069166 (Stitch)
# cid=42183085 aid=140168565 uid=71069166 (TestAccount)

if original:
return return_value

# This test needs the new connections start date to be larger than the default
assert self.start_date > return_value["start_date"]

return_value["start_date"] = self.start_date
return return_value

@staticmethod
Expand Down
86 changes: 42 additions & 44 deletions tests/base_new_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from datetime import datetime as dt
from datetime import timezone as tz

#from tap_tester import connections, menagerie, runner, LOGGER
from tap_tester.base_suite_tests.base_case import BaseCase

def backoff_wait_times():
Expand All @@ -25,6 +24,9 @@ class BingAdsBaseTest(BaseCase):
"""
REQUIRED_KEYS = "required_keys"

# respect tap-bing-ads data retention window by looking back a maximum of about 3 years
start_date = dt.strftime(dt.now() - timedelta(days=365*3), "%Y-%m-%dT00:00:00Z")

@staticmethod
def tap_name():
"""The name of the tap"""
Expand All @@ -35,10 +37,10 @@ def get_type():
"""the expected url route ending"""
return "platform.bing-ads"

def get_properties(self, original: bool = True):
def get_properties(self):
"""Configuration properties required for the tap."""
return_value = {
'start_date': '2020-10-01T00:00:00Z',
'start_date': self.start_date,
'customer_id': '163875182',
'account_ids': '163078754,140168565,71086605',
# 'conversion_window': '-15', # advanced option
Expand All @@ -47,13 +49,6 @@ def get_properties(self, original: bool = True):
# cid=42183085 aid=163078754 uid=71069166 (Stitch)
# cid=42183085 aid=140168565 uid=71069166 (TestAccount)

if original:
return return_value

# This test needs the new connections start date to be larger than the default
assert self.start_date > return_value["start_date"]

return_value["start_date"] = self.start_date
return return_value

@staticmethod
Expand All @@ -77,7 +72,7 @@ def expected_metadata():
default_report = {
BaseCase.PRIMARY_KEYS: set(), # "_sdc_report_datetime" is added by tap
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"TimePeriod"}, # It used in sync but not mentioned in catalog. Bug: TDL-15816
BaseCase.REPLICATION_KEYS: {"TimePeriod"}, # in sync but not in catalog TDL-15816
BaseCase.FOREIGN_KEYS: {"AccountId"}
}
accounts_meta = {
Expand Down Expand Up @@ -106,12 +101,16 @@ def expected_metadata():
}

age_gender_report = copy.deepcopy(default_report)
age_gender_report[BingAdsBaseTest.REQUIRED_KEYS] = {'AccountName', 'AdGroupName', 'AgeGroup', 'Gender'}
age_gender_report[BingAdsBaseTest.REQUIRED_KEYS] = {'AccountName',
'AdGroupName',
'AgeGroup',
'Gender'}

return {
"accounts": accounts_meta,
"ad_extension_detail_report": extension_report, # BUG_DOC-1504 | https://stitchdata.atlassian.net/browse/DOC-1504
"ad_group_performance_report": default_report, # BUG_DOC-1567 https://stitchdata.atlassian.net/browse/DOC-1567
"ad_extension_detail_report": extension_report, # BUG_DOC-1504
# https://stitchdata.atlassian.net/browse/DOC-1504
"ad_group_performance_report": default_report, # BUG_DOC-1567
"ad_groups": default,
"ad_performance_report": default_report,
"ads": default,
Expand All @@ -129,20 +128,18 @@ def expected_metadata():
def setUpClass(cls):
super().setUpClass(logging="Ensuring environment variables are sourced.")
missing_envs = [
x for x in [
'TAP_BING_ADS_OAUTH_CLIENT_ID','TAP_BING_ADS_OAUTH_CLIENT_SECRET','TAP_BING_ADS_REFRESH_TOKEN',
'TAP_BING_ADS_DEVELOPER_TOKEN',
x for x in ['TAP_BING_ADS_OAUTH_CLIENT_ID', 'TAP_BING_ADS_OAUTH_CLIENT_SECRET',
'TAP_BING_ADS_REFRESH_TOKEN', 'TAP_BING_ADS_DEVELOPER_TOKEN',
] if os.getenv(x) is None
]

if len(missing_envs) != 0:
raise Exception("Missing environment variables: {}".format(missing_envs))

def expected_replication_method(self,stream=None):
"""return a dictionary with key of table name nd value of replication method
TDL-15816
Currently, in tap, all streams are FULL_TABLE except accounts.
But as per the doc https://www.stitchdata.com/docs/integrations/saas/microsoft-advertising,
""" Return a dictionary with key of table name nd value of replication method
TDL-15816 - Currently, in tap, all streams are FULL_TABLE except accounts. But as per
the doc https://www.stitchdata.com/docs/integrations/saas/microsoft-advertising,
only the below streams are FULL TABLE, all other streams are INCREMENTAL.
ads
ad_groups
Expand All @@ -153,48 +150,49 @@ def expected_replication_method(self,stream=None):
for table, properties in self.expected_metadata().items():
rep_method[table] = properties.get(self.REPLICATION_METHOD, None)
for streams in rep_method.keys():
if streams in [ 'ad_extension_detail_report', 'ad_group_performance_report', 'ad_performance_report',
'age_gender_audience_report', 'audience_performance_report', 'campaign_performance_report', 'geographic_performance_report', 'goals_and_funnels_report', 'keyword_performance_report',
'search_query_performance_report']:
if streams in [ 'ad_extension_detail_report', 'ad_group_performance_report',
'ad_performance_report', 'age_gender_audience_report',
'audience_performance_report', 'campaign_performance_report',
'geographic_performance_report', 'goals_and_funnels_report',
'keyword_performance_report', 'search_query_performance_report']:
rep_method[streams] = 'FULL_TABLE'
if not stream:
return rep_method
return rep_method[stream]

def expected_replication_keys(self,stream=None):
"""
return a dictionary with key of table name
and value as a set of replication key fields
Return a dictionary with key of table name and value as a set of replication key fields
"""
"""
As all streams are FULL TABLE according to the tap, there is no replication key specified for any of
the streams.TDL-15816, hence removing the "TimePeriod" key from expected replication keys.
Need to determine the correct replication menthod and replication keys accordingly.
"""
replication_keys = {table: properties.get(self.REPLICATION_KEYS, set())-{"TimePeriod"}

# As all streams are FULL TABLE according to the tap, there is no replication key specified
# for any of the streams. TDL-15816, hence removing the "TimePeriod" key from expected
# replication keys. Need to determine the correct replication menthod and replication keys
# accordingly.

replication_keys = {table: properties.get(self.REPLICATION_KEYS, set()) - {"TimePeriod"}
for table, properties
in self.expected_metadata().items()}
if not stream:
return replication_keys
return replication_keys[stream]

def expected_automatic_fields(self,stream=None):
"""
return a dictionary with key of table name
and value as a set of automatic fields
"""
"""
Sdc_report_datetime is mentioned as primary key for most of the stream in docs,
but is not returned as primary key by the tap, hence adding it explicitly to automatic fields TDL-15816
Return a dictionary with key of table name and value as a set of automatic fields
"""

# _sdc_report_datetime is mentioned as primary key for most streams in docs, but is not
# returned as pk by the tap, hence adding it explicitly to automatic fields TDL-15816

auto_fields = {}
for k, v in self.expected_metadata().items():
auto_fields[k] = v.get(self.PRIMARY_KEYS, set())|v.get(self.REPLICATION_KEYS, set()) \
|v.get(self.FOREIGN_KEYS, set())|v.get(self.REQUIRED_KEYS, set())|{'_sdc_report_datetime'}
auto_fields[k] = v.get(self.PRIMARY_KEYS, set()) | v.get(self.REPLICATION_KEYS, set()) \
| v.get(self.FOREIGN_KEYS, set()) | v.get(self.REQUIRED_KEYS, set()) | \
{'_sdc_report_datetime'}
for streams in auto_fields.keys():
if streams in ['ads', 'ad_groups', 'campaigns', 'accounts']:
auto_fields[streams] = auto_fields[stream]-{'_sdc_report_datetime'}
auto_fields[streams] = auto_fields[stream] - {'_sdc_report_datetime'}
if not stream:
return auto_fields
return auto_fields[stream]
114 changes: 65 additions & 49 deletions tests/test_all_fields.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,78 @@


from tap_tester.base_suite_tests.all_fields_test import AllFieldsTest
from base_new_framework import BingAdsBaseTest

from tap_tester.base_case import BaseCase as base
from tap_tester.jira_client import JiraClient as jira_client
from tap_tester.jira_client import CONFIGURATION_ENVIRONMENT as jira_config

JIRA_CLIENT = jira_client({**jira_config})


class AllFieldsTest(AllFieldsTest,BingAdsBaseTest):
""" Test the tap all_fields """

start_date = '2021-01-01T00:00:00Z'

@staticmethod
def name():
return "tap_tester_bing_ads_all_fields_test"

# update all tests in repo when JIRA cards are complete
TDL_23223_is_done = JIRA_CLIENT.get_status_category("TDL-23223") == "done"
assert TDL_23223_is_done == False, "TDL-23223 is done, Re-add streams with fixed exclusions"
TDL_24648_is_done = JIRA_CLIENT.get_status_category("TDL-24648") == "done"
assert TDL_24648_is_done == False, "TDL-24648 is done, Re-add streams that have data"

def streams_to_test(self):
streams_to_exclude={'ad_group_performance_report','campaign_performance_report','goals_and_funnels_report'}
"""
TODO
Excluded the ad_group and campaign report streams, has the Exclusion's file doesn't have the latest exclusions,
to be removed after TDL-23223 is fixed
Goals stream has no active data
"""
# TODO Excluded ad_group and campaign report streams due to errors exclusions file errors
# current file doesn't appear to have the latest exclusions, to be removed after TDL-23223
# is fixed. Data has aged out of the 3 year retention window for other report streams.
# Work with marketing / dev to see if new data can be generated.
streams_to_exclude = {'ad_extension_detail_report',
'ad_performance_report',
'ad_group_performance_report', # TDL-23223
'age_gender_audience_report',
'audience_performance_report',
'campaign_performance_report', # TDL-23223
'geographic_performance_report',
'goals_and_funnels_report',
'keyword_performance_report',
'search_query_performance_report'}

return self.expected_stream_names().difference(streams_to_exclude)

def missing_fields(self):
return {
'accounts':{
'TaxCertificate',
'AccountMode'
},
'ads':{
'Descriptions',
'LongHeadlineString',
'BusinessName',
'Videos',
'LongHeadlines',
'Images',
'LongHeadline',
'PromotionalText',
'CallToAction',
'AppStoreId',
'Headlines',
'ImpressionTrackingUrls',
'CallToActionLanguage',
'Headline',
'AppPlatform'
},
'campaigns':{
'MultimediaAdsBidAdjustment',
'AdScheduleUseSearcherTimeZone',
'BidStrategyId'
},
'ad_groups':{
'CpvBid',
'AdGroupType',#Talend Data Loader TDL-23228 -- data present in fronend but not returned in synced records
'MultimediaAdsBidAdjustment',
'AdScheduleUseSearcherTimeZone',
'CpmBid'
}
}
def test_all_fields_for_streams_are_replicated(self):
self.selected_fields = {k:v - self.missing_fields().get(k, set())
for k,v in AllFieldsTest.selected_fields.items()}
super().test_all_fields_for_streams_are_replicated()
MISSING_FIELDS = {
'accounts':{
'TaxCertificate',
'AccountMode'
},
'ads':{
'Descriptions',
'LongHeadlineString',
'BusinessName',
'Videos',
'LongHeadlines',
'Images',
'LongHeadline',
'PromotionalText',
'CallToAction',
'AppStoreId',
'Headlines',
'ImpressionTrackingUrls',
'CallToActionLanguage',
'Headline',
'AppPlatform'
},
'campaigns':{
'MultimediaAdsBidAdjustment',
'AdScheduleUseSearcherTimeZone',
'BidStrategyId'
},
'ad_groups':{
'CpvBid',
'AdGroupType', # TDL-23228 -- data present in fronend but not returned in synced records
'MultimediaAdsBidAdjustment',
'AdScheduleUseSearcherTimeZone',
'CpmBid'
}
}
Loading

0 comments on commit e1f418c

Please sign in to comment.