From e9e415650b2b8fc07e4ae68c741e692b538e4a2c Mon Sep 17 00:00:00 2001 From: Alex Kort Date: Mon, 19 Apr 2021 14:45:43 +0900 Subject: [PATCH] Remove support for gtin optimizer. PiperOrigin-RevId: 369155418 --- README.md | 1 - docs/developer-guide.md | 5 +- .../optimizers_builtin/gtin_optimizer.py | 203 ------------------ .../optimizers_builtin/gtin_optimizer_test.py | 104 --------- 4 files changed, 2 insertions(+), 311 deletions(-) delete mode 100644 shoptimizer_api/optimizers_builtin/gtin_optimizer.py delete mode 100644 shoptimizer_api/optimizers_builtin/gtin_optimizer_test.py diff --git a/README.md b/README.md index cf8c81b..376d814 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,6 @@ color-length-optimizer | _Optimization_ | Fixes the length o condition-optimizer | _Sanitization_ | If the condition field is specified as "new", but other fields in the product imply that the condition is otherwise, this optimizer will set the condition value to "used". This will lead to the product avoiding disapproval. (It is also possible that accounts that misrepresent condition can be suspended.) description-optimizer | _Optimization_ | Appends the following product attributes to the product description if they could be mined: brand, color, sizes, gender. It will also create the description from these attributes if it does not exist. These mined fields being in the description have a possibility to increase ad performance. (This optimizer will also perform attribute mining: brand, color, sizes, and gender attributes will be added to the product fields if they do not exist and could be mined.) free-shipping-optimizer | _Sanitization_ | Removes promotional text related with free shipping from title. This will lead to the product avoiding disapproval. It must be run before title-optimizer. -gtin-optimizer | _Sanitization_ | For 14-digit GTIN values, the gtin must not make use of the bulk indicator digit (9) as specified [here](https://www.gs1.org/1/gtinrules/en/rule/167/bulk-items). It must also not make use of the restricted ranges in the company prefix as specified [here](https://support.google.com/merchants/answer/6286302), nor make use of coupon ranges as specified [here](https://support.google.com/merchants/answer/6324461?hl=en). Finally, the last digit of the gtin must match the formula defined by the GS1 standard as described [here](https://www.gs1.org/services/how-calculate-check-digit-manually). If the gtin fails any of these checks, this optimizer will delete the gtin field from the product. This will lead to the product avoiding disapproval. identifier-exists-optimizer | _Sanitization_ | Removes invalid identifierExists fields. Items that have a brand, mpn, or gtin set and identifierExists as "false" cause disapproval, so this optimizer will delete the identifierExists value in these cases, which defaults the value to true via Content API. This will lead to the product avoiding disapproval. invalid-chars-optimizer | _Sanitization/Optimization_ | Removes invalid chars from the product title and description. Invalid chars are those with code points that map to the Unicode private use area (0xE000-0xF8FF). This will lead to the product avoiding disapproval if the invalid char is in the title, and lead to the description not being rejected if the invalid char is in the description. mpn-optimizer | _Sanitization_ | Removes invalid MPN fields. Certain MPN values will cause products to be disapproved. If an invalid MPN value is detected, this optimizer will delete it. This will lead to the product avoiding disapproval. The list of invalid MPNs can be found in `mpn_optimizer.py`. diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 6a75546..f74317a 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -96,7 +96,6 @@ _**Optional:**_ * `condition-optimizer=(true/false)` * `description-optimizer=(true/false)` * `free-shipping-optimizer=(true/false)` -* `gtin-optimizer=(true/false)` * `identifier-exists-optimizer=(true/false)` * `invalid-chars-optimizer=(true/false)` * `mpn-optimizer=(true/false)` @@ -486,10 +485,10 @@ def shoptimize(original_product_batch_dictionary: Dict[str, Any]) -> Dict[str, A Append `optimizer-key=true` as a URL parameter in your call to Shoptimizer for each optimizer you want to run. -For example, to run the mpn-optimizer and gtin-optimizer, use the following +For example, to run the mpn-optimizer and title-optimizer, use the following endpoint: -`.../shoptimizer/v1/batch/optimize?mpn-optimizer=true>in-optimizer=true` +`.../shoptimizer/v1/batch/optimize?mpn-optimizer=true&title-optimizer=true` ## 6. Writing a Plugin diff --git a/shoptimizer_api/optimizers_builtin/gtin_optimizer.py b/shoptimizer_api/optimizers_builtin/gtin_optimizer.py deleted file mode 100644 index 92fc2b6..0000000 --- a/shoptimizer_api/optimizers_builtin/gtin_optimizer.py +++ /dev/null @@ -1,203 +0,0 @@ -# coding=utf-8 -# Copyright 2021 Google LLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""A module for Shoptimizer API that fixes invalid gtin values. - -Reference: https://support.google.com/merchants/answer/6324461 - -This optimizer does several validations on the gtin value: - -1. The gtin must consist of integers, and fall within a range of valid lengths. - -2. The gtin must not start with the bulk indicator digit (9): -https://support.google.com/merchants/answer/6286298?hl=en - -3. The gtin must not use the reserved range for its prefix: -https://support.google.com/merchants/answer/7000684?hl=en - -4. The gtin must not use the coupon range for its prefix: -https://support.google.com/merchants/answer/6286302?hl=en - -5. The last digit of the gtin must match the formula defined here: -https://www.gs1.org/services/how-calculate-check-digit-manually - -If it fails these checks, this optimizer will remove the gtin field from the -product to prevent the product from being disapproved in Merchant Center. -""" -import logging -import math -from typing import Any, Dict - -from optimizers_abstract import base_optimizer - -_VALID_GTIN_LENGTHS = [8, 12, 13, 14] -_COUPON_PREFIXES = ['981', '982', '983', '984', '99', '05'] -_RESTRICTED_PREFIXES = ['020-029', '040-049', '200-299'] -_INVALID_BULK_INDICATOR = '9' - - -class GTINOptimizer(base_optimizer.BaseOptimizer): - """"An optimizer that fixes invalid gtin values.""" - - _OPTIMIZER_PARAMETER = 'gtin-optimizer' - - def _optimize(self, product_batch: Dict[str, Any], language: str, - country: str, currency: str) -> int: - """Runs the optimization. - - Fixes invalid gtin fields. - See above for the definition of an invalid gtin field. - - Args: - product_batch: A batch of product data. - language: The language to use for this optimizer. - country: The country to use for this optimizer. - currency: The currency to use for this optimizer. - - Returns: - The number of products affected by this optimization: int - """ - num_of_products_optimized = 0 - for entry in product_batch['entries']: - product = entry['product'] - if 'gtin' in product: - gtin = product.get('gtin', '') - - violates_any_gtin_check = (_gtin_fails_format_check(gtin) or - _gtin_uses_bulk_indicator(gtin) or - _gtin_uses_reserved_range(gtin) or - _gtin_uses_coupon_range(gtin) or - _gtin_fails_checksum(gtin)) - if violates_any_gtin_check: - _remove_gtin(product) - num_of_products_optimized += 1 - - return num_of_products_optimized - - -def _remove_gtin(product: Dict[str, Any]) -> None: - """Clears the gtin value from the product. - - Args: - product: A dictionary representing a single shopping product. - """ - violating_gtin = product.get('gtin', '') - del product['gtin'] - logging.info( - 'Modified item %s: Cleared invalid gtin: %s to ' - 'prevent disapproval', product.get('offerId', ''), violating_gtin) - base_optimizer.set_optimization_tracking(product, base_optimizer.SANITIZED) - - -def _gtin_uses_bulk_indicator(gtin: str) -> bool: - """Determines if the provided gtin violates the bulk indicator digit check. - - Args: - gtin: a string representing the product's GTIN. - - Returns: - True if the indicator digit is 9, otherwise False. - """ - return len(gtin) == 14 and gtin[0] == _INVALID_BULK_INDICATOR - - -def _gtin_uses_reserved_range(gtin: str) -> str: - """Determines if the provided gtin violates the reserved prefix check. - - Args: - gtin: a string representing the product's GTIN. - - Returns: - True if the prefix is in a reserved prefix range, otherwise False. - """ - company_prefix = int(gtin[1:4]) - for restricted_prefix in _RESTRICTED_PREFIXES: - if company_prefix >= int( - restricted_prefix.split('-')[0]) and company_prefix <= int( - restricted_prefix.split('-')[1]): - return True - return False - - -def _gtin_uses_coupon_range(gtin: str) -> bool: - """Determines if the provided gtin violates the coupon prefix check. - - Args: - gtin: a string representing the product's GTIN. - - Returns: - True if the prefix is in a coupon prefix range, otherwise False. - """ - return gtin[1:].startswith(tuple(_COUPON_PREFIXES)) - - -def _gtin_fails_format_check(gtin: str) -> bool: - """Determines if the provided gtin violates basic sanity checks. - - Args: - gtin: a string representing the product's GTIN - - Returns: - True if the gtin fails the validations, otherwise False. - """ - if not gtin.isdigit() or len( - gtin) not in _VALID_GTIN_LENGTHS or _contains_repeating_digits( - gtin[:-1]) or _contains_sequential_digits(gtin): - return True - return False - - -def _gtin_fails_checksum(gtin: str) -> bool: - """Determines if the provided gtin violates the check digit calculation. - - Args: - gtin: a string representing the product's GTIN - - Returns: - True if the gtin fails check digit validation, otherwise False. - """ - padded_gtin = gtin.zfill(14) - existing_check_digit = int(padded_gtin[-1]) - target_check_digit = _calculate_check_digit(padded_gtin[:-1]) - return target_check_digit != existing_check_digit - - -def _calculate_check_digit(partial_gtin: str) -> int: - """Calculates the expected check digit of a GTIN (without the last digit). - - Args: - partial_gtin: a string representing a product GTIN without the check digit. - - Returns: - the calculated expected check digit of the input GTIN. - """ - odds = list(partial_gtin[::2]) - evens = [int(x) for x in list(partial_gtin[1::2])] - odds_times_three = [int(x) * 3 for x in odds] - sum_mults = sum(evens) + sum(odds_times_three) - check_digit = _round_up(sum_mults) - sum_mults - return check_digit - - -def _round_up(x) -> int: - return int(math.ceil(x / 10.0)) * 10 - - -def _contains_repeating_digits(gtin: str) -> bool: - return gtin.count(gtin[0]) == len(gtin) - - -def _contains_sequential_digits(gtin: str) -> bool: - return gtin.startswith('123456789') diff --git a/shoptimizer_api/optimizers_builtin/gtin_optimizer_test.py b/shoptimizer_api/optimizers_builtin/gtin_optimizer_test.py deleted file mode 100644 index fec6349..0000000 --- a/shoptimizer_api/optimizers_builtin/gtin_optimizer_test.py +++ /dev/null @@ -1,104 +0,0 @@ -# coding=utf-8 -# Copyright 2021 Google LLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Unit tests for gtin_optimizer.py.""" - -from absl.testing import parameterized - -from optimizers_builtin import gtin_optimizer -from test_data import requests_bodies - - -class GTINOptimizerTest(parameterized.TestCase): - - def setUp(self) -> None: - super(GTINOptimizerTest, self).setUp() - self.optimizer = gtin_optimizer.GTINOptimizer() - - @parameterized.named_parameters([{ - 'testcase_name': 'empty', - 'test_gtin': '', - }, { - 'testcase_name': 'invalid 7-digit GTIN', - 'test_gtin': '9504000', - }, { - 'testcase_name': 'invalid 15-digit GTIN', - 'test_gtin': '009781594741753', - }, { - 'testcase_name': 'invalid 8-digit GTIN', - 'test_gtin': '12345678', - }, { - 'testcase_name': 'invalid 12-digit GTIN', - 'test_gtin': '978159474175', - }, { - 'testcase_name': 'invalid 13-digit GTIN', - 'test_gtin': '9781594741754', - }, { - 'testcase_name': 'invalid 14-digit GTIN', - 'test_gtin': '12345678901234', - }, { - 'testcase_name': 'invalid GTIN with letters', - 'test_gtin': '123456789z1234', - }, { - 'testcase_name': 'invalid repeating-number GTIN', - 'test_gtin': '1111111111116', - }, { - 'testcase_name': 'invalid sequential-number GTIN', - 'test_gtin': '123456789999', - }, { - 'testcase_name': 'invalid coupon prefix', - 'test_gtin': '19834567890123', - }, { - 'testcase_name': 'invalid restricted prefix', - 'test_gtin': '10424567890123', - }, { - 'testcase_name': 'invalid bulk indicator', - 'test_gtin': '90424567890123', - }]) - def test_gtin_optimizer_removes_gtin_from_request_on_invalid_gtins( - self, test_gtin): - original_data = requests_bodies.build_request_body( - properties_to_be_updated={'gtin': test_gtin}) - - optimized_data, optimization_result = self.optimizer.process(original_data) - product = optimized_data['entries'][0]['product'] - - self.assertNotIn('gtin', product) - self.assertEqual(1, optimization_result.num_of_products_optimized) - - @parameterized.named_parameters([{ - 'testcase_name': 'valid 13-digit GTIN', - 'test_gtin': '9504000059422', - }, { - 'testcase_name': 'another valid 13-digit GTIN', - 'test_gtin': '9781594741753', - }]) - def test_gtin_optimizer_does_not_transform_valid_data(self, test_gtin): - original_data = requests_bodies.build_request_body( - properties_to_be_updated={'gtin': test_gtin}) - - optimized_data, optimization_result = self.optimizer.process(original_data) - - self.assertEqual(original_data, optimized_data) - self.assertEqual(0, optimization_result.num_of_products_optimized) - - def test_gtin_optimizer_does_not_transform_data_when_gtin_field_missing(self): - original_data = requests_bodies.build_request_body( - properties_to_be_removed=['gtin']) - - optimized_data, optimization_result = self.optimizer.process(original_data) - - self.assertEqual(original_data, optimized_data) - self.assertEqual(0, optimization_result.num_of_products_optimized)