diff --git a/scripts/.gitkeep b/scripts/.gitkeep deleted file mode 100644 index fb5fe755..00000000 --- a/scripts/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -./data/* diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/gdpr/create_prices_from_gdpr_csv.py b/scripts/gdpr/create_prices_from_gdpr_csv.py index d1ce2d6b..34053f74 100644 --- a/scripts/gdpr/create_prices_from_gdpr_csv.py +++ b/scripts/gdpr/create_prices_from_gdpr_csv.py @@ -1,14 +1,11 @@ -import csv import datetime import os import sys import time -import requests -from utils import get_picard_product_from_subcode +import utils as gdpr_utils -OPEN_PRICES_CREATE_PRICE_ENDPOINT = f'{os.environ.get("API_ENDPOINT")}/prices' -OPEN_PRICES_TOKEN = os.environ.get("API_TOKEN") +from scripts.utils import create_price, read_csv GDPR_FIELD_MAPPING_FILEPATH = "scripts/gdpr/gdpr_field_mapping.csv" @@ -28,6 +25,7 @@ REQUIRED_ENV_PARAMS = [ # "FILEPATH" + # "DELIMITER" (optional) "SOURCE", "LOCATION", "LOCATION_OSM_ID", @@ -35,7 +33,7 @@ "PROOF_ID", "API_ENDPOINT", "API_TOKEN", - # DRY_MODE + # DRY_RUN ] @@ -151,7 +149,7 @@ def gdpr_source_filter_rules(op_price_list, gdpr_source=""): elif gdpr_source == "INTERMARCHE": pass elif gdpr_source == "PICARD": - full_product_code = get_picard_product_from_subcode(op_price) + full_product_code = gdpr_utils.get_picard_product_from_subcode(op_price) if full_product_code: op_price["product_code"] = full_product_code else: @@ -182,26 +180,9 @@ def gdpr_source_location_rules(op_price_list): return op_price_list_filtered -def read_gdpr_field_mapping_csv(): - with open(GDPR_FIELD_MAPPING_FILEPATH, newline="") as csvfile: - reader = csv.DictReader(csvfile) - return list(reader) - - -def read_gdpr_csv(filepath): - price_list = list() - - with open(filepath, newline="") as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - price_list.append(row) - - return price_list - - def map_gdpr_price_list_to_open_prices(gdpr_price_list, gdpr_source="", extra_data={}): # get mapping file - gdpr_field_mapping = read_gdpr_field_mapping_csv() + gdpr_field_mapping = read_csv(GDPR_FIELD_MAPPING_FILEPATH) # map source fields to op fields open_prices_price_list_1 = list() @@ -230,16 +211,6 @@ def map_gdpr_price_list_to_open_prices(gdpr_price_list, gdpr_source="", extra_da return open_prices_price_list_2 -def create_price(price): - headers = {"Authorization": f"Bearer {OPEN_PRICES_TOKEN}"} - response = requests.post( - OPEN_PRICES_CREATE_PRICE_ENDPOINT, json=price, headers=headers - ) - if not response.status_code == 201: - print(response.json()) - print(price) - - if __name__ == "__main__": """ How-to run: @@ -251,7 +222,10 @@ def create_price(price): sys.exit("Error: missing FILEPATH env") filepath = os.environ.get("FILEPATH") print(f"===== Reading {filepath}") - gdpr_price_list = read_gdpr_csv(filepath) + if os.environ.get("DELIMITER"): + gdpr_price_list = read_csv(filepath, delimiter=os.environ.get("DELIMITER")) + else: + gdpr_price_list = read_csv(filepath) print(len(gdpr_price_list)) print("===== Input example:") @@ -271,9 +245,8 @@ def create_price(price): "currency": DEFAULT_PRICE_CURRENCY, "location_osm_id": int(os.environ.get("LOCATION_OSM_ID")), "location_osm_type": os.environ.get("LOCATION_OSM_TYPE"), - "proof_id": int( - os.environ.get("PROOF_ID") - ), # must be of type "GDPR_REQUEST" :) + # proof_id must be of type "GDPR_REQUEST" :) + "proof_id": int(os.environ.get("PROOF_ID")), } open_prices_price_list = map_gdpr_price_list_to_open_prices( gdpr_price_list, gdpr_source=source, extra_data=extra_data @@ -299,10 +272,12 @@ def create_price(price): # Step 5: send prices to backend via API if os.environ.get("DRY_RUN") == "False": - print(f"===== Uploading data to {OPEN_PRICES_CREATE_PRICE_ENDPOINT}") + print(f"===== Uploading data to {os.environ.get('API_ENDPOINT')}") progress = 0 for index, price in enumerate(open_prices_price_list_filtered_2): - create_price(price) + create_price( + price, os.environ.get("API_ENDPOINT"), os.environ.get("API_TOKEN") + ) # some pauses to be safe progress += 1 if (progress % 10) == 0: @@ -310,4 +285,4 @@ def create_price(price): if (progress % 50) == 0: print(f"{progress}/{len(open_prices_price_list_filtered_2)}...") else: - sys.exit("No prices uploaded (DRY_RUN env missing or set to 'True')") + sys.exit("===== No prices uploaded (DRY_RUN env missing or set to 'True')") diff --git a/scripts/shop_import/README.md b/scripts/shop_import/README.md new file mode 100644 index 00000000..14a50b0d --- /dev/null +++ b/scripts/shop_import/README.md @@ -0,0 +1,42 @@ +# Uploading shop price data + +## Context + +One of our data sources is shop imports (supermarkets uploading data directly). + +Currently, some members are active volunteers in food co-ops, and get authorization to extract and upload data from their shops. + +## Usage + +### Step 0: prerequisites + +* have a .csv file of the prices +* upload these prices to a dedicated shop account please! See other shops for examples: `elefan-grenoble`, `400coop-paris11` + +### Step 1: get your API token from Open Prices + +https://prices.openfoodfacts.org/api/docs#/auth/auth_create + +### Step 2: upload a proof + +Use the token returned in Step 1. + +You can upload your proof via Postman (change the key to "File"). + +### Step 3: get your file ready + +The file must be a `.csv`. + +### Step 4: upload your file + +#### Upload command + +Use the token returned in Step 1. + +``` +FILEPATH=../data/Elefan/20241208_articles_actif.csv PRODUCT_CODE_FIELD=Code PRODUCT_NAME_FIELD=Designation PRICE_FIELD="Prix Vente (€)" CURRENCY=EUR LOCATION_OSM_ID=1392117416 LOCATION_OSM_TYPE=NODE DATE=2024-12-08 PROOF_ID=1234 API_ENDPOINT=https://prices.openfoodfacts.net/api/v1 API_TOKEN=username_token-hash poetry run python scripts/shop_import/create_prices_from_csv.py +``` + +Last changes when you're ready: +- replace the API_ENDPOINT with `https://prices.openfoodfacts.org/api/v1` +- `DRY_RUN=False` to actually upload your data diff --git a/scripts/shop_import/create_prices_from_csv.py b/scripts/shop_import/create_prices_from_csv.py new file mode 100644 index 00000000..88626442 --- /dev/null +++ b/scripts/shop_import/create_prices_from_csv.py @@ -0,0 +1,147 @@ +import os +import sys +import time + +import openfoodfacts + +from scripts.utils import create_price, read_csv + +OPEN_PRICES_CREATE_PRICE_ENDPOINT = f'{os.environ.get("API_ENDPOINT")}/prices' +OPEN_PRICES_TOKEN = os.environ.get("API_TOKEN") + +REQUIRED_ENV_PARAMS = [ + # "FILEPATH" + # "DELIMITER" (optional) + "PRODUCT_CODE_FIELD", + "PRODUCT_NAME_FIELD", + "PRICE_FIELD", + "CURRENCY", + "LOCATION_OSM_ID", + "LOCATION_OSM_TYPE", + "DATE", + "PROOF_ID", + "API_ENDPOINT", + "API_TOKEN", + # DRY_RUN +] + + +def map_gdpr_price_list_to_open_prices(price_list, extra_data={}): + # map source fields to op fields + open_prices_price_list = list() + for price in price_list: + open_prices_price = dict() + # product_name + if os.environ.get("PRODUCT_NAME_FIELD"): + open_prices_price["product_name"] = price.get( + os.environ.get("PRODUCT_NAME_FIELD") + ) + # product_code + open_prices_price["product_code"] = price.get( + os.environ.get("PRODUCT_CODE_FIELD") + ) + # price + price_str = price.get(os.environ.get("PRICE_FIELD")) + open_prices_price["price"] = ( + float(price_str.replace(",", ".")) if price_str else None + ) + # print(open_prices_price) + open_prices_price_list.append({**open_prices_price, **extra_data}) + + return open_prices_price_list + + +def filter_rules(op_price_list): + """ + Rules to skip some prices (on code, name...) + """ + op_price_list_filtered = list() + + for op_price in op_price_list: + passes_test = True + + if not op_price["product_code"]: + passes_test = False + elif not op_price["product_code"].isnumeric(): + passes_test = False + elif len(op_price["product_code"]) < 6: + passes_test = False + elif not openfoodfacts.barcode.has_valid_check_digit(op_price["product_code"]): + passes_test = False + + if not op_price["price"]: + passes_test = False + + if passes_test: + op_price_list_filtered.append(op_price) + + return op_price_list_filtered + + +if __name__ == "__main__": + """ + How-to run: + > FILEPATH= poetry run python scripts/shop_import/create_prices_from_csv.py + Required params: see REQUIRED_ENV_PARAMS + """ + # Step 1: read input file + if not os.environ.get("FILEPATH"): + sys.exit("Error: missing FILEPATH env") + filepath = os.environ.get("FILEPATH") + print(f"===== Reading {filepath}") + if os.environ.get("DELIMITER"): + price_list = read_csv(filepath, delimiter=os.environ.get("DELIMITER")) + else: + price_list = read_csv(filepath) + print(len(price_list)) + + print("===== Input example:") + print(price_list[0]) + + # Step 2: check env params are all present + print("===== Checking env params") + for env_param in REQUIRED_ENV_PARAMS: + if not os.environ.get(env_param): + sys.exit(f"Error: missing {env_param} env") + print("All good :)") + + # Step 3: transform input into OP format + print("===== Mapping source file to Open Prices format") + source = os.environ.get("SOURCE") + extra_data = { + "currency": os.environ.get("CURRENCY"), + "location_osm_id": int(os.environ.get("LOCATION_OSM_ID")), + "location_osm_type": os.environ.get("LOCATION_OSM_TYPE"), + "date": os.environ.get("DATE"), + # proof_id must be of type "SHOP_IMPORT" :) + "proof_id": int(os.environ.get("PROOF_ID")), + } + open_prices_price_list = map_gdpr_price_list_to_open_prices( + price_list, extra_data=extra_data + ) + print(len(open_prices_price_list)) + + # Step 4: filter prices depending on specific rules + print("===== Applying source filtering rules") + open_prices_price_list_filtered = filter_rules(open_prices_price_list) + print(len(open_prices_price_list_filtered)) + + print("===== Output example (extra fields will be ignored)") + print(open_prices_price_list_filtered[0]) + + # Step 5: send prices to backend via API + if os.environ.get("DRY_RUN") == "False": + print(f"===== Uploading data to {os.environ.get('API_ENDPOINT')}") + progress = 0 + for index, price in enumerate(open_prices_price_list_filtered): + create_price( + price, os.environ.get("API_ENDPOINT"), os.environ.get("API_TOKEN") + ) + # some pauses to be safe + progress += 1 + if (progress % 10) == 0: + time.sleep(1) + if (progress % 50) == 0: + print(f"{progress}/{len(open_prices_price_list_filtered)}...") + else: + sys.exit("===== No prices uploaded (DRY_RUN env missing or set to 'True')") diff --git a/scripts/utils.py b/scripts/utils.py new file mode 100644 index 00000000..e40681a3 --- /dev/null +++ b/scripts/utils.py @@ -0,0 +1,20 @@ +import csv + +import requests + + +def read_csv(filepath, delimiter=","): + with open(filepath, newline="") as csvfile: + reader = csv.DictReader(csvfile, delimiter=delimiter) + return list(reader) + + +def create_price(price, API_ENDPOINT, API_TOKEN): + OPEN_PRICES_CREATE_PRICE_ENDPOINT = f"{API_ENDPOINT}/prices" + headers = {"Authorization": f"Bearer {API_TOKEN}"} + response = requests.post( + OPEN_PRICES_CREATE_PRICE_ENDPOINT, json=price, headers=headers + ) + if not response.status_code == 201: + print(response.json()) + print(price)