Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(Shop import): new script to upload prices from a shop csv #574

Merged
merged 3 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion scripts/.gitkeep

This file was deleted.

Empty file added scripts/__init__.py
Empty file.
59 changes: 17 additions & 42 deletions scripts/gdpr/create_prices_from_gdpr_csv.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
import csv
import datetime
import os
import sys
import time

import requests
from utils import get_picard_product_from_subcode
import utils as gdpr_utils

OPEN_PRICES_CREATE_PRICE_ENDPOINT = f'{os.environ.get("API_ENDPOINT")}/prices'
OPEN_PRICES_TOKEN = os.environ.get("API_TOKEN")
from scripts.utils import create_price, read_csv

GDPR_FIELD_MAPPING_FILEPATH = "scripts/gdpr/gdpr_field_mapping.csv"

Expand All @@ -28,14 +25,15 @@

REQUIRED_ENV_PARAMS = [
# "FILEPATH"
# "DELIMITER" (optional)
"SOURCE",
"LOCATION",
"LOCATION_OSM_ID",
"LOCATION_OSM_TYPE",
"PROOF_ID",
"API_ENDPOINT",
"API_TOKEN",
# DRY_MODE
# DRY_RUN
]


Expand Down Expand Up @@ -151,7 +149,7 @@ def gdpr_source_filter_rules(op_price_list, gdpr_source=""):
elif gdpr_source == "INTERMARCHE":
pass
elif gdpr_source == "PICARD":
full_product_code = get_picard_product_from_subcode(op_price)
full_product_code = gdpr_utils.get_picard_product_from_subcode(op_price)
if full_product_code:
op_price["product_code"] = full_product_code
else:
Expand Down Expand Up @@ -182,26 +180,9 @@ def gdpr_source_location_rules(op_price_list):
return op_price_list_filtered


def read_gdpr_field_mapping_csv():
with open(GDPR_FIELD_MAPPING_FILEPATH, newline="") as csvfile:
reader = csv.DictReader(csvfile)
return list(reader)


def read_gdpr_csv(filepath):
price_list = list()

with open(filepath, newline="") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
price_list.append(row)

return price_list


def map_gdpr_price_list_to_open_prices(gdpr_price_list, gdpr_source="", extra_data={}):
# get mapping file
gdpr_field_mapping = read_gdpr_field_mapping_csv()
gdpr_field_mapping = read_csv(GDPR_FIELD_MAPPING_FILEPATH)

# map source fields to op fields
open_prices_price_list_1 = list()
Expand Down Expand Up @@ -230,16 +211,6 @@ def map_gdpr_price_list_to_open_prices(gdpr_price_list, gdpr_source="", extra_da
return open_prices_price_list_2


def create_price(price):
headers = {"Authorization": f"Bearer {OPEN_PRICES_TOKEN}"}
response = requests.post(
OPEN_PRICES_CREATE_PRICE_ENDPOINT, json=price, headers=headers
)
if not response.status_code == 201:
print(response.json())
print(price)


if __name__ == "__main__":
"""
How-to run:
Expand All @@ -251,7 +222,10 @@ def create_price(price):
sys.exit("Error: missing FILEPATH env")
filepath = os.environ.get("FILEPATH")
print(f"===== Reading {filepath}")
gdpr_price_list = read_gdpr_csv(filepath)
if os.environ.get("DELIMITER"):
gdpr_price_list = read_csv(filepath, delimiter=os.environ.get("DELIMITER"))
else:
gdpr_price_list = read_csv(filepath)
print(len(gdpr_price_list))

print("===== Input example:")
Expand All @@ -271,9 +245,8 @@ def create_price(price):
"currency": DEFAULT_PRICE_CURRENCY,
"location_osm_id": int(os.environ.get("LOCATION_OSM_ID")),
"location_osm_type": os.environ.get("LOCATION_OSM_TYPE"),
"proof_id": int(
os.environ.get("PROOF_ID")
), # must be of type "GDPR_REQUEST" :)
# proof_id must be of type "GDPR_REQUEST" :)
"proof_id": int(os.environ.get("PROOF_ID")),
}
open_prices_price_list = map_gdpr_price_list_to_open_prices(
gdpr_price_list, gdpr_source=source, extra_data=extra_data
Expand All @@ -299,15 +272,17 @@ def create_price(price):

# Step 5: send prices to backend via API
if os.environ.get("DRY_RUN") == "False":
print(f"===== Uploading data to {OPEN_PRICES_CREATE_PRICE_ENDPOINT}")
print(f"===== Uploading data to {os.environ.get('API_ENDPOINT')}")
progress = 0
for index, price in enumerate(open_prices_price_list_filtered_2):
create_price(price)
create_price(
price, os.environ.get("API_ENDPOINT"), os.environ.get("API_TOKEN")
)
# some pauses to be safe
progress += 1
if (progress % 10) == 0:
time.sleep(1)
if (progress % 50) == 0:
print(f"{progress}/{len(open_prices_price_list_filtered_2)}...")
else:
sys.exit("No prices uploaded (DRY_RUN env missing or set to 'True')")
sys.exit("===== No prices uploaded (DRY_RUN env missing or set to 'True')")
42 changes: 42 additions & 0 deletions scripts/shop_import/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Uploading shop price data

## Context

One of our data sources is shop imports (supermarkets uploading data directly).

Currently, some members are active volunteers in food co-ops, and get authorization to extract and upload data from their shops.

## Usage

### Step 0: prerequisites

* have a .csv file of the prices
* upload these prices to a dedicated shop account please! See other shops for examples: `elefan-grenoble`, `400coop-paris11`

### Step 1: get your API token from Open Prices

https://prices.openfoodfacts.org/api/docs#/auth/auth_create

### Step 2: upload a proof

Use the token returned in Step 1.

You can upload your proof via Postman (change the key to "File").

### Step 3: get your file ready

The file must be a `.csv`.

### Step 4: upload your file

#### Upload command

Use the token returned in Step 1.

```
FILEPATH=../data/Elefan/20241208_articles_actif.csv PRODUCT_CODE_FIELD=Code PRODUCT_NAME_FIELD=Designation PRICE_FIELD="Prix Vente (€)" CURRENCY=EUR LOCATION_OSM_ID=1392117416 LOCATION_OSM_TYPE=NODE DATE=2024-12-08 PROOF_ID=1234 API_ENDPOINT=https://prices.openfoodfacts.net/api/v1 API_TOKEN=username_token-hash poetry run python scripts/shop_import/create_prices_from_csv.py
```

Last changes when you're ready:
- replace the API_ENDPOINT with `https://prices.openfoodfacts.org/api/v1`
- `DRY_RUN=False` to actually upload your data
147 changes: 147 additions & 0 deletions scripts/shop_import/create_prices_from_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import os
import sys
import time

import openfoodfacts

from scripts.utils import create_price, read_csv

OPEN_PRICES_CREATE_PRICE_ENDPOINT = f'{os.environ.get("API_ENDPOINT")}/prices'
OPEN_PRICES_TOKEN = os.environ.get("API_TOKEN")

REQUIRED_ENV_PARAMS = [
# "FILEPATH"
# "DELIMITER" (optional)
"PRODUCT_CODE_FIELD",
"PRODUCT_NAME_FIELD",
"PRICE_FIELD",
"CURRENCY",
"LOCATION_OSM_ID",
"LOCATION_OSM_TYPE",
"DATE",
"PROOF_ID",
"API_ENDPOINT",
"API_TOKEN",
# DRY_RUN
]


def map_gdpr_price_list_to_open_prices(price_list, extra_data={}):
# map source fields to op fields
open_prices_price_list = list()
for price in price_list:
open_prices_price = dict()
# product_name
if os.environ.get("PRODUCT_NAME_FIELD"):
open_prices_price["product_name"] = price.get(
os.environ.get("PRODUCT_NAME_FIELD")
)
# product_code
open_prices_price["product_code"] = price.get(
os.environ.get("PRODUCT_CODE_FIELD")
)
# price
price_str = price.get(os.environ.get("PRICE_FIELD"))
open_prices_price["price"] = (
float(price_str.replace(",", ".")) if price_str else None
)
# print(open_prices_price)
open_prices_price_list.append({**open_prices_price, **extra_data})

return open_prices_price_list


def filter_rules(op_price_list):
"""
Rules to skip some prices (on code, name...)
"""
op_price_list_filtered = list()

for op_price in op_price_list:
passes_test = True

if not op_price["product_code"]:
passes_test = False
elif not op_price["product_code"].isnumeric():
passes_test = False
elif len(op_price["product_code"]) < 6:
passes_test = False
elif not openfoodfacts.barcode.has_valid_check_digit(op_price["product_code"]):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

passes_test = False

if not op_price["price"]:
passes_test = False

if passes_test:
op_price_list_filtered.append(op_price)

return op_price_list_filtered


if __name__ == "__main__":
"""
How-to run:
> FILEPATH= poetry run python scripts/shop_import/create_prices_from_csv.py
Required params: see REQUIRED_ENV_PARAMS
"""
# Step 1: read input file
if not os.environ.get("FILEPATH"):
sys.exit("Error: missing FILEPATH env")
filepath = os.environ.get("FILEPATH")
print(f"===== Reading {filepath}")
if os.environ.get("DELIMITER"):
price_list = read_csv(filepath, delimiter=os.environ.get("DELIMITER"))
else:
price_list = read_csv(filepath)
print(len(price_list))

print("===== Input example:")
print(price_list[0])

# Step 2: check env params are all present
print("===== Checking env params")
for env_param in REQUIRED_ENV_PARAMS:
if not os.environ.get(env_param):
sys.exit(f"Error: missing {env_param} env")
print("All good :)")

# Step 3: transform input into OP format
print("===== Mapping source file to Open Prices format")
source = os.environ.get("SOURCE")
extra_data = {
"currency": os.environ.get("CURRENCY"),
"location_osm_id": int(os.environ.get("LOCATION_OSM_ID")),
"location_osm_type": os.environ.get("LOCATION_OSM_TYPE"),
"date": os.environ.get("DATE"),
# proof_id must be of type "SHOP_IMPORT" :)
"proof_id": int(os.environ.get("PROOF_ID")),
}
open_prices_price_list = map_gdpr_price_list_to_open_prices(
price_list, extra_data=extra_data
)
print(len(open_prices_price_list))

# Step 4: filter prices depending on specific rules
print("===== Applying source filtering rules")
open_prices_price_list_filtered = filter_rules(open_prices_price_list)
print(len(open_prices_price_list_filtered))

print("===== Output example (extra fields will be ignored)")
print(open_prices_price_list_filtered[0])

# Step 5: send prices to backend via API
if os.environ.get("DRY_RUN") == "False":
print(f"===== Uploading data to {os.environ.get('API_ENDPOINT')}")
progress = 0
for index, price in enumerate(open_prices_price_list_filtered):
create_price(
price, os.environ.get("API_ENDPOINT"), os.environ.get("API_TOKEN")
)
# some pauses to be safe
progress += 1
if (progress % 10) == 0:
time.sleep(1)
if (progress % 50) == 0:
print(f"{progress}/{len(open_prices_price_list_filtered)}...")
else:
sys.exit("===== No prices uploaded (DRY_RUN env missing or set to 'True')")
20 changes: 20 additions & 0 deletions scripts/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import csv

import requests


def read_csv(filepath, delimiter=","):
with open(filepath, newline="") as csvfile:
reader = csv.DictReader(csvfile, delimiter=delimiter)
return list(reader)


def create_price(price, API_ENDPOINT, API_TOKEN):
OPEN_PRICES_CREATE_PRICE_ENDPOINT = f"{API_ENDPOINT}/prices"
headers = {"Authorization": f"Bearer {API_TOKEN}"}
response = requests.post(
OPEN_PRICES_CREATE_PRICE_ENDPOINT, json=price, headers=headers
)
if not response.status_code == 201:
print(response.json())
print(price)
Loading