-
Notifications
You must be signed in to change notification settings - Fork 1
/
search.py
90 lines (72 loc) · 3.32 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from googleapiclient.discovery import build
import pandas as pd
import time
from googleapiclient.errors import HttpError
# Your Google Custom Search API key and search engine ID
# API_KEY = "AIzaSyBHQ2BDclENB8zguc_E4QTyIID_HbBw1eE" # Update with your valid API key
# SEARCH_ENGINE_ID = "b4ba369391c7e4e91"
API_KEY = "AIzaSyB8GR91OR-fCQxLqvloAGbxFRR42XQv2kg"
SEARCH_ENGINE_ID = "34168807f6c364d1e"
# Build the custom search service
service = build("customsearch", "v1", developerKey=API_KEY)
# Cache to store previously fetched image URLs to avoid duplicate API calls
image_url_cache = {}
# Function to fetch image URL from Google Custom Search with retry logic
def fetch_image_url(product_name, brand_name, retries=3, delay=5):
# Combine product name and brand name for more accurate search
query = f"{product_name} {brand_name}"
# Check if the product image URL is already in cache
if query in image_url_cache:
print(f"Using cached image for: {query}")
return image_url_cache[query]
for attempt in range(retries):
try:
print(f"Fetching image for: {query}")
result = (
service.cse()
.list(
q=query, # Search with combined product and brand name
cx=SEARCH_ENGINE_ID,
searchType="image",
num=1, # Get only one result
)
.execute()
)
if "items" in result:
image_url = result["items"][0]["link"] # Return the first image link
image_url_cache[query] = image_url # Save to cache
return image_url
else:
print(f"No image found for: {query}")
return None # Return None if no image is found
except HttpError as e:
if e.resp.status == 429:
print(f"Quota exceeded. Retrying in {delay} seconds...")
time.sleep(delay) # Wait before retrying
delay *= 2 # Exponential backoff
else:
print(f"HttpError fetching image for {query}: {e}")
return None
print(f"Failed to fetch image for {query} after {retries} attempts.")
return None
# Load the original dataset
df = pd.read_csv("supermarket_dataset_4floors_4sections.csv")
# Load the original dataset
# df = pd.read_csv(
# "C:\\Users\\Admin\\OneDrive\\Documents\\VSCode Practice\\Practice\\BTP\\FusionCart\\supermarket_dataset_4floors_4sections.csv"
# )
# Load the existing updated dataset with images for the first 80 records
existing_df = pd.read_csv("updated_supermarket_dataset_limited.csv")
# Fetch the records from index 106 to the next 100 records (106 to 205)
new_records_df = df.iloc[717:801]
# Apply the fetch_image_url function to the "Product Name" and "Brand Name" columns for new records
new_records_df["Image URL"] = new_records_df.apply(
lambda row: fetch_image_url(row["Product Name"], row["Brand"]), axis=1
)
# Append the new records with images to the existing dataframe
updated_df = pd.concat([existing_df, new_records_df])
# Save the updated DataFrame back to the same CSV file
updated_df.to_csv("updated_supermarket_dataset_limited.csv", index=False)
print(
"Image URLs for records 106 to 205 have been added and saved to updated_supermarket_dataset_limited.csv"
)