-
Notifications
You must be signed in to change notification settings - Fork 0
/
translate_ecoregions.py
113 lines (89 loc) · 3.07 KB
/
translate_ecoregions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
from pathlib import Path
import psycopg2
from dotenv import load_dotenv
from supabase import create_client
import openai
import asyncio
from supabase import create_client, Client
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
retry_if_exception_type,
retry_if_exception
)
import time
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
def dutch_translations_missing_ids(conn):
query = """
SELECT eco_code, english
FROM ecoregion_name
WHERE dutch IS NULL OR dutch = '';
"""
cur = conn.cursor()
# Execute the query with the provided start_id and end_id
cur.execute(query)
# Fetch the results
results = cur.fetchall()
# Close the cursor
cur.close()
return results
def main():
# Set up the OpenAI API client
openai.api_key = api_key
# set up connection to supabase
conn_supabase = psycopg2.connect(
dbname=os.getenv("DB"),
user=os.getenv("USER"),
password=os.getenv("PW"),
host=os.getenv("HOST"),
port=os.getenv("PORT"))
# find the species_ids for which we should still generate a dutch description
ecoregions = dutch_translations_missing_ids(conn_supabase)
for ecoregion in ecoregions:
eco_code = ecoregion[0]
eco_english = ecoregion[1]
prompt = f'Translate the ecoregion name between the square brackets to dutch: {eco_english}. Only return the dutch text, no brackets or other signs.'
params = {
'model': 'gpt-3.5-turbo',
'messages' : [
{"role": "system", "content": f"You are a translator"},
{"role": "user", "content": prompt}
],
'temperature': 0.3,
'max_tokens' : 400,
'presence_penalty' : 0.1,
'frequency_penalty' : 0.1
}
try:
translation = openai.ChatCompletion.create(**params)
except Exception as e:
print(f"Unexpected error occurred: {e}")
translation = None
raise e
if translation:
dutch_translation = translation.choices[0].message.content
try:
# Write to the database
cur = conn_supabase.cursor()
# Define your SQL query with placeholders
query = """
UPDATE ecoregion_name
SET dutch = %s
WHERE eco_code = %s;
"""
# Execute the query with the provided species_id and dutch_description
cur.execute(query, (dutch_translation, eco_code))
# Commit the changes to the database
conn_supabase.commit()
# Close the cursor
cur.close()
print(f"Wrote dutch translation into the database for eco: {eco_code, eco_english, dutch_translation}")
except Exception as e:
print(f'An error occured writing to the database for eco: {eco_code, eco_english, dutch_translation}', e)
print('all done!')
conn_supabase.close()
if __name__ == "__main__":
main()