Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎨Unlossy AI translation #479

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ and this project adheres to

## [Unreleased]

## Added

- ✨(backend) annotate number of accesses on documents in list view #411
- ✨(backend) allow users to mark/unmark documents as favorite #411

## Changes

- ♻️(frontend) Improve Ai translations #478
- 🐛(frontend) Fix hidden menu on Firefox #468
- ⚡️(backend) optimize number of queries on document list view #411


## [1.8.2] - 2024-11-28

Expand All @@ -28,8 +39,6 @@ and this project adheres to

## Added

- ✨(backend) annotate number of accesses on documents in list view #411
- ✨(backend) allow users to mark/unmark documents as favorite #411
- 🌐(backend) add German translation #259
- 🌐(frontend) add German translation #255
- ✨(frontend) add a broadcast store #387
Expand All @@ -41,7 +50,6 @@ and this project adheres to

## Changed

- ⚡️(backend) optimize number of queries on document list view #411
- 🚸(backend) improve users similarity search and sort results #391
- ♻️(frontend) simplify stores #402
- ✨(frontend) update $css Box props type to add styled components RuleSet #423
Expand Down
7 changes: 4 additions & 3 deletions src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,11 +426,12 @@ class AITranslateSerializer(serializers.Serializer):
language = serializers.ChoiceField(
choices=tuple(enums.ALL_LANGUAGES.items()), required=True
)
text = serializers.CharField(required=True)
text = serializers.JSONField(required=True)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: wouldn't call this text in this context


def validate_text(self, value):
"""Ensure the text field is not empty."""

if len(value.strip()) == 0:
raise serializers.ValidationError("Text field cannot be empty.")
if not isinstance(value, dict):
raise serializers.ValidationError("Text field must be a json object.")

return value
4 changes: 2 additions & 2 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,9 +680,9 @@ def ai_translate(self, request, *args, **kwargs):
"""
POST /api/v1.0/documents/<resource_id>/ai-translate
with expected data:
- text: str
- text: json
- language: str [settings.LANGUAGES]
Return JSON response with the translated text.
Return the same json but with the value updated, keep the keys accordingly.
"""
# Check permissions first
self.get_object()
Expand Down
10 changes: 7 additions & 3 deletions src/backend/core/services/ai_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,33 @@
"Answer the prompt in markdown format. Return JSON: "
'{"answer": "Your markdown answer"}. '
"Do not provide any other information."
"Preserve the language."
),
"correct": (
"Correct grammar and spelling of the markdown text, "
"preserving language and markdown formatting. "
'Return JSON: {"answer": "your corrected markdown text"}. '
"Do not provide any other information."
"Preserve the language."
),
"rephrase": (
"Rephrase the given markdown text, "
"preserving language and markdown formatting. "
'Return JSON: {"answer": "your rephrased markdown text"}. '
"Do not provide any other information."
"Preserve the language."
),
"summarize": (
"Summarize the markdown text, preserving language and markdown formatting. "
'Return JSON: {"answer": "your markdown summary"}. '
"Do not provide any other information."
"Preserve the language."
),
}

AI_TRANSLATE = (
"Translate the markdown text to {language:s}, preserving markdown formatting. "
'Return JSON: {{"answer": "your translated markdown text in {language:s}"}}. '
"Translate to {language:s} for every value of the json provided."
"Keep the same json but with the value updated, keep the keys accordingly."
"Do not provide any other information."
)

Expand All @@ -62,7 +66,7 @@ def call_ai_api(self, system_content, text):
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": system_content},
{"role": "user", "content": json.dumps({"markdown_input": text})},
{"role": "user", "content": json.dumps({"answer": text})},
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you keep using the answer keyword, although it is not the answer in this context but the input ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also it feels weird than when we already have structured json output in the translate case, we still pass it to the model within the answer json.
We could get rid of the anwser altogether such that we don't have another indentation layer in the json.

],
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,10 @@ def test_api_documents_ai_transform_anonymous_success(mock_create):
"Summarize the markdown text, preserving language and markdown formatting. "
'Return JSON: {"answer": "your markdown summary"}. Do not provide any other '
"information."
"Preserve the language."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": '{"answer": "Hello"}'},
],
)

Expand Down Expand Up @@ -163,9 +164,10 @@ def test_api_documents_ai_transform_authenticated_success(mock_create, reach, ro
"content": (
'Answer the prompt in markdown format. Return JSON: {"answer": '
'"Your markdown answer"}. Do not provide any other information.'
"Preserve the language."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": '{"answer": "Hello"}'},
],
)

Expand Down Expand Up @@ -239,9 +241,10 @@ def test_api_documents_ai_transform_success(mock_create, via, role, mock_user_te
"content": (
'Answer the prompt in markdown format. Return JSON: {"answer": '
'"Your markdown answer"}. Do not provide any other information.'
"Preserve the language."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": '{"answer": "Hello"}'},
],
)

Expand Down
70 changes: 42 additions & 28 deletions src/backend/core/tests/documents/test_api_documents_ai_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,29 +86,31 @@ def test_api_documents_ai_translate_anonymous_success(mock_create):
"""
document = factories.DocumentFactory(link_reach="public", link_role="editor")

answer = '{"answer": "Salut"}'
answer = '{"answer": {"tid-1": "Ola"}}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
)

url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
response = APIClient().post(url, {"text": "Hello", "language": "es"})
response = APIClient().post(
url, {"text": {"tid-1": "Hello"}, "language": "es"}, format="json"
)

assert response.status_code == 200
assert response.json() == {"answer": "Salut"}
assert response.json() == {"answer": {"tid-1": "Ola"}}
mock_create.assert_called_once_with(
model="llama",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
"Translate the markdown text to Spanish, preserving markdown formatting. "
'Return JSON: {"answer": "your translated markdown text in Spanish"}. '
"Translate to Spanish for every value of the json provided."
"Keep the same json but with the value updated, keep the keys accordingly."
"Do not provide any other information."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": '{"answer": {"tid-1": "Hello"}}'},
],
)

Expand Down Expand Up @@ -164,30 +166,31 @@ def test_api_documents_ai_translate_authenticated_success(mock_create, reach, ro

document = factories.DocumentFactory(link_reach=reach, link_role=role)

answer = '{"answer": "Salut"}'
answer = '{"answer": {"tid-1": "Ola"}}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
)

url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
response = client.post(url, {"text": "Hello", "language": "es-co"})
response = client.post(
url, {"text": {"tid-1": "Hello"}, "language": "es-co"}, format="json"
)

assert response.status_code == 200
assert response.json() == {"answer": "Salut"}
assert response.json() == {"answer": {"tid-1": "Ola"}}
mock_create.assert_called_once_with(
model="llama",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
"Translate the markdown text to Colombian Spanish, "
"preserving markdown formatting. Return JSON: "
'{"answer": "your translated markdown text in Colombian Spanish"}. '
"Translate to Colombian Spanish for every value of the json provided."
"Keep the same json but with the value updated, keep the keys accordingly."
"Do not provide any other information."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": '{"answer": {"tid-1": "Hello"}}'},
],
)

Expand Down Expand Up @@ -242,36 +245,37 @@ def test_api_documents_ai_translate_success(mock_create, via, role, mock_user_te
document=document, team="lasuite", role=role
)

answer = '{"answer": "Salut"}'
answer = '{"answer": {"tid-1": "Ola"}}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
)

url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
response = client.post(url, {"text": "Hello", "language": "es-co"})
response = client.post(
url, {"text": {"tid-1": "Hello"}, "language": "es-co"}, format="json"
)

assert response.status_code == 200
assert response.json() == {"answer": "Salut"}
assert response.json() == {"answer": {"tid-1": "Ola"}}
mock_create.assert_called_once_with(
model="llama",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
"Translate the markdown text to Colombian Spanish, "
"preserving markdown formatting. Return JSON: "
'{"answer": "your translated markdown text in Colombian Spanish"}. '
"Translate to Colombian Spanish for every value of the json provided."
"Keep the same json but with the value updated, keep the keys accordingly."
"Do not provide any other information."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": '{"answer": {"tid-1": "Hello"}}'},
],
)


def test_api_documents_ai_translate_empty_text():
"""The text should not be empty when requesting AI translate."""
def test_api_documents_ai_translate_should_be_json():
"""The text should be a json when requesting AI translate."""
user = factories.UserFactory()

client = APIClient()
Expand All @@ -283,7 +287,7 @@ def test_api_documents_ai_translate_empty_text():
response = client.post(url, {"text": " ", "language": "es"})

assert response.status_code == 400
assert response.json() == {"text": ["This field may not be blank."]}
assert response.json() == {"text": ["Text field must be a json object."]}


def test_api_documents_ai_translate_invalid_action():
Expand All @@ -296,7 +300,9 @@ def test_api_documents_ai_translate_invalid_action():
document = factories.DocumentFactory(link_reach="public", link_role="editor")

url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
response = client.post(url, {"text": "Hello", "language": "invalid"})
response = client.post(
url, {"text": {"tid-1": "Hello"}, "language": "invalid"}, format="json"
)

assert response.status_code == 400
assert response.json() == {"language": ['"invalid" is not a valid choice.']}
Expand All @@ -322,13 +328,17 @@ def test_api_documents_ai_translate_throttling_document(mock_create):
for _ in range(3):
user = factories.UserFactory()
client.force_login(user)
response = client.post(url, {"text": "Hello", "language": "es"})
response = client.post(
url, {"text": {"tid-1": "Hello"}, "language": "es"}, format="json"
)
assert response.status_code == 200
assert response.json() == {"answer": "Salut"}

user = factories.UserFactory()
client.force_login(user)
response = client.post(url, {"text": "Hello", "language": "es"})
response = client.post(
url, {"text": {"tid-1": "Hello"}, "language": "es"}, format="json"
)

assert response.status_code == 429
assert response.json() == {
Expand Down Expand Up @@ -356,13 +366,17 @@ def test_api_documents_ai_translate_throttling_user(mock_create):
for _ in range(3):
document = factories.DocumentFactory(link_reach="public", link_role="editor")
url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
response = client.post(url, {"text": "Hello", "language": "es"})
response = client.post(
url, {"text": {"tid-1": "Hello"}, "language": "es"}, format="json"
)
assert response.status_code == 200
assert response.json() == {"answer": "Salut"}

document = factories.DocumentFactory(link_reach="public", link_role="editor")
url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
response = client.post(url, {"text": "Hello", "language": "es"})
response = client.post(
url, {"text": {"tid-1": "Hello"}, "language": "es"}, format="json"
)

assert response.status_code == 429
assert response.json() == {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ test.describe('Doc Editor', () => {
if (request.method().includes('POST')) {
await route.fulfill({
json: {
answer: 'Bonjour le monde',
answer: { 'tid-0': 'Bonjour le monde' },
},
});
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ import { APIError, errorCauses, fetchAPI } from '@/api';

export type DocAITranslate = {
docId: string;
text: string;
text: Record<string, string>;
language: string;
};

export type DocAITranslateResponse = {
answer: string;
answer: Record<string, string>;
};

export const docAITranslate = async ({
Expand Down
Loading
Loading