Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨Export pdf / docx front side #537

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/impress.yml
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,11 @@ jobs:
- name: Install development dependencies
run: pip install --user .[dev]

- name: Install gettext (required to compile messages)
- name: Install gettext (required to compile messages) and MIME support
run: |
sudo apt-get update
sudo apt-get install -y gettext pandoc
sudo apt-get install -y gettext pandoc shared-mime-info
sudo wget https://svn.apache.org/repos/asf/httpd/httpd/trunk/docs/conf/mime.types -O /etc/mime.types

- name: Generate a MO file from strings extracted from the project
run: python manage.py compilemessages
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ and this project adheres to
- 💄(frontend) add filtering to left panel #475
- ✨(frontend) new share modal ui #489
- ✨(frontend) add favorite feature #515
- 🏷️(backend) add content-type to uploaded files #552
- ✨(frontend) export pdf docx front side #537
- 📝(documentation) Documentation about self-hosted installation #530
- ✨(helm) helm versioning #530

## Changed

Expand Down
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,11 @@ RUN apk add \
gettext \
gdk-pixbuf \
libffi-dev \
pandoc \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look at the initial commit, I think more dependencies can be removed that are linked to exports.

pango \
shared-mime-info

RUN wget https://svn.apache.org/repos/asf/httpd/httpd/trunk/docs/conf/mime.types -O /etc/mime.types

# Copy entrypoint
COPY ./docker/files/usr/local/bin/entrypoint /usr/local/bin/entrypoint

Expand Down
48 changes: 13 additions & 35 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# pylint: disable=too-many-lines

import logging
import mimetypes
import re
import uuid
from urllib.parse import urlparse
Expand Down Expand Up @@ -604,8 +605,19 @@ def attachment_upload(self, request, *args, **kwargs):
extension = serializer.validated_data["expected_extension"]
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}.{extension:s}"

# Determine the content type of the file
file = serializer.validated_data["file"]
content_type, _ = mimetypes.guess_type(file.name)

# Fallback if MIME type cannot be determined
if not content_type:
content_type = "application/octet-stream"

# Prepare metadata for storage
extra_args = {"Metadata": {"owner": str(request.user.id)}}
extra_args = {
"Metadata": {"owner": str(request.user.id)},
"ContentType": content_type,
}
if serializer.validated_data["is_unsafe"]:
extra_args["Metadata"]["is_unsafe"] = "true"

Expand Down Expand Up @@ -936,40 +948,6 @@ def perform_create(self, serializer):
role=models.RoleChoices.OWNER,
)

@drf.decorators.action(
detail=True,
methods=["post"],
url_path="generate-document",
permission_classes=[permissions.AccessPermission],
)
# pylint: disable=unused-argument
def generate_document(self, request, pk=None):
"""
Generate and return a document for this template around the
body passed as argument.

2 types of body are accepted:
- HTML: body_type = "html"
- Markdown: body_type = "markdown"

2 types of documents can be generated:
- PDF: format = "pdf"
- Docx: format = "docx"
"""
serializer = serializers.DocumentGenerationSerializer(data=request.data)

if not serializer.is_valid():
return drf.response.Response(
serializer.errors, status=drf.status.HTTP_400_BAD_REQUEST
)

body = serializer.validated_data["body"]
body_type = serializer.validated_data["body_type"]
export_format = serializer.validated_data["format"]

template = self.get_object()
return template.generate_document(body, body_type, export_format)


class TemplateAccessViewSet(
ResourceAccessViewsetMixin,
Expand Down
113 changes: 1 addition & 112 deletions src/backend/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@

import hashlib
import smtplib
import tempfile
import textwrap
import uuid
from datetime import timedelta
from io import BytesIO
from logging import getLogger

from django.conf import settings
Expand All @@ -20,19 +17,12 @@
from django.core.files.storage import default_storage
from django.core.mail import send_mail
from django.db import models
from django.http import FileResponse
from django.template.base import Template as DjangoTemplate
from django.template.context import Context
from django.template.loader import render_to_string
from django.utils import html, timezone
from django.utils import timezone
from django.utils.functional import cached_property, lazy
from django.utils.translation import get_language, override
from django.utils.translation import gettext_lazy as _

import frontmatter
import markdown
import pypandoc
import weasyprint
from botocore.exceptions import ClientError
from timezone_field import TimeZoneField

Expand Down Expand Up @@ -754,107 +744,6 @@ def get_abilities(self, user):
"retrieve": can_get,
}

def generate_pdf(self, body_html, metadata):
"""
Generate and return a pdf document wrapped around the current template
"""
document_html = weasyprint.HTML(
string=DjangoTemplate(self.code).render(
Context({"body": html.format_html(body_html), **metadata})
)
)
css = weasyprint.CSS(
string=self.css,
font_config=weasyprint.text.fonts.FontConfiguration(),
)

pdf_content = document_html.write_pdf(stylesheets=[css], zoom=1)
response = FileResponse(BytesIO(pdf_content), content_type="application/pdf")
response["Content-Disposition"] = f"attachment; filename={self.title}.pdf"

return response

def generate_word(self, body_html, metadata):
"""
Generate and return a docx document wrapped around the current template
"""
template_string = DjangoTemplate(self.code).render(
Context({"body": html.format_html(body_html), **metadata})
)

html_string = f"""
<!DOCTYPE html>
<html>
<head>
<style>
{self.css}
</style>
</head>
<body>
{template_string}
</body>
</html>
"""

reference_docx = "core/static/reference.docx"
output = BytesIO()

# Convert the HTML to a temporary docx file
with tempfile.NamedTemporaryFile(suffix=".docx", prefix="docx_") as tmp_file:
output_path = tmp_file.name

pypandoc.convert_text(
html_string,
"docx",
format="html",
outputfile=output_path,
extra_args=["--reference-doc", reference_docx],
)

# Create a BytesIO object to store the output of the temporary docx file
with open(output_path, "rb") as f:
output = BytesIO(f.read())

# Ensure the pointer is at the beginning
output.seek(0)

response = FileResponse(
output,
content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
)
response["Content-Disposition"] = f"attachment; filename={self.title}.docx"

return response

def generate_document(self, body, body_type, export_format):
"""
Generate and return a document for this template around the
body passed as argument.

2 types of body are accepted:
- HTML: body_type = "html"
- Markdown: body_type = "markdown"

2 types of documents can be generated:
- PDF: export_format = "pdf"
- Docx: export_format = "docx"
"""
document = frontmatter.loads(body)
metadata = document.metadata
strip_body = document.content.strip()

if body_type == "html":
body_html = strip_body
else:
body_html = (
markdown.markdown(textwrap.dedent(strip_body)) if strip_body else ""
)

if export_format == "pdf":
return self.generate_pdf(body_html, metadata)

return self.generate_word(body_html, metadata)


class TemplateAccess(BaseAccess):
"""Relation model to give access to a template for a user or a team with a role."""
Expand Down
Binary file removed src/backend/core/static/reference.docx
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,22 @@ def test_api_documents_attachment_upload_anonymous_success():
assert response.status_code == 201

pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(response.json()["file"])
file_path = response.json()["file"]
match = pattern.search(file_path)
file_id = match.group(1)

# Validate that file_id is a valid UUID
uuid.UUID(file_id)

# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)

assert file_head["Metadata"] == {"owner": "None"}
assert file_head["ContentType"] == "image/png"


@pytest.mark.parametrize(
"reach, role",
Expand Down Expand Up @@ -206,6 +216,7 @@ def test_api_documents_attachment_upload_success(via, role, mock_user_teams):
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id)}
assert file_head["ContentType"] == "image/png"


def test_api_documents_attachment_upload_invalid(client):
Expand Down Expand Up @@ -247,16 +258,18 @@ def test_api_documents_attachment_upload_size_limit_exceeded(settings):


@pytest.mark.parametrize(
"name,content,extension",
"name,content,extension,content_type",
[
("test.exe", b"text", "exe"),
("test", b"text", "txt"),
("test.aaaaaa", b"test", "txt"),
("test.txt", PIXEL, "txt"),
("test.py", b"#!/usr/bin/python", "py"),
("test.exe", b"text", "exe", "application/x-msdownload"),
("test", b"text", "txt", "application/octet-stream"),
("test.aaaaaa", b"test", "txt", "application/octet-stream"),
("test.txt", PIXEL, "txt", "text/plain"),
("test.py", b"#!/usr/bin/python", "py", "text/x-python"),
],
)
def test_api_documents_attachment_upload_fix_extension(name, content, extension):
def test_api_documents_attachment_upload_fix_extension(
name, content, extension, content_type
):
"""
A file with no extension or a wrong extension is accepted and the extension
is corrected in storage.
Expand Down Expand Up @@ -287,6 +300,7 @@ def test_api_documents_attachment_upload_fix_extension(name, content, extension)
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
assert file_head["ContentType"] == content_type


def test_api_documents_attachment_upload_empty_file():
Expand Down Expand Up @@ -335,3 +349,4 @@ def test_api_documents_attachment_upload_unsafe():
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
assert file_head["ContentType"] == "application/x-msdownload"
Loading
Loading