From b4f78eac29c258471481cc833466430ef2f85572 Mon Sep 17 00:00:00 2001 From: "v.kaukin" Date: Thu, 17 Sep 2020 13:36:14 +0500 Subject: [PATCH] Attachment content_id content_disposition _part->part, email.utils.parsedate_to_datetime used in utils.parse_email_addresses, BaseMailBox.fetch limit argument now can receive slice object, mailbox.last_search_ids --- README.rst | 21 +++-- docs/dev_notes.txt | 17 ++-- docs/release_notes.rst | 10 +++ docs/todo.txt | 2 - imap_tools/__init__.py | 8 +- imap_tools/mailbox.py | 14 +-- imap_tools/message.py | 24 +++-- imap_tools/utils.py | 11 ++- tests/data.py | 67 ++++++++++++-- tests/messages/attachment_inline.eml | 128 +++++++++++++++++++++++++++ tests/test_message.py | 2 +- 11 files changed, 265 insertions(+), 39 deletions(-) create mode 100644 tests/messages/attachment_inline.eml diff --git a/README.rst b/README.rst index bf6b45a..0c26da2 100644 --- a/README.rst +++ b/README.rst @@ -48,11 +48,11 @@ Basic MailBox, MailBoxUnencrypted - for create mailbox instance. -MailBox.box - imaplib.IMAP4/IMAP4_SSL client instance. +BaseMailBox.box - imaplib.IMAP4/IMAP4_SSL client instance. -MailBox.login, MailBox.xoauth2 - authentication functions +BaseMailBox.login, MailBox.xoauth2 - authentication functions -MailBox.fetch - email message generator, first searches email ids by criteria, then fetch and yields emails by one: +BaseMailBox.fetch - email message generator, first searches email ids by criteria, then fetch and yields emails by one: * *criteria* = 'ALL', message search criteria, `docs <#search-criteria>`_ * *charset* = 'US-ASCII', indicates charset of the strings that appear in the search criteria. See rfc2978 @@ -63,6 +63,8 @@ MailBox.fetch - email message generator, first searches email ids by criteria, t * *reverse* = False, in order from the larger date to the smaller * *headers_only* = False, get only email headers (without text, html, attachments) +BaseMailBox instance has attribute mailbox.last_search_ids, it fills after each fetch - msg ids from search command + Email attributes ^^^^^^^^^^^^^^^^ @@ -86,9 +88,12 @@ Message and Attachment public attributes are cached by functools.lru_cache msg.headers # dict: {'Received': ('from 1.m.ru', 'from 2.m.ru'), 'AntiVirus': ('Clean',)} for att in msg.attachments: # list: [Attachment] - att.filename # str: 'cat.jpg' - att.content_type # str: 'image/jpeg' - att.payload # bytes: b'\xff\xd8\xff\xe0\' + att.filename # str: 'cat.jpg' + att.payload # bytes: b'\xff\xd8\xff\xe0\' + att.content_id # str: 'part45.06020801.00060008@mail.ru' + att.content_type # str: 'image/jpeg' + att.content_disposition # str: 'inline' + att.part # email.message.Message: original object msg.obj # email.message.Message: original object msg.from_values # dict or None: {'email': 'im@ya.ru', 'name': 'Ya 你', 'full': 'Ya 你 '} @@ -291,6 +296,8 @@ Big thanks to people who helped develop this library: `amarkham09 `_, `nixCodeX `_, `backelj `_, -`ohayak `_ +`ohayak `_, +`mwherman95926 `_, +`andyfensham `_ 💰 You may `thank me `_, if this library helped you. diff --git a/docs/dev_notes.txt b/docs/dev_notes.txt index 7450048..3af9e9c 100644 --- a/docs/dev_notes.txt +++ b/docs/dev_notes.txt @@ -13,13 +13,20 @@ False False _payload (*) the payload is returned as-is. -icons -===== -📨 📬 📪 📭 📫 ✉ 📧 🖂 🖃 🖅 📩 - - many uids command ================= error example for 22000 uids in yandex: imap_tools.errors.MailboxCopyError: Response status "OK" expected, but "NO" received. Data: [b'command or literal size is too large'] + + +email lib +========= +Доступен email.message.EmailMessage вместо Message, есть смысл туда заглядывать по логике разбора +self.obj = email.message_from_bytes(raw_message_data, _class=EmailMessage) можно так, пока не вижу смысла +утилиты https://docs.python.org/release/3.8.1/library/email.utils.html + + +icons +===== +📨 📬 📪 📭 📫 ✉ 📧 🖂 🖃 🖅 📩 diff --git a/docs/release_notes.rst b/docs/release_notes.rst index a9ba3f9..4fe6998 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -1,3 +1,13 @@ +0.22.0 +====== +* Added Attachment.content_id +* Added Attachment.content_disposition +* Attachment._part -> Attachment.part +* email.utils.parsedate_to_datetime used in utils.parse_email_addresses +* BaseMailBox.fetch limit argument now can receive slice object +* BaseMailBox instance now has attribute mailbox.last_search_ids, it fills after each fetch - msg ids from search command +* __init__.py refined + 0.21.0 ====== * Added MailBox.xoauth2 - authentication using OAuth 2.0 mechanism diff --git a/docs/todo.txt b/docs/todo.txt index 7ced31e..e69de29 100644 --- a/docs/todo.txt +++ b/docs/todo.txt @@ -1,2 +0,0 @@ - -check https://docs.python.org/release/3.8.1/library/email.utils.html diff --git a/imap_tools/__init__.py b/imap_tools/__init__.py index 9e6fec0..b6c65b1 100644 --- a/imap_tools/__init__.py +++ b/imap_tools/__init__.py @@ -1,7 +1,7 @@ from .query import Q, AND, OR, NOT, Header, A, O, N, H -from .mailbox import * -from .message import * -from .folder import * +from .mailbox import BaseMailBox, MailBox, MailBoxUnencrypted +from .message import MailMessage, Attachment, MailMessageFlags +from .folder import MailBoxFolderManager, MailBoxFolderStatusOptions from .errors import * -__version__ = '0.21.0' +__version__ = '0.22.0' diff --git a/imap_tools/mailbox.py b/imap_tools/mailbox.py index 384c24e..ebe7d80 100644 --- a/imap_tools/mailbox.py +++ b/imap_tools/mailbox.py @@ -21,6 +21,7 @@ class BaseMailBox: def __init__(self): self.folder = None # folder manager self.login_result = None + self.last_search_ids = [] self.box = self._get_mailbox_client() def _get_mailbox_client(self) -> imaplib.IMAP4: @@ -44,13 +45,14 @@ def _criteria_encoder(criteria: str or bytes, charset: str) -> str or bytes: """logic for encoding search criteria by default""" return criteria if type(criteria) is bytes else str(criteria).encode(charset) - def fetch(self, criteria: str or bytes = 'ALL', charset: str = 'US-ASCII', limit: int = None, + def fetch(self, criteria: str or bytes = 'ALL', charset: str = 'US-ASCII', limit: int or slice = None, miss_defect=True, miss_no_uid=True, mark_seen=True, reverse=False, headers_only=False) -> iter: """ Mail message generator in current folder by search criteria :param criteria: message search criteria (see examples at ./doc/imap_search_criteria.txt) :param charset: IANA charset, indicates charset of the strings that appear in the search criteria. See rfc2978 - :param limit: limit number of read emails, useful for actions with a large number of messages, like "move" + :param limit: int | slice - limit number of read emails | slice emails range for read + useful for actions with a large number of messages, like "move" | paging :param miss_defect: miss emails with defects :param miss_no_uid: miss emails without uid :param mark_seen: mark emails as seen on fetch @@ -61,11 +63,11 @@ def fetch(self, criteria: str or bytes = 'ALL', charset: str = 'US-ASCII', limit search_result = self.box.search(charset, self._criteria_encoder(criteria, charset)) check_command_status(search_result, MailboxSearchError) # first element is string with email numbers through the gap - message_id_set = search_result[1][0].decode().split(' ') if search_result[1][0] else () + self.last_search_ids = search_result[1][0].decode().split(' ') if search_result[1][0] else [] message_parts = "(BODY{}[{}] UID FLAGS)".format('' if mark_seen else '.PEEK', 'HEADER' if headers_only else '') - for i, message_id in enumerate((reversed if reverse else iter)(message_id_set)): - if limit and i >= limit: - break + limit_range = slice(0, limit) if type(limit) is int else limit or slice(None) + assert type(limit_range) is slice + for message_id in (reversed if reverse else iter)(self.last_search_ids[limit_range]): # get message by id fetch_result = self.box.fetch(message_id, message_parts) check_command_status(fetch_result, MailboxFetchError) diff --git a/imap_tools/message.py b/imap_tools/message.py index 1a7d1dd..557a9d2 100644 --- a/imap_tools/message.py +++ b/imap_tools/message.py @@ -222,7 +222,7 @@ class Attachment: """An attachment for a MailMessage""" def __init__(self, part): - self._part = part + self.part = part @property @lru_cache() @@ -234,27 +234,37 @@ def filename(self) -> str: forwarded message (Content-Type = message/rfc822) :return: filename """ - filename = self._part.get_filename() or '' - return ''.join(decode_value(*part) for part in decode_header(filename)) + filename = self.part.get_filename() or '' + return ''.join(decode_value(*head_part) for head_part in decode_header(filename)) + + @property + @lru_cache() + def content_id(self) -> str: + return self.part.get('Content-ID', '').lstrip('<').rstrip('>') @property @lru_cache() def content_type(self) -> str: - return self._part.get_content_type() + return self.part.get_content_type() + + @property + @lru_cache() + def content_disposition(self) -> str: + return self.part.get_content_disposition() or '' @property @lru_cache() def payload(self) -> bytes: - payload = self._part.get_payload(decode=True) + payload = self.part.get_payload(decode=True) if payload: return payload # multipart payload, such as .eml (see get_payload) - multipart_payload = self._part.get_payload() + multipart_payload = self.part.get_payload() if isinstance(multipart_payload, list): for payload_item in multipart_payload: if hasattr(payload_item, 'as_bytes'): payload_item_bytes = payload_item.as_bytes() # noqa - cte = str(self._part.get('content-transfer-encoding', '')).lower().strip() + cte = str(self.part.get('content-transfer-encoding', '')).lower().strip() if cte == 'base64': return base64.b64decode(payload_item_bytes) elif cte in ('7bit', '8bit', 'quoted-printable', 'binary', ''): diff --git a/imap_tools/utils.py b/imap_tools/utils.py index c20428c..75fcb1c 100644 --- a/imap_tools/utils.py +++ b/imap_tools/utils.py @@ -1,7 +1,7 @@ import re import inspect import datetime -from email.utils import getaddresses +from email.utils import getaddresses, parsedate_to_datetime from email.header import decode_header, Header SHORT_MONTH_NAMES = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') @@ -85,7 +85,14 @@ def parse_email_addresses(raw_header: str or Header) -> (dict,): def parse_email_date(value: str) -> datetime.datetime: - """Parsing the date described in rfc2822""" + """ + Parsing the date described in rfc2822 + 1900-1-1 for unparsed, may be naive or with tzinfo + """ + try: + return parsedate_to_datetime(value) + except Exception: # noqa + pass match = re.search(r'(?P\d{1,2}\s+(' + '|'.join(SHORT_MONTH_NAMES) + r')\s+\d{4})\s+' + r'(?P