Attachment content_id content_disposition _part->part,

email.utils.parsedate_to_datetime used in utils.parse_email_addresses, BaseMailBox.fetch limit argument now can receive slice object, mailbox.last_search_ids
ikvk · Sep 17, 2020 · b4f78ea · b4f78ea
1 parent 5867f9b
commit b4f78ea
Show file tree

Hide file tree

Showing 11 changed files with 265 additions and 39 deletions.
diff --git a/README.rst b/README.rst
@@ -48,11 +48,11 @@ Basic
 
 MailBox, MailBoxUnencrypted - for create mailbox instance.
 
-MailBox.box - imaplib.IMAP4/IMAP4_SSL client instance.
+BaseMailBox.box - imaplib.IMAP4/IMAP4_SSL client instance.
 
-MailBox.login, MailBox.xoauth2 - authentication functions
+BaseMailBox.login, MailBox.xoauth2 - authentication functions
 
-MailBox.fetch - email message generator, first searches email ids by criteria, then fetch and yields emails by one:
+BaseMailBox.fetch - email message generator, first searches email ids by criteria, then fetch and yields emails by one:
 
 * *criteria* = 'ALL', message search criteria, `docs <#search-criteria>`_
 * *charset* = 'US-ASCII', indicates charset of the strings that appear in the search criteria. See rfc2978
@@ -63,6 +63,8 @@ MailBox.fetch - email message generator, first searches email ids by criteria, t
 * *reverse* = False, in order from the larger date to the smaller
 * *headers_only* = False, get only email headers (without text, html, attachments)
 
+BaseMailBox instance has attribute mailbox.last_search_ids, it fills after each fetch - msg ids from search command
+
 Email attributes
 ^^^^^^^^^^^^^^^^
 
@@ -86,9 +88,12 @@ Message and Attachment public attributes are cached by functools.lru_cache
         msg.headers          # dict: {'Received': ('from 1.m.ru', 'from 2.m.ru'), 'AntiVirus': ('Clean',)}
 
         for att in msg.attachments:  # list: [Attachment]
-            att.filename         # str: 'cat.jpg'
-            att.content_type     # str: 'image/jpeg'
-            att.payload          # bytes: b'\xff\xd8\xff\xe0\'
+            att.filename             # str: 'cat.jpg'
+            att.payload              # bytes: b'\xff\xd8\xff\xe0\'
+            att.content_id           # str: '[email protected]'
+            att.content_type         # str: 'image/jpeg'
+            att.content_disposition  # str: 'inline'
+            att.part                 # email.message.Message: original object
 
         msg.obj              # email.message.Message: original object
         msg.from_values      # dict or None: {'email': '[email protected]', 'name': 'Ya 你', 'full': 'Ya 你 <[email protected]>'}
@@ -291,6 +296,8 @@ Big thanks to people who helped develop this library:
 `amarkham09 <https://github.com/amarkham09>`_,
 `nixCodeX <https://github.com/nixCodeX>`_,
 `backelj <https://github.com/backelj>`_,
-`ohayak <https://github.com/ohayak>`_
+`ohayak <https://github.com/ohayak>`_,
+`mwherman95926 <https://github.com/mwherman95926>`_,
+`andyfensham <https://github.com/andyfensham>`_
 
 💰 You may `thank me <https://github.com/ikvk/imap_tools/blob/master/docs/donate.rst>`_, if this library helped you.
diff --git a/docs/dev_notes.txt b/docs/dev_notes.txt
@@ -13,13 +13,20 @@ False   False         _payload (*)
     the payload is returned as-is.
 
 
-icons
-=====
-📨 📬 📪 📭 📫 ✉ 📧 🖂 🖃 🖅 📩
-
-
 many uids command
 =================
 error example for 22000 uids in yandex:
     imap_tools.errors.MailboxCopyError: Response status "OK" expected, but "NO" received.
     Data: [b'command or literal size is too large']
+
+
+email lib
+=========
+Доступен email.message.EmailMessage вместо Message, есть смысл туда заглядывать по логике разбора
+self.obj = email.message_from_bytes(raw_message_data, _class=EmailMessage) можно так, пока не вижу смысла
+утилиты https://docs.python.org/release/3.8.1/library/email.utils.html
+
+
+icons
+=====
+📨 📬 📪 📭 📫 ✉ 📧 🖂 🖃 🖅 📩
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
@@ -1,3 +1,13 @@
+0.22.0
+======
+* Added Attachment.content_id
+* Added Attachment.content_disposition
+* Attachment._part -> Attachment.part
+* email.utils.parsedate_to_datetime used in utils.parse_email_addresses
+* BaseMailBox.fetch limit argument now can receive slice object
+* BaseMailBox instance now has attribute mailbox.last_search_ids, it fills after each fetch - msg ids from search command
+* __init__.py refined
+
 0.21.0
 ======
 * Added MailBox.xoauth2 - authentication using OAuth 2.0 mechanism

diff --git a/docs/todo.txt b/docs/todo.txt
@@ -1,2 +0,0 @@
-
-check https://docs.python.org/release/3.8.1/library/email.utils.html

diff --git a/imap_tools/__init__.py b/imap_tools/__init__.py
@@ -1,7 +1,7 @@
 from .query import Q, AND, OR, NOT, Header, A, O, N, H
-from .mailbox import *
-from .message import *
-from .folder import *
+from .mailbox import BaseMailBox, MailBox, MailBoxUnencrypted
+from .message import MailMessage, Attachment, MailMessageFlags
+from .folder import MailBoxFolderManager, MailBoxFolderStatusOptions
 from .errors import *
 
-__version__ = '0.21.0'
+__version__ = '0.22.0'
diff --git a/imap_tools/mailbox.py b/imap_tools/mailbox.py
@@ -21,6 +21,7 @@ class BaseMailBox:
     def __init__(self):
         self.folder = None  # folder manager
         self.login_result = None
+        self.last_search_ids = []
         self.box = self._get_mailbox_client()
 
     def _get_mailbox_client(self) -> imaplib.IMAP4:
@@ -44,13 +45,14 @@ def _criteria_encoder(criteria: str or bytes, charset: str) -> str or bytes:
         """logic for encoding search criteria by default"""
         return criteria if type(criteria) is bytes else str(criteria).encode(charset)
 
-    def fetch(self, criteria: str or bytes = 'ALL', charset: str = 'US-ASCII', limit: int = None,
+    def fetch(self, criteria: str or bytes = 'ALL', charset: str = 'US-ASCII', limit: int or slice = None,
               miss_defect=True, miss_no_uid=True, mark_seen=True, reverse=False, headers_only=False) -> iter:
         """
         Mail message generator in current folder by search criteria
         :param criteria: message search criteria (see examples at ./doc/imap_search_criteria.txt)
         :param charset: IANA charset, indicates charset of the strings that appear in the search criteria. See rfc2978
-        :param limit: limit number of read emails, useful for actions with a large number of messages, like "move"
+        :param limit: int | slice - limit number of read emails | slice emails range for read
+                      useful for actions with a large number of messages, like "move" | paging
         :param miss_defect: miss emails with defects
         :param miss_no_uid: miss emails without uid
         :param mark_seen: mark emails as seen on fetch
@@ -61,11 +63,11 @@ def fetch(self, criteria: str or bytes = 'ALL', charset: str = 'US-ASCII', limit
         search_result = self.box.search(charset, self._criteria_encoder(criteria, charset))
         check_command_status(search_result, MailboxSearchError)
         # first element is string with email numbers through the gap
-        message_id_set = search_result[1][0].decode().split(' ') if search_result[1][0] else ()
+        self.last_search_ids = search_result[1][0].decode().split(' ') if search_result[1][0] else []
         message_parts = "(BODY{}[{}] UID FLAGS)".format('' if mark_seen else '.PEEK', 'HEADER' if headers_only else '')
-        for i, message_id in enumerate((reversed if reverse else iter)(message_id_set)):
-            if limit and i >= limit:
-                break
+        limit_range = slice(0, limit) if type(limit) is int else limit or slice(None)
+        assert type(limit_range) is slice
+        for message_id in (reversed if reverse else iter)(self.last_search_ids[limit_range]):
             # get message by id
             fetch_result = self.box.fetch(message_id, message_parts)
             check_command_status(fetch_result, MailboxFetchError)

diff --git a/imap_tools/message.py b/imap_tools/message.py
@@ -222,7 +222,7 @@ class Attachment:
     """An attachment for a MailMessage"""
 
     def __init__(self, part):
-        self._part = part
+        self.part = part
 
     @property
     @lru_cache()
@@ -234,27 +234,37 @@ def filename(self) -> str:
             forwarded message (Content-Type = message/rfc822)
         :return: filename
         """
-        filename = self._part.get_filename() or ''
-        return ''.join(decode_value(*part) for part in decode_header(filename))
+        filename = self.part.get_filename() or ''
+        return ''.join(decode_value(*head_part) for head_part in decode_header(filename))
+
+    @property
+    @lru_cache()
+    def content_id(self) -> str:
+        return self.part.get('Content-ID', '').lstrip('<').rstrip('>')
 
     @property
     @lru_cache()
     def content_type(self) -> str:
-        return self._part.get_content_type()
+        return self.part.get_content_type()
+
+    @property
+    @lru_cache()
+    def content_disposition(self) -> str:
+        return self.part.get_content_disposition() or ''
 
     @property
     @lru_cache()
     def payload(self) -> bytes:
-        payload = self._part.get_payload(decode=True)
+        payload = self.part.get_payload(decode=True)
         if payload:
             return payload
         # multipart payload, such as .eml (see get_payload)
-        multipart_payload = self._part.get_payload()
+        multipart_payload = self.part.get_payload()
         if isinstance(multipart_payload, list):
             for payload_item in multipart_payload:
                 if hasattr(payload_item, 'as_bytes'):
                     payload_item_bytes = payload_item.as_bytes()  # noqa
-                    cte = str(self._part.get('content-transfer-encoding', '')).lower().strip()
+                    cte = str(self.part.get('content-transfer-encoding', '')).lower().strip()
                     if cte == 'base64':
                         return base64.b64decode(payload_item_bytes)
                     elif cte in ('7bit', '8bit', 'quoted-printable', 'binary', ''):

diff --git a/imap_tools/utils.py b/imap_tools/utils.py
@@ -1,7 +1,7 @@
 import re
 import inspect
 import datetime
-from email.utils import getaddresses
+from email.utils import getaddresses, parsedate_to_datetime
 from email.header import decode_header, Header
 
 SHORT_MONTH_NAMES = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec')
@@ -85,7 +85,14 @@ def parse_email_addresses(raw_header: str or Header) -> (dict,):
 
 
 def parse_email_date(value: str) -> datetime.datetime:
-    """Parsing the date described in rfc2822"""
+    """
+    Parsing the date described in rfc2822
+    1900-1-1 for unparsed, may be naive or with tzinfo
+    """
+    try:
+        return parsedate_to_datetime(value)
+    except Exception:  # noqa
+        pass
     match = re.search(r'(?P<date>\d{1,2}\s+(' + '|'.join(SHORT_MONTH_NAMES) + r')\s+\d{4})\s+' +
                       r'(?P<time>\d{1,2}:\d{1,2}(:\d{1,2})?)\s*' +
                       r'(?P<zone_sign>[+-])?(?P<zone>\d{4})?', value)
Original file line number	Diff line number	Diff line change
		@@ -1,2 +0,0 @@

		check https://docs.python.org/release/3.8.1/library/email.utils.html