Skip to content

Commit

Permalink
Attachment content_id content_disposition _part->part,
Browse files Browse the repository at this point in the history
email.utils.parsedate_to_datetime used in utils.parse_email_addresses,
BaseMailBox.fetch limit argument now can receive slice object,
mailbox.last_search_ids
  • Loading branch information
ikvk committed Sep 17, 2020
1 parent 5867f9b commit b4f78ea
Show file tree
Hide file tree
Showing 11 changed files with 265 additions and 39 deletions.
21 changes: 14 additions & 7 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ Basic
MailBox, MailBoxUnencrypted - for create mailbox instance.

MailBox.box - imaplib.IMAP4/IMAP4_SSL client instance.
BaseMailBox.box - imaplib.IMAP4/IMAP4_SSL client instance.

MailBox.login, MailBox.xoauth2 - authentication functions
BaseMailBox.login, MailBox.xoauth2 - authentication functions

MailBox.fetch - email message generator, first searches email ids by criteria, then fetch and yields emails by one:
BaseMailBox.fetch - email message generator, first searches email ids by criteria, then fetch and yields emails by one:

* *criteria* = 'ALL', message search criteria, `docs <#search-criteria>`_
* *charset* = 'US-ASCII', indicates charset of the strings that appear in the search criteria. See rfc2978
Expand All @@ -63,6 +63,8 @@ MailBox.fetch - email message generator, first searches email ids by criteria, t
* *reverse* = False, in order from the larger date to the smaller
* *headers_only* = False, get only email headers (without text, html, attachments)

BaseMailBox instance has attribute mailbox.last_search_ids, it fills after each fetch - msg ids from search command

Email attributes
^^^^^^^^^^^^^^^^

Expand All @@ -86,9 +88,12 @@ Message and Attachment public attributes are cached by functools.lru_cache
msg.headers # dict: {'Received': ('from 1.m.ru', 'from 2.m.ru'), 'AntiVirus': ('Clean',)}
for att in msg.attachments: # list: [Attachment]
att.filename # str: 'cat.jpg'
att.content_type # str: 'image/jpeg'
att.payload # bytes: b'\xff\xd8\xff\xe0\'
att.filename # str: 'cat.jpg'
att.payload # bytes: b'\xff\xd8\xff\xe0\'
att.content_id # str: '[email protected]'
att.content_type # str: 'image/jpeg'
att.content_disposition # str: 'inline'
att.part # email.message.Message: original object
msg.obj # email.message.Message: original object
msg.from_values # dict or None: {'email': '[email protected]', 'name': 'Ya 你', 'full': 'Ya 你 <[email protected]>'}
Expand Down Expand Up @@ -291,6 +296,8 @@ Big thanks to people who helped develop this library:
`amarkham09 <https://github.com/amarkham09>`_,
`nixCodeX <https://github.com/nixCodeX>`_,
`backelj <https://github.com/backelj>`_,
`ohayak <https://github.com/ohayak>`_
`ohayak <https://github.com/ohayak>`_,
`mwherman95926 <https://github.com/mwherman95926>`_,
`andyfensham <https://github.com/andyfensham>`_

💰 You may `thank me <https://github.com/ikvk/imap_tools/blob/master/docs/donate.rst>`_, if this library helped you.
17 changes: 12 additions & 5 deletions docs/dev_notes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,20 @@ False False _payload (*)
the payload is returned as-is.


icons
=====
📨 📬 📪 📭 📫 ✉ 📧 🖂 🖃 🖅 📩


many uids command
=================
error example for 22000 uids in yandex:
imap_tools.errors.MailboxCopyError: Response status "OK" expected, but "NO" received.
Data: [b'command or literal size is too large']


email lib
=========
Доступен email.message.EmailMessage вместо Message, есть смысл туда заглядывать по логике разбора
self.obj = email.message_from_bytes(raw_message_data, _class=EmailMessage) можно так, пока не вижу смысла
утилиты https://docs.python.org/release/3.8.1/library/email.utils.html


icons
=====
📨 📬 📪 📭 📫 ✉ 📧 🖂 🖃 🖅 📩
10 changes: 10 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
0.22.0
======
* Added Attachment.content_id
* Added Attachment.content_disposition
* Attachment._part -> Attachment.part
* email.utils.parsedate_to_datetime used in utils.parse_email_addresses
* BaseMailBox.fetch limit argument now can receive slice object
* BaseMailBox instance now has attribute mailbox.last_search_ids, it fills after each fetch - msg ids from search command
* __init__.py refined

0.21.0
======
* Added MailBox.xoauth2 - authentication using OAuth 2.0 mechanism
Expand Down
2 changes: 0 additions & 2 deletions docs/todo.txt
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@

check https://docs.python.org/release/3.8.1/library/email.utils.html
8 changes: 4 additions & 4 deletions imap_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .query import Q, AND, OR, NOT, Header, A, O, N, H
from .mailbox import *
from .message import *
from .folder import *
from .mailbox import BaseMailBox, MailBox, MailBoxUnencrypted
from .message import MailMessage, Attachment, MailMessageFlags
from .folder import MailBoxFolderManager, MailBoxFolderStatusOptions
from .errors import *

__version__ = '0.21.0'
__version__ = '0.22.0'
14 changes: 8 additions & 6 deletions imap_tools/mailbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class BaseMailBox:
def __init__(self):
self.folder = None # folder manager
self.login_result = None
self.last_search_ids = []
self.box = self._get_mailbox_client()

def _get_mailbox_client(self) -> imaplib.IMAP4:
Expand All @@ -44,13 +45,14 @@ def _criteria_encoder(criteria: str or bytes, charset: str) -> str or bytes:
"""logic for encoding search criteria by default"""
return criteria if type(criteria) is bytes else str(criteria).encode(charset)

def fetch(self, criteria: str or bytes = 'ALL', charset: str = 'US-ASCII', limit: int = None,
def fetch(self, criteria: str or bytes = 'ALL', charset: str = 'US-ASCII', limit: int or slice = None,
miss_defect=True, miss_no_uid=True, mark_seen=True, reverse=False, headers_only=False) -> iter:
"""
Mail message generator in current folder by search criteria
:param criteria: message search criteria (see examples at ./doc/imap_search_criteria.txt)
:param charset: IANA charset, indicates charset of the strings that appear in the search criteria. See rfc2978
:param limit: limit number of read emails, useful for actions with a large number of messages, like "move"
:param limit: int | slice - limit number of read emails | slice emails range for read
useful for actions with a large number of messages, like "move" | paging
:param miss_defect: miss emails with defects
:param miss_no_uid: miss emails without uid
:param mark_seen: mark emails as seen on fetch
Expand All @@ -61,11 +63,11 @@ def fetch(self, criteria: str or bytes = 'ALL', charset: str = 'US-ASCII', limit
search_result = self.box.search(charset, self._criteria_encoder(criteria, charset))
check_command_status(search_result, MailboxSearchError)
# first element is string with email numbers through the gap
message_id_set = search_result[1][0].decode().split(' ') if search_result[1][0] else ()
self.last_search_ids = search_result[1][0].decode().split(' ') if search_result[1][0] else []
message_parts = "(BODY{}[{}] UID FLAGS)".format('' if mark_seen else '.PEEK', 'HEADER' if headers_only else '')
for i, message_id in enumerate((reversed if reverse else iter)(message_id_set)):
if limit and i >= limit:
break
limit_range = slice(0, limit) if type(limit) is int else limit or slice(None)
assert type(limit_range) is slice
for message_id in (reversed if reverse else iter)(self.last_search_ids[limit_range]):
# get message by id
fetch_result = self.box.fetch(message_id, message_parts)
check_command_status(fetch_result, MailboxFetchError)
Expand Down
24 changes: 17 additions & 7 deletions imap_tools/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ class Attachment:
"""An attachment for a MailMessage"""

def __init__(self, part):
self._part = part
self.part = part

@property
@lru_cache()
Expand All @@ -234,27 +234,37 @@ def filename(self) -> str:
forwarded message (Content-Type = message/rfc822)
:return: filename
"""
filename = self._part.get_filename() or ''
return ''.join(decode_value(*part) for part in decode_header(filename))
filename = self.part.get_filename() or ''
return ''.join(decode_value(*head_part) for head_part in decode_header(filename))

@property
@lru_cache()
def content_id(self) -> str:
return self.part.get('Content-ID', '').lstrip('<').rstrip('>')

@property
@lru_cache()
def content_type(self) -> str:
return self._part.get_content_type()
return self.part.get_content_type()

@property
@lru_cache()
def content_disposition(self) -> str:
return self.part.get_content_disposition() or ''

@property
@lru_cache()
def payload(self) -> bytes:
payload = self._part.get_payload(decode=True)
payload = self.part.get_payload(decode=True)
if payload:
return payload
# multipart payload, such as .eml (see get_payload)
multipart_payload = self._part.get_payload()
multipart_payload = self.part.get_payload()
if isinstance(multipart_payload, list):
for payload_item in multipart_payload:
if hasattr(payload_item, 'as_bytes'):
payload_item_bytes = payload_item.as_bytes() # noqa
cte = str(self._part.get('content-transfer-encoding', '')).lower().strip()
cte = str(self.part.get('content-transfer-encoding', '')).lower().strip()
if cte == 'base64':
return base64.b64decode(payload_item_bytes)
elif cte in ('7bit', '8bit', 'quoted-printable', 'binary', ''):
Expand Down
11 changes: 9 additions & 2 deletions imap_tools/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
import inspect
import datetime
from email.utils import getaddresses
from email.utils import getaddresses, parsedate_to_datetime
from email.header import decode_header, Header

SHORT_MONTH_NAMES = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec')
Expand Down Expand Up @@ -85,7 +85,14 @@ def parse_email_addresses(raw_header: str or Header) -> (dict,):


def parse_email_date(value: str) -> datetime.datetime:
"""Parsing the date described in rfc2822"""
"""
Parsing the date described in rfc2822
1900-1-1 for unparsed, may be naive or with tzinfo
"""
try:
return parsedate_to_datetime(value)
except Exception: # noqa
pass
match = re.search(r'(?P<date>\d{1,2}\s+(' + '|'.join(SHORT_MONTH_NAMES) + r')\s+\d{4})\s+' +
r'(?P<time>\d{1,2}:\d{1,2}(:\d{1,2})?)\s*' +
r'(?P<zone_sign>[+-])?(?P<zone>\d{4})?', value)
Expand Down
Loading

0 comments on commit b4f78ea

Please sign in to comment.