From 1431e03e6754c804c403ed7b9300a317a7412aaa Mon Sep 17 00:00:00 2001 From: Doran Smestad Date: Wed, 2 Sep 2020 19:08:26 -0400 Subject: [PATCH 1/3] Add STARTTLS modes, source-mailbox flag, IMAP IDLE for destination, progress messages, and error handling --- README.md | 5 +- pymap-copy.py | 246 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 205 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 4938d1c..1f55953 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ SSL/TLS support i wrote my own python-based version. I hope you like it! ## Features - Copies folders and subfolders - Copies mails even with flags (seen, answered, ...) -- Connecting via SSL/TLS (by default) +- Connecting via SSL/TLS (by default), or optionally use STARTTLS - Supports incremental copy (copies only new mails/folders) - User specific redirections (with wildcard support) - Auto subscribe new folders (by default) @@ -85,6 +85,9 @@ You could change the buffer size with `-b`/`--buffer-size` to increase the downl If you know the source mailbox has a lot of small mails use a higher size. In the case of lager mails use a lower size to counter timeouts. For bad internet connections you also should use a lower sized buffer. +#### Use of source-mailbox argument +As a further optimization you can target specific mailboxes you want to sync to the destination (versus the default of everything). Use `--source-mailbox ` to only sync that one mailbox. The flag can be specified multiple times to indicate multiple mailboxes to sync. The flag is NOT recursive and will only sync the contents of the folder. + ## Microsoft Exchange Server IMAP bug If your destination is an Exchange Server (EX) you'll properly get an `bad command` exception while coping some mails. This happens because the EX analyse (and in some cases modify) new mails. There is a bug in this lookup process (since diff --git a/pymap-copy.py b/pymap-copy.py index acd6110..ed9a280 100755 --- a/pymap-copy.py +++ b/pymap-copy.py @@ -32,6 +32,8 @@ default=False) parser.add_argument('--source-no-ssl', help='use this option if the destination server does not support TLS/SSL', action="store_true") +parser.add_argument('--source-use-starttls', help='use this option when connecting to an optional tls port (143) but require starttls', + action="store_true", default=False) parser.add_argument('--source-port', help='the IMAP port of the source server (default: 993)', nargs='?', default=993, type=int) parser.add_argument('--source-root', help='defines the source root (case sensitive)', nargs='?', default='', type=str) @@ -41,8 +43,14 @@ required=True) parser.add_argument('--destination-no-ssl', help='use this option if the destination server does not support TLS/SSL', action="store_true", default=False) +parser.add_argument('--destination-use-starttls', help='use this option when connecting to an optional tls port (143) but require starttls', + action="store_true", default=False) +parser.add_argument('--destination-use-idle', help='use IMAP IDLE to keep the destination connection alive (important for massive source mailboxes)', + action="store_true", default=True) parser.add_argument('--destination-port', help='the IMAP port of the destination server (default: 993)', nargs='?', default=993, type=int) +parser.add_argument('--source-mailbox', help='if specified, only sync this folder (case sensitive). Can be repeated ' + 'multiple times to source multiple mailboxes.', action='append', nargs='?', default=list(), type=str) parser.add_argument('--destination-root', help='defines the destination root (case sensitive)', nargs='?', default='', type=str) parser.add_argument('--destination-root-merge', help='ignores the destination root if the folder is already part of it', @@ -52,7 +60,6 @@ args = parser.parse_args() - def colorize(s, color=None, bold=False, clear=False): colors = {'red': '\x1b[31m', 'green': '\x1b[32m', @@ -109,25 +116,62 @@ def colorize(s, color=None, bold=False, clear=False): try: print('\nConnecting source : {}, '.format(args.source_server), end='', flush=True) - source = IMAPClient(host=args.source_server, port=args.source_port, ssl=not args.source_no_ssl, - ssl_context=ssl_context) - print(colorize('OK', color='green')) -except Exception as e: - print('{} {}'.format(colorize('Error:', color='red', bold=True), imaperror_decode(e))) - error = True -try: - print('Connecting destination : {}, '.format(args.destination_server), end='', flush=True) - destination = IMAPClient(host=args.destination_server, port=args.destination_port, ssl=not args.destination_no_ssl, - ssl_context=ssl_context) - print(colorize('OK', color='green')) + # if we are using starttls, then we must connect without an SSL'd connection + if args.source_use_starttls: + use_ssl = False + else: + use_ssl = not args.source_no_ssl + + source = IMAPClient(host=args.source_server, port=args.source_port, ssl=use_ssl, ssl_context=ssl_context) + + if args.source_use_starttls: + source.starttls(ssl_context=ssl_context) + print(colorize('OK - STARTTLS', color='green')) + + elif not args.source_use_starttls and not use_ssl: + print(colorize('OK', color='green'), "-", colorize('NOT ENCRYPTED', color='red')) + + else: # default is ssl + print(colorize('OK', color='green')) + except Exception as e: print('{} {}'.format(colorize('Error:', color='red', bold=True), imaperror_decode(e))) error = True -if error: - print('\nAbort!') - exit() +def connect_to_destination(args): + error = False + try: + print('Connecting destination : {}, '.format(args.destination_server), end='', flush=True) + + # if we are using starttls, then we must connect without an SSL'd connection + if args.destination_use_starttls: + use_ssl = False + else: + use_ssl = not args.destination_no_ssl + + destination = IMAPClient(host=args.destination_server, port=args.destination_port, ssl=use_ssl, ssl_context=ssl_context) + + if args.destination_use_starttls: + destination.starttls(ssl_context=ssl_context) + print(colorize('OK - STARTTLS', color='green')) + + elif not args.destination_use_starttls and not use_ssl: + print(colorize('OK', color='green'), "-", colorize('NOT ENCRYPTED', color='red')) + + else: # default is ssl + print(colorize('OK', color='green')) + + except Exception as e: + print('{} {}'.format(colorize('Error:', color='red', bold=True), imaperror_decode(e))) + error = True + + if error: + print('\nAbort!') + exit() + + return destination +destination = connect_to_destination(args) print() @@ -140,18 +184,24 @@ def colorize(s, color=None, bold=False, clear=False): error = True print('{} {}'.format(colorize('Error:', color='red', bold=True), imaperror_decode(e))) -try: - #: Login destination - print('Login destination : {}, '.format(args.destination_user), end='', flush=True) - destination.login(args.destination_user, args.destination_pass) - print(colorize('OK', color='green')) -except (exceptions.LoginError, IMAP4.error) as e: - error = True - print('{} {}'.format(colorize('Error:', color='red', bold=True), imaperror_decode(e))) +def login_to_destination(destination, args): + error = False + try: + #: Login destination + print('Login destination : {}, '.format(args.destination_user), end='', flush=True) + destination.login(args.destination_user, args.destination_pass) + print(colorize('OK', color='green')) + except (exceptions.LoginError, IMAP4.error) as e: + error = True + print('{} {}'.format(colorize('Error:', color='red', bold=True), imaperror_decode(e))) -if error: - print('\nAbort!') - exit() + if error: + print('\nAbort!') + exit() +login_to_destination(destination, args) + +destination = connect_to_destination(args) +login_to_destination(destination, args) print() @@ -167,7 +217,8 @@ def colorize(s, color=None, bold=False, clear=False): #: get quota from destination print('Getting destination quota : ', end='', flush=True) -if destination.has_capability('QUOTA'): +if destination.has_capability('QUOTA') and not args.ignore_quota: + print(destination.get_quota(), flush=True) destination_quota = destination.get_quota()[0] print('{}/{} ({:.0f}%)'.format(beautysized(destination_quota.usage*1000), beautysized(destination_quota.limit*1000), @@ -196,9 +247,52 @@ def colorize(s, color=None, bold=False, clear=False): print() +# attempt to drop the destination connection into idle mode, this hopefully will keep the connection +# open for us and prevent a logout/timeout from happening while we parse particularly large source mailboxes +did_idle_fail = True +def start_imap_idle(client): + try: + if args.destination_use_idle: + # must select a folder before invoking idle. we simply select the first folder to idle on + flags, delimiter, name = destination.list_folders(args.destination_root)[0] + destination.select_folder(name, readonly=True) + client.idle() + did_idle_fail = False + except Exception as e: + did_idle_fail = True + print(f'WARNING: Unable to idle on destination imap connection. Will continue without it. (got exception {type(e)}: {e})') + +def end_imap_idle(client): + """ + Rather simple: stop idle mode to allow normal commands (if idle did not fail) + """ + if not did_idle_fail: + client.idle_done() + +def restart_imap_idle(client): + """ + Restart the idle session so we don't timeout. intended to be invoked by long running code where needed to keep the + connection alive + """ + if not did_idle_fail: + end_imap_idle(client) + start_imap_idle(client) + +if args.destination_use_idle: + start_imap_idle(destination) + restart_imap_idle(destination) +else: + print(f' Skipping destination IMAP IDLE command') + + #: get source folders -print('Getting source folders : ', end='', flush=True) +print('Getting source folders : ', flush=True) for flags, delimiter, name in source.list_folders(args.source_root): + print(f' > Processing folder "{name}"...', flush=True) + if args.source_mailbox: + if name not in args.source_mailbox: + print(f' >> Skipping folder "{name}" as it does not in the source mailbox restrictions of "{args.source_mailbox}"') + continue source.select_folder(name, readonly=True) mails = source.search() @@ -216,7 +310,10 @@ def colorize(s, color=None, bold=False, clear=False): db['source']['folders'][name]['buffer'].append(mails[:args.buffer_size]) for mail_id, data in source.fetch(mails[:args.buffer_size], ['RFC822.SIZE', 'ENVELOPE']).items(): - if data[b'ENVELOPE'].subject: + if b'ENVELOPE' not in data: + print(f' Encountered message with no ENVELOPE? Skipping it. Folder: "{name}", Mail Id: "{mail_id}", Data: "{data}"') + continue + elif data[b'ENVELOPE'].subject: subject = decode_mime(data[b'ENVELOPE'].subject) else: subject = '(no subject)' @@ -227,6 +324,14 @@ def colorize(s, color=None, bold=False, clear=False): db['source']['folders'][name]['size'] += data[b'RFC822.SIZE'] stats['source_mails'] += 1 + if stats['source_mails'] % 1000 == 0: # add message to show progress when processing massive folders + print(f" - Loaded {stats['source_mails']} total emails so far..") + + # adding a check to refresh our imap idle session on the destination imap connection so we do not + # get logged out. This is possibly too frequently, but just taking a guess here. + if stats['source_mails'] % 10000 == 0: + restart_imap_idle(destination) + del mails[:args.buffer_size] if not source_delimiter: @@ -237,9 +342,19 @@ def colorize(s, color=None, bold=False, clear=False): #: get destination folders print('Getting destination folders : ', end='', flush=True) +end_imap_idle(destination) +destination = connect_to_destination(args) # refresh the dest connection as the old session might have timed out in large source mailboxes +login_to_destination(destination, args) # re-login to the destination now that we have a new connection for flags, delimiter, name in destination.list_folders(args.destination_root): db['destination']['folders'][name] = {'flags': flags, 'mails': {}, 'size': 0} + # no need to process the source destination mailbox if we skipped the source for it + if args.source_mailbox: + destination_mailbox_without_root = name.replace(f'{args.destination_root}/', "") + if destination_mailbox_without_root not in args.source_mailbox: + print(f' >> Skipping destination folder "{name}" which with root removed ("{destination_mailbox_without_root}"), does not match an entry in the source mailbox restrictions of "{args.source_mailbox}".') + continue + destination.select_folder(name, readonly=True) mails = destination.search() @@ -249,6 +364,7 @@ def colorize(s, color=None, bold=False, clear=False): while mails: for mail_id, data in destination.fetch(mails[:args.buffer_size], fetch_data).items(): + print(f' > Processing folder "{name}"', flush=True) db['destination']['folders'][name]['mails'][mail_id] = {'size': data[b'RFC822.SIZE']} db['destination']['folders'][name]['size'] += data[b'RFC822.SIZE'] @@ -259,6 +375,7 @@ def colorize(s, color=None, bold=False, clear=False): del mails[:args.buffer_size] if not destination_delimiter: + print(delimiter.decode(), flush=True) destination_delimiter = delimiter.decode() print('{} mails in {} folders ({})\n'.format( @@ -385,13 +502,39 @@ def colorize(s, color=None, bold=False, clear=False): for i, fetch in enumerate(source.fetch(buffer, ['FLAGS', 'RFC822', 'INTERNALDATE']).items()): progress = stats['processed'] / stats['source_mails'] * 100 mail_id, data = fetch + + # placeholders, so we can still attempt to use them in error reporting + flags = "(unk)" + msg = "(unk)" + date = "(unk)" + msg_id = b"(unk)" + size = "(unk)" + subject = "(unk)" + + # adding this try statement because I ran into issues where a message wouldn't have any flags defined + # I think it's related to an issue with reading a flag with no envelope above + try: + msg_id = db['source']['folders'][sf_name]['mails'][mail_id]['msg_id'] + size = db['source']['folders'][sf_name]['mails'][mail_id]['size'] + subject = db['source']['folders'][sf_name]['mails'][mail_id]['subject'] + + flags = data[b'FLAGS'] + msg = data[b'RFC822'] + date = data[b'INTERNALDATE'] + + + except KeyError as e: + print(colorize(f' Error "{e}" when reading data from message, will skip: ' + f'Mail_id "{mail_id}" in source folder "{sf_name}", got data "{data}"', color='red')) + + stats['errors'].append({'size': size, + 'subject': subject, + 'exception': f'{type(e).__name__}: {e}', + 'folder': df_name, + 'date': date, + 'id': msg_id.decode()}) - flags = data[b'FLAGS'] - msg = data[b'RFC822'] - date = data[b'INTERNALDATE'] - msg_id = db['source']['folders'][sf_name]['mails'][mail_id]['msg_id'] - size = db['source']['folders'][sf_name]['mails'][mail_id]['size'] - subject = db['source']['folders'][sf_name]['mails'][mail_id]['subject'] + continue #: copy mail print(colorize('[{:>5.1f}%] Progressing... (buffer {}/{}) (mail {}/{}) ({}) ({}): {}'.format( @@ -422,22 +565,35 @@ def colorize(s, color=None, bold=False, clear=False): status = destination.append(df_name, msg, (flag for flag in flags if flag.lower() not in denied_flags), msg_time=date) - if b'append completed' in status.lower(): + + # dovecot returns completed; gmail returns success + success_messages = [b'append completed', b'(success)'] + if any([msg in status.lower() for msg in success_messages]): stats['copied_mails'] += 1 else: - raise exceptions.IMAPClientError(status.decode()) + raise exceptions.IMAPClientError(f'Unable to confirm append success via status message! Got "{status.decode()}"') except exceptions.IMAPClientError as e: - e = imaperror_decode(e) - stats['errors'].append({'size': beautysized(size), - 'subject': subject, - 'exception': e, - 'folder': df_name, - 'date': date, - 'id': msg_id.decode()}) - print('\n{} {}\n'.format(colorize('Error:', color='red', bold=True), e)) + e_decoded = imaperror_decode(e) + try: + msg_id_decoded = msg_id.decode() + except Exception as sub_exception: + msg_id_decoded = f'(decode failure: {sub_exception})' + error_information = {'size': beautysized(size), + 'subject': subject, + 'exception': f'{type(e).__name__}: {e_decoded}', + 'folder': df_name, + 'date': date, + 'id': msg_id_decoded} + stats['errors'].append(error_information) + print('\n{} {} {}\n'.format(colorize('Error:', color='red', bold=True), e, error_information)) if args.abort_on_error: raise KeyboardInterrupt + # reconnect the imap session as gmail disconnects on certain error conditions + print(' Reconnecting after error to ensure IMAP connection to destination is still alive..') + destination = connect_to_destination(args) + login_to_destination(destination, args) + finally: stats['processed'] += 1 From a112155ba7acb699ca4cc4f0e98be24434c61727 Mon Sep 17 00:00:00 2001 From: Lukas Schulte-Tickmann <43608073+Schluggi@users.noreply.github.com> Date: Thu, 3 Sep 2020 08:57:51 +0200 Subject: [PATCH 2/3] added: STARTTLS-note in the description --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f55953..1a0aa19 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ In our company we often have to copy mailboxes from one to another server. For this we used [IMAPCopy](http://www.ardiehl.de/imapcopy/) as so far. Due to compatibility issues, first of all the missing -SSL/TLS support i wrote my own python-based version. I hope you like it! +SSL/TLS and STARTTLS support i wrote my own python-based version. I hope you like it! ## Features - Copies folders and subfolders From fbb60dd53e984d2aa45787217688ce93154c9717 Mon Sep 17 00:00:00 2001 From: Lukas Schulte-Tickmann <43608073+Schluggi@users.noreply.github.com> Date: Sat, 26 Sep 2020 19:54:25 +0200 Subject: [PATCH 3/3] Update pymap-copy.py Added: Stats for skipped mails with no envelope Added: Stats for skipped folders based on the source-mailbox argument Added: IMAP idle for the source server Changed: f-strings to strings combined with format for better compatibility with python3 < 3.6 Changed: The IMAP idle functions Improved: The output Improved: Folder skipping (use the correct delimiter) Removed: Reconnects after "everthing" (see #16) Removed: IMAP idle from arguments, now it's default (i don't know any case where idle is a bad idea) --- pymap-copy.py | 205 +++++++++++++++++++++++++------------------------- 1 file changed, 104 insertions(+), 101 deletions(-) diff --git a/pymap-copy.py b/pymap-copy.py index ed9a280..5485a27 100755 --- a/pymap-copy.py +++ b/pymap-copy.py @@ -32,7 +32,8 @@ default=False) parser.add_argument('--source-no-ssl', help='use this option if the destination server does not support TLS/SSL', action="store_true") -parser.add_argument('--source-use-starttls', help='use this option when connecting to an optional tls port (143) but require starttls', +parser.add_argument('--source-use-starttls', + help='use this option when connecting to an optional tls port (143) but require starttls', action="store_true", default=False) parser.add_argument('--source-port', help='the IMAP port of the source server (default: 993)', nargs='?', default=993, type=int) @@ -43,14 +44,14 @@ required=True) parser.add_argument('--destination-no-ssl', help='use this option if the destination server does not support TLS/SSL', action="store_true", default=False) -parser.add_argument('--destination-use-starttls', help='use this option when connecting to an optional tls port (143) but require starttls', +parser.add_argument('--destination-use-starttls', + help='use this option when connecting to an optional tls port (143) but require starttls', action="store_true", default=False) -parser.add_argument('--destination-use-idle', help='use IMAP IDLE to keep the destination connection alive (important for massive source mailboxes)', - action="store_true", default=True) parser.add_argument('--destination-port', help='the IMAP port of the destination server (default: 993)', nargs='?', default=993, type=int) parser.add_argument('--source-mailbox', help='if specified, only sync this folder (case sensitive). Can be repeated ' - 'multiple times to source multiple mailboxes.', action='append', nargs='?', default=list(), type=str) + 'multiple times to source multiple mailboxes.', action='append', nargs='?', default=list(), + type=str) parser.add_argument('--destination-root', help='defines the destination root (case sensitive)', nargs='?', default='', type=str) parser.add_argument('--destination-root-merge', help='ignores the destination root if the folder is already part of it', @@ -60,6 +61,7 @@ args = parser.parse_args() + def colorize(s, color=None, bold=False, clear=False): colors = {'red': '\x1b[31m', 'green': '\x1b[32m', @@ -94,12 +96,14 @@ def colorize(s, color=None, bold=False, clear=False): 'skipped_folders': { 'already_exists': 0, 'empty': 0, - 'dry-run': 0 + 'dry-run': 0, + 'by_mailbox': 0 }, 'skipped_mails': { 'already_exists': 0, 'zero_size': 0, - 'max_line_length': 0 + 'max_line_length': 0, + 'no_envelope': 0 }, 'copied_mails': 0, 'copied_folders': 0 @@ -117,7 +121,7 @@ def colorize(s, color=None, bold=False, clear=False): try: print('\nConnecting source : {}, '.format(args.source_server), end='', flush=True) - # if we are using starttls, then we must connect without an SSL'd connection + #: if we are using starttls, then we must connect without an SSL'd connection if args.source_use_starttls: use_ssl = False else: @@ -127,18 +131,19 @@ def colorize(s, color=None, bold=False, clear=False): if args.source_use_starttls: source.starttls(ssl_context=ssl_context) - print(colorize('OK - STARTTLS', color='green')) + print(colorize('OK (STARTTLS)', color='green')) elif not args.source_use_starttls and not use_ssl: - print(colorize('OK', color='green'), "-", colorize('NOT ENCRYPTED', color='red')) + print('{} ({})'.format(colorize('OK ', color='green'), colorize('NOT ENCRYPTED', color='red'))) else: # default is ssl - print(colorize('OK', color='green')) + print(colorize('OK (SSL/TLS)', color='green')) except Exception as e: print('{} {}'.format(colorize('Error:', color='red', bold=True), imaperror_decode(e))) error = True + def connect_to_destination(args): error = False try: @@ -150,17 +155,18 @@ def connect_to_destination(args): else: use_ssl = not args.destination_no_ssl - destination = IMAPClient(host=args.destination_server, port=args.destination_port, ssl=use_ssl, ssl_context=ssl_context) + destination = IMAPClient(host=args.destination_server, port=args.destination_port, ssl=use_ssl, + ssl_context=ssl_context) if args.destination_use_starttls: destination.starttls(ssl_context=ssl_context) - print(colorize('OK - STARTTLS', color='green')) + print(colorize('OK (STARTTLS)', color='green')) + + elif not args.source_use_starttls and not use_ssl: + print('{} ({})'.format(colorize('OK ', color='green'), colorize('NOT ENCRYPTED', color='red'))) - elif not args.destination_use_starttls and not use_ssl: - print(colorize('OK', color='green'), "-", colorize('NOT ENCRYPTED', color='red')) - else: # default is ssl - print(colorize('OK', color='green')) + print(colorize('OK (SSL/TLS)', color='green')) except Exception as e: print('{} {}'.format(colorize('Error:', color='red', bold=True), imaperror_decode(e))) @@ -171,8 +177,9 @@ def connect_to_destination(args): exit() return destination -destination = connect_to_destination(args) + +destination = connect_to_destination(args) print() try: @@ -184,6 +191,7 @@ def connect_to_destination(args): error = True print('{} {}'.format(colorize('Error:', color='red', bold=True), imaperror_decode(e))) + def login_to_destination(destination, args): error = False try: @@ -198,11 +206,9 @@ def login_to_destination(destination, args): if error: print('\nAbort!') exit() -login_to_destination(destination, args) -destination = connect_to_destination(args) -login_to_destination(destination, args) +login_to_destination(destination, args) print() #: get quota from source @@ -247,51 +253,43 @@ def login_to_destination(destination, args): print() -# attempt to drop the destination connection into idle mode, this hopefully will keep the connection -# open for us and prevent a logout/timeout from happening while we parse particularly large source mailboxes -did_idle_fail = True + def start_imap_idle(client): - try: - if args.destination_use_idle: - # must select a folder before invoking idle. we simply select the first folder to idle on - flags, delimiter, name = destination.list_folders(args.destination_root)[0] - destination.select_folder(name, readonly=True) - client.idle() - did_idle_fail = False - except Exception as e: - did_idle_fail = True - print(f'WARNING: Unable to idle on destination imap connection. Will continue without it. (got exception {type(e)}: {e})') + #: must select a folder before invoking idle. we simply select the first folder to idle on + _, _, some_folder = client.list_folders()[0] + client.select_folder(some_folder, readonly=True) + client.idle() + def end_imap_idle(client): """ - Rather simple: stop idle mode to allow normal commands (if idle did not fail) + Rather simple: stop idle mode to allow normal commands """ - if not did_idle_fail: - client.idle_done() + client.idle_done() + def restart_imap_idle(client): """ Restart the idle session so we don't timeout. intended to be invoked by long running code where needed to keep the connection alive """ - if not did_idle_fail: - end_imap_idle(client) - start_imap_idle(client) + end_imap_idle(client) + start_imap_idle(client) -if args.destination_use_idle: - start_imap_idle(destination) - restart_imap_idle(destination) -else: - print(f' Skipping destination IMAP IDLE command') +start_imap_idle(destination) #: get source folders -print('Getting source folders : ', flush=True) +print(colorize('Getting source folders : loading (this can take a while)', clear=True), flush=True, end='') for flags, delimiter, name in source.list_folders(args.source_root): - print(f' > Processing folder "{name}"...', flush=True) + + if not source_delimiter: + source_delimiter = delimiter.decode() + if args.source_mailbox: if name not in args.source_mailbox: - print(f' >> Skipping folder "{name}" as it does not in the source mailbox restrictions of "{args.source_mailbox}"') + print(colorize('Getting source folders : Progressing ({} mails) (skipping): {}'. + format(stats['source_mails'], name), clear=True), flush=True, end='') continue source.select_folder(name, readonly=True) @@ -310,8 +308,8 @@ def restart_imap_idle(client): db['source']['folders'][name]['buffer'].append(mails[:args.buffer_size]) for mail_id, data in source.fetch(mails[:args.buffer_size], ['RFC822.SIZE', 'ENVELOPE']).items(): - if b'ENVELOPE' not in data: - print(f' Encountered message with no ENVELOPE? Skipping it. Folder: "{name}", Mail Id: "{mail_id}", Data: "{data}"') + if b'ENVELOPE' not in data: # Encountered message with no ENVELOPE? Skipping it + stats['skipped_mails']['no_envelope'] += 1 continue elif data[b'ENVELOPE'].subject: subject = decode_mime(data[b'ENVELOPE'].subject) @@ -324,8 +322,8 @@ def restart_imap_idle(client): db['source']['folders'][name]['size'] += data[b'RFC822.SIZE'] stats['source_mails'] += 1 - if stats['source_mails'] % 1000 == 0: # add message to show progress when processing massive folders - print(f" - Loaded {stats['source_mails']} total emails so far..") + print(colorize('Getting source folders : Progressing ({} mails): {}'. + format(stats['source_mails'], name), clear=True), flush=True, end='') # adding a check to refresh our imap idle session on the destination imap connection so we do not # get logged out. This is possibly too frequently, but just taking a guess here. @@ -334,27 +332,32 @@ def restart_imap_idle(client): del mails[:args.buffer_size] - if not source_delimiter: - source_delimiter = delimiter.decode() +print(colorize('Getting source folders : {} mails in {} folders ({})'. + format(stats['source_mails'], len(db['source']['folders']), + beautysized(sum([f['size'] for f in db['source']['folders'].values()]))), clear=True)) -print('{} mails in {} folders ({})'.format(stats['source_mails'], len(db['source']['folders']), - beautysized(sum([f['size'] for f in db['source']['folders'].values()])))) -#: get destination folders -print('Getting destination folders : ', end='', flush=True) end_imap_idle(destination) -destination = connect_to_destination(args) # refresh the dest connection as the old session might have timed out in large source mailboxes -login_to_destination(destination, args) # re-login to the destination now that we have a new connection +start_imap_idle(source) + + +#: get destination folders +print(colorize('Getting destination folders : loading (this can take a while)', clear=True), flush=True, end='') for flags, delimiter, name in destination.list_folders(args.destination_root): - db['destination']['folders'][name] = {'flags': flags, 'mails': {}, 'size': 0} - # no need to process the source destination mailbox if we skipped the source for it + if not destination_delimiter: + destination_delimiter = delimiter.decode() + + #: no need to process the source destination mailbox if we skipped the source for it if args.source_mailbox: - destination_mailbox_without_root = name.replace(f'{args.destination_root}/', "") - if destination_mailbox_without_root not in args.source_mailbox: - print(f' >> Skipping destination folder "{name}" which with root removed ("{destination_mailbox_without_root}"), does not match an entry in the source mailbox restrictions of "{args.source_mailbox}".') + if name.replace(destination_delimiter, source_delimiter) not in args.source_mailbox: + print(colorize('Getting destination folders : Progressing ({} mails) (skipping): {}'. + format(stats['destination_mails'], name), clear=True), flush=True, end='') + stats['skipped_folders']['by_mailbox'] += 1 continue + db['destination']['folders'][name] = {'flags': flags, 'mails': {}, 'size': 0} + destination.select_folder(name, readonly=True) mails = destination.search() @@ -364,7 +367,6 @@ def restart_imap_idle(client): while mails: for mail_id, data in destination.fetch(mails[:args.buffer_size], fetch_data).items(): - print(f' > Processing folder "{name}"', flush=True) db['destination']['folders'][name]['mails'][mail_id] = {'size': data[b'RFC822.SIZE']} db['destination']['folders'][name]['size'] += data[b'RFC822.SIZE'] @@ -372,16 +374,14 @@ def restart_imap_idle(client): db['destination']['folders'][name]['mails'][mail_id]['msg_id'] = data[b'ENVELOPE'].message_id stats['destination_mails'] += 1 + print(colorize('Getting destination folders : Progressing ({} mails): {}'. + format(stats['destination_mails'], name), clear=True), flush=True, end='') del mails[:args.buffer_size] - if not destination_delimiter: - print(delimiter.decode(), flush=True) - destination_delimiter = delimiter.decode() - -print('{} mails in {} folders ({})\n'.format( - stats['destination_mails'], len(db['destination']['folders']), - beautysized(sum([f['size'] for f in db['destination']['folders'].values()])))) +print(colorize('Getting destination folders : {} mails in {} folders ({})\n'. + format(stats['destination_mails'], len(db['destination']['folders']), + beautysized(sum([f['size'] for f in db['destination']['folders'].values()]))), clear=True)) #: list mode if args.list: @@ -395,7 +395,11 @@ def restart_imap_idle(client): print('{} ({} mails, {})'.format(name, len(db['destination']['folders'][name]['mails']), beautysized(db['destination']['folders'][name]['size']))) - print('\n{}'.format(colorize('Everything skipped! (list mode)', color='cyan'))) + if args.source_mailbox: + print('\n{}'.format(colorize('Everything skipped! (list mode, list was filtered by the source mailbox argument)', + color='cyan'))) + else: + print('\n{}'.format(colorize('Everything skipped! (list mode)', color='cyan'))) exit() @@ -428,6 +432,8 @@ def restart_imap_idle(client): print('\n{} Source folder not found: {}\n'.format(colorize('Error:', color='red', bold=True), ', '.join(not_found))) exit() +end_imap_idle(source) + try: for sf_name in sorted(db['source']['folders'], key=lambda x: x.lower()): source.select_folder(sf_name, readonly=True) @@ -502,17 +508,11 @@ def restart_imap_idle(client): for i, fetch in enumerate(source.fetch(buffer, ['FLAGS', 'RFC822', 'INTERNALDATE']).items()): progress = stats['processed'] / stats['source_mails'] * 100 mail_id, data = fetch - - # placeholders, so we can still attempt to use them in error reporting - flags = "(unk)" - msg = "(unk)" - date = "(unk)" - msg_id = b"(unk)" - size = "(unk)" - subject = "(unk)" - - # adding this try statement because I ran into issues where a message wouldn't have any flags defined - # I think it's related to an issue with reading a flag with no envelope above + + #: placeholders, so we can still attempt to use them in error reporting + flags = msg = date = size = subject = "(unknown)" + msg_id = b"(unknown)" + try: msg_id = db['source']['folders'][sf_name]['mails'][mail_id]['msg_id'] size = db['source']['folders'][sf_name]['mails'][mail_id]['size'] @@ -522,18 +522,19 @@ def restart_imap_idle(client): msg = data[b'RFC822'] date = data[b'INTERNALDATE'] - except KeyError as e: - print(colorize(f' Error "{e}" when reading data from message, will skip: ' - f'Mail_id "{mail_id}" in source folder "{sf_name}", got data "{data}"', color='red')) + try: + msg_id_decoded = msg_id.decode() + except Exception as sub_exception: + msg_id_decoded = '(decode failure): {}'.format(sub_exception) stats['errors'].append({'size': size, 'subject': subject, - 'exception': f'{type(e).__name__}: {e}', + 'exception': '{}: {}'.format(type(e).__name__, e), 'folder': df_name, 'date': date, - 'id': msg_id.decode()}) - + 'id': msg_id_decoded}) + print('\n{} {}\n'.format(colorize('Error:', color='red', bold=True), e)) continue #: copy mail @@ -566,34 +567,34 @@ def restart_imap_idle(client): status = destination.append(df_name, msg, (flag for flag in flags if flag.lower() not in denied_flags), msg_time=date) - # dovecot returns completed; gmail returns success + #: differed IMAP servers have differed return codes success_messages = [b'append completed', b'(success)'] if any([msg in status.lower() for msg in success_messages]): stats['copied_mails'] += 1 else: - raise exceptions.IMAPClientError(f'Unable to confirm append success via status message! Got "{status.decode()}"') + raise exceptions.IMAPClientError('Unknown success message: {}'.format(status.decode())) + except exceptions.IMAPClientError as e: e_decoded = imaperror_decode(e) + try: msg_id_decoded = msg_id.decode() except Exception as sub_exception: - msg_id_decoded = f'(decode failure: {sub_exception})' + msg_id_decoded = '(decode failure): {}'.format(sub_exception) + error_information = {'size': beautysized(size), 'subject': subject, - 'exception': f'{type(e).__name__}: {e_decoded}', + 'exception': '{}: {}'.format(type(e).__name__, e), 'folder': df_name, 'date': date, 'id': msg_id_decoded} + stats['errors'].append(error_information) - print('\n{} {} {}\n'.format(colorize('Error:', color='red', bold=True), e, error_information)) + print('\n{} {}\n'.format(colorize('Error:', color='red', bold=True), e)) + if args.abort_on_error: raise KeyboardInterrupt - # reconnect the imap session as gmail disconnects on certain error conditions - print(' Reconnecting after error to ensure IMAP connection to destination is still alive..') - destination = connect_to_destination(args) - login_to_destination(destination, args) - finally: stats['processed'] += 1 @@ -633,10 +634,12 @@ def restart_imap_idle(client): else: print('Skipped folders : {}'.format(sum([stats['skipped_folders'][c] for c in stats['skipped_folders']]))) print('├─ Empty : {} (skip-empty-folders mode only)'.format(stats['skipped_folders']['empty'])) + print('├─ By mailbox : {} (source-mailbox mode only)'.format(stats['skipped_folders']['by_mailbox'])) print('└─ Already exists : {} '.format(stats['skipped_folders']['already_exists'])) print() print('Skipped mails : {}'.format(sum([stats['skipped_mails'][c] for c in stats['skipped_mails']]))) print('├─ Zero sized : {}'.format(stats['skipped_mails']['zero_size'])) + print('├─ No envelope : {}'.format(stats['skipped_mails']['no_envelope'])) print('├─ Line length : {} (max-line-length mode only)'.format(stats['skipped_mails']['max_line_length'])) print('└─ Already exists : {} (incremental mode only)'.format(stats['skipped_mails']['already_exists']))