#! /usr/bin/env python3

# Copyright (C) 2023 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.

'Check for possible hyperkitty_import exceptions.'

import argparse
import mailbox
from email import message_from_bytes, policy
from email.utils import unquote


def parseargs():
    parser = argparse.ArgumentParser(
        description="""\
This script is used to check an archive mbox file for
messages that might cause hyperkitty_import to skip the message or fail
altogether. It will only catch and report exceptions in getting messages
from the mbox or converting them to email.message.Message instances. There
can be other issues with mboxes such as unescaped 'From ' lines and
unparseable Date: headers. The Mailman 2.1 cleanarch script can check for
unescaped 'From ' lines and the script at
https://www.msapiro.net/scripts/cleanarch2 can do that and check for
unparseable Date: headers as well.""")
    parser.add_argument('mbox',
                        type=str,
                        help='The path to the mbox file to be checked.'
                        )
    return parser.parse_args()


def loop(start, end, mbox, mb_keys):
    global count
    try:
        for count in range(start, end):
            msg = mbox.get_message(mb_keys[count])
            try:
                msg_raw = msg.as_bytes(unixfrom=False)
            except Exception as e:
                print('Failed to convert message number {}, '
                      'message=id {} to bytes\n'
                      '    {}'.format(count,
                                      unquote(msg.get("Message-Id", 'n/a')),
                                      e))
                continue
            msg.get_from()
            mid = msg.get('message-id', 'n/a')
            try:
                msg = message_from_bytes(msg_raw, policy=policy.default)
            except Exception as e:
                print('Failed to convert message number {}, '
                      'message=id {} to email.message.Message\n'
                      '    {}'.format(count,
                                      unquote(msg.get("Message-Id", 'n/a')),
                                      e))
                continue
            try:
                mid = msg.get('message-id', 'n/a')
            except Exception as e:
                print('Failed to get mid from converted message number {}, '
                      'message=id {}\n'
                      '    {}'.format(count,
                                      mid,
                                      e))
                continue
            try:
                date = msg.get('date') or msg.get('resent-date')
            except Exception as e:
                print('Failed to get date from converted message number {}, '
                      'message=id {}\n'
                      '    {}'.format(count,
                                      mid,
                                      e))
                continue
    except Exception as e:
        print('Failed to retrieve message number {}\n    {}'.format(count, e))


def main():
    global count
    ns = parseargs()
    mbox = mailbox.mbox(ns.mbox, create=False)
    mb_keys = mbox.keys()
    count = -1
    while count < len(mb_keys) - 1:
        loop(count+1, len(mb_keys), mbox, mb_keys)
    print('Processed {} messages.'.format(count+1))


if __name__ == '__main__':
    main()
