#!/usr/bin/python
# Lists duplicate files under given Maildir based on Message-IDs
#
# Author: Filippo Giunchedi <filippo@esaurito.net>
# Version: 0.1
#
# this file is public domain 
#
# this will NOT remove any file, however piping it with "xargs rm" will do 
import os, sys, re
from os.path import join

msgid_re = re.compile("^Message-ID: <(\S+)>")
empty_re = re.compile("^\s*$")
ids = {}

def record_msgid(dir):
    global ids
    # only one call to next(), we are not interested in subdirectories
    files = os.walk(dir).next()[2]

    for f in files:
        fullpath = join(dir, f)
        fp = open(fullpath, 'r')
        id = ""

        # email.message_from_file reads the whole file, bad idea
        # consider also email.Parser.HeaderParser
        while True:
            line = fp.readline()
            match = msgid_re.search(line)
            if match:
                id = match.group(1)
                break

            match = empty_re.search(line)
            # uh-oh end of headers!
            if match:
                break

        fp.close()

        if id == "":
            continue

        if not ids.has_key(id):
            ids[id] = fullpath
        else:
            print fullpath

def init():
    me = sys.argv[0]
    if len(sys.argv) < 2:
        print "%s: usage %s maildir" % (me, me)
        sys.exit(1)

    maildir = sys.argv[1]

    toscan = [join(maildir, dir) for dir in ("cur", "new", "tmp") if os.path.isdir(join(maildir, dir))]

    if len(toscan) < 3:
        print "%s: %s not a valid maildir" % (me, maildir)
        sys.exit(1)

    for dir in toscan:
        record_msgid(dir)

if __name__ == "__main__":
    init()

Generated with vim2html
Copyright © 2003-2004 by Chip Cuccio <http://norlug.org/~chipster/finger>