#!/usr/bin/python
# Lists duplicate files under given Maildir based on Message-IDs
#
# Author: Filippo Giunchedi <filippo@esaurito.net>
# Version: 0.1
#
# this file is public domain
#
# this will NOT remove any file, however piping it with "xargs rm" will do
import os, sys, re
from os.path import join
msgid_re = re.compile("^Message-ID: <(\S+)>")
empty_re = re.compile("^\s*$")
ids = {}
def record_msgid(dir):
global ids
# only one call to next(), we are not interested in subdirectories
files = os.walk(dir).next()[2]
for f in files:
fullpath = join(dir, f)
fp = open(fullpath, 'r')
id = ""
# email.message_from_file reads the whole file, bad idea
# consider also email.Parser.HeaderParser
while True:
line = fp.readline()
match = msgid_re.search(line)
if match:
id = match.group(1)
break
match = empty_re.search(line)
# uh-oh end of headers!
if match:
break
fp.close()
if id == "":
continue
if not ids.has_key(id):
ids[id] = fullpath
else:
print fullpath
def init():
me = sys.argv[0]
if len(sys.argv) < 2:
print "%s: usage %s maildir" % (me, me)
sys.exit(1)
maildir = sys.argv[1]
toscan = [join(maildir, dir) for dir in ("cur", "new", "tmp") if os.path.isdir(join(maildir, dir))]
if len(toscan) < 3:
print "%s: %s not a valid maildir" % (me, maildir)
sys.exit(1)
for dir in toscan:
record_msgid(dir)
if __name__ == "__main__":
init()
Generated with
vim2html
Copyright © 2003-2004 by Chip Cuccio
<http://norlug.org/~chipster/finger>