Source code for wapmail.py.
The plain text source is here.
#!/usr/bin/python -u
#!D:\Programs\Python\python.exe -u
#
# wapmail.py is released under the terms of the Sleepycat License, reproduced
# below. Commercial licensing is available for a fee - please contact
# richie@entrian.com for information.
#
# Copyright (c) 2002 Entrian Solutions. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# o Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# o Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# o Redistributions in any form must be accompanied by information on how to
# obtain complete source code for the software and any accompanying
# software that uses the software. The source code must either be included
# in the distribution or be available for no more than the cost of
# distribution plus a nominal fee, and must be freely redistributable under
# reasonable conditions. For an executable file, complete source code means
# the source code for all modules it contains. It does not include source
# code for modules or files that typically accompany the major components
# of the operating system on which the executable file runs.
#
# THIS SOFTWARE IS PROVIDED BY ENTRIAN SOLUTIONS ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
# ARE DISCLAIMED. IN NO EVENT SHALL ENTRIAN SOLUTIONS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
r"""Entrian wapmail lets you access a POP3 email account via WAP on your
mobile phone or PDA. Use it by adding an address to your phone's bookmarks
like this:
http://entrian.com/cgi-bin/wapmail.py/server/username/password
replacing 'server', 'username' and 'password' with your POP3 account details.
[demo]
*Advantages:*
o Because your login details go into a bookmark, you don't need to type them
in every time.
o As well as saving time and effort, this makes the WAP call much shorter and
cheaper, if you pay by the minute. Checking your email needs only a few
seconds of airtime.
o wapmail hides rather than deletes messages - see below.
o It's free!
*Hiding messages:* Rather than deleting emails, wapmail lets you hide them so
they don't appear in the list while still remaining in your POP3 account for
later retrieval. There are several reasons for this:
o You can't accidentally delete an email because of finger trouble on a
tiny phone keypad.
o Lots of messages, like mailing list postings, aren't important enough
to view on the move. They clutter your email list, but you don't want
to delete them. Hiding them is the answer.
o Wading through lots of emails on a phone is tedious. By hiding any
uninteresting messages, you make the whole idea a lot more practical.
o It gives the option of automatically hiding suspected spam, though
wapmail doesn't (yet) do that.
*Security:* Your password is held only in the bookmark within your
phone. It isn't stored by wapmail, just used transiently to access your POP3
account. It does pass in plain text across the internet, so if that worries
you, you shouldn't use wapmail.
*Legal:* Use of the wapmail server on _entrian.com_ is free but comes with no
warranty - see the disclaimer in _wapmail.py_. Entrian wapmail is implemented
in _wapmail.py_, which is freely downloadable for use in Open Source projects
under the Sleepycat License. Commercial licensing is also available for a fee
- please contact _richie@entrian.com_ for information.
"""
developersOverview = """
*Configuration:* Change `HIDE_LIST_LOCATION` to point to a directory where
the users' Hide lists can be saved. CGI scripts must have read-write access
to it. The OUTPUT_HTML option makes the script output HTML rather than WML,
which is useful for testing and also provides the HTML demo at
http://entrian.com/wapmail/wapmaildemo.html
*Identification of messages:* This is done by taking an md5 hash of the
Message-Id, base64-encoding it, and changing some characters around to make
it URL-safe. See `messageIDToHash`. Whenever a message needs to be
identified, eg. for viewing or hiding, a hashed Message-Id is used.
Message-Ids are hashed to save space - you only get 1400 bytes to play with
in a WAP page (only on some devices, but I err on the side of caution). If
you contrive to receive a message with no Message-Id, it will show up in the
summary list but you won't be able to view it or hide it.
*URLs:* These can be one of the following forms:
o `.../wapmail.py/server/username/password` - this gives a summary page,
possibly truncated due to WAP size limits. The summary page has a 'View'
link for all messages that include a plain text body, and a 'Hide' link
for all messages.
o `.../wapmail.py/server/username/password?s=N` - this gives a summary
page starting at index N, again possibly truncated due to WAP size limits.
o `.../wapmail.py/server/username/password/hashed-message-id?s=N` - this
displays an email, including the From, Subject and Date headers, possibly
truncated due to WAP size limits. There's a 'Hide' link at the end of the
page. Message-Id URLs inherit the `s` parameter from the summary page, so
that hitting the 'Hide' link can go back to the right place in the summary.
o `.../wapmail.py/server/username/password?h=hashed-message-id&s=N` - this
adds the named message the user's Hide list and displays a summary page
starting at N. This means that I'm using GET requests that return content
in response to commands, which sounds like I'm committing the cardinal sin
of breaking idempotency, but in fact it's safe because hitting Reload will
simply hide the message again, which has no effect (whether the message
still exists or not).
*Hide lists:* Each user has a 'Hide list', which is a list of all the messages
that they have marked as hidden and still exist on the POP3 server. When a
user makes a request, the system first logs him into his POP3 account then
reads his Hide list (thus ensuring that the password needs to be right before
the Hide list is loaded). At the end of processing, any Message-Ids in the
Hide list that are no longer on the POP3 server are removed from the Hide
list. This means that Hide lists don't keep growing over time.
Hide lists are implemented as dictionaries, where the keys are hashed
Message-Ids and the values are all None. They are saved as pickles into
HIDE_LIST_LOCATION with filenames of 'server_username.hide', with any
non-alphanumeric characters replaced with '-'.
*Testing:* There's a commented-out section to help with testing - uncomment
it to run the program outside of a CGI environment. You can also give a
server/username/password of 'test/test/test' in order to work on a fake
POP3 account (implemented in _FakePoplib.py_) rather than a real one. The
OUTPUT_HTML configuration variable is also useful for testing.
*POP3 assumptions:* wapmail uses the POP3 `top` command, which is optional
according to the POP3 RFC but I've never come across a POP3 server that
didn't support it. Unlike some POP3 gateways (that will remain nameless),
wapmail *doesn't* assume that the POP3 server will assign the same message
number to the same message across different sessions. Messages are only
identified by number within a session, and by hashed Message-Id at all other
times. wapmail doesn't use the POP3 `UIDL` extension.
*Data structures:* emails are represented by a simple list of lines, as
returned by `poplib`. Lists of emails are represented by lists of lists of
lines. See also *Identification of messages* and *Hide lists* above.
*MIME handling:* wapmail handles MIME message itself, because it only reads
the first few lines of each message from the server and so the Python standard
library MIME tools won't parse the messages properly. It handles multipart
messages, and even nested multipart messages, provided that the plain text
body is within the first MIME part. It only displays plain text sections,
rather than trying to convert HTML sections into WML or anything like that
(virtually all "real" emails have a plain text section, and who wants to read
spam on their phone?) Quoted printable plain text sections are decoded.
"""
# This is used on entrian.com to provide an online demo of wapmail.
demoForm = """
<html>
<head><script><!--
function onGo()
{
if ( window && window.frames && window.frames[ 'wapmaildemo' ] &&
window.frames[ 'wapmaildemo' ].document && window.frames[ 'wapmaildemo' ].document.body )
{
window.frames[ 'wapmaildemo' ].document.body.innerHTML = '<p><b>Connecting...</b></p>';
}
return true;
}
//-->
</script></head>
<body>
<p><b>Demonstration:</b> Enter your POP3 account details here to try out
wapmail online:</p>
<table style='background: #e8f0f8; border: 1px solid black'
border='0' cellpadding='5' cellspacing='0'>
<tr><td valign='top'>
<form method='GET' action='/cgi-bin/wapmaildemo.py' target='wapmaildemo' onsubmit='onGo()'>
<table border='0' cellpadding='0' cellspacing='4'>
<tr><td>Server: </td><td><input type='text' name='server' size='20'></td></tr>
<tr><td>Username: </td><td><input type='text' name='username' size='20'></td></tr>
<tr><td>Password: </td><td><input type='password' name='password' size='20'></td></tr>
<tr><td colspan='2'><center><input type='submit' name='demo' value='Go'></td></tr>
</table>
</form>
</td><td>
<table border='0' cellpadding='0' cellspacing='4'><tr><td>
<iframe width='250' height='150' name='wapmaildemo' src='demointro.html'></iframe>
</td></tr></table>
</td></tr></table>
</body></html>
"""
import os, sys
# ============================== Configuration ==============================
# Outputs HTML rather than WML, for testing or demonstration purposes.
OUTPUT_HTML = 0
# HIDE_LIST_LOCATION determines where each user's Hide list is saved.
# CGI scripts must have read-write access to it.
if os.name == 'nt':
HIDE_LIST_LOCATION = r'D:\Projects\cgi-bin'
else:
HIDE_LIST_LOCATION = '/home/entrian/sandbox'
# ===========================================================================
import os, sys, string, re, time, operator, cPickle, cStringIO, traceback
import cgi, urllib, poplib, quopri, rfc822, md5, base64
# Limits on the sizes of various things.
HEADER_LIMIT = 32
SHORT_BODY_LIMIT = 60
FULL_BODY_LIMIT = 1100
TOTAL_LIMIT = 1400 # The smallest device's limit of compiled WML size.
# This is output when we can't find a plain text section in the body of an email.
NO_PLAIN_TEXT_BODY = "(no plain text body)"
def main():
"""Main driver function; ensures that exceptions go to the browser."""
sys.stderr = sys.stdout
if OUTPUT_HTML:
sys.stdout.write( "Content-Type: text/html\r\nCache-Control: no-cache, no-store\r\n\r\n" )
print "<html><head><style>"
print "a { font-weight: bold }"
print "</style>"
print "<body leftmargin='2' topmargin='2' marginwidth='2' marginheight='2'"
print " bgcolor='#90f0c0' link='#000000' alink='#000000' vlink='#000000'>"
print "<font face='arial, swiss, heletica'>"
else:
sys.stdout.write( "Content-Type: text/vnd.wap.wml\r\nCache-Control: no-cache, no-store\r\n\r\n" )
print '<?xml version="1.0"?>'
print '<!DOCTYPE wml PUBLIC "-//WAPFORUM//DTD WML 1.1//EN" "http://www.wapforum.org/DTD/wml_1.1.xml">'
print '<wml><card title="entrian wapmail">'
# Wrap the whole thing in a 'try' block so we can send errors to the browser.
try:
# The demo requires that the server, username and password are passed
# in CGI parameters, so we fake up a PATH_INFO from that here.
if string.find( os.environ[ 'QUERY_STRING' ], 'demo=Go' ) != -1:
params = cgi.FieldStorage()
try:
server = params[ 'server' ].value
username = params[ 'username' ].value
password = params[ 'password' ].value
os.environ[ 'QUERY_STRING' ] = ''
os.environ[ 'PATH_INFO' ] = '/%s/%s/%s' % ( server, username, password )
except KeyError:
pass
# Is this a well-formed request, .../wapmail.py/server/username/password[/message-id]?
args = string.split( os.environ.get( 'PATH_INFO', '' ), '/' )
## # Uncomment these for testing outside of a CGI environment.
## args = [ '', 'test', 'test', 'test' ] #, '-XZ0y4sDLLt4OWxkN78tVQ::' ]
## os.environ[ 'QUERY_STRING' ] = 's=2' ## 'h=-XZ0y4sDLLt4OWxkN78tVQ::'
if len( args ) not in [ 4, 5 ]:
print "<p><b>Usage: </b>.../wapmail.py/server/username/password</p>"
return
# Decode the request - first the login details.
unused, server, username, password = args[ :4 ]
if ( server, username, password ) == ( 'test', 'test', 'test' ):
import FakePoplib
global poplib
poplib = FakePoplib
# Is there a message ID to show?
if len( args ) == 5:
messageIDHash = args[ 4 ]
else:
messageIDHash = None
# Is there a starting index for the summary list?
params = cgi.FieldStorage()
if params.has_key( 's' ):
startIndex = int( params[ 's' ].value )
else:
startIndex = 0
# Is there a message ID to hide?
if params.has_key( 'h' ):
hideIDHash = params[ 'h' ].value
else:
hideIDHash = None
popServer = None
try:
# Log into the POP server
popServer = poplib.POP3( server )
popServer.user( username )
popServer.pass_( password )
# Now that we've verified the password, we can read this user's
# Hide list. This is a dictionary whose keys are the message-id
# hashes of the messages he's hidden. Entries are purged once the
# message no longer appears on the POP3 server - that happens in
# readEmails().
hideDict = {}
hidePickleName = "%s_%s.hide" % ( server, username )
hidePickleName = re.sub( r'[^a-zA-Z0-9_\.]', '-', hidePickleName )
hidePicklePath = os.path.join( HIDE_LIST_LOCATION, hidePickleName )
try:
hidePickleFile = open( hidePicklePath, 'rt' )
hideDict = cPickle.load( hidePickleFile )
hidePickleFile.close()
except (IOError, EOFError):
pass
# What is it that's being requested?
if messageIDHash:
# A single message is being requested; ignore the hide list
# because they're explicitly asking for this message.
emails = readEmails( popServer, 0 )
messageNumber = findMessageNumber( emails, messageIDHash )
if messageNumber:
messageLines = popServer.top( messageNumber, 200 )[ 1 ]
print buildMessage( messageLines, messageIDHash, startIndex )
else:
print "<p><b>Message not found.</b></p>"
else:
# A message summary list is being requested, possibly via Hide.
emails = readEmails( popServer, 20, hideDict )
if hideIDHash:
# We're being asked to hide a message - add it to the hide
# list and remove it from the list of emails.
messageNumber = findMessageNumber( emails, hideIDHash )
if messageNumber:
hideDict[ hideIDHash ] = None
del emails[ messageNumber - 1 ]
# Fix up the start index so that deleting a single final message
# doesn't then give you an empty list.
if startIndex >= len( emails ) and len( emails ) > 0:
startIndex = len( emails ) - 1
# Sort the emails in reverse date order and display them.
sortEmails( emails )
print buildSummary( emails, startIndex )
# Write out the updated Hide list.
hidePickleFile = open( hidePicklePath, 'wt' )
cPickle.dump( hideDict, hidePickleFile )
hidePickleFile.close()
finally:
if popServer:
popServer.quit()
# Handle POP3 errors.
except poplib.error_proto, e:
print "<p>POP3 error: %s</p>" % WMLEscape( e )
# Handle unexpected errors by returning a traceback to the browser.
except:
print "<p>"
t = cStringIO.StringIO()
traceback.print_exc( file=t )
print string.replace( WMLEscape( t.getvalue() ), '\n', '<br/>\n' )
print "</p>"
# Close off the document - the content generation and error handlers
# are all carefully sync'd so that this will result in a valid WML document
# regardless of how we get here.
if OUTPUT_HTML:
print "</font></body></html>\n"
else:
print "</card></wml>\n"
def WMLEscape( text ):
"""Escape WML - like cgi.escape but escapes '$' as well."""
text = str( text )
if not OUTPUT_HTML:
text = string.replace( text, '$', '$$' )
return cgi.escape( text )
def trim( text, limit ):
"""Trims the given text to a size limit, appending '...' if necessary."""
if len( text ) > limit:
return text[ :limit-3 ] + '...'
else:
return text
def readEmails( popServer, bodyLines, hideDict={} ):
"""Reads the emails from the given POP3 server. The first bodyLines
lines of each message are read. The emails are returned as a list
of lists of lines. hideDict is used to decide which emails to hide;
its keys should be hashed message IDs. Any keys for messages that
don't appear in the list of emails are deleted from hideDict."""
emails = []
messageIDHashes = {}
numbersAndSizes = popServer.list()[ 1 ]
for message in numbersAndSizes:
# Read the message.
number, unusedSize = string.split( message, None, 2 )
messageLines = popServer.top( string.atoi( number ), bodyLines )[ 1 ]
# Look to see whether it's hidden.
messageID = findHeader( messageLines, 'Message-Id' )
if not messageID or not hideDict.has_key( messageIDToHash( messageID ) ):
# It's not hidden; add it to the list.
emails.append( messageLines )
# Keep a list of all the message ID hashes from the server.
if messageID:
messageIDHashes[ messageIDToHash( messageID ) ] = None
# Clear the Hide list of any messages that don't exist any more.
for hidden in hideDict.keys():
if not messageIDHashes.has_key( hidden ):
del hideDict[ hidden ]
# Return the list of emails.
return emails
def findMessageNumber( emails, messageIDHash ):
"""Finds the message with the given ID in a list of emails."""
number = 1
for messageLines in emails:
thisID = findHeader( messageLines, 'Message-Id' )
if thisID and messageIDToHash( thisID ) == messageIDHash:
return number
number = number + 1
return 0
def sortEmails( emails ):
"""Given a list of emails from readEmails(), sort the list into
descending date order."""
emails.sort( lambda a, b: cmp( findDate( b ), findDate( a ) ) )
def getTrimmedHeader( messageLines, name ):
"""Gets the value of a header, trimmed to HEADER_LIMIT."""
return trim( findHeader( messageLines, name ) or "(none)", HEADER_LIMIT )
def formatHeader( name, value ):
"""Formats a header for display."""
return "<b>%s: </b>%s<br/>\n" % ( name, value )
def formatBody( body ):
"""Formats a body for display."""
return string.replace( body, '\n', '<br/>\n' ) + "<br/>"
def buildURL( tail ):
"""Builds a URL from the login details and the given tail."""
# Strip off any message ID that's hanging off the end.
pathInfo = os.environ.get( 'PATH_INFO', '/t/t/t' )
loginPath = string.join( string.split( pathInfo, '/' )[ :4 ], '/' )
urlBase = os.environ.get( 'SCRIPT_NAME', 'test' ) + loginPath
if os.environ.get('HTTP_X_FORWARDED_SERVER') == 'ssl4.westserver.net':
urlBase = '/entrian.com' + urlBase
return urlBase + tail
def messageIDToHash( messageID ):
"""Converts a potentially long message ID to a compact url-encoded
base64-encoded md5 hash."""
# Replace '/', '+' and '=' because they're meaningful in URLs. I've seen
# '/' confuse Apache even when url-encoded, so we do it by hand,
# converting the special characters into other, harmless, characters
# rather than using urllib.quote.
hash = md5.new( messageID ).digest()
value = base64.encodestring( hash )
value = string.replace( value, '/', '_' )
value = string.replace( value, '+', '-' )
value = string.replace( value, '=', '~' )
return value[ :-1 ] # Lose the trailing base64 newline.
def buildMessage( messageLines, messageIDHash, startIndex ):
"""Builds WML for the given message."""
# Get all the headers.
lines = []
subject = getTrimmedHeader( messageLines, 'Subject' )
fromAddress = getTrimmedHeader( messageLines, 'From' )
date = getTrimmedHeader( messageLines, 'Date' )
body = trim( findBody( messageLines ), FULL_BODY_LIMIT )
isBodyTruncated = ( len( body ) == FULL_BODY_LIMIT )
# Escape everything and stitch it all together.
subject, fromAddress, date, body = \
map( WMLEscape, [ subject, fromAddress, date, body ] )
lines.append( formatHeader( "Subject", subject ) )
lines.append( formatHeader( "From", fromAddress ) )
lines.append( formatHeader( "Date", date ) )
lines.append( formatBody( body ) )
if isBodyTruncated:
lines.append( "<b>(Message too long)</b>" )
# Append the 'Hide' link and a 'Back' link in the HTML version
# (because the demo needs one).
hideUrl = buildURL( '?h=%s&s=%d' % ( messageIDHash, startIndex ) )
lines.append( '<br/><a href="%s">Hide</a>' % hideUrl )
if OUTPUT_HTML:
backUrl = buildURL( '?s=%d' % startIndex )
lines.append( ' <a href="%s">Back</a>' % backUrl )
return string.join( [ "<p>" ] + lines + [ "</p>" ], '' )
def buildSummary( emails, startIndex ):
"""Builds a WML summery of the given emails, chunked according to the
maximum size of a WML document, with View and Hide links."""
# Start building a list of output lines and keeping track of the
# estimated size of the compiled WML.
lines = []
wmlSize = 0
messageCount = 0
# Loop through the emails.
for messageLines in emails[ startIndex: ]:
# Extract the Subject, From and body, trimming appropriately.
subject = getTrimmedHeader( messageLines, 'Subject' )
fromAddress = getTrimmedHeader( messageLines, 'From' )
body = trim( findBody( messageLines ), SHORT_BODY_LIMIT )
# Build links to view/hide the message if we can.
viewLink = ''
hideLink = ''
messageID = findHeader( messageLines, 'Message-Id' )
if messageID:
hash = messageIDToHash( messageID )
if body != NO_PLAIN_TEXT_BODY:
viewUrl = buildURL( '/%s?s=%d' % ( hash, startIndex ) )
viewLink = '<a href="%s">View</a> ' % viewUrl
hideUrl = buildURL( '?h=%s&s=%d' % ( hash, startIndex ) )
hideLink = '<a href="%s">Hide</a>' % hideUrl
# Add up the size; the '25' is for the 'Subject:' and 'From:' labels and
# the pieces of compiled markup. The '-12's are for the compilation of
# the links. Once we hit TOTAL_LIMIT-200 (to leave space for the rest
# of the document), break out.
lengths = reduce( operator.add, map( len, [ subject, fromAddress, body ] ) )
wmlSize = wmlSize + 25 + len( viewLink ) - 12 + len( hideLink ) - 12 + lengths
if wmlSize > TOTAL_LIMIT - 200:
break
# Build the output lines and append them to the list.
subject, fromAddress, body = map( WMLEscape, [ subject, fromAddress, body ] )
lines.append( formatHeader( "Subject", subject ) )
lines.append( formatHeader( "From", fromAddress ) )
lines.append( formatBody( body ) + "\n" + viewLink + hideLink + "<br/> <br/>\n" )
messageCount = messageCount + 1
# Prepend the header line.
if messageCount == len( emails ):
if messageCount == 1:
lines.insert( 0, "<b>1 message:</b><br/> <br/>" )
else:
lines.insert( 0, "<b>%d messages:</b><br/> <br/>" % messageCount )
else:
lines.insert( 0, "<b>Messages %d-%d of %d:</b><br/> <br/>" % \
( startIndex + 1, startIndex + messageCount, len( emails ) ) )
# Append either a link to the next batch of messages or a closing footer.
if startIndex + messageCount < len( emails ):
url = buildURL( '?s=%d' % ( startIndex + messageCount ) )
lines.append( '<a href="%s">Next messages</a>' % url )
else:
lines.append( "<b>End of messages.</b>" )
if startIndex > 0:
lines.append( ' <a href="%s">Home</a>' % buildURL( '' ) )
# Join the output lines together and return the WML as a single string.
# Don't join the lines using \n because there's no point.
return string.join( [ "<p>" ] + lines + [ "</p>" ], '' )
def findHeader( lines, headerName ):
"""Finds a header in a list of lines."""
# Just like findBody, we can't safely use the standard library because
# the message might be truncated.
for i in range( len( lines ) ):
header = lines[ i ]
if ':' in header:
thisName, thisValue = map( string.strip, string.split( header, ':', 1 ) )
if string.lower( thisName ) == string.lower( headerName ) and thisValue:
# We've found the header; now handle continuation lines.
returnValue = thisValue
j = i + 1
while j < len( lines ) and lines[ j ] and lines[ j ][ 0 ] in [ ' ', '\t' ]:
returnValue = returnValue + ' ' + string.strip( lines[ j ] )
j = j + 1
return returnValue
return None
def findDate( lines ):
"""Finds the unix datestamp of a message, or returns zero."""
date = findHeader( lines, 'Date' )
if date:
try:
dateTuple = rfc822.parsedate( date )
if dateTuple:
return time.mktime( dateTuple )
except ValueError:
pass
return 0
def findBody( lines ):
"""Finds the body from the given list of lines. The body is defined
as the text following the first blank line, for text/plain or
content-type-less emails, or the first text/plain section for other
emails. If no text/plain section exists, NO_PLAIN_TEXT_BODY is
returned."""
# Find out what kind of thing we're dealing with. We can't use the
# standard library MIME stuff because the document might be incomplete.
contentType = findHeader( lines, 'Content-Type' )
contentTransferEncoding = findHeader( lines, 'Content-Transfer-Encoding' )
# Find the first line of the body.
for bodyStart in range( len( lines ) ):
if not lines[ bodyStart ]:
bodyStart = bodyStart + 1
break
# Rebuild the full body.
fullBody = string.join( lines[ bodyStart: ], '\n' )
if contentTransferEncoding and string.lower( contentTransferEncoding ) == 'quoted-printable':
decoded = cStringIO.StringIO()
quopri.decode( cStringIO.StringIO( fullBody ), decoded )
fullBody = decoded.getvalue()
# Return the body verbatim for text/plain or content-type-less emails.
if not contentType or string.lower( contentType[ :10 ] ) == 'text/plain':
return fullBody
else:
# Find the multipart boundary.
match = re.search( r'(?i)boundary=(.*?)(;|$)', contentType )
if not match:
# It's either broken or a content-type we don't understand.
return NO_PLAIN_TEXT_BODY
else:
# Assume that the first MIME section is the one we're
# interested in, or is itself a multipart MIME message that
# contains a text/plain section.
boundary = match.group( 1 )
if boundary[ 0 ] == boundary[ -1 ] and boundary[ 0 ] in [ '"', "'" ]:
boundary = boundary[ 1:-1 ]
boundary = '--' + boundary
boundaryOffset1 = string.find( fullBody, boundary )
if boundaryOffset1 == -1:
# Broken or otherwise incomprehensible.
return NO_PLAIN_TEXT_BODY
else:
# Find the end of the section; if there's no end it all still works.
boundaryOffset2 = string.find( fullBody[ boundaryOffset1 + 1: ], boundary )
if boundaryOffset2 != -1:
boundaryOffset2 = boundaryOffset2 + boundaryOffset1 + 1
# Extract the section and recurse to find the plain text body; for real
# text/plain sections this will simply be the lines following the first
# blank line, else we'll search again for a text/plain section.
section = fullBody[ boundaryOffset1 + len( boundary ) : boundaryOffset2 ]
return findBody( string.split( string.strip( section ), '\n' ) )
if __name__ == '__main__':
main()