Source code for PyMeld.py.
The plain text source is here.
#
# PyMeld is released under the terms of the following MIT-style license:
#
# Copyright (c) Richie Hindle 2002
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
r"""Manipulates HTML (and XML, informally) using an object model based on `id`
attributes. `Container` objects are created from HTML, and provide access to
all the tags with `id="name"` attributes like this: `container.name`.
Individual tags with a container are represented as `Tag` objects, which
provide access to their HTML attributes like this: `tag.name`. You can also
create `Tag`s directly: `tag = Tag( html="<img src='pog.gif' id='pog'>" )`.
`Tag`s have a magic attribute `content` which represents the text between the
opening and closing tags.
Here's an example that takes some HTML, changes some of the content and tag
attributes, and inserts a modified clone of a tag into another part of the
HTML:
>>> import PyMeld
>>> html = '''<html><body>
... <textarea id='message' rows='2' cols='50'>Enter a message here.</textarea>
... <input type='button' id='pushme' value='Push me!'>
... <span id='duplicate'>Duplicate of 'pushme' goes here.</span>
... </body></html>'''
>>> page = PyMeld.Container( html ) # Create a Container from the HTML.
>>> print page.message # Access a tag within the container.
<textarea id='message' rows='2' cols='50'>Enter a message here.</textarea>
>>> print page.message.rows
2
>>> page.message.content = "New message." # Change the content of a tag.
>>> print page.message.content
New message.
>>> print page.message
<textarea id='message' rows='2' cols='50'>New message.</textarea>
>>> pushme2 = page.pushme.clone() # Clone a tag...
>>> pushme2.id = 'pushme2' # ...rename the clone...
>>> print pushme2
<input type='button' id='pushme2' value='Push me!'>
>>> page.duplicate.content = pushme2 # ...and add it to the container.
>>> page.pushme.value = "Don't push me" # Change the original tag.
>>> page.pushme2.value = "Nor me" # Change the clone in the container.
>>> print page # Print the resulting page.
<html><body>
<textarea id='message' rows='2' cols='50'>New message.</textarea>
<input type='button' id='pushme' value='Don't push me'>
<span id='duplicate'><input type='button' id='pushme2' value='Nor me'></span>
</body></html>
Advantages:
o No special requirements for the HTML (or just one: attribute values must be
quoted) - so you can use any HTML/XML editor.
o No logic embedded in HTML - separation of visual design from code and data.
o You can include placeholders in the HTML and replace them with new content
or modified clones at runtime (see the data-driven example below).
o Works by string substitution, rather than by decomposing and rebuilding the
HTML, hence has no impact on the parts of the page you don't manipulate.
o Does nothing but maniplating HTML, hence fits in with any other Web
toolkits you're using.
o Tracebacks always point to the right place - many Python-HTML mixing
systems use exec or eval, making bugs hard to track down.
Here's a data-driven example populating a table from a data source, basing the
table on sample data put in by the page designer. Note that in the real world
the HTML would normally be a larger page read from an external file, keeping
the data and presentation separate, and the data would come from an external
source like an RDBMS:
>>> html = '''<table id='people'>
... <tr id='header'><th>Name</th><th>Age</th></tr>
... <tr id='row'><td id='name'>Example name</td><td id='age'>21</td></tr>
... </table>'''
>>> table = PyMeld.Container( html )
>>> templateRow = PyMeld.Container( table.row )
>>> outputLines = [ str( table.header ) ]
>>> for name, age in [ ('Richie', 30), ('Dave', 39), ('John', 78) ]:
... newRow = templateRow.clone()
... newRow.name.content = name
... newRow.age.content = age
... outputLines.append( str( newRow ) )
>>> table.people.content = '\n'+string.join( outputLines, '\n' )+'\n'
>>> print table
<table id='people'>
<tr id='header'><th>Name</th><th>Age</th></tr>
<tr id='row'><td id='name'>Richie</td><td id='age'>30</td></tr>
<tr id='row'><td id='name'>Dave</td><td id='age'>39</td></tr>
<tr id='row'><td id='name'>John</td><td id='age'>78</td></tr>
</table>
Note that if you were going to subsequently manipulate the table, using
PyMeld or JavaScript for instance, you'd need to rename each `row`,
`name` and `age` tag to have a unique name - you can do that by assigning
to the `id` attribute but I've skipped that to make the example simpler.
Here's an example that mixes single and double quotes, uses the wrong case to
access a tag and an attribute, sets a numeric attribute value using a number
rather than a string, manipulates an unclosed tag (`<img>`), manipulates
nested tags, adds content to previously-empty tag, adds a new attribute to a
tag, and introduces a double-quote character into a double-quoted attribute
value, just to prove it all works:
>>> html = '''<img src="pog.gif" alt='Pog' id="pog" width="5" height='5'>
... <span id='one'>Hello.
... <span id='two'>This <span id='three'>is</span> doubly nested.</span>
... </span>
... <span id='more'></span>'''
>>> doc = Container( html )
>>> doc.Pog.Height = 10
>>> doc.one.content = "New text."
>>> doc.more.content = "More."
>>> doc.more.title = 'She cried, "More, more, more!"'
>>> print doc
<img src="pog.gif" alt='Pog' id="pog" width="5" height='10'>
<span id='one'>New text.</span>
<span title="She cried, "More, more, more!"" id='more'>More.</span>
"""
import re, string
# Regular expressions for tags and attributes.
openTagRE = r"""(?ix)
<(?P<tag>\w+) # Tag opens; capture its name
(?:\s+\w+=(?P<quote1>["']).*?(?P=quote1))* # Attributes preceding 'id'
\s+id=(?P<quote2>["'])%s(?P=quote2) # The 'id' tag
(?:\s+\w+=(?P<quote3>["']).*?(?P=quote3))* # Attributes following 'id'
\s*/?> # Tag closes
"""
attributeRE = r"""(?ix)
(?P<space>\s+)
(?P<name>%s)=(?P<quote>["'])(?P<value>.*?)(?P=quote)
"""
idRE = r"""(?i)\s+id=(?P<quote>["'])(?P<id>.*?)(?P=quote)"""
class Container:
"""Represents an HTML document, or a fragment of one. Pass your HTML (or
a `Container` or `Tag`) to the constructor. You can then access all the
tags with `id="name"` attributes as `container.name`. You can set the
content of such a tag using `container.name.content = content`, where
`content` can be a string or another `Container` or `Tag`."""
def __init__( self, html ):
"""Creates a `Container` from HTML or another `Container` or `Tag`."""
# Coerce the 'html' to be a string, and take a copy (in case it's already a string).
self.html = str( html )[:]
def clone( self ):
"""Creates a clone of a Container."""
return Container( self.html )
def __getattr__( self, name ):
"""Returns the contained `Tag` that has the given name as its `id`
attribute."""
if not re.search( openTagRE % name, self.html ):
raise AttributeError, name
else:
return Tag( self, name )
def __str__( self ):
"""Returns the HTML that this `Container` represents."""
return self.html
class Tag:
"""Represents a named tag in a `Container`, named by its `id` attribute.
You can access all the HTML attributes of the `Tag` as `tag.name`.
Create `Tag`s either via a `Container` using `container.name`, or directly
from a snippet of HTML: `tag = Tag( "<img src='pog.gif' id='pog'>" )`."""
def __init__( self, parent=None, name=None, html=None ):
"""Pass either `parent` and `name` to access a tag within a `Container`,
or 'html' to create a standalone `Tag` from a snippet of HTML."""
if parent and name and not html:
self._parent = parent
self._name = name
elif not parent and not name and html:
self._parent = Container( html )
idMatch = re.search( idRE, html )
if not idMatch:
raise AttributeError, "Tag HTML must have an 'id' attribute"
else:
self._name = idMatch.group( 'id' )
else:
raise ValueError, "Tag() takes either 'parent' and 'name', or 'html'"
def _updateParentRange( self ):
# Updates the object's idea of where it lives within its Container's HTML.
# self._openTagStart and self._openTagEnd mark the opening tag, while
# self._closingTagStart and self._closingTagEnd mark the closing tag.
openTagMatch = re.search( openTagRE % self._name, self._parent.html )
self._tagName = openTagMatch.group( 'tag' )
self._openTagStart, self._openTagEnd = openTagMatch.span()
# Now find the closing tag in the remainder of the HTML. Most of this
# code deals with nested tags - counting up nested opening tags and
# counting down the closing tags until it gets to zero.
remainder = self._parent.html[ self._openTagEnd: ]
depth = 1
pos = 0
while 1:
nextOpenMatch = re.search( '(?i)<%s(>|\s)' % self._tagName, remainder[ pos: ] )
nextCloseMatch = re.search( '(?i)</%s>' % self._tagName, remainder[ pos: ] )
if not nextCloseMatch:
# There's no matching closing tag.
self._closingTagStart = self._openTagEnd
self._closingTagEnd = self._openTagEnd
break
elif not nextOpenMatch:
if depth == 1:
# We've found the matching closing tag.
self._closingTagStart = self._openTagEnd + pos + nextCloseMatch.span()[ 0 ]
self._closingTagEnd = self._openTagEnd + pos + nextCloseMatch.span()[ 1 ]
break
else:
# We've found a closing tag, but it's for a nested opening tag.
depth = depth - 1
pos = pos + nextCloseMatch.span()[ 1 ]
elif nextOpenMatch.span()[ 0 ] < nextCloseMatch.span()[ 0 ]:
# We've found a nested opening tag.
depth = depth + 1
pos = pos + nextOpenMatch.span()[ 1 ]
else: # nextCloseMatch.span()[ 0 ] < nextOpenMatch.span()[ 0 ]
depth = depth - 1
if depth == 0:
# We've found the matching closing tag.
self._closingTagStart = self._openTagEnd + pos + nextCloseMatch.span()[ 0 ]
self._closingTagEnd = self._openTagEnd + pos + nextCloseMatch.span()[ 1 ]
break
else:
# We've found a closing tag but it's for a nested opening tag.
pos = pos + nextCloseMatch.span()[ 1 ]
def __getattr__( self, name ):
"""Returns the `Tag`s HTML content for the special attribute `content`,
or returns the value of the given attribute."""
if name[ 0 ] == '_':
return self.__dict__[ name ]
elif name == 'content':
self._updateParentRange()
return self._parent.html[ self._openTagEnd:self._closingTagStart ]
else:
self._updateParentRange()
openTag = self._parent.html[ self._openTagStart:self._openTagEnd ]
attributeMatch = re.search( attributeRE % name, openTag )
return string.replace( attributeMatch.group( 'value' ), '"', '"' )
def __setattr__( self, name, value ):
"""Sets the `Tag`s HTML content for the special attribute `content`,
or sets the value of the given attribute."""
if name[ 0 ] == '_':
self.__dict__[ name ] = value
elif name == 'content':
self._updateParentRange()
self._parent.html = self._parent.html[ :self._openTagEnd ] + \
str( value ) + \
self._parent.html[ self._closingTagStart: ]
else:
self._updateParentRange()
openTag = self._parent.html[ self._openTagStart:self._openTagEnd ]
attributeMatch = re.search( attributeRE % name, openTag )
escapedValue = string.replace( str( value ), '"', '"' )
if attributeMatch:
# This is a change to an existing attribute.
attributeStart, attributeEnd = attributeMatch.span()
quote = attributeMatch.group( 'quote' )
newOpenTag = openTag[ :attributeStart ] + \
'%s%s=%s%s%s' % ( attributeMatch.group( 'space' ),
attributeMatch.group( 'name' ),
quote, escapedValue, quote ) + \
openTag[ attributeEnd: ]
self._parent.html = self._parent.html[ :self._openTagStart ] + \
newOpenTag + \
self._parent.html[ self._openTagEnd: ]
else:
# This is introducing a new attribute.
newAttributePos = self._openTagStart + 1 + len( self._tagName )
newAttribute = ' %s="%s"' % ( name, escapedValue )
self._parent.html = self._parent.html[ :newAttributePos ] + \
newAttribute + \
self._parent.html[ newAttributePos: ]
if string.lower( name ) == 'id':
self._name = str( value )
def __str__( self ):
"""Gets the whole tag's HTML."""
self._updateParentRange()
return self._parent.html[ self._openTagStart:self._closingTagEnd ]
def clone( self ):
"""Creates a clone of a `Tag`. Useful for copying tags between
`Containers` or for populating template HTML tags with real data
(see the data-driven example in the main module documentation)."""
return Tag( html=str( self ) )
def test():
"""Runs doctest on the `PyMeld` module."""
import doctest, PyMeld
return doctest.testmod( PyMeld )
if __name__ == '__main__':
failed, total = test()
print "%d of %d doctests successful." % ( total - failed, total )