# $Id$
# get element list from HTML specification
from urllib import urlopen
from ElementSoup import ET, parse
import keyword
ELEMENTS = "http://www.w3.org/TR/html4/index/elements.html"
ATTRIBUTES = "http://www.w3.org/TR/html4/index/attributes.html"
def gettext(elem):
text = elem.text or ""
for e in elem:
text += gettext(e)
if e.tail:
text += e.tail
return text
empty = []
print "from builder import E, ET"
print
print "# elements"
for row in parse(urlopen(ELEMENTS)).findall(".//tr"):
d = {}
for cell in row:
d[cell.get("title", "").strip()] = gettext(cell).strip()
name = d.get("Name")
if not name:
continue
name = name.lower().encode("ascii")
description = " ".join(d.get("Description").split())
if keyword.iskeyword(name):
print name.upper(), "=", "getattr(E, %r)" % name,
else:
print name.upper(), "=", "E." + name,
print "#", description,
if d.get("Depr."):
print "(DEPRECATED)",
print
if d.get("Empty"):
empty.append(name)
print
print "# attributes (only reserved words are included here)"
print "ATTR = dict"
last = None
for row in parse(urlopen(ATTRIBUTES)).findall(".//tr"):
d = {}
for cell in row:
d[cell.get("title", "").strip()] = gettext(cell).strip()
name = d.get("Name")
if not name:
continue
name = name.lower().encode("ascii")
if name == last:
continue
if not keyword.iskeyword(name):
continue
print "def %s(v): return {%r: v}" % (name.upper(), name)
last = name
print
print "# elements that don't have an end tag"
print "empty", "=", empty