# $Id$ # XHTML/HTML loader import os # slightly silly try: import xml.etree.cElementTree as ET except ImportError: try: import cElementTree as ET except ImportError: import elementtree.ElementTree as ET NS_XHTML = "{http://www.w3.org/1999/xhtml}" ## # Loads an XHTML or HTML file into an Element structure. Note that # HTML files are converted to XHTML in place, via tidy. def load(file, loader=None, encoding=None): if not loader: loader = ET.parse try: elem = loader(file) except: # FIXME: needs locking! (atomic rename should be good enough) opt = "" if encoding == "utf-8": opt = "-utf8" # print "tidy -qmn -asxml %s \"%s\" >tidy.out 2>tidy.err" % (opt, file) os.system("tidy -qmn -asxml %s \"%s\" >tidy.out 2>tidy.err" % ( opt, file )) elem = loader(file) # if this fails, the file was too broken # clean up namespace for node in elem.getiterator(): if node.tag.startswith(NS_XHTML): node.tag = node.tag[len(NS_XHTML):] return elem