import cElementTree as ET import os, re template, idmap = ET.XMLID( """ Python Reference """ ) def flatten(elem): text = elem.text or "" for e in elem: text += flatten(e) if e.tail: text += e.tail return text body = idmap["body"] toc = [] index = ET.parse("pyref/contents.xml") for e in index.findall(".//a"): href = e.get("href") href = os.path.splitext(href)[0].strip("/") try: e = ET.parse("pyref/" + href + ".xml") except IOError: continue t = e.findtext("head/title") b = e.find("body") if t: h = ET.Element("h1") h.text = t b.insert(0, h) b[0].set("id", href) body[len(body):] = list(b) toc.append((b[0].get("id"), t)) # fixup internal links for e in template.getiterator("a"): href = e.get("href", "") pass # FIXME: look internal links # add id attributes to headings (for bookmarking) ix = 0 for e in template.getiterator(): if e.tag in ("h1", "h2", "h3"): if not e.get("id"): e.set("id", "h%d" % ix) ix = ix + 1 # add table of contents for i, e in enumerate(template.find("body")): if e.get("id") == "node2": dl = ET.Element("dl") for href, title in toc: dt = ET.SubElement(dl, "dt") ET.SubElement(dt, "a", href="#"+href).text = title template.find("body").insert(i+1, dl) break # zap comment paragraphs for e in template.getiterator("p"): if e.text and e.text.startswith("COMMENT:"): e.clear() e.tag = "div" ET.ElementTree(template).write("pyref.html")