import cElementTree as ET ## # Removes start and stop tags for any element for which the filter # function returns false. If you want to remove the entire element, # including all subelements, use the clear method inside the # filter callable. #
# Note that this function modifies the tree in place.
#
# @param elem An element tree.
# @param filter An filter function. This should be a callable that
# takes an element as its single argument.
def cleanup(elem, filter):
out = []
for e in elem:
cleanup(e, filter)
if not filter(e):
if e.text:
if out:
out[-1].tail += e.text
else:
elem.text += e.text
out.extend(e)
if e.tail:
if out:
out[-1].tail += e.tail
else:
elem.text += e.tail
else:
out.append(e)
elem[:] = out
# --------------------------------------------------------------------
# demo code
import copy
DOC2 = """\