# $Id$ # a simple interface to the W3C validator import urllib, urllib2 try: import xml.etree.cElementTree as ET except ImportError: try: import cElementTree as ET except ImportError: import elementtree.ElementTree as ET NS = "{http://www.w3.org/2005/10/markup-validator}" class Result(object): uri = None validity = None checkedby = None doctype = None charset = None validity = None errorcount = 0 errorlist = () warningcount = 0 warninglist = () recursion = None # only set by check def __nonzero__(self): return self.validity class Validator(object): def __init__(self, validator="http://validator.w3.org/check"): self.validator = validator ## # (internal) Convert parameter set to validator URI. def geturi(self, **params): return self.validator + "?" + urllib.urlencode(params) ## # Do a quick check, using a HTTP HEAD request. This call sets # the uri, validity and errorcount result # attributes only. def check(self, uri, **params): # just do a HEAD request # FIXME: enforce 1 second delay! request = urllib2.Request(self.geturi(uri=uri, **params)) request.get_method = lambda: "HEAD" http_file = urllib2.urlopen(request) res = Result() res.uri = uri res.status = http_file.headers["X-W3C-Validator-Status"] res.validity = res.status == "Valid" res.errorcount = int(http_file.headers["X-W3C-Validator-Errors"]) res.recursion = http_file.headers["X-W3C-Validator-Recursion"] return res ## # Do full check, using the SOAP interface. This call sets as many # result attributes as possible, including errorlist and # warninglist. def check_full(self, uri, **params): # FIXME: enforce 1 second delay! params["output"] = "soap12" request = urllib2.Request(self.geturi(uri=uri, **params)) http_file = urllib2.urlopen(request) tree = ET.parse(http_file) body = tree.find("{http://www.w3.org/2003/05/soap-envelope}Body") response = body.find(NS + "markupvalidationresponse") def getlist(elem): out = [] for elem in elem: line = int(elem.findtext(NS + "line")) column = int(elem.findtext(NS + "col")) message = elem.findtext(NS + "message") out.append(((line, column), message)) return out res = Result() for elem in response: if elem.tag.startswith(NS): tag = elem.tag[len(NS):] if tag == "validity": res.validity = elem.text == "true" elif tag == "warnings": res.warningcount = int(elem.findtext(NS + "warningcount")) res.warninglist = getlist(elem.find(NS + "warninglist")) elif tag == "errors": res.errorcount = int(elem.findtext(NS + "errorcount")) res.errorlist = getlist(elem.find(NS + "errorlist")) else: setattr(res, tag, elem.text) return res if __name__ == "__main__": v = Validator() print vars(v.check("http://www.python.org")) print vars(v.check_full("http://www.cnn.com"))