# $Id$
# a simple interface to the W3C validator
import urllib, urllib2
try:
import xml.etree.cElementTree as ET
except ImportError:
try:
import cElementTree as ET
except ImportError:
import elementtree.ElementTree as ET
NS = "{http://www.w3.org/2005/10/markup-validator}"
class Result(object):
uri = None
validity = None
checkedby = None
doctype = None
charset = None
validity = None
errorcount = 0
errorlist = ()
warningcount = 0
warninglist = ()
recursion = None # only set by check
def __nonzero__(self):
return self.validity
class Validator(object):
def __init__(self, validator="http://validator.w3.org/check"):
self.validator = validator
##
# (internal) Convert parameter set to validator URI.
def geturi(self, **params):
return self.validator + "?" + urllib.urlencode(params)
##
# Do a quick check, using a HTTP HEAD request. This call sets
# the uri, validity and errorcount result
# attributes only.
def check(self, uri, **params):
# just do a HEAD request
# FIXME: enforce 1 second delay!
request = urllib2.Request(self.geturi(uri=uri, **params))
request.get_method = lambda: "HEAD"
http_file = urllib2.urlopen(request)
res = Result()
res.uri = uri
res.status = http_file.headers["X-W3C-Validator-Status"]
res.validity = res.status == "Valid"
res.errorcount = int(http_file.headers["X-W3C-Validator-Errors"])
res.recursion = http_file.headers["X-W3C-Validator-Recursion"]
return res
##
# Do full check, using the SOAP interface. This call sets as many
# result attributes as possible, including errorlist and
# warninglist.
def check_full(self, uri, **params):
# FIXME: enforce 1 second delay!
params["output"] = "soap12"
request = urllib2.Request(self.geturi(uri=uri, **params))
http_file = urllib2.urlopen(request)
tree = ET.parse(http_file)
body = tree.find("{http://www.w3.org/2003/05/soap-envelope}Body")
response = body.find(NS + "markupvalidationresponse")
def getlist(elem):
out = []
for elem in elem:
line = int(elem.findtext(NS + "line"))
column = int(elem.findtext(NS + "col"))
message = elem.findtext(NS + "message")
out.append(((line, column), message))
return out
res = Result()
for elem in response:
if elem.tag.startswith(NS):
tag = elem.tag[len(NS):]
if tag == "validity":
res.validity = elem.text == "true"
elif tag == "warnings":
res.warningcount = int(elem.findtext(NS + "warningcount"))
res.warninglist = getlist(elem.find(NS + "warninglist"))
elif tag == "errors":
res.errorcount = int(elem.findtext(NS + "errorcount"))
res.errorlist = getlist(elem.find(NS + "errorlist"))
else:
setattr(res, tag, elem.text)
return res
if __name__ == "__main__":
v = Validator()
print vars(v.check("http://www.python.org"))
print vars(v.check_full("http://www.cnn.com"))