# $Id$ # simple wrapper for the Yahoo Term Extraction API # # written by Fredrik Lundh, November 2005 # # see: # http://developer.yahoo.net/search/content/V1/termExtraction.html import urllib from elementtree import ElementTree URI = "http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction" ## # Calls the Yahoo Term Extraction service with a search context, and # returns a list of search terms. # # @param appid The application identity. See the Yahoo site for # details. # @param context The context to extract terms from. For non-ASCII # text, use a Unicode string. # @param query Optional query string. For non-ASCII text, use a # Unicode string. # @return A list of search terms. def term_extraction(appid, context, query=None): d = dict( appid=appid, context=context.encode("utf-8") ) if query: d["query"] = query.encode("utf-8") result = [] f = urllib.urlopen(URI, urllib.urlencode(d)) # FIXME: check return code ? for event, elem in ElementTree.iterparse(f): if elem.tag == "{urn:yahoo:cate}Result": result.append(elem.text) return result