19c19 < VERSION = "1.9" --- > VERSION = "1.9" ###@@@ nimrod: except for where you see stuff with "###@@@" in it 25a26,36 > ###@@@ nimrod: these aliases is missing from some old python distros > ###@@@ broke on whatsup an cyberlaw at tripod :( > > import encodings.aliases > encodings.aliases.aliases.update( > {'iso8859_8_i':'cp1255', > 'iso_8859_8_i':'cp1255', > 'iso8859_8i':'cp1255', > 'iso_8859_8i':'cp1255', > }) > 64c75 < def sanitise_html(html, baseurl, inline = 0): --- > def sanitise_html(html, baseurl, inline=0, acceptable_elements=['a', 'b','br','i','img','p']): 75a87 > p.acceptable_elements = acceptable_elements ###@@@ nimrod 237,238c249 < if description is None and item.has_key("content"): < description = self.decode(select_content(item["content"])) --- > ###@@@ nimrod: switched between description and content (would rather see description) 240a252,253 > if description is None and item.has_key("content"): > description = self.decode(select_content(item["content"])) 302c315,316 < s = str(feed) + str(title) + str(link) + str(description) --- > #s = str(feed) + str(title) + str(link) + str(description) > s = str(Feed) + str(link) ### nimrod: Some sites (e.g. BBC) keep rewriting their title and desc :) 401a416,417 > ###@@@ nimrod: added templateencoding and rtlitemtemplate > "templateencoding" : "iso-8859-1", 402a419 > "rtlitemtemplate" : None, 466a484,485 > elif l[0] == "templateencoding": > self["templateencoding"] = l[1] 468a488,489 > elif l[0] == "rtlitemtemplate": > self["rtlitemtemplate"] = l[1] 482a504 > ORPHAN_ENTITY_PAT=re.compile('(&[^;]{0,6}$)') 552c574,575 < return load_file(config["template"]) --- > ###@@@ nimrod: added templateencoding > return load_file(config["template"]).decode(config["templateencoding"]) 590c613 < def get_itemtemplate(self, config): --- > def get_itemtemplate(self, config,feed=None): 592c615,620 < return load_file(config["itemtemplate"]) --- > ###@@@ nimrod: added dir=rtl and templateencoding > if feed and feed.args.has_key("dir") and feed.args["dir"] == "rtl": > thetemplate=config["rtlitemtemplate"] or config["itemtemplate"] > else: > thetemplate=config["itemtemplate"] > return load_file(thetemplate).decode(config["templateencoding"]) 628,642c656,663 < def compare(a, b): < """Compare two articles to decide how they < should be sorted. Sort by added date, then < by feed, then by sequence, then by hash.""" < i = cmp(b.added, a.added) < if i != 0: < return i < i = cmp(a.feed, b.feed) < if i != 0: < return i < i = cmp(a.get_sequence(), b.get_sequence()) < if i != 0: < return i < return cmp(a.hash, b.hash) < articles.sort(compare) --- > ###@@@ nimrod: a different sort function > def comp(a,b): > """order function for article sort. > when you start running rawdog, there's advantage to feeds without item date > because as fallback - we look at the date WE first saw them, > so we give "added" a 24h penalty. But after a while - things settle down :)""" > return cmp(b.date or (b.added-60*60*24), a.date or (a.added-60*60*24)) or cmp(b.added,a.added) or cmp(b.hash,a.hash) > articles.sort(comp) 647d667 < itemtemplate = self.get_itemtemplate(config) 657,658d676 < dw.time(article.added) < 661a680,681 > ##@@@ nimrod: added feed as optional arg to get_itemtemplate > itemtemplate = self.get_itemtemplate(config,feed=feed) 669c689,690 < if feed.args.has_key("format") and feed.args["format"] == "text": --- > if feed.args.has_key("format"): > if feed.args["format"] == "text": 671,672c692,701 < else: < description = sanitise_html(article.description, baseurl, 0) --- > elif feed.args["format"] == "wmlattr": # for attributes inside wml: plain text, truncated, '"' and '$' "defused" > description = article.description or "" # against 'None' and evil eye > if len(description)>128: > description=description[:128-3] # leave space for ... > description=self.ORPHAN_ENTITY_PAT.sub('',description)+'...' > description = sanitise_html(description,'',1,[]).replace('$','$$').replace('"','"') > else: # unknown format, just sanitise and hope for the best > description = sanitise_html(description,baseurl,1,[]) > else: # default format. allow some html tags > description = sanitise_html(article.description, baseurl, 1) 683c712,715 < itembits["title_no_link"] = title --- > if feed.args.has_key("format") and feed.args["format"]=="wmlattr": > itembits["title_no_link"] = title.replace('"','"').replace('$','$$') > else: > itembits["title_no_link"] = title 720,721c752,753 < print >>f, '' < print >>f, '' + feed.get_html_link() + '' --- > print >>f, '' > print >>f, '' + feed.get_html_link() + '' 729a762 > s=s.encode('utf-8') ###@@@ nimrod 736c769,773 < f.close() --- > f.close() > try: # windows doesn't clobber on rename. sorry) > os.remove(outputfile) > except OSError: > pass 772c809,812 < statedir = os.environ["HOME"] + "/.rawdog" --- > try: > statedir = os.environ["HOME"] + "/.rawdog" > except: # windows > statedir='.'