From 21f8ced3dc2acec4ac692adee184685d69aba54a Mon Sep 17 00:00:00 2001 From: Louis Abel Date: Thu, 7 Sep 2023 02:12:08 -0700 Subject: [PATCH] move to use elementtree --- mangle/generators/rss.py | 208 ++++++++++++++++++++++----------------- 1 file changed, 118 insertions(+), 90 deletions(-) diff --git a/mangle/generators/rss.py b/mangle/generators/rss.py index 984ce17..9157c01 100644 --- a/mangle/generators/rss.py +++ b/mangle/generators/rss.py @@ -8,10 +8,15 @@ import os import argparse import time import binascii +# The old yum-utils repo-rss used string manipulation. We're instead going to +# use the XML python library to do the work for us. This is cleaner, imo. +from xml.sax.saxutils import escape as xmlescape +from xml.etree.ElementTree import ElementTree, TreeBuilder, tostring +from xml.dom import minidom import dnf import dnf.exceptions #from dnf.comps import Comps -import libxml2 +#import libxml2 def to_unicode(string: str) -> str: """ @@ -43,6 +48,8 @@ class DnfQuiet(dnf.Base): else: available = self.sack.query().available().filter(latest_per_arch=1) + available.run() + for package in available: ftime = int(package.buildtime) if ftime > recentlimit: @@ -60,108 +67,126 @@ class DnfQuiet(dnf.Base): class RepoRSS: def __init__(self, filename='repo-rss.xml'): self.description = 'Repository RSS' - self.link = 'http://dnf.baseurl.org' + self.link = 'https://github.com/rpm-software-management/dnf' self.title = 'Recent Packages' - self.do_file(filename) - self.do_doc() - - def do_doc(self): - self.doc = libxml2.newDoc('1.0') - self.xmlescape = self.doc.encodeEntitiesReentrant - rss = self.doc.newChild(None, 'rss', None) - rss.setProp('version', '2.0') - self.rssnode = rss.newChild(None, 'channel', None) - - def do_file(self, filename): if filename[0] != '/': cwd = os.getcwd() self.filename = os.path.join(cwd, filename) else: self.filename = filename - try: - self.file_open = open(self.filename, 'w+') - except IOError as exc: - print(f'Error opening file {self.filename}: {exc}', file=sys.stderr) - sys.exit(1) - - def rsspackage(self, package): - rfc822_format = "%a, %d %b %Y %X GMT" - changelog_format = "%a, %d %b %Y GMT" - package_hex = binascii.hexlify(package.chksum[1]).decode() - item = self.rssnode.newChild(None, 'item', None) - title = self.xmlescape(str(package)) - description = package.description - item.newChild(None, 'title', title) - date = time.gmtime(float(package.buildtime)) - item.newChild(None, 'pubDate', time.strftime(rfc822_format, date)) - # pylint: disable=line-too-long - item.newChild(None, 'guid', package_hex).setProp("isPermaLink", "false") - link = package.remote_location() - item.newChild(None, 'link', self.xmlescape(link)) - changelog = '' - count = 0 - if package.changelogs is not None: - changelog_list = package.changelogs - else: - changelog_list = [] - for meta in changelog_list: - count += 1 - if count > 3: - changelog += '...' - break - date = meta['timestamp'].strftime(changelog_format) - author = meta['author'] - desc = meta['text'] - changelog += f'{date} - {author}\n{desc}\n\n' - # pylint: disable=line-too-long,consider-using-f-string - description = '

{} - {}

\n\n'.format(self.xmlescape(package.name), self.xmlescape(package.summary)) - description += '

%s

\n\n

Change Log:

\n\n' % self.xmlescape(description.replace("\n", "
\n")) - description += self.xmlescape('
%s
' % self.xmlescape(changelog)) - item.newChild(None, 'description', description) - return item - - def start_rss(self): - """return string representation of rss preamble""" + def rsspackage(self, packages): + file = self.filename rfc822_format = "%a, %d %b %Y %X GMT" now = time.strftime(rfc822_format, time.gmtime()) - rssheader = f""" - - - {self.title} - {self.link} - {self.description} - {now} - DNF - """ + etbobj = TreeBuilder() + # start rss + etbobj.start('rss', {'version': '2.0'}) + # start channel + etbobj.start('channel', {}) + # start title + etbobj.start('title', {}) + etbobj.data(self.title) + etbobj.end('title') + # end title + # start link + etbobj.start('link', {}) + etbobj.data(self.link) + etbobj.end('link') + # end link + # start description + etbobj.start('description', {}) + etbobj.data(self.description) + etbobj.end('description') + # end description + # start pubDate + etbobj.start('pubDate', {}) + etbobj.data(now) + etbobj.end('pubDate') + # end pubDate + # start generator + etbobj.start('generator', {}) + etbobj.data('DNF') + etbobj.end('generator') + # end generator - self.file_open.write(rssheader) + rfc822_format = "%a, %d %b %Y %X GMT" + changelog_format = "%a, %d %b %Y GMT" + for package in packages: + package_hex = binascii.hexlify(package.chksum[1]).decode() + title = xmlescape(str(package)) + date = time.gmtime(float(package.buildtime)) + description = package.description + link = package.remote_location() + # form description + changelog = '' + count = 0 + if package.changelogs is not None: + changelog_list = package.changelogs + else: + changelog_list = [] + for meta in changelog_list: + count += 1 + if count > 3: + changelog += '...' + break + date = meta['timestamp'].strftime(changelog_format) + author = meta['author'] + desc = meta['text'] + changelog += f'{date} - {author}\n{desc}\n\n' + description = f'

{package.name} - {package.summary}

\n\n' + description += '

%s

\n\n

Change Log:

\n\n' % description.replace("\n", "
\n") + description += f'
{changelog}
' - def do_package(self, package): - item = self.rsspackage(package) - self.file_open.write(item.serialize("utf-8", 1)) - item.unlinkNode() - item.freeNode() - del item + # start item + etbobj.start('item', {}) + # start title + etbobj.start('title', {}) + etbobj.data(title) + etbobj.end('title') + # end title + # start pubDate + etbobj.start('pubDate', {}) + etbobj.data(date) + etbobj.end('pubDate') + # end pubDate + # start guid + etbobj.start('guid', {'isPermaLink': 'false'}) + etbobj.data(package_hex) + etbobj.end('guid',) + # end guid + # start link + etbobj.start('link', {}) + etbobj.data(link) + etbobj.end('link') + # end link + # start description + etbobj.start('description', {}) + etbobj.data(xmlescape(description)) + etbobj.end('description') + # end description + etbobj.end('item') + # end item - def close_rss(self): - end="\n
\n
\n" - self.file_open.write(end) - self.file_open.close() - del self.file_open - self.doc.freeDoc() - del self.doc + etbobj.end('channel') + # end channel + etbobj.end('rss') + # end rss + rss = etbobj.close() + etree = ElementTree(rss) + some_string = tostring(etree.getroot(), encoding='utf-8') + xmlstr = minidom.parseString(some_string).toprettyxml(indent=" ") + #etree.write(file, encoding='utf-8') + with open(file, 'w+', encoding='utf-8') as f: + f.write(xmlstr) + f.close() -def make_rss_feed(filename, title, link, description, recent, dnfobj): +def make_rss_feed(filename, title, link, description, recent): rssobj = RepoRSS(filename) rssobj.title = title rssobj.link = link rssobj.description = description - rssobj.start_rss() - if len(recent) > 0: - for package in recent: - rssobj.do_package(package) - rssobj.close_rss() + rssobj.rsspackage(recent) def main(options): days = options.days @@ -198,7 +223,7 @@ def main(options): repoobj.load_metadata_other = True - print('Getting repo data') + print('Getting repo data for requested repos') try: dnfobj.fill_sack() except: @@ -206,10 +231,13 @@ def main(options): sys.exit(1) sack_query = dnfobj.sack.query().available() - recent = sack_query.filter(latest_per_arch=1) - sorted_recents = sorted(set(recent.run()), key=lambda pkg: pkg.buildtime) + #recent = sack_query.filter(latest_per_arch=1) + recent = dnfobj.get_recent(days=days) + #sorted_recents = sorted(set(recent.run()), key=lambda pkg: pkg.buildtime) + sorted_recents = sorted(set(recent), key=lambda pkg: pkg.buildtime) sorted_recents.reverse() - make_rss_feed(options.filename, options.title, options.link, options.description, sorted_recents, dnfobj) + make_rss_feed(options.filename, options.title, options.link, + options.description, sorted_recents) if __name__ == "__main__": parser = argparse.ArgumentParser()