Source code for resyndicator.resyndicators

import requests
import xmltodict
from datetime import datetime
from operator import attrgetter
from slugify import slugify
from utilofies.stdlib import isoformat, canonicalized
from . import settings
from .utils.logger import logger
from .utils import urn_from_string, FeedTemplate
from .models import Entry, DefaultSession


[docs]class Resyndicator(object): """ The Resyndicator class represents a feed that is generated from the retrieved data on the bases of an SQLAlchemy query. It is identified by the title that you sepecify on instantiation, so do not change it, because that’ll be tantamount to creating a new resyndicator. """ Entry = Entry def __init__(self, title, query, session=DefaultSession, past=None, length=settings.DEFAULT_LENGTH, **kwargs): self.title = title # Don't change it self.slug = slugify(title, to_lower=True) self.length = length self.metadata = kwargs self.id = urn_from_string(self.title) self.query = query self.past = past self.url = '{}{}.atom'.format(settings.BASE_URL, self.slug) self.session = session()
[docs] def get_entries(self): """Query all relevant entries from the database.""" query = self.session.query(self.Entry) if self.past: query = query.filter(self.Entry.updated > datetime.utcnow() - self.past) return query.filter(self.query) \ .order_by(self.Entry.updated.desc())[:settings.DEFAULT_LENGTH]
[docs] def feed(self): """Generate the serialized feed.""" # Long-running function. I’ve seen it take 13 s to complete. entries = self.get_entries() feed = FeedTemplate.feed() feed['feed']['id'] = self.id feed['feed']['title'] = self.title feed['feed']['updated'] = isoformat(entries[0].updated) if entries else None feed['feed']['link'][0]['@href'] = self.url feed['feed'].update(self.metadata) for entry in entries[:self.length]: feed_entry = FeedTemplate.entry() feed_entry['id'] = urn_from_string(entry.id) feed_entry['updated'] = isoformat(entry.updated) feed_entry['published'] = isoformat(entry.published) feed_entry['title'] = entry.title feed_entry['author']['name'] = entry.author feed_entry['link']['@href'] = entry.link feed_entry['summary']['@type'] = entry.summary_type feed_entry['summary']['#text'] = entry.summary feed_entry['content']['@type'] = entry.content_type feed_entry['content']['#text'] = entry.content feed_entry['source']['id'] = entry.source_id feed_entry['source']['title'] = entry.source_title feed_entry['source']['link']['@href'] = entry.source_link feed['feed']['entry'].append(feed_entry) feed = canonicalized(feed, blacklist=(None, '', {})) return xmltodict.unparse(feed, pretty=True)
[docs] def publish(self): """Write the serialized feed to a file.""" # Generate the serialized feed first, because Python empties the file # before it writes to it, so for the duration of the write, it stays # empty. This duration should be as short as we can make it. feed = self.feed() with open(settings.WEBROOT + self.slug + '.atom', encoding='utf-8', mode='w') as feedfile: feedfile.write(feed)
[docs] def pubsub(self, fresh_entries): """Publish new entries to a hub like PubSubHubbub.""" if not settings.HUB or not fresh_entries: # Skip if PubSubHubbub is deactivated or there are no new entries return entry_ids = set(map(attrgetter('id'), self.get_entries())) fresh_entry_ids = set(map(attrgetter('id'), fresh_entries)) if entry_ids & fresh_entry_ids: logger.info('Publishing %s to %s', self.title, settings.HUB) try: response = requests.post( settings.HUB, data={'hub.mode': 'publish', 'hub.url': self.url}) except (IOError, requests.RequestException) as excp: logger.error('Request exception %r for %s while publishing %s', excp, settings.HUB, self.title) else: if response.status_code != 204: logger.error('Publishing %s to %s failed: %s', self.title, settings.HUB, response.text)